Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
47e57be4
Commit
47e57be4
authored
Sep 29, 2018
by
Zhi
Committed by
Tianqi Chen
Sep 29, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
support of multiple devices for tvm.build (#1773)
parent
bea0b00f
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
168 additions
and
117 deletions
+168
-117
python/tvm/build_module.py
+163
-92
tests/python/unittest/test_runtime_heterogeneous.py
+5
-25
No files found.
python/tvm/build_module.py
View file @
47e57be4
...
...
@@ -379,92 +379,32 @@ def lower(sch,
return
stmt
return
ir_pass
.
MakeAPI
(
stmt
,
name
,
arg_list
,
0
,
cfg
.
restricted_func
)
def
build
(
sch
,
args
=
None
,
target
=
None
,
target_host
=
None
,
name
=
"default_function"
,
binds
=
None
,
postpone_host_codegen
=
False
):
"""Build a function with arguments as signature. Code will be generated
for a device specified by the target. For homogeneous execution, a module
that contains both host and device code is returned. For heterogeneous
execution, a list of lowered functions for the host and a module containing
device code are returned, but actual code generation for the host module is
postponed after code generation is finished for all devices.
def
_build_for_device
(
flist
,
target
,
target_host
):
"""Build the lowered functions for a device with the given compilation
target.
Parameters
----------
sch : tvm.Schedule, or LoweredFunc
The schedule to be builded
args : list of Buffer or Tensor or Var, optional
The argument lists to the function.
flist : list of LoweredFunc
The schedule to be built.
target : str or :any:`tvm.target.Target`
, optional
target : str or :any:`tvm.target.Target`
The target and option of the compilation.
target_host : str or :any:`tvm.target.Target` optional
Host compilation target, if target is device.
When TVM compiles device specific program such as CUDA,
we also need host(CPU) side code to interact with the driver
setup the dimensions and parameters correctly.
target_host is used to specify the host side codegen target.
By default, llvm is used if it is enabled,
otherwise a stackvm intepreter is used.
name : str, optional
The name of result function.
binds : dict, optional
Dictionary that maps the binding of symbolic buffer to Tensor.
By default, a new buffer is created for each tensor in the argument.
postpone_host_codegen : bool, optional
A bool value that indicates if code generation for the host module
should be postponed. This variable is set to be true for heterogeneous
execution. Otherwise, it is defaulted to false.
target_host : str or :any:`tvm.target.Target`
The host compilation target.
Returns
-------
ret : tvm.module, or (list of LoweredFunc, tvm.module) tuple
A module that combines both host and device code is returned when
postpone_host_codegen is not set. Otherwise, a list of lowered
functions for the host and a module contains only device code are
returned.
fhost : list of LoweredFunc
A list of lowered functions for the host.
Note
----
See the note on :any:`tvm.target` on target string format.
mdev : tvm.module
A module that contains device code.
"""
if
isinstance
(
sch
,
schedule
.
Schedule
):
if
args
is
None
:
raise
ValueError
(
"args must be given for build from schedule"
)
flist
=
lower
(
sch
,
args
,
name
=
name
,
binds
=
binds
)
if
isinstance
(
flist
,
container
.
LoweredFunc
):
flist
=
[
flist
]
elif
isinstance
(
sch
,
container
.
LoweredFunc
):
if
args
:
raise
ValueError
(
"args must be done when build from LoweredFunc"
)
flist
=
[
sch
]
elif
isinstance
(
sch
,
(
list
,
tuple
,
container
.
Array
)):
flist
=
sch
else
:
raise
ValueError
(
"sch have to be Schedule, LoweredFunc or list of LoweredFunc"
)
fname_set
=
set
()
for
x
in
flist
:
if
not
isinstance
(
x
,
container
.
LoweredFunc
):
raise
ValueError
(
"sch have to be Schedule, LoweredFunc or list of LoweredFunc"
)
if
x
.
name
in
fname_set
:
raise
ValueError
(
"Duplicate function name
%
s"
%
x
.
name
)
fname_set
.
add
(
x
.
name
)
target
=
_target
.
current_target
()
if
target
is
None
else
target
target
=
_target
.
create
(
target
)
if
target
else
_target
.
create
(
"llvm"
)
target
=
_target
.
create
(
target
)
device_type
=
ndarray
.
context
(
target
.
target_name
,
0
)
.
device_type
fhost
=
[]
fdevice
=
[]
for
func
in
flist
:
...
...
@@ -496,31 +436,162 @@ def build(sch,
if
"gpu"
in
target
.
keys
and
not
fdevice
:
warnings
.
warn
(
"Specified target
%
s, but cannot find device code, did you do bind?"
%
target
)
"Specified target
%
s, but cannot find device code, did you do "
"bind?"
%
target
)
fhost
=
[
ir_pass
.
BindDeviceType
(
x
,
device_type
)
for
x
in
fhost
]
fhost
=
[
ir_pass
.
LowerTVMBuiltin
(
x
)
for
x
in
fhost
]
if
not
target_host
:
if
device_type
==
ndarray
.
cpu
(
0
)
.
device_type
:
target_host
=
target
assert
not
fdevice
else
:
target_host
=
"llvm"
if
module
.
enabled
(
"llvm"
)
else
"stackvm"
if
device_type
==
ndarray
.
cpu
(
0
)
.
device_type
and
target_host
==
target
:
assert
not
fdevice
target_host
=
_target
.
create
(
target_host
)
target_device
=
target
fdevice
=
[
ir_pass
.
LowerIntrin
(
x
,
target_device
.
target_name
)
for
x
in
fdevice
]
fdevice
=
[
ir_pass
.
LowerIntrin
(
x
,
target
.
target_name
)
for
x
in
fdevice
]
fhost
=
[
ir_pass
.
LowerIntrin
(
x
,
target_host
.
target_name
)
for
x
in
fhost
]
fhost
=
[
ir_pass
.
CombineContextCall
(
x
)
for
x
in
fhost
]
mdev
=
codegen
.
build_module
(
fdevice
,
str
(
target
))
if
fdevice
else
None
return
fhost
,
mdev
def
build
(
inputs
,
args
=
None
,
target
=
None
,
target_host
=
None
,
name
=
"default_function"
,
binds
=
None
):
"""Build a function with arguments as signature. Code will be generated
for devices coupled with target information.
Parameters
----------
inputs : tvm.Schedule, LoweredFunc, or dict of target to LoweredFunc list
The schedule to be built
args : list of Buffer or Tensor or Var, optional
The argument lists to the function.
target : str or :any:`tvm.target.Target`, optional
The target and option of the compilation.
target_host : str or :any:`tvm.target.Target` optional
Host compilation target, if target is device.
When TVM compiles device specific program such as CUDA,
we also need host(CPU) side code to interact with the driver
setup the dimensions and parameters correctly.
target_host is used to specify the host side codegen target.
By default, llvm is used if it is enabled,
otherwise a stackvm intepreter is used.
# Append fhost to the device module and return the updated module. Al
l
# device modules will be imported to the host module after all of them are
# collected.
mdev
=
codegen
.
build_module
(
fdevice
,
str
(
target_device
))
if
fdevice
else
None
if
postpone_host_codegen
:
return
fhost
,
mdev
name : str, optiona
l
The name of result function.
binds : dict, optional
Dictionary that maps the binding of symbolic buffer to Tensor.
By default, a new buffer is created for each tensor in the argument.
mhost
=
codegen
.
build_module
(
fhost
,
str
(
target_host
))
if
fdevice
:
mhost
.
import_module
(
mdev
)
Returns
-------
ret : tvm.module
A module that combines both host and device code.
Examples
________
There are two typical example uses of this function depending on the type
of the argument `inputs`:
1. it is a list of lowered functions:
.. code-block:: python
n = 2
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
s = tvm.create_schedule(C.op)
f = tvm.lower(s, [A, B, C], name="test_add")
m = tvm.build(f, target="llvm")
2. it is a dict of compilation target to list of lowered functions:
.. code-block:: python
n = 2
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
s1 = tvm.create_schedule(C.op)
s2 = topi.cpp.cuda.schedule_injective("cuda", [C])
f1 = tvm.lower(s1, [A, B, C], name="test_add1")
f2 = tvm.lower(s2, [A, B, C], name="test_add2")
m = tvm.build({"llvm": [f1], "cuda": [f2]}, target_host="llvm")
Note
----
See the note on :any:`tvm.target` on target string format.
"""
if
isinstance
(
inputs
,
schedule
.
Schedule
):
if
args
is
None
:
raise
ValueError
(
"args must be given for build from schedule"
)
flist
=
lower
(
inputs
,
args
,
name
=
name
,
binds
=
binds
)
if
isinstance
(
flist
,
container
.
LoweredFunc
):
flist
=
[
flist
]
elif
isinstance
(
inputs
,
container
.
LoweredFunc
):
if
args
:
raise
ValueError
(
"args must be done when build from LoweredFunc."
)
flist
=
[
inputs
]
elif
isinstance
(
inputs
,
(
list
,
tuple
,
container
.
Array
)):
flist
=
inputs
elif
not
isinstance
(
inputs
,
(
dict
,
container
.
Map
)):
raise
ValueError
(
"inputs must be Schedule, LoweredFunc, list of "
"LoweredFunc, or dict of target to list of "
"LoweredFunc."
)
if
not
isinstance
(
inputs
,
(
dict
,
container
.
Map
)):
target
=
_target
.
current_target
()
if
target
is
None
else
target
target
=
target
if
target
else
"llvm"
target_flist
=
{
target
:
flist
}
else
:
target_flist
=
inputs
for
tar
,
flist
in
target_flist
.
items
():
if
not
isinstance
(
tar
,
(
str
,
_target
.
Target
)):
raise
ValueError
(
"The key of inputs must be str or "
"_target.Target when inputs is dict."
)
fname_set
=
set
()
for
x
in
flist
:
if
not
isinstance
(
x
,
container
.
LoweredFunc
):
raise
ValueError
(
"inputs must be Schedule, LoweredFunc, list "
"of LoweredFunc, or dict of str to list of "
"LoweredFunc."
)
if
x
.
name
in
fname_set
:
raise
ValueError
(
"Duplicate function name
%
s"
%
x
.
name
)
fname_set
.
add
(
x
.
name
)
if
not
target_host
:
for
tar
,
_
in
target_flist
.
items
():
tar
=
_target
.
create
(
tar
)
device_type
=
ndarray
.
context
(
tar
.
target_name
,
0
)
.
device_type
if
device_type
==
ndarray
.
cpu
(
0
)
.
device_type
:
target_host
=
tar
break
if
not
target_host
:
target_host
=
"llvm"
if
module
.
enabled
(
"llvm"
)
else
"stackvm"
fhost_all
=
[]
device_modules
=
[]
for
tar
,
flist
in
target_flist
.
items
():
fhost
,
mdev
=
_build_for_device
(
flist
,
tar
,
target_host
)
# Save the current lowered functions of the host and the device module.
fhost_all
+=
fhost
device_modules
.
append
(
mdev
)
# Generate a unified host module.
mhost
=
codegen
.
build_module
(
fhost_all
,
str
(
target_host
))
# Import all modules.
for
mdev
in
device_modules
:
if
mdev
:
mhost
.
import_module
(
mdev
)
return
mhost
tests/python/unittest/test_runtime_heterogeneous.py
View file @
47e57be4
...
...
@@ -124,9 +124,6 @@ def test_simplex_data_transferring():
schedule_add
=
topi
.
cpp
.
cuda
.
schedule_injective
(
target
,
[
elemwise_add
])
lower_add
=
tvm
.
lower
(
schedule_add
,
[
tensor_a
,
tensor_b
,
elemwise_add
],
name
=
"elemwise_add"
)
host_funcs_add
,
lib_add
=
tvm
.
build
(
lower_add
,
target
=
target_device
,
name
=
"elemwise_add"
,
postpone_host_codegen
=
True
)
# Insert copy. Neither compute nor schedule is required for the copy
# node. The compute will be performed at runtime which is just data
...
...
@@ -142,16 +139,8 @@ def test_simplex_data_transferring():
elemwise_sub
],
name
=
"elemwise_sub"
)
host_funcs_sub
,
lib_sub
=
tvm
.
build
(
lower_sub
,
target
=
target_host
,
name
=
"elemwise_sub"
,
postpone_host_codegen
=
True
)
host_funcs
=
host_funcs_add
+
host_funcs_sub
mhost
=
tvm
.
codegen
.
build_module
(
host_funcs
,
target_host
)
if
lib_add
:
mhost
.
import_module
(
lib_add
)
if
lib_sub
:
mhost
.
import_module
(
lib_sub
)
target_flist
=
{
target_device
:
[
lower_add
],
target_host
:
[
lower_sub
]}
mhost
=
tvm
.
build
(
target_flist
,
target_host
=
target_host
)
ctx
=
[
host_ctx
,
device_ctx
]
mod
=
graph_runtime
.
create
(
graph
,
mhost
,
ctx
)
params
=
{}
...
...
@@ -338,10 +327,6 @@ def test_duplex_data_transferring():
lower_add1
=
tvm
.
lower
(
add_schedule1
,
[
tensor_d
,
copy_sub_add
,
elemwise_add1
],
name
=
"elemwise_add1"
)
host_funcs_add
,
lib_add
=
tvm
.
build
([
lower_add0
,
lower_add1
],
target
=
target_device
,
postpone_host_codegen
=
True
)
# Create module for sub whose target is the host.
tensor_c
=
tvm
.
placeholder
(
shape
,
name
=
"C"
)
elemwise_sub
=
tvm
.
compute
(
shape
,
lambda
*
i
:
copy_add_sub
(
*
i
)
...
...
@@ -350,15 +335,10 @@ def test_duplex_data_transferring():
lower_sub
=
tvm
.
lower
(
sub_schedule
,
[
copy_add_sub
,
tensor_c
,
elemwise_sub
],
name
=
"elemwise_sub"
)
host_funcs_sub
,
lib_sub
=
tvm
.
build
(
lower_sub
,
target
=
target_host
,
postpone_host_codegen
=
True
)
host_funcs
=
host_funcs_add
+
host_funcs_sub
mhost
=
tvm
.
codegen
.
build_module
(
host_funcs
,
target_host
)
if
lib_add
:
mhost
.
import_module
(
lib_add
)
if
lib_sub
:
mhost
.
import_module
(
lib_sub
)
target_flist
=
{
target_device
:
[
lower_add0
,
lower_add1
],
target_host
:
[
lower_sub
]}
mhost
=
tvm
.
build
(
target_flist
,
target_host
=
target_host
)
ctx
=
[
host_ctx
,
device_ctx
]
params
=
{}
params
[
"A"
]
=
tensor_a
=
np
.
random
.
uniform
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment