Heterogeneous Runtime (#1695)

7c3ec7df · Zhi · Tianqi Chen · 7beafddd · 7c3ec7df · 7c3ec7df
Commit 7c3ec7df authored Sep 21, 2018 by Zhi Committed by Tianqi Chen Sep 21, 2018
Showing with 72 additions and 34 deletions

python/tvm/build_module.py
+26 -6

python/tvm/contrib/graph_runtime.py
+46 -28

src/runtime/graph/graph_runtime.cc
+0 -0

tests/python/unittest/test_runtime_heterogeneous.py
+0 -0

No files found.
--- a/python/tvm/build_module.py
+++ b/python/tvm/build_module.py
@@ -384,8 +384,14 @@ def build(sch,
          target=None,
          target_host=None,
          name="default_function",
-          binds=None):
+          binds=None,
-    """Build a function with arguments as signiture.
+          postpone_host_codegen=False):
+    """Build a function with arguments as signature. Code will be generated
+    for a device specified by the target. For homogeneous execution, a module
+    that contains both host and device code is returned. For heterogeneous
+    execution, a list of lowered functions for the host and a module containing
+    device code are returned, but actual code generation for the host module is
+    postponed after code generation is finished for all devices.
    Parameters
    ----------
@@ -414,10 +420,18 @@ def build(sch,
        Dictionary that maps the binding of symbolic buffer to Tensor.
        By default, a new buffer is created for each tensor in the argument.
+    postpone_host_codegen : bool, optional
+        A bool value that indicates if code generation for the host module
+        should be postponed. This variable is set to be true for heterogeneous
+        execution. Otherwise, it is defaulted to false.
    Returns
    -------
-    f : Function, or pair of functions
+    ret : tvm.module, or (list of LoweredFunc, tvm.module) tuple
-       The result function.
+        A module that combines both host and device code is returned when
+        postpone_host_codegen is not set. Otherwise, a list of lowered
+        functions for the host and a module contains only device code are
+        returned.
    Note
    ----
@@ -498,9 +512,15 @@ def build(sch,
    fdevice = [ir_pass.LowerIntrin(x, target_device.target_name) for x in fdevice]
    fhost = [ir_pass.LowerIntrin(x, target_host.target_name) for x in fhost]
    fhost = [ir_pass.CombineContextCall(x) for x in fhost]
-    mhost = codegen.build_module(fhost, str(target_host))
+    # Append fhost to the device module and return the updated module. All
+    # device modules will be imported to the host module after all of them are
+    # collected.
+    mdev = codegen.build_module(fdevice, str(target_device)) if fdevice else None
+    if postpone_host_codegen:
+        return fhost, mdev
+    mhost = codegen.build_module(fhost, str(target_host))
    if fdevice:
-        mdev = codegen.build_module(fdevice, str(target_device))
        mhost.import_module(mdev)
    return mhost
--- a/python/tvm/contrib/graph_runtime.py
+++ b/python/tvm/contrib/graph_runtime.py
@@ -3,26 +3,24 @@ import numpy as np
 from .._ffi.base import string_types
 from .._ffi.function import get_global_func
+from .._ffi.runtime_ctypes import TVMContext
 from ..rpc import base as rpc_base
-from .. import ndarray as nd
 def create(graph_json_str, libmod, ctx):
    """Create a runtime executor module given a graph and module.
    Parameters
    ----------
    graph_json_str : str or graph class
        The graph to be deployed in json format output by nnvm graph.
        The graph can only contain one operator(tvm_op) that
        points to the name of PackedFunc in the libmod.
    libmod : tvm.Module
        The module of the corresponding function
+    ctx : TVMContext or list of TVMContext
-    ctx : TVMContext
+        The context to deploy the module. It can be local or remote when there
-        The context to deploy the module, can be local or remote.
+        is only one TVMContext. Otherwise, the first context in the list will
+        be used as this purpose. All context should be given for heterogeneous
+        execution.
    Returns
    -------
    graph_module : GraphModule
@@ -33,17 +31,42 @@ def create(graph_json_str, libmod, ctx):
            graph_json_str = graph_json_str._tvm_graph_json()
        except AttributeError:
            raise ValueError("Type %s is not supported" % type(graph_json_str))
-    device_type = ctx.device_type
+    if isinstance(ctx, TVMContext):
-    device_id = ctx.device_id
+        ctx = [ctx]
-    if device_type >= rpc_base.RPC_SESS_MASK:
+    elif not isinstance(ctx, (list, tuple)):
-        assert libmod.type_key == "rpc"
+        raise ValueError("ctx has to be the type of TVMContext or a list of "
-        assert rpc_base._SessTableIndex(libmod) == ctx._rpc_sess._tbl_index
+                         "TVMCTVMContext")
+    for cur_ctx in ctx:
+        if not isinstance(cur_ctx, TVMContext):
+            raise ValueError("ctx has to be the type of TVMContext or a list "
+                             "of TVMContext")
+    # device_type_id[0], device_type_id[1] are used as the primary/fallback
+    # context type and id. All other ones are used as device context for
+    # heterogeneous execution.
+    num_rpc_ctx = 0
+    device_type_id = []
+    for cur_ctx in ctx:
+        device_type = cur_ctx.device_type
+        if device_type >= rpc_base.RPC_SESS_MASK:
+            assert libmod.type_key == "rpc"
+            assert rpc_base._SessTableIndex(
+                libmod) == cur_ctx._rpc_sess._tbl_index
+            num_rpc_ctx += 1
+            device_type = cur_ctx.device_type % rpc_base.RPC_SESS_MASK
+        device_type_id.append(device_type)
+        device_type_id.append(cur_ctx.device_id)
+    if 0 < num_rpc_ctx < len(ctx):
+        raise ValueError("Either all or none of the contexts should be rpc.")
+    if num_rpc_ctx == len(ctx):
        hmod = rpc_base._ModuleHandle(libmod)
-        fcreate = ctx._rpc_sess.get_function("tvm.graph_runtime.remote_create")
+        fcreate = ctx[0]._rpc_sess.get_function("tvm.graph_runtime.remote_create")
-        device_type = device_type % rpc_base.RPC_SESS_MASK
+        return GraphModule(fcreate(graph_json_str, hmod, *device_type_id))
-        return GraphModule(fcreate(graph_json_str, hmod, device_type, device_id), ctx)
    fcreate = get_global_func("tvm.graph_runtime.create")
-    return GraphModule(fcreate(graph_json_str, libmod, device_type, device_id), ctx)
+    return GraphModule(fcreate(graph_json_str, libmod, *device_type_id))
 class GraphModule(object):
@@ -58,18 +81,13 @@ class GraphModule(object):
    module : Module
        The interal tvm module that holds the actual graph functions.
-    ctx : TVMContext
-        The context this module is under
    Attributes
    ----------
    module : Module
        The interal tvm module that holds the actual graph functions.
-    ctx : TVMContext
-        The context this module is under
    """
-    def __init__(self, module, ctx):
+    def __init__(self, module):
        self.module = module
        self._set_input = module["set_input"]
        self._run = module["run"]
@@ -81,7 +99,6 @@ class GraphModule(object):
        except AttributeError:
            pass
        self._load_params = module["load_params"]
-        self.ctx = ctx
    def set_input(self, key=None, value=None, **params):
        """Set inputs to the module via kwargs
@@ -98,14 +115,14 @@ class GraphModule(object):
           Additonal arguments
        """
        if key:
-            self._set_input(key, nd.array(value, ctx=self.ctx))
+            self._get_input(key).copyfrom(value)
        if params:
            # upload big arrays first to avoid memory issue in rpc mode
            keys = list(params.keys())
            keys.sort(key=lambda x: -np.prod(params[x].shape))
            for k in keys:
-                self._set_input(k, nd.array(params[k], ctx=self.ctx))
+                self._get_input(k).copyfrom(params[k])
    def run(self, **input_dict):
        """Run forward execution of the graph
@@ -177,7 +194,8 @@ class GraphModule(object):
        if hasattr(self, '_debug_get_output'):
            self._debug_get_output(node, out)
        else:
-            raise RuntimeError("Please compile runtime with USE_GRAPH_RUNTIME_DEBUG = 0")
+            raise RuntimeError(
+                "Please compile runtime with USE_GRAPH_RUNTIME_DEBUG = 0")
        return out
    def load_params(self, params_bytes):

--- a/src/runtime/graph/graph_runtime.cc
+++ b/src/runtime/graph/graph_runtime.cc
--- a/tests/python/unittest/test_runtime_heterogeneous.py
+++ b/tests/python/unittest/test_runtime_heterogeneous.py