A couple of fixes for GEN (#2593)

ec3a4251 · Marina Kolpakova · Tianqi Chen · 77718f8e · ec3a4251 · ec3a4251
Commit ec3a4251 authored Feb 12, 2019 by Marina Kolpakova Committed by Tianqi Chen Feb 12, 2019
Showing with 22 additions and 14 deletions

python/tvm/build_module.py
+1 -1

python/tvm/contrib/debugger/debug_runtime.py
+4 -3

src/runtime/thread_storage_scope.h
+1 -1

tutorials/dev/low_level_custom_pass.py
+1 -1

tutorials/get_started.py
+15 -8

No files found.
--- a/python/tvm/build_module.py
+++ b/python/tvm/build_module.py
@@ -302,7 +302,7 @@ def lower(sch,
    Parameters
    ----------
    sch : tvm.schedule.Schedule
-        The schedule to be builded
+        The schedule to be built
    args : list of Buffer or Tensor or Var
        The argument lists to the function.

--- a/python/tvm/contrib/debugger/debug_runtime.py
+++ b/python/tvm/contrib/debugger/debug_runtime.py
@@ -159,11 +159,12 @@ class GraphModuleDebug(graph_runtime.GraphModule):
        self.debug_datum = debug_result.DebugResult(graph_json, self._dump_path)
    def _run_debug(self):
-        """Execute the node spcified with index will be executed.
+        """Execute the node specified with index will be executed.
        Each debug output will be copied to the buffer
-        Time consumed for each execuion will be set as debug output.
+        Time consumed for each execution will be set as debug output.
        """
+        self.debug_datum._time_list = []
        for i, node in enumerate(self.debug_datum.get_graph_nodes()):
            start_time = datetime.now().time()
@@ -177,7 +178,7 @@ class GraphModuleDebug(graph_runtime.GraphModule):
                self.debug_datum._output_tensor_list.append(out_tensor)
    def debug_get_output(self, node, out):
-        """Run graph upto node and get the output to out
+        """Run graph up to node and get the output to out
        Parameters
        ----------

--- a/src/runtime/thread_storage_scope.h
+++ b/src/runtime/thread_storage_scope.h
@@ -130,7 +130,7 @@ struct ThreadScope {
 };
-/*! \brief workload speccification */
+/*! \brief workload specification */
 struct ThreadWorkLoad {
  // array, first three are thread configuration.
  size_t work_size[6];

--- a/tutorials/dev/low_level_custom_pass.py
+++ b/tutorials/dev/low_level_custom_pass.py
@@ -31,7 +31,7 @@ import numpy as np
 ######################################################################
 # We first write a very simple vector add and build it with the default schedule. Then, we use
-# our customized lowering pass to manipulate the IR directly instead of using schedule premitives.
+# our customized lowering pass to manipulate the IR directly instead of using schedule primitives.
 #
 n = tvm.const(128, "int32")

--- a/tutorials/get_started.py
+++ b/tutorials/get_started.py
@@ -94,7 +94,7 @@ bx, tx = s[C].split(C.op.axis[0], factor=64)
 # compute grid. These are GPU specific constructs that allows us
 # to generate code that runs on GPU.
 #
-if tgt == "cuda":
+if tgt == "cuda" or tgt.startswith('opencl'):
  s[C].bind(bx, tvm.thread_axis("blockIdx.x"))
  s[C].bind(tx, tvm.thread_axis("threadIdx.x"))
@@ -149,7 +149,7 @@ tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
 #
 # The following code fetches the device module and prints the content code.
 #
-if tgt == "cuda":
+if tgt == "cuda" or tgt.startswith('opencl'):
    dev_module = fadd.imported_modules[0]
    print("-----GPU code-----")
    print(dev_module.get_source())
@@ -193,6 +193,8 @@ temp = util.tempdir()
 fadd.save(temp.relpath("myadd.o"))
 if tgt == "cuda":
    fadd.imported_modules[0].save(temp.relpath("myadd.ptx"))
+if tgt.startswith('opencl'):
+    fadd.imported_modules[0].save(temp.relpath("myadd.cl"))
 cc.create_shared(temp.relpath("myadd.so"), [temp.relpath("myadd.o")])
 print(temp.listdir())
@@ -200,29 +202,34 @@ print(temp.listdir())
 # .. note:: Module Storage Format
 #
 #   The CPU(host) module is directly saved as a shared library(so).
-#   There can be multiple customed format on the device code.
+#   There can be multiple customized format on the device code.
 #   In our example, device code is stored in ptx, as well as a meta
-#   data json file. They can be loaded and linked seperatedly via import.
+#   data json file. They can be loaded and linked separately via import.
 #
 ######################################################################
 # Load Compiled Module
 # --------------------
 # We can load the compiled module from the file system and run the code.
-# The following code load the host and device module seperatedly and
+# The following code load the host and device module separately and
 # re-link them together. We can verify that the newly loaded function works.
 #
 fadd1 = tvm.module.load(temp.relpath("myadd.so"))
 if tgt == "cuda":
    fadd1_dev = tvm.module.load(temp.relpath("myadd.ptx"))
    fadd1.import_module(fadd1_dev)
+if tgt.startswith('opencl'):
+    fadd1_dev = tvm.module.load(temp.relpath("myadd.cl"))
+    fadd1.import_module(fadd1_dev)
 fadd1(a, b, c)
 tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
 ######################################################################
 # Pack Everything into One Library
 # --------------------------------
-# In the above example, we store the device and host code seperatedly.
+# In the above example, we store the device and host code separately.
 # TVM also supports export everything as one shared library.
 # Under the hood, we pack the device modules into binary blobs and link
 # them together with the host code.
@@ -254,8 +261,8 @@ tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
 # The following codeblocks generate opencl code, creates array on opencl
 # device, and verifies the correctness of the code.
 #
-if tgt == "opencl":
+if tgt.startswith('opencl'):
-    fadd_cl = tvm.build(s, [A, B, C], "opencl", name="myadd")
+    fadd_cl = tvm.build(s, [A, B, C], tgt, name="myadd")
    print("------opencl code------")
    print(fadd_cl.imported_modules[0].get_source())
    ctx = tvm.cl(0)