[PASS] Revert the change of intel gpu warp index (#1127)

f48befc6 · Tianqi Chen · GitHub · c7e7e7f5 · f48befc6 · f48befc6
Commit f48befc6 authored Apr 20, 2018 by Tianqi Chen Committed by GitHub Apr 20, 2018
Hide whitespace changes
Inline Side-by-side

Showing with 1 additions and 16 deletions

include/tvm/build_module.h
+0 -6

python/tvm/build_module.py
+0 -7

src/codegen/build_module.cc
+0 -2

src/pass/inject_copy_intrin.cc
+1 -1

No files found.
--- a/include/tvm/build_module.h
+++ b/include/tvm/build_module.h
@@ -32,11 +32,6 @@ class TargetNode : public Node {
  int max_num_threads = 1;
  /*! \brief The warp size that should be used by the LowerThreadAllreduce pass */
  int thread_warp_size = 1;
-  /*!
-   * \brief The thread index that is the lowest(correspond to warp)
-   *  In cuda it is threadIdx.x, but can be different in some platform.
-   */
-  int thread_warp_index = 0;
  /*! \brief Keys for this target */
  Array<Expr> keys_array;
  /*! \brief Options for this target */
@@ -53,7 +48,6 @@ class TargetNode : public Node {
    v->Visit("device_type", &device_type);
    v->Visit("max_num_threads", &max_num_threads);
    v->Visit("thread_warp_size", &thread_warp_size);
-    v->Visit("thread_warp_index", &thread_warp_index);
    v->Visit("keys_array", &keys_array);
    v->Visit("options_array", &options_array);
    v->Visit("libs_array", &libs_array);

--- a/python/tvm/build_module.py
+++ b/python/tvm/build_module.py
@@ -470,13 +470,6 @@ def build(sch,
    for i, func in enumerate(fdevice):
        warp_size = target.thread_warp_size
        fdevice[i] = ir_pass.LowerWarpMemory(func, warp_size)
-        warp_index = target.thread_warp_index
-        if warp_index != 0:
-            assert warp_index == 2
-            # swap z and x
-            tmap = {api.convert("threadIdx.z"): api.thread_axis("threadIdx.x"),
-                    api.convert("threadIdx.x"): api.thread_axis("threadIdx.z")}
-            fdevice[i] = ir_pass.RemapThreadAxis(func, tmap)
    if "gpu" in target.keys and not fdevice:
        warnings.warn(

--- a/src/codegen/build_module.cc
+++ b/src/codegen/build_module.cc
@@ -78,8 +78,6 @@ Target CreateTarget(const std::string& target_name,
    t->max_num_threads = 256;
    if (t->device_name == "intel_gpu") {
      t->thread_warp_size = 16;
-      // use threadIdx.z for index
-      t->thread_warp_index = 2;
    }
  } else if (target_name == "metal" || target_name == "vulkan") {
    if (target_name == "metal") {

--- a/src/pass/inject_copy_intrin.cc
+++ b/src/pass/inject_copy_intrin.cc
@@ -156,7 +156,7 @@ class CopyIntrinInjector : public IRMutator {
    }
  }
  // pragma key
-  const std::string& pragma_key_;
+  std::string pragma_key_;
  // function to lower copy intrinsics.
  const PackedFunc& flower_copy_fromto_;
  // Storage scope