[TOPI] Fix atlest1d for reduce and squeeze (#2147)

f3ae3f20 · Tianqi Chen · GitHub · bac22073 · f3ae3f20 · f3ae3f20
Unverified Commit f3ae3f20 authored Nov 21, 2018 by Tianqi Chen Committed by GitHub Nov 21, 2018
13 changed files
--- a/nnvm/include/nnvm/compiler/util.h
+++ b/nnvm/include/nnvm/compiler/util.h
@@ -28,6 +28,17 @@ inline tvm::Array<tvm::Expr> ShapeToArray(TShape shape) {
  return result;
 }
+/*
+ * \brief Helper function to convert TShape to TVM array. Useful for
+ * passing data from NNVM param structures to TOPI ops.
+ *
+ * \param shape The shape to convert
+ *
+ * \return An Array of Expr, where each element is a constant int32
+ */
+inline tvm::Array<tvm::Integer> ShapeToIntArray(TShape shape) {
+  return tvm::Array<tvm::Integer>(ShapeToArray(shape).node_);
+}
 }  // namespace compiler
 }  // namespace nnvm
 #endif  // NNVM_COMPILER_UTIL_H_
--- a/nnvm/src/top/tensor/reduce.cc
+++ b/nnvm/src/top/tensor/reduce.cc
@@ -3,9 +3,6 @@
 * \file reduce.cc
 * \brief reduce operator.
 */
-// Enforce TOPI to use old behavior that reduces to at least 1d
-#define TOPI_REDUCE_ATLEAST1D 1
 #include <nnvm/op.h>
 #include <nnvm/node.h>
 #include <nnvm/op_attr_types.h>
@@ -20,13 +17,12 @@
 #include "topi/reduction.h"
 #include "topi/transform.h"
-static_assert(TOPI_REDUCE_ATLEAST1D, "need to use legacy reduce behavior");
 namespace nnvm {
 namespace top {
 using namespace tvm;
 using namespace nnvm::compiler;
 // reduce
 DMLC_REGISTER_PARAMETER(ReduceParam);
@@ -168,9 +164,9 @@ Example::
    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
                                  param.axis, param.exclude);
    if (!r_axes.ndim()) return Array<Tensor> { topi::identity(inputs[0]) };
-    auto axis = ShapeToArray(r_axes);
+    auto axis = ShapeToIntArray(r_axes);
    return Array<Tensor>{
-      topi::sum(inputs[0], axis, param.keepdims) };
+      topi::sum(inputs[0], axis, param.keepdims, true) };
 })
 .set_attr<FGradient>(
  "FGradient", [](const NodePtr& n,
@@ -202,9 +198,9 @@ NNVM_REGISTER_REDUCE_OP(max)
    const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
                                  param.axis, param.exclude);
-    auto axis = ShapeToArray(r_axes);
+    auto axis = ShapeToIntArray(r_axes);
    return Array<Tensor>{
-      topi::max(inputs[0], axis, param.keepdims) };
+      topi::max(inputs[0], axis, param.keepdims, true) };
 })
 .set_attr<FGradient>(
  "FGradient", [](const NodePtr& n,
@@ -235,9 +231,9 @@ NNVM_REGISTER_REDUCE_OP(min)
    const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
                                  param.axis, param.exclude);
-    auto axis = ShapeToArray(r_axes);
+    auto axis = ShapeToIntArray(r_axes);
    return Array<Tensor>{
-      topi::min(inputs[0], axis, param.keepdims) };
+      topi::min(inputs[0], axis, param.keepdims, true) };
 })
 .set_attr<FGradient>(
  "FGradient", [](const NodePtr& n,
@@ -299,8 +295,8 @@ values over a given axis.
    const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
                                  param.axis, param.exclude);
-    auto axis = ShapeToArray(r_axes);
+    auto axis = ShapeToIntArray(r_axes);
-    Tensor out = topi::argmax(inputs[0], axis, param.keepdims);
+    Tensor out = topi::argmax(inputs[0], axis, param.keepdims, true);
    if (param.dtype == kFloat32) out = topi::cast(out, out_info[0]->dtype);
    return Array<Tensor>{out};
 });
@@ -322,8 +318,8 @@ values over a given axis.
    const ReduceParam& param = nnvm::get<ReduceParam>(attrs.parsed);
    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
                                  param.axis, param.exclude);
-    auto axis = ShapeToArray(r_axes);
+    auto axis = ShapeToIntArray(r_axes);
-    Tensor out = topi::argmin(inputs[0], axis, param.keepdims);
+    Tensor out = topi::argmin(inputs[0], axis, param.keepdims, true);
    if (param.dtype == kFloat32) out = topi::cast(out, out_info[0]->dtype);
    return Array<Tensor>{out};
 });
@@ -352,7 +348,7 @@ Example::
    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
                                  param.axis, param.exclude);
    if (!r_axes.ndim()) return Array<Tensor> { topi::identity(inputs[0]) };
-    auto axis = ShapeToArray(r_axes);
+    auto axis = ShapeToIntArray(r_axes);
    Expr count = make_const(inputs[0]->dtype, 1);
    for (auto& i : r_axes) {
@@ -360,7 +356,7 @@ Example::
    }
    return Array<Tensor>{
-      topi::divide(topi::sum(inputs[0], axis, param.keepdims), count) };
+      topi::divide(topi::sum(inputs[0], axis, param.keepdims, true), count) };
 });
 NNVM_REGISTER_REDUCE_OP(prod)
@@ -387,9 +383,9 @@ Example::
    TShape r_axes = GetReduceAxes(inputs[0]->shape.size(),
                                  param.axis, param.exclude);
    if (!r_axes.ndim()) return Array<Tensor> { topi::identity(inputs[0]) };
-    auto axis = ShapeToArray(r_axes);
+    auto axis = ShapeToIntArray(r_axes);
    return Array<Tensor>{
-      topi::prod(inputs[0], axis, param.keepdims) };
+      topi::prod(inputs[0], axis, param.keepdims, true) };
 });

--- a/nnvm/src/top/tensor/transform.cc
+++ b/nnvm/src/top/tensor/transform.cc
@@ -756,8 +756,8 @@ Examples::
                    const Array<Tensor>& inputs,
                    const Array<Tensor>& out_info) {
    const SqueezeParam& param = nnvm::get<SqueezeParam>(attrs.parsed);
-    auto axis = ShapeToArray(param.axis);
+    auto axis = ShapeToIntArray(param.axis);
-    return Array<Tensor>{ topi::squeeze(inputs[0], axis) };
+    return Array<Tensor>{ topi::squeeze(inputs[0], axis, true) };
 })
 .set_attr<FGradient>(
  "FGradient", [](const NodePtr& n,

--- a/topi/include/topi/detail/fuse.h
+++ b/topi/include/topi/detail/fuse.h
@@ -14,22 +14,16 @@ using namespace tvm;
 /*!
 * \brief Fuse all of the given args
- * 
+ *
 * \param stage The stage in which to apply the fuse
 * \param args The iteration variables to be fused
 *
 * \return The fused iteration variable
 */
 inline IterVar Fuse(Stage stage, const Array<IterVar>& args) {
-  CHECK_GE(args.size(), 1) << "Fuse requires at least 1 arg";
+  IterVar res;
+  stage.fuse(args, &res);
-  auto fused = args[0];
+  return res;
-  for (size_t i = 1; i < args.size(); ++i) {
-    IterVar out;
-    stage.fuse(fused, args[i], &out);
-    fused = out;
-  }
-  return fused;
 }
 }  // namespace detail

--- a/topi/include/topi/nn/l2_normalize.h
+++ b/topi/include/topi/nn/l2_normalize.h
@@ -27,7 +27,7 @@ using namespace tvm;
 */
 inline Tensor l2_normalize(const Tensor& data,
                           float eps,
-                           const Array<Expr>& axis,
+                           const Array<Integer>& axis,
                           std::string name = "tensor",
                           std::string tag = "l2_normalize") {
  CHECK_EQ(data->shape.size(), 4) << "L2 normalization requires 4-D input";

--- a/topi/include/topi/nn/softmax.h
+++ b/topi/include/topi/nn/softmax.h
@@ -40,7 +40,7 @@ inline Tensor softmax(const Tensor &x,
  auto k1 = tvm::reduce_axis(Range(0, input_shape[axis]), "k1");
  auto k2 = tvm::reduce_axis(Range(0, input_shape[axis]), "k2");
-  auto reduced_shape = MakeReduceTargetShape({axis}, x, false);
+  auto reduced_shape = MakeReduceTargetShape({axis}, x, false, false);
  auto insert_reduce_index = [axis, ndim](const Array<Var> &indices,
                                          const IterVar &reduce_index) {

--- a/topi/include/topi/reduction.h
+++ b/topi/include/topi/reduction.h
--- a/topi/include/topi/transform.h
+++ b/topi/include/topi/transform.h
@@ -196,30 +196,34 @@ inline Tensor reshape(const Tensor& x,
 * \param x The input tensor
 * \param axis Indices of the dimensions to remove. If this is empty,
 * all entries with a constant size of 1 will be removed.
+ * \param atleast1d Whether the output need to be atleast1d.
 * \param name The name of the operation
 * \param tag The tag to mark the operation
 *
 * \return A Tensor whose op member is the squeeze operation
 */
 inline Tensor squeeze(const Tensor& x,
-                      Array<Expr> axis,
+                      Array<Integer> axis,
+                      bool atleast1d = false,
                      std::string name = "tensor",
                      std::string tag = kInjective) {
-  auto axis_val = GetConstIntValues(axis, "axis");
  auto ndim = x->shape.size();
-  if (axis_val.size() == 0) {
+  std::vector<int> axis_val;
+  if (!axis.defined() || axis.size() == 0) {
    for (size_t i = 0; i < ndim; ++i) {
      if (IsConstInt(x->shape[i]) && GetConstInt(x->shape[i]) == 1) {
        axis_val.push_back(static_cast<int>(i));
      }
    }
  } else {
-    for (size_t i = 0; i < axis_val.size(); ++i) {
+    for (size_t i = 0; i < axis.size(); ++i) {
-      if (axis_val[i] < 0) {
+      int64_t val = axis[i]->value;
-        axis_val[i] += static_cast<int>(x->shape.size());
+      if (val < 0) {
+        val += static_cast<int>(x->shape.size());
      }
-      CHECK_EQ(GetConstInt(x->shape[axis_val[i]]), 1) <<
+      CHECK_EQ(GetConstInt(x->shape[val]), 1) <<
-        "Dimension " << axis[i] << " must have size 1";
+          "Dimension " << val << " must have size 1";
+      axis_val.push_back(val);
    }
  }
@@ -231,7 +235,7 @@ inline Tensor squeeze(const Tensor& x,
      out_shape.push_back(x->shape[i]);
    }
  }
-  if (out_shape.size() == 0) {
+  if (out_shape.size() == 0 && atleast1d) {
    out_shape.push_back(1);
  }

--- a/topi/python/topi/cuda/reduction.py
+++ b/topi/python/topi/cuda/reduction.py
@@ -63,10 +63,12 @@ def _schedule_reduce(op, sch, is_idx_reduce=False):
            sch[temp_val_input].compute_at(sch[real_output], outer_in)
    else:
        if is_idx_reduce:
+            spatial_axis = sch[real_output].fuse(*(sch[real_output].op.axis))
+            sch[real_output].bind(spatial_axis, tvm.thread_axis("blockIdx.x"))
            sch[temp_idx_input].compute_at(sch[real_output],
-                                           sch[real_output].op.axis[0])
+                                           spatial_axis)
            sch[temp_val_input].compute_at(sch[real_output],
-                                           sch[real_output].op.axis[0])
+                                           spatial_axis)
    sch[real_output].set_store_predicate(thread_x.equal(0))
    return sch

--- a/topi/src/topi.cc
+++ b/topi/src/topi.cc
@@ -59,9 +59,9 @@ using namespace tvm;
 using namespace tvm::runtime;
 /*! \brief Canonicalize an argument that may be Array<Expr> or int to Array<Expr> */
-Array<Expr> ArrayOrInt(TVMArgValue arg) {
+Array<Integer> ArrayOrInt(TVMArgValue arg) {
  if (arg.type_code() == kDLInt || arg.type_code() == kDLUInt) {
-    Array<Expr> result;
+    Array<Integer> result;
    result.push_back(arg.operator int());
    return result;
  } else {

--- a/topi/tests/python/test_topi_reduce.py
+++ b/topi/tests/python/test_topi_reduce.py
@@ -97,6 +97,10 @@ def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum", dtype="float32")
 def test_reduce_map():
+    verify_reduce_map_ele(in_shape=(32,),
+                          axis=0,
+                          keepdims=False,
+                          type="argmax")
    verify_reduce_map_ele(in_shape=(128, 24, 128, 24),
                        axis=(1, 2, 3),
                        keepdims=True,

--- a/topi/tests/python/test_topi_transform.py
+++ b/topi/tests/python/test_topi_transform.py
@@ -91,10 +91,7 @@ def verify_squeeze(src_shape, axis):
        data_npy = np.random.normal(size=src_shape).astype(A.dtype)
        out_npy = np.squeeze(data_npy, axis=axis)
        data_nd = tvm.nd.array(data_npy, ctx)
-        if out_npy.shape == ():
+        out_nd_shape = out_npy.shape
-            out_nd_shape = (1,)
-        else:
-            out_nd_shape = out_npy.shape
        out_nd = tvm.nd.empty(out_nd_shape, ctx=ctx, dtype=B.dtype)
        foo(data_nd, out_nd)
        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

--- a/topi/tests/python_cpp/test_topi_transform.py
+++ b/topi/tests/python_cpp/test_topi_transform.py
@@ -100,10 +100,7 @@ def verify_squeeze(src_shape, axis):
        data_npy = np.random.normal(size=src_shape).astype(A.dtype)
        out_npy = np.squeeze(data_npy, axis=axis)
        data_nd = tvm.nd.array(data_npy, ctx)
-        if out_npy.shape == ():
+        out_nd_shape = out_npy.shape
-            out_nd_shape = (1,)
-        else:
-            out_nd_shape = out_npy.shape
        out_nd = tvm.nd.empty(out_nd_shape, ctx=ctx, dtype=B.dtype)
        foo(data_nd, out_nd)
        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)