[TOPI] update c++ pool and softmax (#905)

* update c++ pool and softmax * clean up reduce axis

[TOPI] update c++ pool and softmax (#905)
* update c++ pool and softmax * clean up reduce axis
6d4eb2e8 · masahi · Tianqi Chen · 99c96962 · 6d4eb2e8 · 6d4eb2e8
Commit 6d4eb2e8 authored Feb 19, 2018 by masahi Committed by Tianqi Chen Feb 18, 2018
Showing with 176 additions and 21 deletions

topi/include/topi/nn/pooling.h
+119 -1

topi/include/topi/nn/softmax.h
+53 -16

topi/src/topi.cc
+2 -2

topi/tests/python_cpp/test_topi_pooling.py
+1 -1

topi/tests/python_cpp/test_topi_softmax.py
+1 -1

No files found.
--- a/topi/include/topi/nn/pooling.h
+++ b/topi/include/topi/nn/pooling.h
@@ -36,7 +36,8 @@ enum PoolType : int {
 *
 * \return The output tensor in NCHW order
 */
-inline Tensor pool(const Tensor& x,
+
+inline Tensor pool_nchw(const Tensor& x,
                        const Array<Expr>& kernel_size,
                        const Array<Expr>& stride_size,
                        const Array<Expr>& padding_size,
@@ -113,6 +114,123 @@ inline Tensor pool(const Tensor& x,
 }

 /*!
+* \brief Perform pooling on data in NHWC order
+*
+* \param x The input tensor in NHWC order
+* \param kernel_size Vector of two ints: {kernel_height, kernel_width}
+* \param stride_size Vector of two ints: {stride_height, stride_width}
+* \param padding_size Vector of two ints: {padding_height, padding_width}
+* \param pool_type The type of pooling operator
+* \param ceil_mode Whether to use ceil when calculating the output size
+*
+* \return The output tensor in NCHW order
+*/
+
+inline Tensor pool_nhwc(const Tensor& x,
+                        const Array<Expr>& kernel_size,
+                        const Array<Expr>& stride_size,
+                        const Array<Expr>& padding_size,
+                        PoolType pool_type,
+                        bool ceil_mode) {
+  CHECK_EQ(x->shape.size(), 4) << "Pooling input must be 4-D";
+  CHECK_EQ(kernel_size.size(), 2) << "Pooling kernel_size must have 2 elements";
+  CHECK_EQ(stride_size.size(), 2) << "Pooling stride_size must have 2 elements";
+  CHECK_EQ(padding_size.size(), 2) << "Pooling padding_size must have 2 elements";
+
+  auto kernel_height = kernel_size[0];
+  auto kernel_width = kernel_size[1];
+  auto stride_height = stride_size[0];
+  auto stride_width = stride_size[1];
+  auto padding_height = padding_size[0];
+  auto padding_width = padding_size[1];
+
+  auto batch = x->shape[0];
+  auto height = x->shape[1];
+  auto width = x->shape[2];
+  auto channel = x->shape[3];
+
+  auto pad_tuple = detail::GetPadTuple(padding_height, padding_width);
+  auto pad_top = pad_tuple[0];
+  auto pad_left = pad_tuple[1];
+  auto pad_down = pad_tuple[2];
+  auto pad_right = pad_tuple[3];
+
+  if (ceil_mode) {
+    // Additional padding to ensure we do ceil instead of floor when
+    // dividing by stride.
+    pad_down += stride_height - 1;
+    pad_right += stride_width - 1;
+  }
+
+  Array<Expr> pad_before{ 0, pad_top, pad_left, 0};
+  Array<Expr> pad_after{ 0, pad_down, pad_right, 0};
+
+  auto out_height = tvm::ir::Simplify(
+    (height - kernel_height + pad_top + pad_down) / stride_height + 1);
+  auto out_width = tvm::ir::Simplify(
+    (width - kernel_width + pad_left + pad_right) / stride_width + 1);
+
+  auto dheight = tvm::reduce_axis(Range(0, kernel_height));
+  auto dwidth = tvm::reduce_axis(Range(0, kernel_width));
+
+  if (pool_type == kMaxPool) {
+    auto temp = pad(x, pad_before, pad_after, x->dtype.min(), "pad_temp");
+    return tvm::compute(
+     { batch, out_height, out_width, channel },
+      [&](Var n, Var h, Var w, Var c) {
+        return tvm::max(temp(n, h * stride_height + dheight, w * stride_width + dwidth, c),
+        { dheight, dwidth });
+      }, "tensor", "pool_max");
+  } else if (pool_type == kAvgPool) {
+    auto temp = pad(x, pad_before, pad_after, 0, "pad_temp");
+
+    auto tsum = tvm::compute(
+     { batch, out_height, out_width, channel },
+      [&](Var n, Var h, Var w, Var c) {
+        return tvm::sum(temp(n, h * stride_height + dheight, w * stride_width + dwidth, c),
+        { dheight, dwidth });
+      }, "tensor", "pool_avg");
+
+    return tvm::compute(
+     { batch, out_height, out_width, channel },
+     [&](Var n, Var h, Var w, Var c) {
+       return tsum(n, h, w, c) / (kernel_height * kernel_width);
+      }, "tensor", kElementWise);
+  } else {
+    LOG(ERROR) << "Unrecognized pool_type: " << pool_type;
+    return x;
+  }
+}
+
+/*!
+* \brief Perform pooling on data
+*
+* \param x The input tensor in NCHW or NHWC order
+* \param kernel_size Vector of two ints: {kernel_height, kernel_width}
+* \param stride_size Vector of two ints: {stride_height, stride_width}
+* \param padding_size Vector of two ints: {padding_height, padding_width}
+* \param pool_type The type of pooling operator
+* \param ceil_mode Whether to use ceil when calculating the output size
+* \param layout The input layout
+*
+* \return The output tensor in NCHW order
+*/
+
+inline Tensor pool(const Tensor& x,
+                   const Array<Expr>& kernel_size,
+                   const Array<Expr>& stride_size,
+                   const Array<Expr>& padding_size,
+                   PoolType pool_type,
+                   bool ceil_mode,
+                   const std::string& layout = "NCHW") {
+  CHECK(layout == "NCHW" || layout == "NHWC") << "Unsupported layout.";
+  if (layout == "NCHW")
+    return pool_nchw(x, kernel_size, stride_size, padding_size, pool_type, ceil_mode);
+  else
+    return pool_nhwc(x, kernel_size, stride_size, padding_size, pool_type, ceil_mode);
+}
+
+/*!
 * \brief Perform global pooling on data in NCHW order
 *
 * \param x The input tensor in NCHW order

--- a/topi/include/topi/nn/softmax.h
+++ b/topi/include/topi/nn/softmax.h
@@ -9,6 +9,7 @@
 #include <algorithm>
 #include <string>

+#include "topi/reduction.h"
 #include "topi/tags.h"
 #include "tvm/tvm.h"

@@ -19,33 +20,69 @@ using namespace tvm;
 /*!
 * \brief Softmax activation
 *
-* \param x The input tensor. 2-D where softmax is performed along the second dimension
+* \param x The input tensor. Can be any dimension
+* \param axis The channel axis along which softmax is performed
 * \param name The name of the operation
 * \param tag The tag to mark the operation
 *
 * \return A Tensor whose op member is the softmax operation
 */
-inline Tensor softmax(const Tensor& x,
+inline Tensor softmax(const Tensor &x,
+                      int axis = -1,
                      std::string name = "tensor",
                      std::string tag = "softmax_output") {
-  CHECK_EQ(x->shape.size(), 2) << "Softmax requires 2-D input";
+  auto input_shape = x->shape;
+  auto ndim = input_shape.size();
+  if (axis < 0) {
+    axis = ndim + axis;
+  }
+  CHECK_LT(axis, ndim) << "axis parameter should be less than input dim";

-  Expr m = x->shape[0];
-  Expr n = x->shape[1];
+  auto k1 = tvm::reduce_axis(Range(0, input_shape[axis]), "k1");
+  auto k2 = tvm::reduce_axis(Range(0, input_shape[axis]), "k2");
+  auto reduced_shape = MakeReduceTargetShape({axis}, x, false);

-  auto k = tvm::reduce_axis(Range(0, n), "k");
-  auto max_elem = tvm::compute(
-    { m }, [&](Var i) {
-      return tvm::max(x(i, k), Array<IterVar>{ k }); });
-  k = tvm::reduce_axis(Range(0, n), "k");
+  auto insert_reduce_index = [axis, ndim](const Array<Var> &indices,
+                                          const IterVar &reduce_index) {
+    Array<Expr> eval_range;
+    int arg_counter = 0;
+    for (size_t i = 0; i < ndim; ++i) {
+      if (i == axis)
+        eval_range.push_back(reduce_index);
+      else
+        eval_range.push_back(indices[arg_counter++]);
+    }
+    return eval_range;
+  };

-  auto expsum = tvm::compute(
-    { m }, [&](Var i) {
-      return tvm::sum(tvm::exp(x(i, k) - max_elem(i)), { k }); });
+  auto _compute_max = [&](const Array<Var> &indices) {
+    auto eval_range = insert_reduce_index(indices, k1);
+    return topi::MaxOp(x(eval_range), {k1});
+  };

-  return tvm::compute(
-    x->shape, [&](Var i, Var j) {
-      return tvm::exp(x(i, j) - max_elem(i)) / expsum(i);
+  auto _compute_expsum = [&](const Tensor &max_elem,
+                             const Array<Var> &indices) {
+    auto eval_range = insert_reduce_index(indices, k2);
+    return tvm::sum(tvm::exp(x(eval_range) - max_elem(indices)), {k2});
+  };
+
+  auto _normalize = [&](const Tensor &max_elem, const Tensor &expsum,
+                        const Array<Var> &indices) {
+    Array<Expr> non_reduce_indices;
+    for (size_t i = 0; i < ndim; ++i) {
+      if (i != axis)
+        non_reduce_indices.push_back(indices[i]);
+    }
+    return tvm::exp(x(indices) - max_elem(non_reduce_indices)) /
+           expsum(non_reduce_indices);
+  };
+
+  auto max_elem = tvm::compute(reduced_shape, _compute_max);
+  auto expsum = tvm::compute(reduced_shape, [&](const Array<Var> &indices) {
+      return _compute_expsum(max_elem, indices);
+  });
+  return tvm::compute(input_shape, [&](const Array<Var> &indices) {
+      return _normalize(max_elem, expsum, indices);
  });
 }


--- a/topi/src/topi.cc
+++ b/topi/src/topi.cc
@@ -320,7 +320,7 @@ TVM_REGISTER_GLOBAL("topi.nn.pool")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
  *rv = nn::pool(args[0], args[1], args[2], args[3],
                 static_cast<nn::PoolType>(static_cast<int>(args[4])),
-                 args[5]);
+                 args[5], args[6]);
  });

 TVM_REGISTER_GLOBAL("topi.nn.global_pool")
@@ -332,7 +332,7 @@ TVM_REGISTER_GLOBAL("topi.nn.global_pool")
 /* Ops from nn/softmax.h */
 TVM_REGISTER_GLOBAL("topi.nn.softmax")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
-  *rv = nn::softmax(args[0]);
+  *rv = nn::softmax(args[0], args[1]);
  });

 TVM_REGISTER_GLOBAL("topi.nn.log_softmax")

--- a/topi/tests/python_cpp/test_topi_pooling.py
+++ b/topi/tests/python_cpp/test_topi_pooling.py
@@ -16,7 +16,7 @@ def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode):
    ph, pw = padding
    A = tvm.placeholder((n, ic, ih, iw), name='A')
    B = topi.cpp.nn.pool(A, [kh, kw], [sh, sw], padding,
-                     pool_code[pool_type], ceil_mode)
+                         pool_code[pool_type], ceil_mode, "NCHW")
    B = topi.cpp.nn.relu(B)
    dtype = A.dtype


--- a/topi/tests/python_cpp/test_topi_softmax.py
+++ b/topi/tests/python_cpp/test_topi_softmax.py
@@ -8,7 +8,7 @@ from topi.util import get_const_tuple

 def verify_softmax(m, n):
    A = tvm.placeholder((m, n), name='A')
-    B = topi.cpp.nn.softmax(A)
+    B = topi.cpp.nn.softmax(A, 1)
    # confirm lower works
    s = tvm.create_schedule([B.op])
    tvm.lower(s, [A, B], simple_mode=True)