Commit 6d4eb2e8 by masahi Committed by Tianqi Chen

[TOPI] update c++ pool and softmax (#905)

* update c++ pool and softmax

* clean up reduce axis
parent 99c96962
......@@ -36,7 +36,8 @@ enum PoolType : int {
*
* \return The output tensor in NCHW order
*/
inline Tensor pool(const Tensor& x,
inline Tensor pool_nchw(const Tensor& x,
const Array<Expr>& kernel_size,
const Array<Expr>& stride_size,
const Array<Expr>& padding_size,
......@@ -113,6 +114,123 @@ inline Tensor pool(const Tensor& x,
}
/*!
* \brief Perform pooling on data in NHWC order
*
* \param x The input tensor in NHWC order
* \param kernel_size Vector of two ints: {kernel_height, kernel_width}
* \param stride_size Vector of two ints: {stride_height, stride_width}
* \param padding_size Vector of two ints: {padding_height, padding_width}
* \param pool_type The type of pooling operator
* \param ceil_mode Whether to use ceil when calculating the output size
*
* \return The output tensor in NCHW order
*/
inline Tensor pool_nhwc(const Tensor& x,
const Array<Expr>& kernel_size,
const Array<Expr>& stride_size,
const Array<Expr>& padding_size,
PoolType pool_type,
bool ceil_mode) {
CHECK_EQ(x->shape.size(), 4) << "Pooling input must be 4-D";
CHECK_EQ(kernel_size.size(), 2) << "Pooling kernel_size must have 2 elements";
CHECK_EQ(stride_size.size(), 2) << "Pooling stride_size must have 2 elements";
CHECK_EQ(padding_size.size(), 2) << "Pooling padding_size must have 2 elements";
auto kernel_height = kernel_size[0];
auto kernel_width = kernel_size[1];
auto stride_height = stride_size[0];
auto stride_width = stride_size[1];
auto padding_height = padding_size[0];
auto padding_width = padding_size[1];
auto batch = x->shape[0];
auto height = x->shape[1];
auto width = x->shape[2];
auto channel = x->shape[3];
auto pad_tuple = detail::GetPadTuple(padding_height, padding_width);
auto pad_top = pad_tuple[0];
auto pad_left = pad_tuple[1];
auto pad_down = pad_tuple[2];
auto pad_right = pad_tuple[3];
if (ceil_mode) {
// Additional padding to ensure we do ceil instead of floor when
// dividing by stride.
pad_down += stride_height - 1;
pad_right += stride_width - 1;
}
Array<Expr> pad_before{ 0, pad_top, pad_left, 0};
Array<Expr> pad_after{ 0, pad_down, pad_right, 0};
auto out_height = tvm::ir::Simplify(
(height - kernel_height + pad_top + pad_down) / stride_height + 1);
auto out_width = tvm::ir::Simplify(
(width - kernel_width + pad_left + pad_right) / stride_width + 1);
auto dheight = tvm::reduce_axis(Range(0, kernel_height));
auto dwidth = tvm::reduce_axis(Range(0, kernel_width));
if (pool_type == kMaxPool) {
auto temp = pad(x, pad_before, pad_after, x->dtype.min(), "pad_temp");
return tvm::compute(
{ batch, out_height, out_width, channel },
[&](Var n, Var h, Var w, Var c) {
return tvm::max(temp(n, h * stride_height + dheight, w * stride_width + dwidth, c),
{ dheight, dwidth });
}, "tensor", "pool_max");
} else if (pool_type == kAvgPool) {
auto temp = pad(x, pad_before, pad_after, 0, "pad_temp");
auto tsum = tvm::compute(
{ batch, out_height, out_width, channel },
[&](Var n, Var h, Var w, Var c) {
return tvm::sum(temp(n, h * stride_height + dheight, w * stride_width + dwidth, c),
{ dheight, dwidth });
}, "tensor", "pool_avg");
return tvm::compute(
{ batch, out_height, out_width, channel },
[&](Var n, Var h, Var w, Var c) {
return tsum(n, h, w, c) / (kernel_height * kernel_width);
}, "tensor", kElementWise);
} else {
LOG(ERROR) << "Unrecognized pool_type: " << pool_type;
return x;
}
}
/*!
* \brief Perform pooling on data
*
* \param x The input tensor in NCHW or NHWC order
* \param kernel_size Vector of two ints: {kernel_height, kernel_width}
* \param stride_size Vector of two ints: {stride_height, stride_width}
* \param padding_size Vector of two ints: {padding_height, padding_width}
* \param pool_type The type of pooling operator
* \param ceil_mode Whether to use ceil when calculating the output size
* \param layout The input layout
*
* \return The output tensor in NCHW order
*/
inline Tensor pool(const Tensor& x,
const Array<Expr>& kernel_size,
const Array<Expr>& stride_size,
const Array<Expr>& padding_size,
PoolType pool_type,
bool ceil_mode,
const std::string& layout = "NCHW") {
CHECK(layout == "NCHW" || layout == "NHWC") << "Unsupported layout.";
if (layout == "NCHW")
return pool_nchw(x, kernel_size, stride_size, padding_size, pool_type, ceil_mode);
else
return pool_nhwc(x, kernel_size, stride_size, padding_size, pool_type, ceil_mode);
}
/*!
* \brief Perform global pooling on data in NCHW order
*
* \param x The input tensor in NCHW order
......
......@@ -9,6 +9,7 @@
#include <algorithm>
#include <string>
#include "topi/reduction.h"
#include "topi/tags.h"
#include "tvm/tvm.h"
......@@ -19,33 +20,69 @@ using namespace tvm;
/*!
* \brief Softmax activation
*
* \param x The input tensor. 2-D where softmax is performed along the second dimension
* \param x The input tensor. Can be any dimension
* \param axis The channel axis along which softmax is performed
* \param name The name of the operation
* \param tag The tag to mark the operation
*
* \return A Tensor whose op member is the softmax operation
*/
inline Tensor softmax(const Tensor& x,
inline Tensor softmax(const Tensor &x,
int axis = -1,
std::string name = "tensor",
std::string tag = "softmax_output") {
CHECK_EQ(x->shape.size(), 2) << "Softmax requires 2-D input";
auto input_shape = x->shape;
auto ndim = input_shape.size();
if (axis < 0) {
axis = ndim + axis;
}
CHECK_LT(axis, ndim) << "axis parameter should be less than input dim";
Expr m = x->shape[0];
Expr n = x->shape[1];
auto k1 = tvm::reduce_axis(Range(0, input_shape[axis]), "k1");
auto k2 = tvm::reduce_axis(Range(0, input_shape[axis]), "k2");
auto reduced_shape = MakeReduceTargetShape({axis}, x, false);
auto k = tvm::reduce_axis(Range(0, n), "k");
auto max_elem = tvm::compute(
{ m }, [&](Var i) {
return tvm::max(x(i, k), Array<IterVar>{ k }); });
k = tvm::reduce_axis(Range(0, n), "k");
auto insert_reduce_index = [axis, ndim](const Array<Var> &indices,
const IterVar &reduce_index) {
Array<Expr> eval_range;
int arg_counter = 0;
for (size_t i = 0; i < ndim; ++i) {
if (i == axis)
eval_range.push_back(reduce_index);
else
eval_range.push_back(indices[arg_counter++]);
}
return eval_range;
};
auto expsum = tvm::compute(
{ m }, [&](Var i) {
return tvm::sum(tvm::exp(x(i, k) - max_elem(i)), { k }); });
auto _compute_max = [&](const Array<Var> &indices) {
auto eval_range = insert_reduce_index(indices, k1);
return topi::MaxOp(x(eval_range), {k1});
};
return tvm::compute(
x->shape, [&](Var i, Var j) {
return tvm::exp(x(i, j) - max_elem(i)) / expsum(i);
auto _compute_expsum = [&](const Tensor &max_elem,
const Array<Var> &indices) {
auto eval_range = insert_reduce_index(indices, k2);
return tvm::sum(tvm::exp(x(eval_range) - max_elem(indices)), {k2});
};
auto _normalize = [&](const Tensor &max_elem, const Tensor &expsum,
const Array<Var> &indices) {
Array<Expr> non_reduce_indices;
for (size_t i = 0; i < ndim; ++i) {
if (i != axis)
non_reduce_indices.push_back(indices[i]);
}
return tvm::exp(x(indices) - max_elem(non_reduce_indices)) /
expsum(non_reduce_indices);
};
auto max_elem = tvm::compute(reduced_shape, _compute_max);
auto expsum = tvm::compute(reduced_shape, [&](const Array<Var> &indices) {
return _compute_expsum(max_elem, indices);
});
return tvm::compute(input_shape, [&](const Array<Var> &indices) {
return _normalize(max_elem, expsum, indices);
});
}
......
......@@ -320,7 +320,7 @@ TVM_REGISTER_GLOBAL("topi.nn.pool")
.set_body([](TVMArgs args, TVMRetValue *rv) {
*rv = nn::pool(args[0], args[1], args[2], args[3],
static_cast<nn::PoolType>(static_cast<int>(args[4])),
args[5]);
args[5], args[6]);
});
TVM_REGISTER_GLOBAL("topi.nn.global_pool")
......@@ -332,7 +332,7 @@ TVM_REGISTER_GLOBAL("topi.nn.global_pool")
/* Ops from nn/softmax.h */
TVM_REGISTER_GLOBAL("topi.nn.softmax")
.set_body([](TVMArgs args, TVMRetValue *rv) {
*rv = nn::softmax(args[0]);
*rv = nn::softmax(args[0], args[1]);
});
TVM_REGISTER_GLOBAL("topi.nn.log_softmax")
......
......@@ -16,7 +16,7 @@ def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode):
ph, pw = padding
A = tvm.placeholder((n, ic, ih, iw), name='A')
B = topi.cpp.nn.pool(A, [kh, kw], [sh, sw], padding,
pool_code[pool_type], ceil_mode)
pool_code[pool_type], ceil_mode, "NCHW")
B = topi.cpp.nn.relu(B)
dtype = A.dtype
......
......@@ -8,7 +8,7 @@ from topi.util import get_const_tuple
def verify_softmax(m, n):
A = tvm.placeholder((m, n), name='A')
B = topi.cpp.nn.softmax(A)
B = topi.cpp.nn.softmax(A, 1)
# confirm lower works
s = tvm.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment