[TOPI] implement pool3d op (#4478)

* [TOPI] implement pool3d op * use PoolInferCorrectLayout for both 2d and 3d pooling * unify MakeMaxPool and MakeAvgPool

[TOPI] implement pool3d op (#4478)
* [TOPI] implement pool3d op * use PoolInferCorrectLayout for both 2d and 3d pooling * unify MakeMaxPool and MakeAvgPool
41959ed2 · optima2005 · masahi · 8c2d4f65 · 41959ed2 · 41959ed2
Commit 41959ed2 authored Dec 12, 2019 by optima2005 Committed by masahi Dec 12, 2019
Showing with 418 additions and 11 deletions

include/tvm/relay/attrs/nn.h
+62 -0

src/relay/op/nn/pooling.cc
+0 -0

topi/include/topi/nn/pooling.h
+200 -7

topi/python/topi/nn/pooling.py
+57 -0

topi/src/topi.cc
+12 -4

topi/tests/python/test_topi_pooling.py
+87 -0

No files found.
--- a/include/tvm/relay/attrs/nn.h
+++ b/include/tvm/relay/attrs/nn.h
@@ -406,6 +406,68 @@ struct AdaptivePool2DAttrs : public tvm::AttrsNode<AdaptivePool2DAttrs> {
 };


+/*! \brief Attributes for 3D max pool operator */
+struct MaxPool3DAttrs : public tvm::AttrsNode<MaxPool3DAttrs> {
+  Array<IndexExpr> pool_size;
+  Array<IndexExpr> strides;
+  Array<IndexExpr> padding;
+  std::string layout;
+  bool ceil_mode;
+
+  TVM_DECLARE_ATTRS(MaxPool3DAttrs, "relay.attrs.MaxPool3DAttrs") {
+    TVM_ATTR_FIELD(pool_size)
+      .describe("Size of the pooling windows.");
+    TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1, 1}))
+      .describe("Specifies the strides of the convolution.");
+    TVM_ATTR_FIELD(padding).set_default(Array<IndexExpr>({0, 0, 0}))
+      .describe("If padding is non-zero, then the input is implicitly zero-padded"
+                "Padding support both symmetric and asymmetric as"
+                "one int : same padding used on all sides"
+                "three int : back, bottom, right will use same padding as front, top, left"
+                "six int : padding width in the order of (front, top, left, back, bottom, right)");
+    TVM_ATTR_FIELD(layout).set_default("NCDHW")
+      .describe("Dimension ordering of data and weight. Can be 'NCDHW', 'NDHWC', etc."
+                "'N', 'C', 'D', 'H', 'W' stands for batch, channel, depth, height, and width"
+                "dimensions respectively. Pooling is applied on the 'D', 'H' and"
+                "'W' dimensions.");
+    TVM_ATTR_FIELD(ceil_mode).set_default(false)
+      .describe("When true, will use ceil instead of floor to compute the output shape.");
+  }
+};
+
+/*! \brief Attributes for 3D avg pool operator */
+struct AvgPool3DAttrs : public tvm::AttrsNode<AvgPool3DAttrs> {
+  Array<IndexExpr> pool_size;
+  Array<IndexExpr> strides;
+  Array<IndexExpr> padding;
+  std::string layout;
+  bool ceil_mode;
+  bool count_include_pad;
+
+  TVM_DECLARE_ATTRS(AvgPool3DAttrs, "relay.attrs.AvgPool3DAttrs") {
+    TVM_ATTR_FIELD(pool_size)
+      .describe("Size of the pooling windows.");
+    TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1, 1}))
+      .describe("Specifies the strides of the convolution.");
+    TVM_ATTR_FIELD(padding).set_default(Array<IndexExpr>({0, 0, 0}))
+      .describe("If padding is non-zero, then the input is implicitly zero-padded"
+                "Padding support both symmetric and asymmetric as"
+                "one int : same padding used on all sides"
+                "three int : back, bottom, right will use same padding as front, top, left"
+                "six int : padding width in the order of (front, top, left, back, bottom, right)");
+    TVM_ATTR_FIELD(layout).set_default("NCDHW")
+      .describe("Dimension ordering of data and weight. Can be 'NCDHW', 'NDHWC', etc."
+                "'N', 'C', 'D', 'H', 'W' stands for batch, channel, depth, height, and width"
+                "dimensions respectively. Pooling is applied on the 'D', 'H' and"
+                "'W' dimensions.");
+    TVM_ATTR_FIELD(ceil_mode).set_default(false)
+      .describe("When true, will use ceil instead of floor to compute the output shape.");
+    TVM_ATTR_FIELD(count_include_pad).set_default(false)
+      .describe("When true, will include padding to compute the average");
+  }
+};
+
+
 /*! \brief Attributes for dense operator */
 struct DenseAttrs : public tvm::AttrsNode<DenseAttrs> {
  IndexExpr units;

--- a/src/relay/op/nn/pooling.cc
+++ b/src/relay/op/nn/pooling.cc
--- a/topi/include/topi/nn/pooling.h
+++ b/topi/include/topi/nn/pooling.h
--- a/topi/python/topi/nn/pooling.py
+++ b/topi/python/topi/nn/pooling.py
@@ -216,3 +216,60 @@ def adaptive_pool(data,
        n-D in the same layout
    """
    return cpp.nn.adaptive_pool(data, output_size, POOL_TYPE_CODE[pool_type], layout)
+
+
+def pool3d(data,
+           kernel,
+           stride,
+           padding,
+           pool_type,
+           ceil_mode=False,
+           layout="NCDHW",
+           count_include_pad=True):
+    """Perform pooling on depth, height and width dimension of data.
+       It decides the depth, height and width dimension according to the layout string,
+       in which 'D', 'W' and 'H' means depth, width and height respectively.
+       Depth, width and height dimension cannot be split.
+       For example, NCDHW, NCDHW16c, etc. are valid for pool,
+       while NCDHW16d, NCDHW16w, NCDHW16h are not.
+       See parameter `layout` for more information of the layout string convention.
+
+    Parameters
+    ----------
+    data : tvm.Tensor
+        n-D with shape of layout
+
+    kernel : list/tuple of three ints
+        Kernel size, [kernel_depth, kernel_height, kernel_width]
+
+    stride : list/tuple of three ints
+        Stride size, [stride_depth, stride_height, stride_width]
+
+    padding : list/tuple of six ints
+        Pad size, [pad_front, pad_top, pad_left, pad_back, pad_bottom, pad_right]
+
+    pool_type : str
+        Pool type, 'max' or 'avg'
+
+    ceil_mode : bool
+        Whether to use ceil when calculating output size.
+
+    layout: string
+        Layout of the input data.
+        The layout is supposed to be composed of upper cases, lower cases and numbers,
+        where upper case indicates a dimension and
+        the corresponding lower case with factor size indicates the split dimension.
+        For example, NCDHW16c can describe a 6-D tensor of
+        [batch_size, channel, depth, height, width, channel_block],
+        in which channel_block=16 is a split of dimension channel.
+
+    count_include_pad: bool
+        Whether include padding in the calculation when pool_type is 'avg'
+
+    Returns
+    -------
+    output : tvm.Tensor
+        n-D in the same layout
+    """
+    return cpp.nn.pool3d(data, kernel, stride, padding,
+                         POOL_TYPE_CODE[pool_type], ceil_mode, layout, count_include_pad)
--- a/topi/src/topi.cc
+++ b/topi/src/topi.cc
@@ -535,6 +535,13 @@ TVM_REGISTER_GLOBAL("topi.nn.adaptive_pool")
                          args[3]);
 });

+TVM_REGISTER_GLOBAL("topi.nn.pool3d")
+.set_body([](TVMArgs args, TVMRetValue *rv) {
+  *rv = nn::pool3d(args[0], args[1], args[2], args[3],
+                   static_cast<nn::PoolType>(static_cast<int>(args[4])),
+                   args[5], args[6], args[7]);
+  });
+
 /* Ops from nn/softmax.h */
 TVM_REGISTER_GLOBAL("topi.nn.softmax")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
@@ -599,7 +606,7 @@ TVM_REGISTER_GLOBAL("topi.generic.schedule_injective")
 TVM_REGISTER_GLOBAL("topi.generic.schedule_injective_from_existing")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
  *rv = topi::generic::schedule_injective_from_existing(args[0], args[1]);
- });
+  });

 /* x86 schedules */
 TVM_REGISTER_GLOBAL("topi.x86.schedule_binarize_pack")
@@ -629,7 +636,7 @@ TVM_REGISTER_GLOBAL("topi.x86.schedule_injective")
 TVM_REGISTER_GLOBAL("topi.x86.schedule_injective_from_existing")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
  *rv = topi::x86::schedule_injective_from_existing(args[0], args[1]);
- });
+  });

 /* ROCm schedules */
 TVM_REGISTER_GLOBAL("topi.rocm.dense_cuda")
@@ -701,7 +708,7 @@ TVM_REGISTER_GLOBAL("topi.cuda.schedule_injective")
 TVM_REGISTER_GLOBAL("topi.cuda.schedule_injective_from_existing")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
  *rv = topi::cuda::schedule_injective_from_existing(args[0], args[1]);
- });
+  });

 TVM_REGISTER_GLOBAL("topi.cuda.schedule_pool")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
@@ -824,7 +831,8 @@ inline PackedFunc WrapScheduleFromExisting(FTVMScheduleFromExistingBuilder build
 TVM_REGISTER_GENERIC_FUNC(schedule_injective_from_existing)
 .set_default(WrapScheduleFromExisting(topi::generic::schedule_injective_from_existing))
 .register_func({ "cpu" }, WrapScheduleFromExisting(topi::x86::schedule_injective_from_existing))
-.register_func({ "cuda", "gpu" }, WrapScheduleFromExisting(topi::cuda::schedule_injective_from_existing));
+.register_func({ "cuda", "gpu" }, WrapScheduleFromExisting(
+  topi::cuda::schedule_injective_from_existing));

 /*! \brief Builder function for instantiating dense ops. */
 using FTVMDenseOpBuilder = std::function<tvm::Tensor(const Target& target,

--- a/topi/tests/python/test_topi_pooling.py
+++ b/topi/tests/python/test_topi_pooling.py
@@ -264,9 +264,96 @@ def test_adaptive_pool():
    verify_adaptive_pool((1, 14, 56, 78), (34, 13), "max")
    verify_adaptive_pool((1, 5, 46, 97), (4, 96), "avg")

+def verify_pool3d(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_pad=True):
+    iz = iw = ih
+    kz = kw = kh
+    sz = sw = sh
+    pf, pt, pl, pk, pb, pr = padding
+    layout = "NCDHW"
+    A = tvm.placeholder((n, ic, iz, ih, iw), name='A')
+    B = topi.nn.pool3d(A, kernel=[kz, kh, kw], stride=[sz, sh, sw], padding=padding,
+                       pool_type=pool_type, ceil_mode=ceil_mode,
+                       layout="NCDHW", count_include_pad=count_include_pad)
+    B = topi.nn.relu(B)
+    dtype = A.dtype
+
+    bshape = get_const_tuple(B.shape)
+    ashape = get_const_tuple(A.shape)
+    if ceil_mode:
+        assert bshape[2] == int(math.ceil(float(ashape[2] - kz + pf + pk) / sz) + 1)
+        assert bshape[3] == int(math.ceil(float(ashape[3] - kh + pt + pb) / sh) + 1)
+        assert bshape[4] == int(math.ceil(float(ashape[4] - kw + pl + pr) / sw) + 1)
+    else:
+        assert bshape[2] == int(math.floor(float(ashape[2] - kz + pf + pk) / sz) + 1)
+        assert bshape[3] == int(math.floor(float(ashape[3] - kh + pt + pb) / sh) + 1)
+        assert bshape[4] == int(math.floor(float(ashape[4] - kw + pl + pr) / sw) + 1)
+
+    a_np = np.random.uniform(low=0.001, size=(n, ic, iz, ih, iw)).astype(dtype)
+    pad_np = np.zeros(shape=(n, ic, iz+pf+pk, ih+pt+pb, iw+pl+pr)).astype(dtype)
+    no_zero = (range(n), range(ic), (range(pf, iz+pf)), (range(pt, ih+pt)), (range(pl, iw+pl)))
+    pad_np[np.ix_(*no_zero)] = a_np
+    _, oc, oz, oh, ow = get_const_tuple(B.shape)
+    b_np = np.zeros(shape=(n, oc, oz, oh, ow)).astype(dtype)
+
+    if pool_type == 'avg':
+        for k in range(oz):
+            for i in range(oh):
+                for j in range(ow):
+                    if count_include_pad:
+                        b_np[:,:,k,i,j] = np.mean( \
+                            pad_np[:, :, k*sz:k*sz+kz, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3,4))
+                    else:
+                        pad_count = np.sum( \
+                            pad_np[:, :, k*sz:k*sz+kz, i*sh:i*sh+kh, j*sw:j*sw+kw] > 0, axis=(2,3,4))
+                        b_np[:,:,k,i,j] = np.sum(pad_np[:, :, k*sz:k*sz+kz, i*sh:i*sh+kh, j*sw:j*sw+kw], \
+                            axis=(2,3, 4)) / np.maximum(pad_count, 1)
+
+    elif pool_type =='max':
+        for k in range(oz):
+            for i in range(oh):
+                for j in range(ow):
+                    b_np[:,:,k,i,j] = np.max( \
+                        pad_np[:, :, k*sz:k*sz+kz, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3,4))
+    b_np = np.maximum(b_np, 0.0)
+
+    def check_device(device):
+        ctx = tvm.context(device, 0)
+        if not ctx.exist:
+            print("Skip because %s is not enabled" % device)
+            return
+        print("Running on target: %s" % device)
+        with tvm.target.create(device):
+            s = topi.generic.schedule_pool(B, layout)
+
+        a = tvm.nd.array(a_np, ctx)
+        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
+        f = tvm.build(s, [A, B], device)
+        f(a, b)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+
+    for device in get_all_backend():
+        check_device(device)
+
+
+def test_pool3d():
+    verify_pool3d(1, 256, 32, 2, 2, [0, 0, 0, 0, 0, 0], 'avg', False, True)
+    verify_pool3d(1, 256, 31, 3, 3, [1, 1, 2, 2, 2, 1], 'avg', False, True)
+    verify_pool3d(1, 256, 32, 2, 2, [1, 1, 2, 2, 2, 1], 'avg', False, False)
+    verify_pool3d(1, 256, 31, 4, 4, [3, 3, 3, 3, 3, 3], 'avg', False, False)
+    verify_pool3d(1, 256, 31, 4, 4, [0, 0, 0, 0, 0, 0], 'avg', False, False)
+    verify_pool3d(1, 256, 32, 2, 2, [0, 0, 0, 0, 0, 0], 'max', False)
+    verify_pool3d(1, 256, 31, 3, 3, [2, 2, 1, 1, 1, 2], 'max', False)
+    verify_pool3d(1, 256, 31, 3, 3, [2, 2, 1, 1, 1, 2], 'max', True)
+
+    verify_pool3d(1, 256, 31, 3, 3, [2, 1, 0, 5, 4, 3], 'avg', False, True)
+    verify_pool3d(1, 256, 32, 2, 2, [0, 5, 4, 3, 2, 1], 'avg', False, False)
+    verify_pool3d(1, 256, 31, 3, 3, [1, 0, 5, 4, 3, 2], 'max', False)
+    verify_pool3d(1, 256, 31, 3, 3, [3, 2, 1, 0, 5, 4], 'max', True)
+

 if __name__ == "__main__":
    test_pool()
    test_pool_grad()
    test_global_pool()
    test_adaptive_pool()
+    test_pool3d()