[TOP] complete level2 (#8)

* [TOP] complete level2 * [TOP] add split

[TOP] complete level2 (#8)
* [TOP] complete level2 * [TOP] add split
55592ece · Tianqi Chen · 13388655 · 55592ece · 55592ece · 55592ece
Commit 55592ece authored Sep 12, 2017 by Tianqi Chen
9 changed files
--- a/nnvm/include/nnvm/top/nn.h
+++ b/nnvm/include/nnvm/top/nn.h
@@ -8,20 +8,20 @@

 #include <dmlc/base.h>
 #include <dmlc/parameter.h>
+#include <nnvm/tuple.h>

 namespace nnvm {
 namespace top {

+// Layout flag in spatial conv and pooling.
 enum LayoutFlag {
-  kNCHW = 0,
+  kNCHW,
  kNHWC,
  kCHWN,
-
-  kNCW = 1 << 3,
+  kNCW,
  kNWC,
  kCWN,
-
-  kNCDHW = 1 << 5,
+  kNCDHW,
  kNDHWC,
  kCDHWN
 };
@@ -101,7 +101,7 @@ struct LeakyReLUParam : public dmlc::Parameter<LeakyReLUParam> {
  }
 };

-struct Conv2DParam : public dmlc::Parameter<Conv2DParam> {
+struct ConvParam : public dmlc::Parameter<ConvParam> {
  int channels;
  TShape kernel_size;
  TShape strides;
@@ -111,7 +111,7 @@ struct Conv2DParam : public dmlc::Parameter<Conv2DParam> {
  int layout;
  bool use_bias;

-  DMLC_DECLARE_PARAMETER(Conv2DParam) {
+  DMLC_DECLARE_PARAMETER(ConvParam) {
    DMLC_DECLARE_FIELD(channels)
      .describe("The dimensionality of the output space"
                "i.e. the number of output channels in the convolution.");
@@ -141,10 +141,14 @@ struct Conv2DParam : public dmlc::Parameter<Conv2DParam> {
    DMLC_DECLARE_FIELD(use_bias).set_default(true)
      .describe("Whether the layer uses a bias vector.");
  }
+  // constants
+  static const constexpr int kData = 0;
+  static const constexpr int kWeight = 1;
+  static const constexpr int kBias = 2;
 };


-struct Conv2DTransposeParam : public dmlc::Parameter<Conv2DTransposeParam> {
+struct ConvTransposeParam : public dmlc::Parameter<ConvTransposeParam> {
  int channels;
  TShape kernel_size;
  TShape strides;
@@ -155,7 +159,7 @@ struct Conv2DTransposeParam : public dmlc::Parameter<Conv2DTransposeParam> {
  int layout;
  bool use_bias;

-  DMLC_DECLARE_PARAMETER(Conv2DTransposeParam) {
+  DMLC_DECLARE_PARAMETER(ConvTransposeParam) {
    DMLC_DECLARE_FIELD(channels)
      .describe("The dimensionality of the output space"
                "i.e. the number of output channels in the convolution.");
@@ -187,9 +191,14 @@ struct Conv2DTransposeParam : public dmlc::Parameter<Conv2DTransposeParam> {
    DMLC_DECLARE_FIELD(use_bias).set_default(true)
      .describe("Whether the layer uses a bias vector.");
  }
+  // constants
+  static const constexpr int kData = 0;
+  static const constexpr int kWeight = 1;
+  static const constexpr int kBias = 2;
 };

-struct Pool2DParam : public dmlc::Parameter<Pool2DParam> {
+
+struct PoolParam : public dmlc::Parameter<PoolParam> {
  TShape pool_size;
  TShape strides;
  TShape padding;
@@ -197,7 +206,7 @@ struct Pool2DParam : public dmlc::Parameter<Pool2DParam> {
  int layout;
  bool ceil_mode;

-  DMLC_DECLARE_PARAMETER(Pool2DParam) {
+  DMLC_DECLARE_PARAMETER(PoolParam) {
    DMLC_DECLARE_FIELD(pool_size)
      .describe("Size of the pooling windows..");
    DMLC_DECLARE_FIELD(strides).set_default(TShape({1, 1}))
@@ -225,10 +234,10 @@ struct Pool2DParam : public dmlc::Parameter<Pool2DParam> {
 };


-struct GlobalPool2DParam : public dmlc::Parameter<GlobalPool2DParam> {
+struct GlobalPoolParam : public dmlc::Parameter<GlobalPoolParam> {
  int layout;

-  DMLC_DECLARE_PARAMETER(GlobalPool2DParam) {
+  DMLC_DECLARE_PARAMETER(GlobalPoolParam) {
    DMLC_DECLARE_FIELD(layout)
      .add_enum("NCHW", kNCHW)
      .add_enum("NHWC", kNHWC)

--- a/nnvm/include/nnvm/top/tensor.h
+++ b/nnvm/include/nnvm/top/tensor.h
@@ -6,6 +6,10 @@
 #ifndef NNVM_TOP_TENSOR_H_
 #define NNVM_TOP_TENSOR_H_

+#include <dmlc/base.h>
+#include <dmlc/parameter.h>
+#include <nnvm/tuple.h>
+
 namespace nnvm {
 namespace top {

@@ -17,6 +21,19 @@ struct ConcatenateParam : public dmlc::Parameter<ConcatenateParam> {
  }
 };

+struct SplitParam : public dmlc::Parameter<SplitParam> {
+  // numpy convention, only support indices, not support list.
+  Tuple<int> indices_or_sections;
+  int axis;
+
+  DMLC_DECLARE_PARAMETER(SplitParam) {
+    DMLC_DECLARE_FIELD(indices_or_sections)
+        .describe("Number of outputs to be splitted");
+    DMLC_DECLARE_FIELD(axis).set_lower_bound(0).set_default(1)
+        .describe("the axis to be splitted.");
+  }
+};
+
 enum TypeFlag {
  kFloat32 = 0,
  kFloat64 = 1,
@@ -56,8 +73,6 @@ struct ScalarParam : public dmlc::Parameter<ScalarParam> {
  }
 };

-
-
 }  // namespace top
 }  // namespace nnvm


--- a/nnvm/src/top/nn/convolution.cc
+++ b/nnvm/src/top/nn/convolution.cc
+/*!
+ *  Copyright (c) 2017 by Contributors
+ * \file convolution.cc
+ * \brief Convolution operators
+ */
+#include <nnvm/op.h>
+#include <nnvm/node.h>
+#include <nnvm/op_attr_types.h>
+#include <nnvm/top/nn.h>
+#include "./nn_common.h"
+#include "../op_common.h"
+#include "../elemwise_op_common.h"
+
+namespace nnvm {
+namespace top {
+
+// conv2d
+DMLC_REGISTER_PARAMETER(ConvParam);
+
+inline bool Conv2DInferShape(const nnvm::NodeAttrs& attrs,
+                             std::vector<TShape>* in_shape,
+                             std::vector<TShape>* out_shape) {
+  const ConvParam& param = nnvm::get<ConvParam>(attrs.parsed);
+  if (param.use_bias) {
+    CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]";
+  } else {
+    CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
+  }
+  CHECK_EQ(out_shape->size(), 1U);
+
+  TShape dshape = in_shape->at(0);
+  if (dshape.ndim() == 0) return false;
+  dshape = ConvertLayout(dshape, param.layout, kNCHW);
+
+  CHECK_EQ(dshape.ndim(), 4U) << "Input data should be 4D";
+  CHECK_EQ(param.kernel_size.ndim(), 2U);
+  CHECK_EQ(param.strides.ndim(), 2U)
+      << "incorrect stride size: " << param.strides;
+  CHECK_EQ(param.dilation.ndim(), 2U)
+      << "incorrect dilate size: " << param.dilation;
+  CHECK_EQ(dshape[1] % param.groups, 0U)
+      << "input channels must divide group size";
+  CHECK_EQ(param.channels % param.groups, 0U)
+      << "output channels must divide group size";
+
+  TShape wshape({param.channels / param.groups,
+                 dshape[1] / param.groups,
+                 param.kernel_size[0],
+                 param.kernel_size[1]});
+
+  wshape = ConvertLayout(wshape, kNCHW, param.layout);
+  wshape[0] *= param.groups;
+
+  NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, ConvParam::kWeight, wshape);
+  if (param.use_bias) {
+    NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape,
+                            ConvParam::kBias, TShape({param.channels}));
+  }
+  // dilation
+  dim_t dilated_ksize_y = 1 + (param.kernel_size[0] - 1) * param.dilation[0];
+  dim_t dilated_ksize_x = 1 + (param.kernel_size[1] - 1) * param.dilation[1];
+  TShape oshape({dshape[0], param.channels, 0, 0});
+  if (dshape[2] != 0) {
+    oshape[2] = (dshape[2] + param.padding[0] * 2 - dilated_ksize_y) / param.strides[0] + 1;
+  }
+  if (dshape[3] != 0) {
+    oshape[3] = (dshape[3] + param.padding[1] * 2 - dilated_ksize_x) / param.strides[1] + 1;
+  }
+  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0,
+                           ConvertLayout(oshape, kNCHW, param.layout));
+  // Perform incomplete shape inference. Fill in the missing values in data shape.
+  // 1) We can always fill in the batch_size.
+  // 2) We can back-calculate the input height/width if the corresponding stride is 1.
+  oshape = ConvertLayout((*out_shape)[0], param.layout, kNCHW);
+  dshape[0] = oshape[0];
+  if (oshape[2] && param.strides[0] == 1) {
+    dshape[2] = oshape[2] + dilated_ksize_y - 1 - 2 * param.padding[0];
+  }
+  if (oshape[3] && param.strides[1] == 1) {
+    dshape[3] = oshape[3] + dilated_ksize_x - 1 - 2 * param.padding[1];
+  }
+  NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, ConvParam::kData,
+                          ConvertLayout(dshape, kNCHW, param.layout));
+  // Check whether the kernel sizes are valid
+  if (dshape[2] != 0) {
+    CHECK_LE(dilated_ksize_y, dshape[2] + 2 * param.padding[0])
+      << "kernel size exceed input";
+  }
+  if (dshape[3] != 0) {
+    CHECK_LE(dilated_ksize_x, dshape[3] + 2 * param.padding[1])
+        << "kernel size exceed input";
+  }
+  return true;
+}
+
+NNVM_REGISTER_OP(conv2d)
+.describe(R"code(2D convolution layer (e.g. spatial convolution over images).
+
+This layer creates a convolution kernel that is convolved
+with the layer input to produce a tensor of
+outputs. If `use_bias` is True,
+a bias vector is created and added to the outputs.
+
+- **data**: This depends on the `layout` parameter. Input is 4D array of shape
+            (batch_size, in_channels, height, width) if `layout` is `NCHW`.
+- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1])
+- **bias**: (channels,)
+- **out**:  This depends on the `layout` parameter. Output is 4D array of shape
+            (batch_size, channels, out_height, out_width) if `layout` is `NCHW`.
+
+)code" NNVM_ADD_FILELINE)
+.add_argument("data", "4D Tensor", "Input data.")
+.add_argument("weight", "4D Tensor", "Weight matrix.")
+.add_argument("bias", "1D Tensor", "Bias parameter.")
+.add_arguments(ConvParam::__FIELDS__())
+.set_attr_parser(ParamParser<ConvParam>)
+.set_num_outputs(1)
+.set_num_inputs(UseBiasNumInputs<ConvParam>)
+.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<ConvParam>)
+.set_attr<FInferShape>("FInferShape", Conv2DInferShape)
+.set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>)
+.set_support_level(2);
+
+
+DMLC_REGISTER_PARAMETER(ConvTransposeParam);
+
+inline bool ConvTransposeInferShape(const nnvm::NodeAttrs& attrs,
+                                    std::vector<TShape>* in_shape,
+                                    std::vector<TShape>* out_shape) {
+  const ConvTransposeParam& param = nnvm::get<ConvTransposeParam>(attrs.parsed);
+  if (param.use_bias) {
+    CHECK_EQ(in_shape->size(), 3U) << "Input:[data, weight, bias]";
+  } else {
+    CHECK_EQ(in_shape->size(), 2U) << "Input:[data, weight]";
+  }
+  CHECK_EQ(out_shape->size(), 1U);
+  const TShape& dshape = (*in_shape)[ConvTransposeParam::kData];
+  if (dshape.ndim() ==  0) return false;
+  TShape dshape_nchw = ConvertLayout(dshape, param.layout, kNCHW);
+
+  CHECK_EQ(dshape_nchw[1] % param.groups, 0U)
+      << "input num_filter must divide group size";
+  CHECK_EQ(param.channels % param.groups, 0U)
+      << "output num_filter must divide group size";
+  CHECK_EQ(param.kernel_size.ndim(), 2U)
+      << "incorrect kernel size: " << param.kernel_size;
+  CHECK_EQ(param.strides.ndim(), 2U)
+      << "incorrect stride size: " << param.strides;
+  CHECK_EQ(param.dilation.ndim(), 2U)
+      << "incorrect dilate size: " << param.dilation;
+
+  TShape wshape({dshape_nchw[1],
+                 param.channels / param.groups,
+                 param.kernel_size[0], param.kernel_size[1]});
+  wshape = ConvertLayout(wshape, kNCHW, param.layout);
+
+  NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape, ConvTransposeParam::kWeight, wshape);
+
+  if (param.use_bias) {
+    NNVM_ASSIGN_INPUT_SHAPE(attrs, *in_shape,
+                            ConvTransposeParam::kBias,
+                            TShape({param.channels}));
+  }
+  // dilation
+  dim_t dilated_ksize_y = 1 + (param.kernel_size[0] - 1) * param.dilation[0];
+  dim_t dilated_ksize_x = 1 + (param.kernel_size[1] - 1) * param.dilation[1];
+  // output shape.
+  TShape oshape({dshape_nchw[0], param.channels, 0, 0});
+  oshape[2] = (param.strides[0] * (dshape_nchw[2] - 1) + dilated_ksize_y -
+               2 * param.padding[0] + param.output_padding[0]);
+
+  oshape[3] = (param.strides[1] * (dshape_nchw[3] - 1) + dilated_ksize_x -
+               2 * param.padding[1] + param.output_padding[1]);
+  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0,
+                           ConvertLayout(oshape, kNCHW, param.layout));
+  return true;
+}
+
+NNVM_REGISTER_OP(conv2d_transpose)
+.describe(R"code(Transposed 2D convolution layer (sometimes called Deconvolution).
+
+The need for transposed convolutions generally arises
+from the desire to use a transformation going in the opposite direction
+of a normal convolution, i.e., from something that has the shape of the
+output of some convolution to something that has the shape of its input
+while maintaining a connectivity pattern that is compatible with
+said convolution.
+
+- **data**: This depends on the `layout` parameter. Input is 4D array of shape
+            (batch_size, in_channels, height, width) if `layout` is `NCHW`.
+- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1])
+- **bias**: (channels,)
+- **out**:  This depends on the `layout` parameter. Output is 4D array of shape
+            (batch_size, channels, out_height, out_width) if `layout` is `NCHW`.
+
+            out_height and out_width are calculated as::
+                out_height = (height-1)*strides[0]-2*padding[0]+kernel_size[0]+output_padding[0]
+                out_width = (width-1)*strides[1]-2*padding[1]+kernel_size[1]+output_padding[1]
+
+)code" NNVM_ADD_FILELINE)
+.add_argument("data", "4D Tensor", "Input data.")
+.add_argument("weight", "4D Tensor", "Weight matrix.")
+.add_argument("bias", "1D Tensor", "Bias parameter.")
+.add_arguments(ConvTransposeParam::__FIELDS__())
+.set_attr_parser(ParamParser<ConvTransposeParam>)
+.set_num_outputs(1)
+.set_num_inputs(UseBiasNumInputs<ConvTransposeParam>)
+.set_attr<FListInputNames>("FListInputNames", UseBiasListInputNames<ConvTransposeParam>)
+.set_attr<FInferShape>("FInferShape", ConvTransposeInferShape)
+.set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>)
+.set_support_level(2);
+
+}  // namespace top
+}  // namespace nnvm
--- a/nnvm/src/top/nn/nn.cc
+++ b/nnvm/src/top/nn/nn.cc
--- a/nnvm/src/top/nn/nn_common.h
+++ b/nnvm/src/top/nn/nn_common.h
+/*!
+ *  Copyright (c) 2017 by Contributors
+ * \file nn_common.h
+ * \brief Common utilities for nn ops.
+ */
+#ifndef NNVM_TOP_NN_NN_COMMON_H_
+#define NNVM_TOP_NN_NN_COMMON_H_
+
+#include <dmlc/logging.h>
+#include <dmlc/parameter.h>
+#include <nnvm/top/nn.h>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+namespace nnvm {
+namespace top {
+
+template<typename ParamType>
+inline uint32_t UseBiasNumInputs(const NodeAttrs& attrs) {
+  const ParamType& param = get<ParamType>(attrs.parsed);
+  return param.use_bias ? 3 : 2;
+}
+
+template<typename ParamType>
+inline std::vector<std::string> UseBiasListInputNames(const NodeAttrs& attrs) {
+  const ParamType& param = nnvm::get<ParamType>(attrs.parsed);
+  if (param.use_bias) {
+    return {"data", "weight", "bias"};
+  } else {
+    return {"data", "weight"};
+  }
+}
+
+/*!
+ * \brief Convert shape in src_layout to shape in dst_layout
+ * \param src original shape
+ * \param src_layout layout of original shape
+ * \param dst_layout target layout
+ * \return shape in target layout
+ */
+inline TShape ConvertLayout(TShape src, int src_layout, int dst_layout) {
+  if (src_layout == dst_layout) return src;
+  TShape dst = src;
+  if (src.ndim() == 3) {
+    switch (src_layout) {
+      case kNCW: break;
+      case kNWC: {
+        std::swap(dst[1], dst[2]);
+        break;
+      }
+      default: {
+        LOG(FATAL) << "inavlid layout for 3d shape" << src_layout;
+      }
+    }
+    switch (dst_layout) {
+      case kNCW: break;
+      case kNWC: {
+        std::swap(dst[1], dst[2]);
+        break;
+      }
+      default: {
+        LOG(FATAL) << "inavlid layout for 3d shape" << dst_layout;
+      }
+    }
+  } else if (src.ndim() == 4) {
+    switch (src_layout) {
+      case kNCHW: break;
+      case kNHWC: {
+        dst[2] = src[1];
+        dst[3] = src[2];
+        dst[1] = src[3];
+        break;
+      }
+      default: {
+        LOG(FATAL) << "inavlid layout for 4d shape" << src_layout;
+      }
+    }
+    src = dst;
+    switch (dst_layout) {
+      case kNCHW: break;
+      case kNHWC: {
+        dst[1] = src[2];
+        dst[2] = src[3];
+        dst[3] = src[1];
+        break;
+      }
+      default: {
+        LOG(FATAL) << "inavlid layout for 4d shape" << dst_layout;
+      }
+    }
+  } else if (src.ndim() == 5) {
+    switch (src_layout) {
+      case kNCDHW: break;
+      case kNDHWC: {
+        dst[2] = src[1];
+        dst[3] = src[2];
+        dst[4] = src[3];
+        dst[1] = src[4];
+        break;
+      }
+      default: {
+        LOG(FATAL) << "inavlid layout for 5d shape" << src_layout;
+      }
+    }
+    src = dst;
+    switch (dst_layout) {
+      case kNCDHW: break;
+      case kNDHWC: {
+        dst[1] = src[2];
+        dst[2] = src[3];
+        dst[3] = src[4];
+        dst[4] = src[1];
+        break;
+      }
+      default: {
+        LOG(FATAL) << "inavlid layout for 5d shape" << dst_layout;
+      }
+    }
+  } else {
+    LOG(FATAL) << "no layout option for " << dst.ndim() << " dimensions";
+  }
+  return dst;
+}
+
+}  // namespace top
+}  // namespace nnvm
+
+#endif  // NNVM_TOP_NN_NN_COMMON_H_
--- a/nnvm/src/top/nn/pooling.cc
+++ b/nnvm/src/top/nn/pooling.cc
+/*!
+ *  Copyright (c) 2017 by Contributors
+ * \file pooling.cc
+ * \brief Property def of pooling operators.
+ */
+#include <nnvm/op.h>
+#include <nnvm/node.h>
+#include <nnvm/op_attr_types.h>
+#include <nnvm/top/nn.h>
+#include "./nn_common.h"
+#include "../op_common.h"
+#include "../elemwise_op_common.h"
+
+namespace nnvm {
+namespace top {
+
+DMLC_REGISTER_PARAMETER(PoolParam);
+
+inline bool Pool2DInferShape(const nnvm::NodeAttrs& attrs,
+                             std::vector<TShape>* in_shape,
+                             std::vector<TShape>* out_shape) {
+  const PoolParam& param = nnvm::get<PoolParam>(attrs.parsed);
+  CHECK_EQ(in_shape->size(), 1U);
+  CHECK_EQ(out_shape->size(), 1U);
+
+  TShape dshape = (*in_shape)[0];
+  if (dshape.ndim() ==  0) return false;
+  dshape = ConvertLayout(dshape, param.layout, kNCHW);
+
+  TShape oshape = dshape;
+  CHECK_EQ(dshape.ndim(), 4U)
+      << "Pooling: Input data should be 4D";
+  CHECK(param.pool_size[0] <= dshape[2] + 2 * param.padding[0])
+      << "pool size (" << param.pool_size[0] << ") exceeds input (" << dshape[2]
+      << " padded to " << (dshape[2] + 2*param.padding[0]) << ")";
+  CHECK(param.pool_size[1] <= dshape[3] + 2 * param.padding[1])
+      << "pool size (" << param.pool_size[1] << ") exceeds input (" << dshape[3]
+      << " padded to " << (dshape[3] + 2*param.padding[1]) << ")";
+
+  if (!param.ceil_mode) {
+    oshape[2] = ((dshape[2] + 2 * param.padding[0] - param.pool_size[0]) /
+                 param.strides[0]) + 1;
+    oshape[3] = ((dshape[3] + 2 * param.padding[1] - param.pool_size[1]) /
+                 param.strides[1]) + 1;
+  } else {
+    oshape[2] = ((dshape[2] + 2 * param.padding[0] - param.pool_size[0] +
+                  param.strides[0] - 1) / param.strides[0]) + 1;
+    oshape[3] = ((dshape[3] + 2 * param.padding[1] - param.pool_size[1] +
+                  param.strides[1] - 1) / param.strides[1]) + 1;
+  }
+  oshape = ConvertLayout(oshape, kNCHW, param.layout);
+  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
+  return true;
+}
+
+NNVM_REGISTER_OP(max_pool2d)
+.describe(R"code(Max pooling operation for one dimensional data.
+
+- **data**: This depends on the `layout` parameter. Input is 4D array of shape
+            (batch_size, channels, height, width) if `layout` is `NCHW`.
+- **out**: This depends on the `layout` parameter. Output is 4D array of shape
+           (batch_size, channels, out_height, out_width)  if `layout` is `NCHW`.
+           out_height and out_width are calculated as::
+               out_height = floor((height+2*padding[0]-pool_size[0])/strides[0])+1
+               out_width = floor((width+2*padding[1]-pool_size[1])/strides[1])+1
+           When `ceil_mode` is `True`, ceil will be used instead of floor in this
+           equation.
+
+)code" NNVM_ADD_FILELINE)
+.add_argument("data", "4D Tensor", "Input data.")
+.add_arguments(PoolParam::__FIELDS__())
+.set_attr_parser(ParamParser<PoolParam>)
+.set_num_outputs(1)
+.set_num_inputs(1)
+.set_attr<FInferShape>("FInferShape", Pool2DInferShape)
+.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_support_level(2);
+
+
+NNVM_REGISTER_OP(avg_pool2d)
+.describe(R"code(Average pooling operation for one dimensional data.
+
+- **data**: This depends on the `layout` parameter. Input is 4D array of shape
+            (batch_size, channels, height, width) if `layout` is `NCHW`.
+- **out**: This depends on the `layout` parameter. Output is 4D array of shape
+           (batch_size, channels, out_height, out_width)  if `layout` is `NCHW`.
+           out_height and out_width are calculated as::
+               out_height = floor((height+2*padding[0]-pool_size[0])/strides[0])+1
+               out_width = floor((width+2*padding[1]-pool_size[1])/strides[1])+1
+           When `ceil_mode` is `True`, ceil will be used instead of floor in this
+           equation.
+
+)code" NNVM_ADD_FILELINE)
+.add_argument("data", "4D Tensor", "Input data.")
+.add_arguments(PoolParam::__FIELDS__())
+.set_attr_parser(ParamParser<PoolParam>)
+.set_num_outputs(1)
+.set_num_inputs(1)
+.set_attr<FInferShape>("FInferShape", Pool2DInferShape)
+.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_support_level(2);
+
+
+DMLC_REGISTER_PARAMETER(GlobalPoolParam);
+
+inline bool GlobalPool2DInferShape(const nnvm::NodeAttrs& attrs,
+                                   std::vector<TShape>* in_shape,
+                                   std::vector<TShape>* out_shape) {
+  const GlobalPoolParam& param = nnvm::get<GlobalPoolParam>(attrs.parsed);
+  CHECK_EQ(in_shape->size(), 1U);
+  CHECK_EQ(out_shape->size(), 1U);
+  TShape dshape = (*in_shape)[0];
+  if (dshape.ndim() ==  0) return false;
+  dshape = ConvertLayout(dshape, param.layout, kNCHW);
+  TShape oshape = dshape;
+  oshape[2] = oshape[3] = 1;
+  oshape = ConvertLayout(oshape, kNCHW, param.layout);
+  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, 0, oshape);
+  return true;
+}
+
+NNVM_REGISTER_OP(global_max_pool2d)
+.describe(R"code(Global max pooling operation for 2D data.
+
+- **data**: This depends on the `layout` parameter. Input is 4D array of shape
+            (batch_size, channels, height, width) if `layout` is `NCHW`.
+- **out**: This depends on the `layout` parameter. Output is 4D array of shape
+           (batch_size, channels, 1, 1)  if `layout` is `NCHW`.
+
+)code" NNVM_ADD_FILELINE)
+.add_argument("data", "4D Tensor", "Input data.")
+.add_arguments(GlobalPoolParam::__FIELDS__())
+.set_attr_parser(ParamParser<GlobalPoolParam>)
+.set_num_outputs(1)
+.set_num_inputs(1)
+.set_attr<FInferShape>("FInferShape", GlobalPool2DInferShape)
+.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_support_level(2);
+
+
+NNVM_REGISTER_OP(global_avg_pool2d)
+.describe(R"code(Global average pooling operation for 2D data.
+
+- **data**: This depends on the `layout` parameter. Input is 4D array of shape
+            (batch_size, channels, height, width) if `layout` is `NCHW`.
+- **out**: This depends on the `layout` parameter. Output is 4D array of shape
+           (batch_size, channels, 1, 1)  if `layout` is `NCHW`.
+
+)code" NNVM_ADD_FILELINE)
+.add_argument("data", "4D Tensor", "Input data.")
+.add_arguments(GlobalPoolParam::__FIELDS__())
+.set_attr_parser(ParamParser<GlobalPoolParam>)
+.set_num_outputs(1)
+.set_num_inputs(1)
+.set_attr<FInferShape>("FInferShape", GlobalPool2DInferShape)
+.set_attr<FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_support_level(2);
+
+}  // namespace top
+}  // namespace nnvm
--- a/nnvm/src/top/tensor/transform.cc
+++ b/nnvm/src/top/tensor/transform.cc
@@ -15,8 +15,8 @@ namespace top {

 // flatten
 inline bool FlattenInferShape(const NodeAttrs& attrs,
-                              std::vector<TShape> *in_attrs,
-                              std::vector<TShape> *out_attrs) {
+                              std::vector<TShape>* in_attrs,
+                              std::vector<TShape>* out_attrs) {
  CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";
  CHECK_EQ(out_attrs->size(), 1U);
  const TShape &dshape = (*in_attrs)[0];
@@ -25,7 +25,8 @@ inline bool FlattenInferShape(const NodeAttrs& attrs,
  for (uint32_t i = 1; i < dshape.ndim(); ++i) {
    target_dim *= dshape[i];
  }
-  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, TShape({dshape[0], target_dim}));
+  NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0,
+                           TShape({dshape[0], target_dim}));
  return true;
 }

@@ -62,8 +63,8 @@ Example::
 DMLC_REGISTER_PARAMETER(ConcatenateParam);

 inline bool ConcatenateInferShape(const NodeAttrs& attrs,
-                                  std::vector<TShape> *in_shape,
-                                  std::vector<TShape> *out_shape) {
+                                  std::vector<TShape>* in_shape,
+                                  std::vector<TShape>* out_shape) {
  const ConcatenateParam& param = nnvm::get<ConcatenateParam>(attrs.parsed);
  TShape dshape;
  dim_t size = 0;
@@ -140,12 +141,78 @@ Example::
 .set_support_level(1);


+// concatenate
+DMLC_REGISTER_PARAMETER(SplitParam);
+
+inline bool SplitInferShape(const NodeAttrs& attrs,
+                            std::vector<TShape>* in_shape,
+                            std::vector<TShape>* out_shape) {
+  const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed);
+  const TShape& dshape = (*in_shape)[0];
+  if (dshape.ndim() == 0) return false;
+
+  if (param.indices_or_sections.ndim() == 1) {
+    int num_outputs = param.indices_or_sections[0];
+    CHECK_EQ(out_shape->size(), static_cast<size_t>(num_outputs));
+    CHECK_LT(param.axis, dshape.ndim());
+    TShape oshape = dshape;
+    CHECK_EQ(oshape[param.axis] % num_outputs, 0)
+        << "indices_or_sections need to be able to divide input.shape[axis]";
+    oshape[param.axis] /= num_outputs;
+
+    for (size_t i = 0; i < out_shape->size(); ++i) {
+      NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, i, oshape);
+    }
+  } else {
+    dim_t num_outputs = param.indices_or_sections.ndim();
+    CHECK_EQ(out_shape->size(), static_cast<size_t>(num_outputs));
+    CHECK_LT(param.axis, dshape.ndim());
+    TShape oshape = dshape;
+    CHECK_EQ(oshape[param.axis] % num_outputs, 0)
+        << "indices_or_sections need to be able to divide input.shape[axis]";
+    dim_t total = 0;
+    for (size_t i = 0; i < out_shape->size(); ++i) {
+      oshape[param.axis] = param.indices_or_sections[i];
+      total += oshape[param.axis];
+      NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_shape, i, oshape);
+    }
+    CHECK_EQ(total, dshape[param.axis])
+        << "The sum of sections must match the input.shape[axis]";
+  }
+  return true;
+}
+
+inline uint32_t SplitNumOutputs(const NodeAttrs& attrs) {
+  const SplitParam& param = nnvm::get<SplitParam>(attrs.parsed);
+  if (param.indices_or_sections.ndim() == 1) {
+    return static_cast<uint32_t>(param.indices_or_sections[0]);
+  } else {
+    return static_cast<uint32_t>(param.indices_or_sections.ndim());
+  }
+}
+
+NNVM_REGISTER_OP(split)
+.describe(R"code(Splits an array along a particular axis into multiple sub-arrays.
+
+**Note** that `indices_or_sections` should evenly divide the length of the axis
+along which to split the array.
+
+)code" NNVM_ADD_FILELINE)
+.set_num_inputs(1)
+.set_attr_parser(ParamParser<SplitParam>)
+.set_num_outputs(SplitNumOutputs)
+.add_argument("data", "Tensor", "List of arrays to concatenate")
+.set_attr<FInferShape>("FInferShape", SplitInferShape)
+.set_attr<FInferType>("FInferType", ElemwiseType<-1, 1>)
+.add_arguments(SplitParam::__FIELDS__())
+.set_support_level(1);
+
 // cast
 DMLC_REGISTER_PARAMETER(CastParam);

 inline bool CastInferType(const NodeAttrs& attrs,
-                          std::vector<int> *in_attrs,
-                          std::vector<int> *out_attrs) {
+                          std::vector<int>* in_attrs,
+                          std::vector<int>* out_attrs) {
  const CastParam& param = nnvm::get<CastParam>(attrs.parsed);
  CHECK_EQ(out_attrs->size(), 1U);
  NNVM_ASSIGN_OUTPUT_TYPE(attrs, *out_attrs, 0, param.dtype);
@@ -170,8 +237,8 @@ NNVM_REGISTER_OP(cast)
 DMLC_REGISTER_PARAMETER(ReshapeParam);

 inline bool ReshapeInferShape(const NodeAttrs& attrs,
-                              std::vector<TShape> *in_attrs,
-                              std::vector<TShape> *out_attrs) {
+                              std::vector<TShape>* in_attrs,
+                              std::vector<TShape>* out_attrs) {
  const ReshapeParam& param = nnvm::get<ReshapeParam>(attrs.parsed);
  CHECK_GT(param.shape.ndim(), 0);
  CHECK_EQ(in_attrs->size(), 1U) << "Input: [data]";

--- a/nnvm/tests/python/unittest/test_infer_shape.py
+++ b/nnvm/tests/python/unittest/test_infer_shape.py
@@ -36,6 +36,18 @@ def test_concatenate():
    assert(sdict["concat"][0] == [20, 20])


+def test_split():
+    x1 = sym.Variable("x", shape=(10, 20))
+    z = sym.split(x1, indices_or_sections=[11, 9], name="y")
+    sdict = infer_shape(z)
+    assert(sdict["y"][0] == [10, 11])
+    assert(sdict["y"][1] == [10, 9])
+    z = sym.split(x1, indices_or_sections=2, name="y")
+    sdict = infer_shape(z)
+    assert(sdict["y"][0] == [10, 10])
+    assert(sdict["y"][1] == [10, 10])
+
+
 def test_batchnorm():
    x = sym.Variable("x", shape=(10, 20))
    y = sym.batch_norm(1 / x, name="bn")
@@ -50,6 +62,115 @@ def test_flatten():
    sdict = infer_shape(y)
    assert(sdict["y"][0] == [10, 200])

+
+# Level 2
+def test_conv2d():
+    def check(in_shape, out_shape, **kwargs):
+        x = sym.Variable("x", shape=in_shape)
+        y = sym.conv2d(x, name="y", **kwargs)
+        sdict = infer_shape(y)
+        assert(tuple(sdict["y"][0]) == tuple(out_shape))
+
+    check((4, 10, 10, 12),
+          (4, 12, 10, 12),
+          channels=12,
+          kernel_size=(3,3),
+          padding=(1,1))
+    check((4, 10, 12, 4),
+          (4, 8, 8, 5),
+          channels=5,
+          kernel_size=(3, 5),
+          layout="NHWC")
+    check((4, 10, 12, 4),
+          (4, 6, 8, 5),
+          channels=5,
+          dilation=(2, 2),
+          kernel_size=(3, 3),
+          layout="NHWC")
+    check((4, 10, 12, 4),
+          (4, 5, 6, 5),
+          channels=5,
+          strides=(2, 2),
+          kernel_size=(3, 3),
+          padding=(1, 1),
+          layout="NHWC")
+
+
+def test_conv2d_transpose():
+    def check(in_shape, out_shape, **kwargs):
+        x = sym.Variable("x", shape=in_shape)
+        y = sym.conv2d_transpose(x, name="y", **kwargs)
+        sdict = infer_shape(y)
+        assert(tuple(sdict["y"][0]) == tuple(out_shape))
+
+    check((4, 10, 10, 12),
+          (4, 15, 10, 12),
+          channels=15,
+          kernel_size=(3,3),
+          padding=(1,1))
+    check((4, 10, 10, 12),
+          (4, 15, 10, 14),
+          channels=15,
+          kernel_size=(3, 5),
+          padding=(1, 1))
+    check((4, 10, 10, 12),
+          (4, 15, 11, 15),
+          channels=15,
+          kernel_size=(3, 5),
+          padding=(1, 1),
+          output_padding=(1, 1))
+    check((4, 10, 10, 12),
+          (4, 15, 15, 11),
+          channels=11,
+          kernel_size=(5, 5),
+          output_padding=(1, 1),
+          layout="NHWC")
+
+
+def test_max_pool2d():
+    def check(in_shape, out_shape, **kwargs):
+        x = sym.Variable("x", shape=in_shape)
+        y = sym.max_pool2d(x, name="y", **kwargs)
+        sdict = infer_shape(y)
+        assert(tuple(sdict["y"][0]) == tuple(out_shape))
+
+    check((4, 10, 12, 12),
+          (4, 10, 12, 12),
+          pool_size=(3,3),
+          padding=(1,1))
+    check((4, 10, 12, 12),
+          (4, 10, 6, 6),
+          pool_size=(3, 3),
+          padding=(1, 1),
+          strides=(2, 2))
+    check((4, 10, 12, 12),
+          (4, 10, 7, 7),
+          pool_size=(3, 3),
+          padding=(1, 1),
+          strides=(2, 2),
+          ceil_mode=True)
+    check((4, 12, 14, 10),
+          (4, 6, 7, 10),
+          pool_size=(3, 3),
+          padding=(1, 1),
+          strides=(2, 2),
+          layout="NHWC")
+
+
+def test_global_pool2d():
+    def check(in_shape, out_shape, **kwargs):
+        x = sym.Variable("x", shape=in_shape)
+        y = sym.global_max_pool2d(x, name="y", **kwargs)
+        sdict = infer_shape(y)
+        assert(tuple(sdict["y"][0]) == tuple(out_shape))
+
+    check((4, 10, 12, 12),
+          (4, 10, 1, 1))
+    check((4, 10, 12, 12),
+          (4, 1, 1, 12),
+          layout="NHWC")
+
+
 # Level 3
 def test_reshape():
    def check(in_shape, tshape, out_shape):
@@ -77,6 +198,11 @@ def test_reshape():
 if __name__ == "__main__":
    test_dense()
    test_concatenate()
+    test_split()
    test_batchnorm()
    test_flatten()
+    test_conv2d()
+    test_conv2d_transpose()
+    test_max_pool2d()
+    test_global_pool2d()
    test_reshape()
--- a/nnvm/tests/python/unittest/test_top_level1.py
+++ b/nnvm/tests/python/unittest/test_top_level1.py
@@ -7,11 +7,16 @@ def test_fullc():
    x3 = sym.softmax(x2)
    assert x2.list_input_names() == ['x', 'dense_weight', 'dense_bias']

-def test_concatenate():
+def test_concatenate_split():
    x = sym.Variable('x')
    y = sym.Variable('y')
    y = sym.concatenate(x, y)
    assert y.list_input_names() == ['x', 'y']
+    z = sym.split(y, indices_or_sections=10)
+    assert len(z.list_output_names()) == 10
+    z = sym.split(y, indices_or_sections=[10, 20])
+    assert len(z.list_output_names()) == 2
+

 def test_unary():
    x = sym.Variable('x')
@@ -29,7 +34,7 @@ def test_batchnorm():


 if __name__ == "__main__":
-    test_concatenate()
+    test_concatenate_split()
    test_fullc()
    test_unary()
    test_batchnorm()