[Relay][TOPI][OP] intel_graphics conv2d alterlayout support relay, added stack op (#2729)

* add stack op frontend * concate moved * topi stack added * stack added * fix stack bugs and tested * conv2d alterlayout udpated for relay * fix pylint * fix cmake warnings * cmake warnings fixed

[Relay][TOPI][OP] intel_graphics conv2d alterlayout support relay, added stack op (#2729)
* add stack op frontend * concate moved * topi stack added * stack added * fix stack bugs and tested * conv2d alterlayout udpated for relay * fix pylint * fix cmake warnings * cmake warnings fixed
ef58291d · Leyuan Wang · Yizhi Liu · 154e054d · ef58291d · ef58291d
Commit ef58291d authored Mar 05, 2019 by Leyuan Wang Committed by Yizhi Liu Mar 05, 2019
12 changed files
--- a/docs/api/python/topi.rst
+++ b/docs/api/python/topi.rst
@@ -72,6 +72,7 @@ List of operators
   topi.logical_or
   topi.logical_not
   topi.arange
+   topi.stack
   topi.layout_transform
   topi.image.resize

@@ -130,6 +131,7 @@ topi
 .. autofunction:: topi.greater
 .. autofunction:: topi.less
 .. autofunction:: topi.arange
+.. autofunction:: topi.stack
 .. autofunction:: topi.layout_transform

 topi.nn

--- a/docs/langref/relay_op.rst
+++ b/docs/langref/relay_op.rst
@@ -96,6 +96,7 @@ This level enables additional math and transform operators.
   tvm.relay.cast
   tvm.relay.split
   tvm.relay.arange
+   tvm.relay.stack


 **Level 4: Broadcast and Reductions**
@@ -220,6 +221,7 @@ Level 3 Definitions
 .. autofunction:: tvm.relay.cast
 .. autofunction:: tvm.relay.split
 .. autofunction:: tvm.relay.arange
+.. autofunction:: tvm.relay.stack


 Level 4 Definitions

--- a/include/tvm/relay/attrs/transform.h
+++ b/include/tvm/relay/attrs/transform.h
@@ -115,6 +115,15 @@ struct ArangeAttrs : public tvm::AttrsNode<ArangeAttrs> {
  }
 };  // struct ArangeAttrs

+/*! \brief Attributes used in stack operators */
+struct StackAttrs : public tvm::AttrsNode<StackAttrs> {
+  Integer axis;
+  TVM_DECLARE_ATTRS(StackAttrs, "relay.attrs.StackAttrs") {
+    TVM_ATTR_FIELD(axis).set_default(0)
+        .describe("The axis in the result array along which the input arrays are stacked.");
+  }
+};  // struct StackAttrs
+
 /*! \brief Attributes used in squeeze operators */
 struct SqueezeAttrs : public tvm::AttrsNode<SqueezeAttrs> {
  // use axis to make the name numpy compatible.

--- a/python/tvm/relay/frontend/mxnet.py
+++ b/python/tvm/relay/frontend/mxnet.py
@@ -253,6 +253,11 @@ def _mx_concat(inputs, attrs):
    return _op.concatenate(tuple(inputs), axis=axis)


+def _mx_stack(inputs, attrs):
+    axis = attrs.get_int("axis", 0)
+    return _op.stack(tuple(inputs), axis=axis)
+
+
 def _mx_expand_dims(inputs, attrs):
    axis = attrs.get_int("axis")
    return _op.expand_dims(inputs[0], axis=axis)
@@ -474,6 +479,7 @@ _convert_map = {
    "expand_dims"   : _mx_expand_dims,
    "Concat"        : _mx_concat,
    "concat"        : _mx_concat,
+    "stack"         : _mx_stack,
    "batch_dot"     : _mx_batch_dot,
    "LeakyReLU"     : _mx_leaky_relu,
    "_arange"       : _mx_arange,

--- a/python/tvm/relay/op/_transform.py
+++ b/python/tvm/relay/op/_transform.py
 """Backend compiler related feature registration"""
 # pylint: disable=invalid-name,unused-argument
 from __future__ import absolute_import
-import topi
 from . import op as _reg
 from ._reduce import _schedule_reduce
 from .op import schedule_injective, OpPattern
@@ -27,16 +26,10 @@ _reg.register_schedule("split", schedule_injective)
 _reg.register_schedule("take", schedule_injective)
 _reg.register_schedule("transpose", schedule_injective)
 _reg.register_schedule("where", schedule_broadcast)
+_reg.register_schedule("stack", schedule_injective)
+_reg.register_schedule("concatenate", schedule_injective)
 _reg.register_schedule("_contrib_reverse_reshape", schedule_injective)

 # layout_transform
 _reg.register_schedule("layout_transform", schedule_injective)
 _reg.register_pattern("layout_transform", OpPattern.INJECTIVE)
-
-# concatenate
-@_reg.register_compute("concatenate")
-def concatenate_compute(attrs, inputs, output_type, target):
-    return [topi.concatenate(inputs, axis=attrs.axis)]
-
-_reg.register_schedule("concatenate", schedule_injective)
-_reg.register_pattern("concatenate", OpPattern.INJECTIVE)
--- a/python/tvm/relay/op/transform.py
+++ b/python/tvm/relay/op/transform.py
@@ -294,6 +294,28 @@ def arange(start, stop=None, step=1, dtype="float32"):
    return _make.arange(start, stop, step, dtype)


+def stack(data, axis):
+    """Join a sequence of arrays along a new axis.
+
+    Parameters
+    ----------
+    data : relay.Expr
+        The input data to the operator.
+
+    axis : int
+        The axis in the result array along which the input arrays are stacked.
+
+    .. note::
+        Each array in the input array sequence must have the same shape.
+
+    Returns
+    -------
+    ret : relay.Expr
+        The computed result.
+    """
+    return _make.stack(data, axis)
+
+
 def where(condition, x, y):
    """Selecting elements from either x or y depending on the value of the
    condition.

--- a/src/relay/op/tensor/transform.cc
+++ b/src/relay/op/tensor/transform.cc
@@ -206,6 +206,15 @@ bool ConcatenateRel(const Array<Type>& types,
  return true;
 }

+Array<Tensor> ConcatenateCompute(const Attrs& attrs,
+                          const Array<Tensor>& inputs,
+                          const Type& out_type,
+                          const Target& target) {
+  const ConcatenateAttrs *param = attrs.as<ConcatenateAttrs>();
+  CHECK(param != nullptr);
+  return { topi::concatenate(inputs, param->axis) };
+}
+
 Array<Array<Layout>> ConcatenateLayout(
    const Attrs& attrs,
    const Array<Layout>& new_in_layouts,
@@ -268,7 +277,96 @@ RELAY_REGISTER_OP("concatenate")
 .add_argument("data", "Tensor", "The input list of tensors.")
 .set_support_level(1)
 .add_type_rel("Concatenate", ConcatenateRel)
-.set_attr<FInferCorrectLayout>("FInferCorrectLayout", ConcatenateLayout);
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout", ConcatenateLayout)
+.set_attr<FTVMCompute>("FTVMCompute", ConcatenateCompute)
+.set_attr<TOpPattern>("TOpPattern", kInjective);
+
+TVM_REGISTER_NODE_TYPE(StackAttrs);
+
+bool StackRel(const Array<Type>& types,
+              int num_inputs,
+              const Attrs& attrs,
+              const TypeReporter& reporter) {
+  // types: [data, result]
+  CHECK_EQ(types.size(), 2);
+  const auto* tensor_tuple = types[0].as<TupleTypeNode>();
+  if (tensor_tuple == nullptr) {
+    CHECK(types[0].as<IncompleteTypeNode>())
+        << "cast: expect input type to be TupleType but get "
+        << types[0];
+    return false;
+  }
+  const auto* param = attrs.as<StackAttrs>();
+  const auto& first = Downcast<TensorType>(tensor_tuple->fields[0]);
+  // Sanity check: ndim and dtype.
+  const int ndim = static_cast<int>(first->shape.size());
+  const DataType dtype = first->dtype;
+  for (const Type& ele : tensor_tuple->fields) {
+    const auto& e = Downcast<TensorType>(ele);
+    int e_ndim = static_cast<int>(e->shape.size());
+    const DataType& e_dtype = e->dtype;
+    CHECK_EQ(e_ndim, ndim) << "relay.stack requires all tensors have the same ndim";
+    CHECK_EQ(e_dtype, dtype) << "relay.stack requires all tensors have the same dtype";
+  }
+  // Sanity check: axis
+  int axis = param->axis;
+  CHECK(-ndim <= axis && axis < ndim)
+    << "stack only accepts `axis` in [-ndim, ndim)"
+    << ", but got axis = " << axis
+    << ", and ndim = " << ndim;
+  axis = axis < 0 ? ndim + axis + 1 : axis;
+  // Calculate shape
+  std::vector<IndexExpr> oshape;
+  oshape.reserve(ndim + 1);
+  const int stack_dim = static_cast<int>(tensor_tuple->fields.size());
+  for (int i = 0; i < axis; ++i) {
+    oshape.emplace_back(first->shape[i]);
+  }
+  oshape.emplace_back(stack_dim);
+  for (int i = axis; i < ndim; ++i) {
+    oshape.emplace_back(first->shape[i]);
+  }
+  reporter->Assign(types[1], TensorTypeNode::make(oshape, dtype));
+  return true;
+}
+
+Array<Tensor> StackCompute(const Attrs& attrs,
+                           const Array<Tensor>& inputs,
+                           const Type& out_type,
+                           const Target& target) {
+  const StackAttrs *param = attrs.as<StackAttrs>();
+  CHECK(param != nullptr);
+  return { topi::stack(inputs, param->axis) };
+}
+
+Expr MakeStack(Expr data,
+               int axis) {
+  auto attrs = make_node<StackAttrs>();
+  attrs->axis = axis;
+  static const Op& op = Op::Get("stack");
+  return CallNode::make(op, {data}, Attrs(attrs), {});
+}
+
+TVM_REGISTER_API("relay.op._make.stack")
+.set_body([](const TVMArgs& args, TVMRetValue* rv) {
+    runtime::detail::unpack_call<Expr, 2>(MakeStack, args, rv);
+});
+
+RELAY_REGISTER_OP("stack")
+.describe(R"code(Stack the input tensors along the given axis.
+
+- **data** : A list of tensors.
+
+- **axis** : The axis along which the tensors are stacked.
+
+)code" TVM_ADD_FILELINE)
+.set_attrs_type_key("relay.attrs.StackAttrs")
+.set_num_inputs(1)
+.add_argument("data", "Tensor", "The input list of tensors.")
+.set_support_level(1)
+.add_type_rel("Stack", StackRel)
+.set_attr<FTVMCompute>("FTVMCompute", StackCompute)
+.set_attr<TOpPattern>("TOpPattern", kInjective);

 /* relay.transpose */
 TVM_REGISTER_NODE_TYPE(TransposeAttrs);

--- a/topi/include/topi/transform.h
+++ b/topi/include/topi/transform.h
@@ -324,6 +324,56 @@ inline Tensor concatenate(const Array<Tensor>& inputs,
 }

 /*!
+* \brief Join a sequence of tensors along a new axis.
+*
+* \param inputs The input tensors
+* \param axis The axis along which the tensors will be stacked
+* \param name The name of the operation
+* \param tag The tag to mark the operation
+*
+* \return A Tensor whose op member is the stack operation
+*/
+inline Tensor stack(const Array<Tensor>& inputs,
+                    int axis = 0,
+                    std::string name = "tensor",
+                    std::string tag = kInjective) {
+  int ndim = static_cast<int>(inputs[0]->shape.size());
+  CHECK(-ndim - 1 <= axis && axis <= ndim)
+    << "stack only accepts `axis` in [-ndim, ndim)"
+    << ", but got axis = " << axis
+    << ", and ndim = " << ndim;
+  if (axis < 0) {
+    axis += ndim + 1;
+  }
+  CHECK_LT(axis, inputs[0]->shape.size() + 1) <<
+    "axis out of bounds";
+
+  const int stack_size = static_cast<int>(inputs.size());
+  Array<Expr> out_shape;
+  for (size_t i = 0; i < static_cast<size_t>(axis); ++i)
+    out_shape.push_back(inputs[0]->shape[i]);
+  out_shape.push_back(stack_size);
+  for (size_t i = static_cast<size_t>(axis); i < static_cast<size_t>(ndim); ++i)
+    out_shape.push_back(inputs[0]->shape[i]);
+
+  return compute(
+    out_shape, [&](const Array<Var>& indices) {
+      Array<Expr> idx;
+      for (size_t i = 0; i < indices.size(); ++i)
+        if (i != static_cast<size_t>(axis))
+          idx.push_back(indices[i]);
+      auto ind = indices[axis];
+      auto ret = inputs[0](idx);
+      for (int i = 0; i < static_cast<int>(inputs.size() - 1); ++i) {
+        ret = tvm::if_then_else(ind == i + 1,
+                                inputs[i + 1](idx),
+                                ret);
+      }
+      return ret;
+    }, name, tag);
+}
+
+/*!
 * \brief Split a tensor into multiple sub-tensors
 *
 * \param x The input tensor

--- a/topi/python/topi/intel_graphics/conv2d.py
+++ b/topi/python/topi/intel_graphics/conv2d.py
@@ -3,7 +3,6 @@

 from __future__ import absolute_import as _abs

-import warnings
 import tvm

 from .. import generic
@@ -40,10 +39,6 @@ def tile_and_bind3d(s, tensor, z, y, x, z_factor=2, y_factor=None, x_factor=None
 @conv2d_alter_layout.register(["intel_graphics"])
 def _alter_conv2d_layout(attrs, inputs, tinfos, F):
    import nnvm.symbol as sym
-    if F != sym:
-        warnings.warn("Only support alter layout for intel graphics in NNVM now. "
-                      "This pass is ignored in relay.")
-        return None

    copy_inputs = [s for s in inputs]

@@ -51,8 +46,8 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, F):
    kernel = tinfos[1]

    import ast
-    padding = ast.literal_eval(attrs['padding'])
-    stride = ast.literal_eval(attrs['strides'])
+    padding = ast.literal_eval(str(attrs['padding']))
+    stride = ast.literal_eval(str(attrs['strides']))

    wkl = _get_workload(data, kernel, stride, padding, data.dtype)
    oc_bn = 1
@@ -64,7 +59,12 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, F):
    new_attrs = {k: attrs[k] for k in attrs.keys()}
    new_attrs['kernel_layout'] = 'OIHW%do' % (oc_bn)

-    return sym.contrib.conv2d_NCHWc(*copy_inputs, **new_attrs)
+    if F == sym:
+        out = F.contrib.conv2d_NCHWc(*copy_inputs, **new_attrs)
+    else:
+        out = F.nn.contrib_conv2d_nchwc(*copy_inputs, **new_attrs)
+
+    return out

 @conv2d_NCHWc.register(["intel_graphics"])
 def _decl_conv2d(data, kernel, stride, padding, dilation, layout, out_layout, out_dtype='float32'):

--- a/topi/python/topi/transform.py
+++ b/topi/python/topi/transform.py
@@ -191,6 +191,25 @@ def concatenate(a_tuple, axis=0):
    return cpp.concatenate(a_tuple, axis)


+def stack(a, axis):
+    """Repeats the whole array multiple times.
+
+    Parameters
+    ----------
+    a : tvm.Tensor
+        The tensor to be stacked.
+
+    axis : int, optional
+        The axis in the result array along which the input arrays are stacked.
+
+
+    Returns
+    -------
+    ret : tvm.Tensor
+    """
+    return cpp.stack(a, axis)
+
+
 def split(ary, indices_or_sections, axis=0):
    """Split an array into multiple sub-arrays.


--- a/topi/src/topi.cc
+++ b/topi/src/topi.cc
@@ -266,6 +266,11 @@ TVM_REGISTER_GLOBAL("topi.concatenate")
  *rv = concatenate(args[0], args[1]);
  });

+TVM_REGISTER_GLOBAL("topi.stack")
+.set_body([](TVMArgs args, TVMRetValue *rv) {
+  *rv = stack(args[0], args[1]);
+});
+
 TVM_REGISTER_GLOBAL("topi.split")
 .set_body([](TVMArgs args, TVMRetValue *rv) {
  if (args[1].type_code() == kDLInt || args[1].type_code() == kDLUInt) {

--- a/topi/tests/python/test_topi_transform.py
+++ b/topi/tests/python/test_topi_transform.py
@@ -124,6 +124,31 @@ def verify_concatenate(shapes, axis):
    for device in get_all_backend():
        check_device(device)

+def verify_stack(shapes, axis):
+    tensor_l = []
+    for i, shape in enumerate(shapes):
+        tensor_l.append(tvm.placeholder(shape, name="A" + str(i)))
+    out_tensor = topi.stack(tensor_l, axis)
+    def check_device(device):
+        ctx = tvm.context(device, 0)
+        if not ctx.exist:
+            print("Skip because %s is not enabled" % device)
+            return
+        print("Running on target: %s" % device)
+        with tvm.target.create(device):
+            s = topi.generic.schedule_broadcast(out_tensor)
+
+        foo = tvm.build(s, tensor_l + [out_tensor], device, name="stack")
+        data_npys = [np.random.normal(size=shape).astype(tensor_l[0].dtype) for shape in shapes]
+        out_npy = np.stack(data_npys, axis=axis)
+        data_nds = [tvm.nd.array(data_npy, ctx) for data_npy in data_npys]
+        out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=out_tensor.dtype)
+        foo(*(data_nds + [out_nd]))
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+
+    for device in get_all_backend():
+        check_device(device)
+

 def verify_split(src_shape, indices_or_sections, axis):
    A = tvm.placeholder(shape=src_shape, name="A")
@@ -383,7 +408,7 @@ def test_squeeze():


 def test_concatenate():
-    verify_concatenate([(2,), (2,), (2,)], 0)
+    verify_concatenate([(2,), (2,), (2,)], -1)
    verify_concatenate([(2, 3, 4), (2, 2, 4), (2, 5, 4)], 1)
    verify_concatenate([(1, 2, 4), (1, 2, 3), (1, 2, 7), (1, 2, 8), (1, 2, 1)], -1)
    verify_concatenate([(5, 6, 7, 3),
@@ -393,6 +418,14 @@ def test_concatenate():
                        (2, 6, 7, 3)], 0)


+def test_stack():
+    verify_stack([(2,), (2,), (2,)], -1)
+    verify_stack([(2,), (2,), (2,)], 1)
+    verify_stack([(2,), (2,), (2,)], 0)
+    verify_stack([(2, 2, 4), (2, 2, 4), (2, 2, 4)], 1)
+    verify_stack([(2, 2, 3, 4), (2, 2, 3, 4), (2, 2, 3, 4), (2, 2, 3, 4)], -1)
+
+
 def test_split():
    verify_split((2, 12, 3), 3, 1)
    verify_split((2, 12, 3), [2, 4], 1)
@@ -480,6 +513,7 @@ def test_layout_transform():
 if __name__ == "__main__":
    test_strided_slice()
    test_concatenate()
+    test_stack()
    test_tranpose()
    test_expand_dims()
    test_reshape()