[TOPI] add basic scheduling for conv2d_transpose on x86 (#3491)

* initialize cond 2d transpose scheduling on x86 * refine the scheduler a bit * fix for lint * address review comments; remove duplicate code * fix lint

[TOPI] add basic scheduling for conv2d_transpose on x86 (#3491)
* initialize cond 2d transpose scheduling on x86 * refine the scheduler a bit * fix for lint * address review comments; remove duplicate code * fix lint
f9788871 · Yida Wang · Yizhi Liu · 59448fed · f9788871 · f9788871
Commit f9788871 authored Jul 06, 2019 by Yida Wang Committed by Yizhi Liu Jul 06, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 74 additions and 4 deletions

topi/python/topi/nn/conv2d_transpose.py
+8 -4

topi/python/topi/x86/__init__.py
+1 -0

topi/python/topi/x86/conv2d_transpose.py
+65 -0

No files found.
--- a/topi/python/topi/nn/conv2d_transpose.py
+++ b/topi/python/topi/nn/conv2d_transpose.py
@@ -51,11 +51,15 @@ def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype):
    Output : tvm.Tensor
        4-D with shape [batch, out_channel, out_height, out_width]
    """
-    batch, in_c, in_h, in_w = Input.shape
+    return declaration_conv2d_transpose_impl(Input, Filter, strides, padding, out_dtype)
-    _, out_c, filter_h, filter_w = Filter.shape
+def declaration_conv2d_transpose_impl(data, kernel, strides, padding, out_dtype):
+    """Implementation of conv2d transpose"""
+    batch, in_c, in_h, in_w = data.shape
+    _, out_c, filter_h, filter_w = kernel.shape
    stride_h, stride_w = strides
    # dilate stage
-    DilatedInput = dilate(Input, [1, 1, stride_h, stride_w], name='DilatedInput')
+    DilatedInput = dilate(data, [1, 1, stride_h, stride_w], name='DilatedInput')
    # padding stage
    fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple(padding, (filter_h, filter_w))
    bpad_top = filter_h - 1 - fpad_top
@@ -78,7 +82,7 @@ def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype):
        (batch, out_c, out_h, out_w),
        lambda b, c, h, w: tvm.sum(
            PaddedInput[b, dc, h+dh, w+dw].astype(out_dtype) *
-            Filter[dc, c, filter_h-1-dh, filter_w-1-dw].astype(out_dtype),
+            kernel[dc, c, filter_h-1-dh, filter_w-1-dw].astype(out_dtype),
            axis=[dc, dh, dw]), tag="conv2d_transpose_nchw")
    return Output
--- a/topi/python/topi/x86/__init__.py
+++ b/topi/python/topi/x86/__init__.py
@@ -14,3 +14,4 @@ from .depthwise_conv2d import schedule_depthwise_conv2d_NCHWc
 from .dense import _schedule_dense, _schedule_dense_pack, _schedule_dense_nopack
 from .batch_matmul import schedule_batch_matmul
 from .roi_align import roi_align_nchw
+from .conv2d_transpose import schedule_conv2d_transpose
--- a/topi/python/topi/x86/conv2d_transpose.py
+++ b/topi/python/topi/x86/conv2d_transpose.py
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name,unused-variable,unused-argument,no-member
+"""Conv2D Transpose schedule on x86"""
+import tvm
+from tvm import autotvm
+from .. import generic, tag
+from ..nn.conv2d_transpose import conv2d_transpose_nchw, declaration_conv2d_transpose_impl
+@autotvm.register_topi_compute(conv2d_transpose_nchw, 'cpu', ['direct'])
+def _declaration_conv2d_transpose(cfg, data, kernel, strides, padding, out_dtype):
+    # TODO cfg is not used for now
+    return declaration_conv2d_transpose_impl(data, kernel, strides, padding, out_dtype)
+@autotvm.register_topi_schedule(generic.schedule_conv2d_transpose_nchw, 'cpu', ['direct'])
+def schedule_conv2d_transpose(cfg, outs):
+    """Create schedule for tensors"""
+    outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
+    s = tvm.create_schedule([x.op for x in outs])
+    scheduled_ops = []
+    def traverse(op):
+        """Traverse operators from computation graph"""
+        # inline all one-to-one-mapping operators except the last stage (output)
+        if tag.is_injective(op.tag):
+            if op not in s.outputs:
+                s[op].compute_inline()
+            for tensor in op.input_tensors:
+                if tensor.op.input_tensors and tensor.op not in scheduled_ops:
+                    traverse(tensor.op)
+        if 'conv2d_transpose_nchw' in op.tag:
+            C = op.output(0)
+            N, OC, OH, OW = C.op.axis
+            rc, ry, rx = C.op.reduce_axis
+            OH, oh = s[C].split(OH, factor=2)
+            OC, oc = s[C].split(OC, factor=32)
+            IC, ic = s[C].split(rc, factor=32)
+            s[C].reorder(N, OC, OH, OW, oc, IC, ry, rx, ic)
+            N = s[C].fuse(N, OC)
+            s[C].vectorize(oc)
+            s[C].parallel(N)
+        scheduled_ops.append(op)
+    traverse(outs[0].op)
+    return s