Commit 81e03ee7 by Haichen Shen Committed by Tianqi Chen

Revert "[Relay][TOPI]Fix meaning of conv2d_transpose output_padding parameter (#4318)" (#4708)

This reverts commit dcf7fbf1.
parent 7f7dc073
...@@ -46,16 +46,16 @@ AUTOTVM_TOPHUB_ROOT_PATH = os.path.join(os.path.expanduser('~'), ".tvm", "tophub ...@@ -46,16 +46,16 @@ AUTOTVM_TOPHUB_ROOT_PATH = os.path.join(os.path.expanduser('~'), ".tvm", "tophub
# the version of each package # the version of each package
PACKAGE_VERSION = { PACKAGE_VERSION = {
'arm_cpu': "v0.05", 'arm_cpu': "v0.04",
'llvm': "v0.03", 'llvm': "v0.03",
'cuda': "v0.07", 'cuda': "v0.06",
'rocm': "v0.03", 'rocm': "v0.03",
'opencl': "v0.03", 'opencl': "v0.03",
'mali': "v0.05", 'mali': "v0.05",
'intel_graphics': "v0.01", 'intel_graphics': "v0.01",
'vta': "v0.07", 'vta': "v0.06",
} }
logger = logging.getLogger('autotvm') logger = logging.getLogger('autotvm')
......
...@@ -339,7 +339,6 @@ def compute_conv2d_transpose(attrs, inputs, out_dtype, target): ...@@ -339,7 +339,6 @@ def compute_conv2d_transpose(attrs, inputs, out_dtype, target):
padding = get_const_tuple(attrs.padding) padding = get_const_tuple(attrs.padding)
strides = get_const_tuple(attrs.strides) strides = get_const_tuple(attrs.strides)
dilation = get_const_tuple(attrs.dilation) dilation = get_const_tuple(attrs.dilation)
output_padding = get_const_tuple(attrs.output_padding)
groups = attrs.groups groups = attrs.groups
layout = attrs.data_layout layout = attrs.data_layout
out_dtype = attrs.out_dtype out_dtype = attrs.out_dtype
...@@ -349,7 +348,10 @@ def compute_conv2d_transpose(attrs, inputs, out_dtype, target): ...@@ -349,7 +348,10 @@ def compute_conv2d_transpose(attrs, inputs, out_dtype, target):
assert dilation == (1, 1), "not support dilate now" assert dilation == (1, 1), "not support dilate now"
assert groups == 1, "only support groups == 1 for now" assert groups == 1, "only support groups == 1 for now"
out = topi.nn.conv2d_transpose_nchw( out = topi.nn.conv2d_transpose_nchw(
inputs[0], inputs[1], strides, padding, out_dtype, output_padding) inputs[0], inputs[1], strides, padding, out_dtype)
output_padding = get_const_tuple(attrs.output_padding)
out = topi.nn.pad(out,
[0, 0, 0, 0], [0, 0, output_padding[0], output_padding[1]])
return [out] return [out]
...@@ -442,8 +444,10 @@ def compute_conv1d_transpose(attrs, inputs, out_dtype, target): ...@@ -442,8 +444,10 @@ def compute_conv1d_transpose(attrs, inputs, out_dtype, target):
assert dilation == (1,), "conv1d_transpose dilation is not supported" assert dilation == (1,), "conv1d_transpose dilation is not supported"
assert groups == 1, "conv1d_transpose groups == 1 only supported" assert groups == 1, "conv1d_transpose groups == 1 only supported"
out = topi.nn.conv1d_transpose_ncw( out = topi.nn.conv1d_transpose_ncw(
inputs[0], inputs[1], strides, padding, out_dtype, inputs[0], inputs[1], strides, padding, out_dtype)
get_const_tuple(attrs.output_padding)) output_padding = get_const_tuple(attrs.output_padding)
out = topi.nn.pad(out,
[0, 0, 0], [0, 0, output_padding[0]])
return [out] return [out]
......
...@@ -570,8 +570,11 @@ def test_conv2d_transpose_nchw_run(): ...@@ -570,8 +570,11 @@ def test_conv2d_transpose_nchw_run():
dtype = "float32" dtype = "float32"
data = np.random.uniform(size=dshape).astype(dtype) data = np.random.uniform(size=dshape).astype(dtype)
kernel = np.random.uniform(size=kshape).astype(dtype) kernel = np.random.uniform(size=kshape).astype(dtype)
ref_res = topi.testing.conv2d_transpose_nchw_python( c_np = topi.testing.conv2d_transpose_nchw_python(
data, kernel, 2, 1, (2, 2)) data, kernel, 2, 1)
d_np = np.zeros(shape=oshape)
d_np[:,:,0:c_np.shape[2],0:c_np.shape[3]] = c_np
ref_res = d_np
for target, ctx in ctx_list(): for target, ctx in ctx_list():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target) intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
...@@ -596,14 +599,9 @@ def test_conv2d_transpose_nhwc_run(): ...@@ -596,14 +599,9 @@ def test_conv2d_transpose_nhwc_run():
data = np.random.uniform(size=dshape_nhwc).astype(dtype) data = np.random.uniform(size=dshape_nhwc).astype(dtype)
kernel = np.random.uniform(size=kshape_hwoi).astype(dtype) kernel = np.random.uniform(size=kshape_hwoi).astype(dtype)
# use true kshape layout here - HWOI # use true kshape layout here - HWOI
c_np = topi.testing.conv2d_transpose_nhwc_python(data, kernel, 'HWOI', 2, 1)
ref_res = topi.testing.conv2d_transpose_nhwc_python(data, kernel, 'HWOI', d_np = np.zeros(shape=oshape_nhwc)
2, 1, output_padding=(2, 2)) d_np[:,0:c_np.shape[1],0:c_np.shape[2],:] = c_np
for target, ctx in ctx_list():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(data, kernel)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5, atol=1e-5)
def test_conv1d_transpose_ncw_run(): def test_conv1d_transpose_ncw_run():
...@@ -619,8 +617,11 @@ def test_conv1d_transpose_ncw_run(): ...@@ -619,8 +617,11 @@ def test_conv1d_transpose_ncw_run():
dtype = "float32" dtype = "float32"
data = np.random.uniform(size=dshape).astype(dtype) data = np.random.uniform(size=dshape).astype(dtype)
kernel = np.random.uniform(size=kshape).astype(dtype) kernel = np.random.uniform(size=kshape).astype(dtype)
ref_res = topi.testing.conv1d_transpose_ncw_python( c_np = topi.testing.conv1d_transpose_ncw_python(
data, kernel, 2, 1, output_padding=(2,)) data, kernel, 2, 1)
d_np = np.zeros(shape=oshape)
d_np[:,:,0:c_np.shape[2]] = c_np
ref_res = d_np
for target, ctx in ctx_list(): for target, ctx in ctx_list():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target) intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
......
...@@ -27,8 +27,7 @@ from ..util import get_const_tuple, traverse_inline ...@@ -27,8 +27,7 @@ from ..util import get_const_tuple, traverse_inline
from .conv2d_spatial_pack import schedule_conv2d_spatial_pack_nchw from .conv2d_spatial_pack import schedule_conv2d_spatial_pack_nchw
@autotvm.task.register_topi_compute(conv2d_transpose_nchw, "arm_cpu", "direct") @autotvm.task.register_topi_compute(conv2d_transpose_nchw, "arm_cpu", "direct")
def conv2d_transpose_nchw_arm(cfg, Input, Filter, strides, padding, out_dtype, def conv2d_transpose_nchw_arm(cfg, Input, Filter, strides, padding, out_dtype):
output_padding=(0, 0)):
"""Transposed 2D convolution nchw forward operator. """Transposed 2D convolution nchw forward operator.
Parameters Parameters
...@@ -48,33 +47,27 @@ def conv2d_transpose_nchw_arm(cfg, Input, Filter, strides, padding, out_dtype, ...@@ -48,33 +47,27 @@ def conv2d_transpose_nchw_arm(cfg, Input, Filter, strides, padding, out_dtype,
out_dtype: str out_dtype: str
The output data type. This is used for mixed precision. The output data type. This is used for mixed precision.
output_padding : tuple of int
Used to get the right output shape in gradients
Returns Returns
------- -------
Output : tvm.Tensor Output : tvm.Tensor
4-D with shape [batch, out_channel, out_height, out_width] 4-D with shape [batch, out_channel, out_height, out_width]
""" """
return _decl_spatial_pack(cfg, Input, Filter, strides, padding, "NCHW", out_dtype, 2, return _decl_spatial_pack(cfg, Input, Filter, strides, padding, "NCHW", out_dtype, 2)
output_padding)
def _decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype, num_tile, def _decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype, num_tile):
output_padding):
assert layout == "NCHW", "Only support NCHW" assert layout == "NCHW", "Only support NCHW"
out_dtype = out_dtype or data.dtype out_dtype = out_dtype or data.dtype
N, CI, IH, IW = get_const_tuple(data.shape) N, CI, IH, IW = get_const_tuple(data.shape)
_, CO, KH, KW = get_const_tuple(kernel.shape) _, CO, KH, KW = get_const_tuple(kernel.shape)
opad_h, opad_w = output_padding
pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (KH, KW)) pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (KH, KW))
bpad_top, bpad_bottom = KH - 1 - pad_top, KH - 1 - pad_bottom + opad_h bpad_top, bpad_bottom = KH - 1 - pad_top, KH - 1 - pad_bottom
bpad_left, bpad_right = KW - 1 - pad_left, KW - 1 - pad_right + opad_w bpad_left, bpad_right = KW - 1 - pad_left, KW - 1 - pad_right
HSTR, WSTR = strides if isinstance(strides, (tuple, list)) else (strides, strides) HSTR, WSTR = strides if isinstance(strides, (tuple, list)) else (strides, strides)
OH = (IH - 1) * HSTR - pad_top - pad_bottom + KH + opad_h OH = (IH - 1) * HSTR - pad_top - pad_bottom + KH
OW = (IW - 1) * WSTR - pad_left - pad_right + KW + opad_w OW = (IW - 1) * WSTR - pad_left - pad_right + KW
dilated_input = dilate(data, [1, 1, HSTR, WSTR]) dilated_input = dilate(data, [1, 1, HSTR, WSTR])
data_pad = pad(dilated_input, [0, 0, bpad_top, bpad_left], [0, 0, bpad_bottom, bpad_right]) data_pad = pad(dilated_input, [0, 0, bpad_top, bpad_left], [0, 0, bpad_bottom, bpad_right])
......
...@@ -23,7 +23,7 @@ from .. import nn, generic ...@@ -23,7 +23,7 @@ from .. import nn, generic
from ..util import get_const_tuple, traverse_inline from ..util import get_const_tuple, traverse_inline
@autotvm.task.register_topi_compute(nn.conv1d_transpose_ncw, ['cuda', 'gpu'], "direct") @autotvm.task.register_topi_compute(nn.conv1d_transpose_ncw, ['cuda', 'gpu'], "direct")
def conv1d_transpose_ncw_cuda(cfg, data, kernel, stride, padding, out_dtype, output_padding=(0,)): def conv1d_transpose_ncw_cuda(cfg, data, kernel, stride, padding, out_dtype):
"""Transposed 1D convolution ncw forward operator. """Transposed 1D convolution ncw forward operator.
Parameters Parameters
...@@ -53,11 +53,10 @@ def conv1d_transpose_ncw_cuda(cfg, data, kernel, stride, padding, out_dtype, out ...@@ -53,11 +53,10 @@ def conv1d_transpose_ncw_cuda(cfg, data, kernel, stride, padding, out_dtype, out
cfg.stride = stride cfg.stride = stride
batch, inp_channels, inp_width = get_const_tuple(data.shape) batch, inp_channels, inp_width = get_const_tuple(data.shape)
_, out_channels, kernel_size = get_const_tuple(kernel.shape) _, out_channels, kernel_size = get_const_tuple(kernel.shape)
opad = output_padding[0]
pad_left, pad_right = nn.get_pad_tuple1d(padding, kernel_size) pad_left, pad_right = nn.get_pad_tuple1d(padding, kernel_size)
out_width = (inp_width - 1) * stride + kernel_size - pad_left - pad_right + opad out_width = (inp_width - 1) * stride + kernel_size - pad_left - pad_right
pad_left = kernel_size - 1 - pad_left pad_left = kernel_size - 1 - pad_left
pad_right = kernel_size - 1 - pad_right + opad pad_right = kernel_size - 1 - pad_right
dilated_width = stride * (inp_width - 1) + 1 dilated_width = stride * (inp_width - 1) + 1
data = tvm.compute( data = tvm.compute(
(batch, inp_channels, pad_left + dilated_width + pad_right), (batch, inp_channels, pad_left + dilated_width + pad_right),
......
...@@ -25,8 +25,7 @@ from ..util import equal_const_int, get_const_tuple, traverse_inline ...@@ -25,8 +25,7 @@ from ..util import equal_const_int, get_const_tuple, traverse_inline
@autotvm.task.register_topi_compute(nn.conv2d_transpose_nchw, ['cuda', 'gpu'], "direct") @autotvm.task.register_topi_compute(nn.conv2d_transpose_nchw, ['cuda', 'gpu'], "direct")
def conv2d_transpose_nchw_cuda(cfg, Input, Filter, strides, padding, out_dtype, def conv2d_transpose_nchw_cuda(cfg, Input, Filter, strides, padding, out_dtype):
output_padding=(0, 0)):
"""Transposed 2D convolution nchw forward operator. """Transposed 2D convolution nchw forward operator.
Parameters Parameters
...@@ -52,7 +51,6 @@ def conv2d_transpose_nchw_cuda(cfg, Input, Filter, strides, padding, out_dtype, ...@@ -52,7 +51,6 @@ def conv2d_transpose_nchw_cuda(cfg, Input, Filter, strides, padding, out_dtype,
batch, in_c, in_h, in_w = get_const_tuple(Input.shape) batch, in_c, in_h, in_w = get_const_tuple(Input.shape)
_, out_c, filter_h, filter_w = get_const_tuple(Filter.shape) _, out_c, filter_h, filter_w = get_const_tuple(Filter.shape)
stride_h, stride_w = strides stride_h, stride_w = strides
opad_h, opad_w = output_padding
# attach stride info to config, this is used in schedule space definition # attach stride info to config, this is used in schedule space definition
cfg.stride = strides cfg.stride = strides
...@@ -60,9 +58,9 @@ def conv2d_transpose_nchw_cuda(cfg, Input, Filter, strides, padding, out_dtype, ...@@ -60,9 +58,9 @@ def conv2d_transpose_nchw_cuda(cfg, Input, Filter, strides, padding, out_dtype,
# padding stage # padding stage
fpad_top, fpad_left, fpad_bottom, fpad_right = nn.get_pad_tuple(padding, (filter_h, filter_w)) fpad_top, fpad_left, fpad_bottom, fpad_right = nn.get_pad_tuple(padding, (filter_h, filter_w))
bpad_top = filter_h - 1 - fpad_top bpad_top = filter_h - 1 - fpad_top
bpad_bottom = filter_h - 1 - fpad_bottom + opad_h bpad_bottom = filter_h - 1 - fpad_bottom
bpad_left = filter_w - 1 - fpad_left bpad_left = filter_w - 1 - fpad_left
bpad_right = filter_w - 1 - fpad_right + opad_w bpad_right = filter_w - 1 - fpad_right
# padding stage # padding stage
FirstPad = nn.pad(Input, FirstPad = nn.pad(Input,
...@@ -97,8 +95,8 @@ def conv2d_transpose_nchw_cuda(cfg, Input, Filter, strides, padding, out_dtype, ...@@ -97,8 +95,8 @@ def conv2d_transpose_nchw_cuda(cfg, Input, Filter, strides, padding, out_dtype,
return data(*index_tuple) return data(*index_tuple)
# convolution stage # convolution stage
out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h + opad_h out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h
out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w + opad_w out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w
dc = tvm.reduce_axis((0, in_c), name='dc') dc = tvm.reduce_axis((0, in_c), name='dc')
dh = tvm.reduce_axis((0, filter_h), name='dh') dh = tvm.reduce_axis((0, filter_h), name='dh')
dw = tvm.reduce_axis((0, filter_w), name='dw') dw = tvm.reduce_axis((0, filter_w), name='dw')
......
...@@ -25,8 +25,7 @@ from .util import get_pad_tuple1d ...@@ -25,8 +25,7 @@ from .util import get_pad_tuple1d
@tvm.target.generic_func @tvm.target.generic_func
def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype, def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype):
output_padding=(0,)):
"""Transposed 1D convolution ncw forward operator. """Transposed 1D convolution ncw forward operator.
Parameters Parameters
...@@ -57,12 +56,11 @@ def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype, ...@@ -57,12 +56,11 @@ def conv1d_transpose_ncw(data, kernel, stride, padding, out_dtype,
stride = stride[0] stride = stride[0]
batch, channels_in, data_width = data.shape batch, channels_in, data_width = data.shape
_, channels_out, kernel_width = kernel.shape _, channels_out, kernel_width = kernel.shape
opad = output_padding[0]
channels_out = simplify(channels_out) channels_out = simplify(channels_out)
data = dilate(data, [1, 1, stride], name='data_dilate') data = dilate(data, [1, 1, stride], name='data_dilate')
pad_left, pad_right = get_pad_tuple1d(padding, (kernel_width,)) pad_left, pad_right = get_pad_tuple1d(padding, (kernel_width,))
pad_left = kernel_width - 1 - pad_left pad_left = kernel_width - 1 - pad_left
pad_right = kernel_width - 1 - pad_right + opad pad_right = kernel_width - 1 - pad_right
data = pad(data, [0, 0, pad_left], [0, 0, pad_right], name='data_pad') data = pad(data, [0, 0, pad_left], [0, 0, pad_right], name='data_pad')
# transpose kernel, switch kernel layout to IOW # transpose kernel, switch kernel layout to IOW
......
...@@ -26,7 +26,7 @@ from ..util import simplify ...@@ -26,7 +26,7 @@ from ..util import simplify
@tvm.target.generic_func @tvm.target.generic_func
def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype, output_padding=(0, 0)): def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype):
"""Transposed 2D convolution nchw forward operator. """Transposed 2D convolution nchw forward operator.
Parameters Parameters
...@@ -46,33 +46,28 @@ def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype, output_pad ...@@ -46,33 +46,28 @@ def conv2d_transpose_nchw(Input, Filter, strides, padding, out_dtype, output_pad
out_dtype : str out_dtype : str
The output data type. This is used for mixed precision. The output data type. This is used for mixed precision.
output_padding : tuple of ints
Used to get the right output shape for gradients
Returns Returns
------- -------
Output : tvm.Tensor Output : tvm.Tensor
4-D with shape [batch, out_channel, out_height, out_width] 4-D with shape [batch, out_channel, out_height, out_width]
""" """
return declaration_conv2d_transpose_impl(Input, Filter, strides, padding, out_dtype, return declaration_conv2d_transpose_impl(Input, Filter, strides, padding, out_dtype)
output_padding=output_padding)
def conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype, output_padding): def conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype):
"""Preprocess data and kernel to make the compute pattern """Preprocess data and kernel to make the compute pattern
of conv2d_transpose the same as conv2d""" of conv2d_transpose the same as conv2d"""
batch, in_c, in_h, in_w = data.shape batch, in_c, in_h, in_w = data.shape
_, out_c, filter_h, filter_w = kernel.shape _, out_c, filter_h, filter_w = kernel.shape
stride_h, stride_w = strides stride_h, stride_w = strides
opad_h, opad_w = output_padding
# dilate data # dilate data
data_dilate = dilate(data, [1, 1, stride_h, stride_w], name='data_dilate') data_dilate = dilate(data, [1, 1, stride_h, stride_w], name='data_dilate')
# pad data # pad data
fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple(padding, (filter_h, filter_w)) fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple(padding, (filter_h, filter_w))
bpad_top = filter_h - 1 - fpad_top bpad_top = filter_h - 1 - fpad_top
bpad_bottom = filter_h - 1 - fpad_bottom + opad_h bpad_bottom = filter_h - 1 - fpad_bottom
bpad_left = filter_w - 1 - fpad_left bpad_left = filter_w - 1 - fpad_left
bpad_right = filter_w - 1 - fpad_right + opad_w bpad_right = filter_w - 1 - fpad_right
data_pad = pad(data_dilate, \ data_pad = pad(data_dilate, \
[0, 0, bpad_top, bpad_left], \ [0, 0, bpad_top, bpad_left], \
[0, 0, bpad_bottom, bpad_right], \ [0, 0, bpad_bottom, bpad_right], \
...@@ -84,17 +79,18 @@ def conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype, ...@@ -84,17 +79,18 @@ def conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype,
return data_pad, kernel_transform return data_pad, kernel_transform
def declaration_conv2d_transpose_impl(data, kernel, strides, padding, out_dtype, output_padding): def declaration_conv2d_transpose_impl(data, kernel, strides, padding, out_dtype):
"""Implementation of conv2d transpose""" """Implementation of conv2d transpose"""
data_pad, kernel_transform = \ data_pad, kernel_transform = \
conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype, output_padding) conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype)
batch, in_c, in_h, in_w = data_pad.shape batch, in_c, in_h, in_w = data_pad.shape
out_c, _, filter_h, filter_w = kernel_transform.shape out_c, _, filter_h, filter_w = kernel_transform.shape
stride_h, stride_w = strides
# convolution stage # convolution stage
out_c = simplify(out_c) out_c = simplify(out_c)
out_h = simplify(in_h - filter_h + 1 + output_padding[0]) out_h = simplify(in_h - filter_h + 1)
out_w = simplify(in_w - filter_w + 1 + output_padding[1]) out_w = simplify(in_w - filter_w + 1)
dc = tvm.reduce_axis((0, in_c), name='dc') dc = tvm.reduce_axis((0, in_c), name='dc')
dh = tvm.reduce_axis((0, filter_h), name='dh') dh = tvm.reduce_axis((0, filter_h), name='dh')
dw = tvm.reduce_axis((0, filter_w), name='dw') dw = tvm.reduce_axis((0, filter_w), name='dw')
......
...@@ -21,7 +21,7 @@ import scipy ...@@ -21,7 +21,7 @@ import scipy
import topi import topi
from topi.nn.util import get_pad_tuple1d from topi.nn.util import get_pad_tuple1d
def conv1d_transpose_ncw_python(a_np, w_np, stride, padding, output_padding): def conv1d_transpose_ncw_python(a_np, w_np, stride, padding):
"""Transposed 1D convolution operator in NCW layout. """Transposed 1D convolution operator in NCW layout.
Parameters Parameters
...@@ -47,7 +47,6 @@ def conv1d_transpose_ncw_python(a_np, w_np, stride, padding, output_padding): ...@@ -47,7 +47,6 @@ def conv1d_transpose_ncw_python(a_np, w_np, stride, padding, output_padding):
""" """
batch, in_c, in_w = a_np.shape batch, in_c, in_w = a_np.shape
_, out_c, filter_w = w_np.shape _, out_c, filter_w = w_np.shape
opad = output_padding[0]
if isinstance(stride, int): if isinstance(stride, int):
stride_w = stride stride_w = stride
else: else:
...@@ -57,11 +56,11 @@ def conv1d_transpose_ncw_python(a_np, w_np, stride, padding, output_padding): ...@@ -57,11 +56,11 @@ def conv1d_transpose_ncw_python(a_np, w_np, stride, padding, output_padding):
dilated_a_np = topi.testing.dilate_python(a_np, [1, 1, stride_w]) dilated_a_np = topi.testing.dilate_python(a_np, [1, 1, stride_w])
# padding stage # padding stage
bpad_left = filter_w - 1 - fpad_left bpad_left = filter_w - 1 - fpad_left
bpad_right = filter_w - 1 - fpad_right + opad bpad_right = filter_w - 1 - fpad_right
padded_a_np = np.zeros((batch, in_c, dilated_a_np.shape[2]+bpad_left+bpad_right)) padded_a_np = np.zeros((batch, in_c, dilated_a_np.shape[2]+bpad_left+bpad_right))
padded_a_np[:, :, bpad_left:dilated_a_np.shape[2]+bpad_left] = dilated_a_np padded_a_np[:, :, bpad_left:dilated_a_np.shape[2]+bpad_left] = dilated_a_np
# convolution stage # convolution stage
out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w + opad out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w
b_np = np.zeros((batch, out_c, out_w)) b_np = np.zeros((batch, out_c, out_w))
for n in range(batch): for n in range(batch):
for f in range(out_c): for f in range(out_c):
......
...@@ -22,7 +22,7 @@ import topi ...@@ -22,7 +22,7 @@ import topi
from topi.nn.util import get_pad_tuple from topi.nn.util import get_pad_tuple
def conv2d_transpose_nchw_python(a_np, w_np, stride, padding, output_padding=(0, 0)): def conv2d_transpose_nchw_python(a_np, w_np, stride, padding):
"""Transposed convolution operator in NCHW layout. """Transposed convolution operator in NCHW layout.
Parameters Parameters
...@@ -50,22 +50,21 @@ def conv2d_transpose_nchw_python(a_np, w_np, stride, padding, output_padding=(0, ...@@ -50,22 +50,21 @@ def conv2d_transpose_nchw_python(a_np, w_np, stride, padding, output_padding=(0,
stride_h = stride_w = stride stride_h = stride_w = stride
else: else:
stride_h, stride_w = stride stride_h, stride_w = stride
opad_h, opad_w = output_padding
# dilate stage # dilate stage
dilated_a_np = topi.testing.dilate_python(a_np, [1, 1, stride_h, stride_w]) dilated_a_np = topi.testing.dilate_python(a_np, [1, 1, stride_h, stride_w])
# padding stage # padding stage
fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple(padding, (filter_h, filter_w)) fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple(padding, (filter_h, filter_w))
bpad_top = filter_h - 1 - fpad_top bpad_top = filter_h - 1 - fpad_top
bpad_bottom = filter_h - 1 - fpad_bottom + opad_h bpad_bottom = filter_h - 1 - fpad_bottom
bpad_left = filter_w - 1 - fpad_left bpad_left = filter_w - 1 - fpad_left
bpad_right = filter_w - 1 - fpad_right + opad_w bpad_right = filter_w - 1 - fpad_right
padded_a_np = np.zeros((batch, in_c, dilated_a_np.shape[2]+bpad_top+bpad_bottom, \ padded_a_np = np.zeros((batch, in_c, dilated_a_np.shape[2]+bpad_top+bpad_bottom, \
dilated_a_np.shape[3]+bpad_left+bpad_right)) dilated_a_np.shape[3]+bpad_left+bpad_right))
padded_a_np[:, :, bpad_top:dilated_a_np.shape[2]+bpad_top, \ padded_a_np[:, :, bpad_top:dilated_a_np.shape[2]+bpad_top, \
bpad_left:dilated_a_np.shape[3]+bpad_left] = dilated_a_np bpad_left:dilated_a_np.shape[3]+bpad_left] = dilated_a_np
# convolution stage # convolution stage
out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h + opad_h out_h = (in_h - 1) * stride_h - fpad_top - fpad_bottom + filter_h
out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w + opad_w out_w = (in_w - 1) * stride_w - fpad_left - fpad_right + filter_w
b_np = np.zeros((batch, out_c, out_h, out_w)) b_np = np.zeros((batch, out_c, out_h, out_w))
for n in range(batch): for n in range(batch):
for f in range(out_c): for f in range(out_c):
...@@ -76,8 +75,7 @@ def conv2d_transpose_nchw_python(a_np, w_np, stride, padding, output_padding=(0, ...@@ -76,8 +75,7 @@ def conv2d_transpose_nchw_python(a_np, w_np, stride, padding, output_padding=(0,
return b_np return b_np
def conv2d_transpose_nhwc_python(a_nhwc, weight, weight_format, stride, padding, def conv2d_transpose_nhwc_python(a_nhwc, weight, weight_format, stride, padding):
output_padding=(0, 0)):
"""Transposed convolution operator in NHWC layout. """Transposed convolution operator in NHWC layout.
Parameters Parameters
...@@ -119,7 +117,6 @@ def conv2d_transpose_nhwc_python(a_nhwc, weight, weight_format, stride, padding, ...@@ -119,7 +117,6 @@ def conv2d_transpose_nhwc_python(a_nhwc, weight, weight_format, stride, padding,
else: else:
raise ValueError('Valid weight_formats are HWIO, HWOI, OIHW or IOHW') raise ValueError('Valid weight_formats are HWIO, HWOI, OIHW or IOHW')
res_nchw = conv2d_transpose_nchw_python(a_nchw, w_iohw, stride, padding, res_nchw = conv2d_transpose_nchw_python(a_nchw, w_iohw, stride, padding)
output_padding=output_padding)
res_nhwc = np.transpose(res_nchw, (0, 2, 3, 1)) res_nhwc = np.transpose(res_nchw, (0, 2, 3, 1))
return res_nhwc return res_nhwc
...@@ -28,9 +28,9 @@ from .conv2d import _declaration_conv_impl, \ ...@@ -28,9 +28,9 @@ from .conv2d import _declaration_conv_impl, \
@autotvm.register_topi_compute(conv2d_transpose_nchw, 'cpu', ['direct']) @autotvm.register_topi_compute(conv2d_transpose_nchw, 'cpu', ['direct'])
def _conv2d_transpose_nchw(cfg, data, kernel, strides, padding, out_dtype, output_padding=(0, 0)): def _conv2d_transpose_nchw(cfg, data, kernel, strides, padding, out_dtype):
data_pad, kernel_transform = \ data_pad, kernel_transform = \
conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype, output_padding) conv2d_transpose_nchw_preprocess(data, kernel, strides, padding, out_dtype)
# reuse conv2d implementation # reuse conv2d implementation
_create_tuning_space_conv2d(cfg, data_pad, kernel_transform, strides=(1, 1), \ _create_tuning_space_conv2d(cfg, data_pad, kernel_transform, strides=(1, 1), \
padding=(0, 0), dilation=(1, 1), layout="NCHW") padding=(0, 0), dilation=(1, 1), layout="NCHW")
......
...@@ -37,7 +37,7 @@ def verify_conv1d_transpose_ncw(batch, in_channel, in_size, num_filter, kernel, ...@@ -37,7 +37,7 @@ def verify_conv1d_transpose_ncw(batch, in_channel, in_size, num_filter, kernel,
def get_ref_data(): def get_ref_data():
a_np = np.random.uniform(size=a_shape).astype(dtype) a_np = np.random.uniform(size=a_shape).astype(dtype)
w_np = np.random.uniform(size=w_shape).astype(dtype) w_np = np.random.uniform(size=w_shape).astype(dtype)
b_np = topi.testing.conv1d_transpose_ncw_python(a_np, w_np, stride, padding, (0,)) b_np = topi.testing.conv1d_transpose_ncw_python(a_np, w_np, stride, padding)
c_np = np.maximum(b_np, 0) c_np = np.maximum(b_np, 0)
return a_np, w_np, b_np, c_np return a_np, w_np, b_np, c_np
......
...@@ -24,7 +24,7 @@ from topi.util import get_const_tuple ...@@ -24,7 +24,7 @@ from topi.util import get_const_tuple
from common import get_all_backend from common import get_all_backend
def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding, output_padding): def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel, stride, padding):
in_height = in_width = in_size in_height = in_width = in_size
A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A') A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
...@@ -38,7 +38,7 @@ def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel, ...@@ -38,7 +38,7 @@ def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel,
def get_ref_data(): def get_ref_data():
a_np = np.random.uniform(size=a_shape).astype(dtype) a_np = np.random.uniform(size=a_shape).astype(dtype)
w_np = np.random.uniform(size=w_shape).astype(dtype) w_np = np.random.uniform(size=w_shape).astype(dtype)
b_np = topi.testing.conv2d_transpose_nchw_python(a_np, w_np, stride, padding, output_padding) b_np = topi.testing.conv2d_transpose_nchw_python(a_np, w_np, stride, padding)
c_np = np.maximum(b_np, 0) c_np = np.maximum(b_np, 0)
return a_np, w_np, b_np, c_np return a_np, w_np, b_np, c_np
...@@ -51,7 +51,7 @@ def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel, ...@@ -51,7 +51,7 @@ def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel,
return return
print("Running on target: %s" % device) print("Running on target: %s" % device)
with tvm.target.create(device): with tvm.target.create(device):
B = topi.nn.conv2d_transpose_nchw(A, W, [stride, stride], [padding, padding], A.dtype, output_padding) B = topi.nn.conv2d_transpose_nchw(A, W, [stride, stride], [padding, padding], A.dtype)
C = topi.nn.relu(B) C = topi.nn.relu(B)
s1 = topi.generic.schedule_conv2d_transpose_nchw([B]) s1 = topi.generic.schedule_conv2d_transpose_nchw([B])
s2 = topi.generic.schedule_conv2d_transpose_nchw([C]) s2 = topi.generic.schedule_conv2d_transpose_nchw([C])
...@@ -72,13 +72,11 @@ def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel, ...@@ -72,13 +72,11 @@ def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel,
def test_conv2d_transpose_nchw(): def test_conv2d_transpose_nchw():
verify_conv2d_transpose_nchw(1, 3, 224, 32, 3, 1, 0, (0, 0)) verify_conv2d_transpose_nchw(1, 3, 224, 32, 3, 1, 0)
verify_conv2d_transpose_nchw(1, 3, 224, 32, 3, 2, 1, (0, 0)) verify_conv2d_transpose_nchw(1, 3, 224, 32, 3, 2, 1)
verify_conv2d_transpose_nchw(1, 3, 224, 32, 3, 2, 1, (1, 0)) verify_conv2d_transpose_nchw(1, 3, 224, 32, 2, 2, 0)
verify_conv2d_transpose_nchw(1, 3, 224, 32, 2, 2, 0, (0, 0)) verify_conv2d_transpose_nchw(1, 32, 32, 128, 5, 1, 0)
verify_conv2d_transpose_nchw(1, 3, 224, 32, 2, 2, 0, (1, 1)) verify_conv2d_transpose_nchw(1, 32, 32, 128, 5, 2, 1)
verify_conv2d_transpose_nchw(1, 32, 32, 128, 5, 1, 0, (0, 0))
verify_conv2d_transpose_nchw(1, 32, 32, 128, 5, 2, 1, (0, 0))
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -27,28 +27,24 @@ from topi.nn.util import get_pad_tuple ...@@ -27,28 +27,24 @@ from topi.nn.util import get_pad_tuple
from ..environment import get_env from ..environment import get_env
@autotvm.register_topi_compute(topi.nn.conv2d_transpose_nchw, 'vta', 'direct') @autotvm.register_topi_compute(topi.nn.conv2d_transpose_nchw, 'vta', 'direct')
def _declaration_conv2d_transpose(cfg, def _declatation_conv2d_transpose(cfg,
data, data,
kernel, kernel,
strides, strides,
padding, padding,
out_dtype, out_dtype):
output_padding=(0, 0)):
ishape = get_const_tuple(data.shape) ishape = get_const_tuple(data.shape)
kshape = get_const_tuple(kernel.shape) kshape = get_const_tuple(kernel.shape)
b, c_i, i_h, i_w, t_b, t_ci = ishape b, c_i, i_h, i_w, t_b, t_ci = ishape
c_o, _, k_h, k_w, t_co, t_ci = kshape c_o, _, k_h, k_w, t_co, t_ci = kshape
stride_h, stride_w = strides stride_h, stride_w = strides
opad_h, opad_w = output_padding
# FIXME(tmoreau89): currently IR pass breaks when output padding != (0,0)
assert opad_h == 0 and opad_w == 0, "VTA does not support output padding for now"
# derive padding parameters # derive padding parameters
fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple(padding, (k_h, k_w)) fpad_top, fpad_left, fpad_bottom, fpad_right = get_pad_tuple(padding, (k_h, k_w))
bpad_top = k_h - 1 - fpad_top bpad_top = k_h - 1 - fpad_top
bpad_bottom = k_h - 1 - fpad_bottom + opad_h bpad_bottom = k_h - 1 - fpad_bottom
bpad_left = k_w - 1 - fpad_left bpad_left = k_w - 1 - fpad_left
bpad_right = k_w - 1 - fpad_right + opad_w bpad_right = k_w - 1 - fpad_right
# padding stage # padding stage
dilated_input = topi.nn.dilate(data, [1, 1, stride_h, stride_w, 1, 1]) dilated_input = topi.nn.dilate(data, [1, 1, stride_h, stride_w, 1, 1])
...@@ -57,8 +53,8 @@ def _declaration_conv2d_transpose(cfg, ...@@ -57,8 +53,8 @@ def _declaration_conv2d_transpose(cfg,
[0, 0, bpad_bottom, bpad_right, 0, 0]) [0, 0, bpad_bottom, bpad_right, 0, 0])
# convolution transpose stage # convolution transpose stage
out_h = (i_h - 1) * stride_h - fpad_top - fpad_bottom + k_h + opad_h out_h = (i_h - 1) * stride_h - fpad_top - fpad_bottom + k_h
out_w = (i_w - 1) * stride_w - fpad_left - fpad_right + k_w + opad_w out_w = (i_w - 1) * stride_w - fpad_left - fpad_right + k_w
oshape = (b, c_o, out_h, out_w, t_b, t_co) oshape = (b, c_o, out_h, out_w, t_b, t_co)
d_c = tvm.reduce_axis((0, c_i), name='d_c') d_c = tvm.reduce_axis((0, c_i), name='d_c')
d_h = tvm.reduce_axis((0, k_h), name='d_h') d_h = tvm.reduce_axis((0, k_h), name='d_h')
......
...@@ -33,15 +33,13 @@ env = vta.get_env() ...@@ -33,15 +33,13 @@ env = vta.get_env()
Workload = namedtuple("Conv2DTransposeWorkload", Workload = namedtuple("Conv2DTransposeWorkload",
['batch', 'height', 'width', 'in_filter', 'out_filter', ['batch', 'height', 'width', 'in_filter', 'out_filter',
'hkernel', 'wkernel', 'hpad', 'wpad', 'hstride', 'wstride', 'hkernel', 'wkernel', 'hpad', 'wpad', 'hstride', 'wstride'])
'o_hpad', 'o_wpad'])
# DCGAN workloads
dcgan_wkls = [ dcgan_wkls = [
# dcgan # dcgan
('DCGAN.CT1', Workload(env.BATCH, 4, 4, 1024, 512, 4, 4, 1, 1, 2, 2, 0, 0)), ('DCGAN.CT1', Workload(env.BATCH, 4, 4, 1024, 512, 4, 4, 1, 1, 2, 2)),
('DCGAN.CT2', Workload(env.BATCH, 8, 8, 512, 256, 4, 4, 1, 1, 2, 2, 0, 0)), ('DCGAN.CT2', Workload(env.BATCH, 8, 8, 512, 256, 4, 4, 1, 1, 2, 2)),
('DCGAN.CT3', Workload(env.BATCH, 16, 16, 256, 128, 4, 4, 1, 1, 2, 2, 0, 0)), ('DCGAN.CT3', Workload(env.BATCH, 16, 16, 256, 128, 4, 4, 1, 1, 2, 2)),
] ]
@tvm.tag_scope(tag=topi.tag.ELEMWISE) @tvm.tag_scope(tag=topi.tag.ELEMWISE)
...@@ -53,7 +51,7 @@ def my_clip(x, a_min, a_max): ...@@ -53,7 +51,7 @@ def my_clip(x, a_min, a_max):
x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB") x = tvm.compute(x.shape, lambda *i: tvm.max(x(*i), const_min), name="clipB")
return x return x
def conv2d_transpose(N, CI, H, W, CO, KH, KW, strides, padding, opadding): def conv2d_transpose(N, CI, H, W, CO, KH, KW, strides, padding):
data_shape = (N//env.BATCH, CI//env.BLOCK_IN, H, W, env.BATCH, env.BLOCK_IN) data_shape = (N//env.BATCH, CI//env.BLOCK_IN, H, W, env.BATCH, env.BLOCK_IN)
kernel_shape = (CO//env.BLOCK_OUT, CI//env.BLOCK_IN, KH, KW, env.BLOCK_OUT, env.BLOCK_IN) kernel_shape = (CO//env.BLOCK_OUT, CI//env.BLOCK_IN, KH, KW, env.BLOCK_OUT, env.BLOCK_IN)
...@@ -66,9 +64,7 @@ def conv2d_transpose(N, CI, H, W, CO, KH, KW, strides, padding, opadding): ...@@ -66,9 +64,7 @@ def conv2d_transpose(N, CI, H, W, CO, KH, KW, strides, padding, opadding):
Filter=kernel, Filter=kernel,
strides=strides, strides=strides,
padding=padding, padding=padding,
out_dtype=env.acc_dtype, out_dtype=env.acc_dtype)
output_padding=opadding
)
res = topi.right_shift(res, env.WGT_WIDTH) res = topi.right_shift(res, env.WGT_WIDTH)
res = my_clip(res, 0, (1 << env.OUT_WIDTH - 1) - 1) res = my_clip(res, 0, (1 << env.OUT_WIDTH - 1) - 1)
res = topi.cast(res, env.out_dtype) res = topi.cast(res, env.out_dtype)
...@@ -113,12 +109,11 @@ if __name__ == '__main__': ...@@ -113,12 +109,11 @@ if __name__ == '__main__':
KW = wl.wkernel KW = wl.wkernel
strides = (wl.hstride, wl.wstride) strides = (wl.hstride, wl.wstride)
padding = (wl.hpad, wl.wpad) padding = (wl.hpad, wl.wpad)
opadding = (wl.o_hpad, wl.o_wpad)
# Create task # Create task
task = autotvm.task.create( task = autotvm.task.create(
conv2d_transpose, conv2d_transpose,
args=(N, CI, H, W, CO, KH, KW, strides, padding, opadding), args=(N, CI, H, W, CO, KH, KW, strides, padding),
target=tvm.target.vta(), target=tvm.target.vta(),
target_host=env.target_host, target_host=env.target_host,
template_key='direct') template_key='direct')
......
...@@ -37,8 +37,7 @@ from vta.testing import simulator ...@@ -37,8 +37,7 @@ from vta.testing import simulator
Workload = namedtuple("Conv2DTransposeWorkload", Workload = namedtuple("Conv2DTransposeWorkload",
['batch', 'height', 'width', 'in_filter', 'out_filter', ['batch', 'height', 'width', 'in_filter', 'out_filter',
'hkernel', 'wkernel', 'hpad', 'wpad', 'hstride', 'wstride', 'hkernel', 'wkernel', 'hpad', 'wpad', 'hstride', 'wstride'])
'o_hpad', 'o_wpad'])
# Get batch info from env # Get batch info from env
env = vta.get_env() env = vta.get_env()
...@@ -46,9 +45,9 @@ env = vta.get_env() ...@@ -46,9 +45,9 @@ env = vta.get_env()
# DCGAN workloads # DCGAN workloads
dcgan_wklds = [ dcgan_wklds = [
# dcgan # dcgan
('DCGAN.CT1', Workload(env.BATCH, 4, 4, 1024, 512, 4, 4, 1, 1, 2, 2, 0, 0)), ('DCGAN.CT1', Workload(env.BATCH, 4, 4, 1024, 512, 4, 4, 1, 1, 2, 2)),
('DCGAN.CT2', Workload(env.BATCH, 8, 8, 512, 256, 4, 4, 1, 1, 2, 2, 0, 0)), ('DCGAN.CT2', Workload(env.BATCH, 8, 8, 512, 256, 4, 4, 1, 1, 2, 2)),
('DCGAN.CT3', Workload(env.BATCH, 16, 16, 256, 128, 4, 4, 1, 1, 2, 2, 0, 0)), ('DCGAN.CT3', Workload(env.BATCH, 16, 16, 256, 128, 4, 4, 1, 1, 2, 2)),
] ]
# FIXME: we need a custom clip operator to circumvent a pattern detection limitation # FIXME: we need a custom clip operator to circumvent a pattern detection limitation
...@@ -103,8 +102,7 @@ def run_conv2d_transpose(env, remote, wl, target, ...@@ -103,8 +102,7 @@ def run_conv2d_transpose(env, remote, wl, target,
# Define base computation schedule # Define base computation schedule
with target: with target:
res = topi.nn.conv2d_transpose_nchw( res = topi.nn.conv2d_transpose_nchw(
data, kernel, (wl.hstride, wl.wstride), data, kernel, (wl.hstride, wl.wstride), (wl.hpad, wl.wpad), env.acc_dtype)
(wl.hpad, wl.wpad), env.acc_dtype, (wl.o_hpad, wl.o_wpad))
res = topi.right_shift(res, env.WGT_WIDTH) res = topi.right_shift(res, env.WGT_WIDTH)
res = my_clip(res, 0, (1 << env.OUT_WIDTH - 1) - 1) res = my_clip(res, 0, (1 << env.OUT_WIDTH - 1) - 1)
res = topi.cast(res, env.out_dtype) res = topi.cast(res, env.out_dtype)
...@@ -114,8 +112,8 @@ def run_conv2d_transpose(env, remote, wl, target, ...@@ -114,8 +112,8 @@ def run_conv2d_transpose(env, remote, wl, target,
print(vta.lower(s, [data, kernel, res], simple_mode=True)) print(vta.lower(s, [data, kernel, res], simple_mode=True))
# Derive number of ops # Derive number of ops
fout_height = (wl.height - 1) * wl.hstride - 2 * wl.hpad + wl.hkernel + wl.o_hpad fout_height = (wl.height - 1) * wl.hstride - 2 * wl.hpad + wl.hkernel
fout_width = (wl.width - 1) * wl.wstride - 2 * wl.wpad + wl.wkernel + wl.o_wpad fout_width = (wl.width - 1) * wl.wstride - 2 * wl.wpad + wl.wkernel
num_ops = 2 * wl.batch * fout_height * fout_width * wl.hkernel * wl.wkernel * wl.out_filter * wl.in_filter num_ops = 2 * wl.batch * fout_height * fout_width * wl.hkernel * wl.wkernel * wl.out_filter * wl.in_filter
# @memoize("vta.tests.test_benchmark_topi.conv2d.verify_nhwc") # @memoize("vta.tests.test_benchmark_topi.conv2d.verify_nhwc")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment