Commit 76b79671 by Cody Hao Yu Committed by Wuwei Lin

[TOPI][CUDA] Fix Winograd Kernel Size Support (#4276)

* fix_winograd_cuda_kernel_size

* add unit test
parent 5bcd3313
......@@ -18,6 +18,7 @@
"""
import numpy as np
import tvm
from tvm import autotvm
from tvm import relay
from tvm.relay import transform
from tvm.relay.testing import ctx_list
......@@ -174,6 +175,76 @@ def test_conv2d_run():
run_test_conv2d("float32", "float32", 1, dshape, kshape,
padding=(1, 1), channels=10, kernel_size=(3 ,3), dilation=(3, 3))
def test_conv2d_winograd():
class WinogradFallback(autotvm.FallbackContext):
def _query_inside(self, target, workload):
key = (target, workload)
if key in self.memory:
return self.memory[key]
cfg = autotvm.task.space.FallbackConfigEntity()
cfg.template_key = 'winograd'
cfg.is_fallback = False
cfg['tile_b'] = autotvm.task.space.SplitEntity([-1, 1, 1, 1])
cfg['tile_y'] = autotvm.task.space.SplitEntity([-1, 1, 1, 1])
cfg['tile_x'] = autotvm.task.space.SplitEntity([-1, 1, 1, 1])
cfg['tile_rc'] = autotvm.task.space.SplitEntity([-1, 1])
cfg['auto_unroll_max_setp'] = autotvm.task.space.OtherOptionEntity(1500)
cfg['unroll_explicit'] = autotvm.task.space.OtherOptionEntity(1)
self.memory[key] = cfg
return cfg
def run_test_conv2d_cuda(dtype, out_dtype, scale, dshape, kshape,
padding=(1, 1),
groups=1,
dilation=(1, 1),
**attrs):
x = relay.var("x", shape=dshape, dtype=dtype)
w = relay.var("w", shape=kshape, dtype=dtype)
y = relay.nn.conv2d(x, w,
padding=padding,
dilation=dilation,
groups=groups,
**attrs)
func = relay.Function([x, w], y)
mod = relay.Module()
mod['main'] = func
mod = relay.transform.InferType()(mod)
data = np.random.uniform(-scale, scale, size=dshape).astype(dtype)
kernel = np.random.uniform(-scale, scale, size=kshape).astype(dtype)
ref_res = topi.testing.conv2d_nchw_python(
data.astype(out_dtype), kernel.astype(out_dtype), 1, padding,
groups=groups)
with WinogradFallback(), relay.build_config(opt_level=3):
for target, ctx in ctx_list():
if target != 'cuda':
continue
params = {'w': tvm.nd.array(kernel)}
graph, lib, params = relay.build_module.build(mod, target=target, params=params)
module = tvm.contrib.graph_runtime.create(graph, lib, ctx)
module.set_input('x', tvm.nd.array(data))
module.set_input(**params)
module.run()
op_res1 = module.get_output(0)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-3, atol=1e-3)
# normal winograd: stride 1, padding 1, kernel 3x3
dshape = (1, 80, 73, 73)
kshape = (192, 80, 3, 3)
run_test_conv2d_cuda("float32", "float32", 1, dshape, kshape,
padding=(1, 1), channels=192, kernel_size=(3, 3))
# extended winograd: stride 1, padding N, kernel 3x3
run_test_conv2d_cuda("float32", "float32", 1, dshape, kshape,
padding=(0, 0), channels=192, kernel_size=(3, 3))
run_test_conv2d_cuda("float32", "float32", 1, dshape, kshape,
padding=(2, 2), channels=192, kernel_size=(3, 3))
# extended winograd: stride 1, padding N, kernel NxN
kshape = (192, 80, 7, 7)
run_test_conv2d_cuda("float32", "float32", 1, dshape, kshape,
padding=(2, 2), channels=192, kernel_size=(7, 7))
def test_conv2d_transpose_infer_type():
# symbolic in batch dimension
......@@ -702,6 +773,7 @@ if __name__ == "__main__":
test_conv2d_transpose_infer_type()
test_conv2d_transpose_run()
test_conv2d_run()
test_conv2d_winograd()
test_bitserial_conv2d_infer_type()
test_batch_flatten()
test_upsampling()
......
......@@ -55,12 +55,13 @@ def winograd_cuda(cfg, data, kernel, strides, padding, dilation, layout, out_dty
if dilation_h != 1 or dilation_w != 1:
kernel = dilation(kernel, (1, 1, dilation_h, dilation_w))
CO, CI, KH, KW = get_const_tuple(kernel.shape)
alpha = KW + tile_size - 1
assert HSTR == 1 and WSTR == 1 and KH == KW
else:
# kernel tensor is pre-transfomred. this op is created by alter op layout.
# dilation is not supported
_, _, CI, CO = get_const_tuple(kernel.shape)
KH = KW = 3
alpha, _, CI, CO = get_const_tuple(kernel.shape)
KH = KW = alpha + 1 - tile_size
assert HSTR == 1 and WSTR == 1 and dilation_h == 1 and dilation_w == 1
HPAD, WPAD, _, _ = nn.get_pad_tuple(padding, kernel)
......@@ -68,7 +69,6 @@ def winograd_cuda(cfg, data, kernel, strides, padding, dilation, layout, out_dty
r = KW
m = tile_size
alpha = m + r - 1
A, B, G = winograd_transform_matrices(m, r, out_dtype)
H = (H + 2 * HPAD - KH) // HSTR + 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment