Commit 88daa2bc by Lianmin Zheng Committed by Yizhi Liu

[TOPI] Fix mali conv2d performance regression (#3131)

* [TOPI] fix mali conv

* fix typo

* address comments
parent 48c92376
...@@ -42,7 +42,7 @@ PACKAGE_VERSION = { ...@@ -42,7 +42,7 @@ PACKAGE_VERSION = {
'cuda': "v0.04", 'cuda': "v0.04",
'rocm': "v0.02", 'rocm': "v0.02",
'opencl': "v0.02", 'opencl': "v0.02",
'mali': "v0.04", 'mali': "v0.05",
'vta': "v0.04", 'vta': "v0.04",
} }
......
...@@ -352,9 +352,11 @@ def _decl_winograd(cfg, data, kernel, strides, padding, dilation, layout, out_dt ...@@ -352,9 +352,11 @@ def _decl_winograd(cfg, data, kernel, strides, padding, dilation, layout, out_dt
# unpack output # unpack output
output = tvm.compute((N, CO, H, W), lambda n, co, h, w: output = tvm.compute((N, CO, H, W), lambda n, co, h, w:
Y[co][n * nH * nW + (h//m) * nW + w//m][h % m][w % m] Y[co][n * nH * nW + (h//m) * nW + w//m][h % m][w % m]
# thw following term is used to make the padding effective, # The following hack term is used to make the padding in batch gemm ("M")
# otherwise the padding will be eliminated by bound inference # effective, otherwise the padding will be eliminated by bound inference.
+ tvm.const(0, out_dtype) * M[alpha-1][alpha-1][CO-1][P_round-1], # Use `tvm.expr.Mul` instead of `*` to avoid issues in const folding.
+ tvm.expr.Mul(tvm.const(0, out_dtype),
M[alpha-1][alpha-1][CO-1][P_round-1]),
name='output', tag='winograd_conv2d_output') name='output', tag='winograd_conv2d_output')
# we have to manually assign effective GFLOP for winograd # we have to manually assign effective GFLOP for winograd
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment