Commit 5a8ab8fe by hlu1 Committed by Lianmin Zheng

[TOPI][ARM] Improve injective schedule (#2801)

parent fa709832
...@@ -4,3 +4,4 @@ from . import conv2d ...@@ -4,3 +4,4 @@ from . import conv2d
from . import depthwise_conv2d from . import depthwise_conv2d
from . import conv2d_transpose from . import conv2d_transpose
from . import bitserial_conv2d from . import bitserial_conv2d
from . import injective
# pylint: disable=invalid-name, unused-variable
"""Schedule for pooling operators"""
import tvm
from .. import generic
@generic.schedule_injective.register(["arm_cpu"])
def schedule_injective(outs):
"""ARM CPU schedule for injective op.
Parameters
----------
outs: Array of Tensor
The computation graph description of injective in the format
of an array of tensors.
Returns
-------
sch: Schedule
The computation schedule for the op.
"""
outs = [outs] if isinstance(outs, tvm.tensor.Tensor) else outs
s = tvm.create_schedule([x.op for x in outs])
x = outs[0]
if list(s[x].op.axis):
# do not vectorize for broadcast
(io, ii) = s[x].split(list(s[x].op.axis)[-1], 8)
s[x].vectorize(ii)
tvm.schedule.AutoInlineInjective(s)
if len(s[x].op.axis) >= 4:
fused = s[x].fuse(s[x].op.axis[0], s[x].op.axis[1], s[x].op.axis[2])
s[x].parallel(fused)
elif len(s[x].op.axis) >= 3:
fused = s[x].fuse(s[x].op.axis[0], s[x].op.axis[1])
s[x].parallel(fused)
elif len(s[x].op.axis) >= 2:
s[x].parallel(s[x].op.axis[0])
return s
...@@ -5,6 +5,8 @@ import topi ...@@ -5,6 +5,8 @@ import topi
import topi.testing import topi.testing
import math import math
from common import get_all_backend
def verify_bilinear_scale(batch, in_channel, in_height, in_width, out_height, out_width, layout='NCHW', align_corners=False): def verify_bilinear_scale(batch, in_channel, in_height, in_width, out_height, out_width, layout='NCHW', align_corners=False):
if layout == 'NCHW': if layout == 'NCHW':
...@@ -40,7 +42,7 @@ def verify_bilinear_scale(batch, in_channel, in_height, in_width, out_height, ou ...@@ -40,7 +42,7 @@ def verify_bilinear_scale(batch, in_channel, in_height, in_width, out_height, ou
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3)
for device in ['llvm', 'cuda', 'vulkan', 'nvptx']: for device in get_all_backend():
check_device(device) check_device(device)
def test_resize(): def test_resize():
......
...@@ -5,6 +5,8 @@ import topi ...@@ -5,6 +5,8 @@ import topi
import topi.testing import topi.testing
import math import math
from common import get_all_backend
def verify_upsampling(batch, in_channel, in_height, in_width, scale, layout='NCHW', method="NEAREST_NEIGHBOR"): def verify_upsampling(batch, in_channel, in_height, in_width, scale, layout='NCHW', method="NEAREST_NEIGHBOR"):
...@@ -45,7 +47,7 @@ def verify_upsampling(batch, in_channel, in_height, in_width, scale, layout='NCH ...@@ -45,7 +47,7 @@ def verify_upsampling(batch, in_channel, in_height, in_width, scale, layout='NCH
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5) tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
for device in ['llvm', 'cuda', 'vulkan', 'nvptx']: for device in get_all_backend():
check_device(device) check_device(device)
def test_upsampling(): def test_upsampling():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment