Commit 84bd230c by Jian Weng Committed by Tianqi Chen

[FRONTEND] [HYBIRD] [TEST] Add GPU shared memory test! (#1338)

parent 50681784
...@@ -47,6 +47,15 @@ TVM_REGISTER_API("ir_pass.CanonicalSimplify") ...@@ -47,6 +47,15 @@ TVM_REGISTER_API("ir_pass.CanonicalSimplify")
} }
}); });
TVM_REGISTER_API("ir_pass.Substitute")
.set_body([](TVMArgs args, TVMRetValue *ret) {
if (args[0].IsNodeType<Stmt>()) {
*ret = Substitute(args[0].operator Stmt(), args[1].operator Map<Var, Expr>());
} else {
*ret = Substitute(args[0].operator Expr(), args[1].operator Map<Var, Expr>());
}
});
TVM_REGISTER_API("ir_pass.Equal") TVM_REGISTER_API("ir_pass.Equal")
.set_body([](TVMArgs args, TVMRetValue *ret) { .set_body([](TVMArgs args, TVMRetValue *ret) {
if (args[0].IsNodeType<Stmt>()) { if (args[0].IsNodeType<Stmt>()) {
......
import tvm, inspect, sys, traceback, numpy import tvm, inspect, sys, traceback, numpy, nose
from tvm.hybrid import script from tvm.hybrid import script
from tvm.hybrid.intrin import HYBRID_GLOBALS from tvm.hybrid.intrin import HYBRID_GLOBALS
@nose.tools.nottest
def run_and_check(func, args, outs, var_dict={}, target='llvm'):
def tvm_val_2_py_val(val):
val = tvm.ir_pass.Substitute(val, var_dict)
val = tvm.ir_pass.Simplify(val)
assert isinstance(val, (tvm.expr.IntImm, tvm.expr.UIntImm))
return val.value
ctx = tvm.context(target, 0)
emu_args = []
nd_args = []
to_check = []
for i in args:
if isinstance(i, tvm.tensor.Tensor):
shape = [tvm_val_2_py_val(j) for j in i.shape]
if i in outs:
emu_args.append(numpy.zeros(shape).astype(i.dtype))
nd_args.append(tvm.nd.array(emu_args[-1], ctx))
to_check.append((nd_args[-1], emu_args[-1]))
else:
emu_args.append(numpy.random.randn(*shape).astype(i.dtype))
nd_args.append(tvm.nd.array(emu_args[-1], ctx))
else:
assert isinstance(i, tvm.expr.Var)
emu_args.append(tvm_val_2_py_val(i))
nd_args.append(emu_args[-1])
func(*emu_args)
lowerd_func = tvm.lower(func(*args), args)
module = tvm.build(lowerd_func, target=target)
assert module
module(*nd_args)
for nd, np in to_check:
numpy.testing.assert_allclose(nd.asnumpy(), np, rtol=1e-5, atol=1e-5)
@script @script
def outer_product(n, m, a, b, c): def outer_product(n, m, a, b, c):
...@@ -45,18 +83,7 @@ def test_outer_product(): ...@@ -45,18 +83,7 @@ def test_outer_product():
func = tvm.lower(ir, [n, m, a, b, c]) func = tvm.lower(ir, [n, m, a, b, c])
func = tvm.build(func) func = tvm.build(func)
_n = 999 run_and_check(outer_product, [n, m, a, b, c], [c], {n: 999, m: 1001})
_m = 1001
_a = numpy.random.rand(_n).astype('float32')
_b = numpy.random.rand(_m).astype('float32')
c_python = numpy.zeros((_n, _m), dtype='float32')
outer_product(_n, _m, _a, _b, c_python)
tvm_a = tvm.ndarray.array(_a)
tvm_b = tvm.ndarray.array(_b)
tvm_c = tvm.ndarray.array(numpy.zeros((_n, _m), dtype='float32'))
func(_n, _m, tvm_a, tvm_b, tvm_c)
numpy.testing.assert_allclose(tvm_c.asnumpy(), c_python, rtol=1e-5)
for key, _ in HYBRID_GLOBALS.items(): for key, _ in HYBRID_GLOBALS.items():
assert key not in globals().keys() assert key not in globals().keys()
...@@ -135,19 +162,7 @@ def test_fanout(): ...@@ -135,19 +162,7 @@ def test_fanout():
assert len(write.value.args) == 1 assert len(write.value.args) == 1
assert write.value.args[0].value == 0 assert write.value.args[0].value == 0
func = tvm.build(tvm.lower(ir, [n, a, b])) run_and_check(fanout, [n, a, b], [b], {n: 10})
assert func
np_a = numpy.random.randn(10).astype('float32')
np_b = numpy.zeros(7).astype('float32')
nd_a = tvm.ndarray.array(np_a)
nd_b = tvm.ndarray.array(np_b)
fanout(10, np_a, np_b)
func(10, nd_a, nd_b)
numpy.testing.assert_allclose(nd_b.asnumpy(), np_b, rtol=1e-5, atol=1e-5)
@script @script
...@@ -160,7 +175,7 @@ def test_failure(): ...@@ -160,7 +175,7 @@ def test_failure():
tvm.hybrid.parse(failure, []) tvm.hybrid.parse(failure, [])
except IOError as err: except IOError as err:
assert sys.version_info[0] == 2 assert sys.version_info[0] == 2
print('[Warning] Python2 cannot do the failure case because "%s"' % str(err)) print('[Warning] Case test_failure is skipped by Python2 because "%s"' % str(err))
except Exception as err: except Exception as err:
assert str(err) == 'You CAN NEVER overwrite a loop variable!' assert str(err) == 'You CAN NEVER overwrite a loop variable!'
...@@ -186,22 +201,7 @@ def test_looptype(): ...@@ -186,22 +201,7 @@ def test_looptype():
assert jloop.for_type == tvm.stmt.For.Vectorized assert jloop.for_type == tvm.stmt.For.Vectorized
assert kloop.for_type == tvm.stmt.For.Unrolled assert kloop.for_type == tvm.stmt.For.Unrolled
func = tvm.build(tvm.lower(ir, [a, b, c])) run_and_check(looptype, [a, b, c], [a, b, c])
np_a = numpy.zeros((8, )).astype('int32')
np_b = numpy.zeros((8, )).astype('int32')
np_c = numpy.zeros((8, )).astype('int32')
nd_a = tvm.ndarray.array(np_a)
nd_b = tvm.ndarray.array(np_b)
nd_c = tvm.ndarray.array(np_c)
looptype(np_a, np_b, np_c)
func(nd_a, nd_b, nd_c)
numpy.testing.assert_allclose(np_a, nd_a.asnumpy())
numpy.testing.assert_allclose(np_b, nd_b.asnumpy())
numpy.testing.assert_allclose(np_c, nd_c.asnumpy())
def test_if(): def test_if():
...@@ -217,26 +217,13 @@ def test_if(): ...@@ -217,26 +217,13 @@ def test_if():
a = tvm.placeholder((10, ), dtype='int32', name='a') a = tvm.placeholder((10, ), dtype='int32', name='a')
b = tvm.placeholder((10, ), dtype='int32', name='b') b = tvm.placeholder((10, ), dtype='int32', name='b')
ir = if_then_else(a, b)
func = tvm.lower(ir, [a, b])
func = tvm.build(func)
assert func
_a = numpy.zeros((10, ), dtype = 'int32')
_b = numpy.zeros((10, ), dtype = 'int32')
if_then_else(_a, _b)
tvm_a = tvm.ndarray.array(numpy.zeros((10, ), dtype='int32')) run_and_check(if_then_else, [a, b], [a, b])
tvm_b = tvm.ndarray.array(numpy.zeros((10, ), dtype='int32'))
func(tvm_a, tvm_b)
numpy.testing.assert_allclose(tvm_a.asnumpy(), _a, rtol=1e-5)
numpy.testing.assert_allclose(tvm_b.asnumpy(), _b, rtol=1e-5)
numpy.testing.assert_allclose(tvm_a.asnumpy(), tvm_b.asnumpy(), rtol=1e-5)
def test_bind(): def test_bind():
if not tvm.gpu(0).exist: if not tvm.gpu(0).exist:
print('No GPU found! Skip this test!') print('[Warning] No GPU found! Skip bind test!')
return return
@script @script
def vec_add(a, b, c): def vec_add(a, b, c):
...@@ -246,24 +233,8 @@ def test_bind(): ...@@ -246,24 +233,8 @@ def test_bind():
a = tvm.placeholder((1000, ), dtype='float32', name='a') a = tvm.placeholder((1000, ), dtype='float32', name='a')
b = tvm.placeholder((1000, ), dtype='float32', name='b') b = tvm.placeholder((1000, ), dtype='float32', name='b')
c = tvm.placeholder((1000, ), dtype='float32', name='c') c = tvm.placeholder((1000, ), dtype='float32', name='c')
ir = vec_add(a, b, c)
func = tvm.lower(ir, [a, b, c]) run_and_check(vec_add, [a, b, c], [c], target='cuda')
func = tvm.build(func, target = 'cuda')
_a = numpy.random.rand(1000).astype('float32')
_b = numpy.random.rand(1000).astype('float32')
_c = numpy.zeros((1000, ), dtype = 'float32')
tvm_a = tvm.ndarray.array(_a, tvm.gpu(0))
tvm_b = tvm.ndarray.array(_b, tvm.gpu(0))
tvm_c = tvm.ndarray.array(_c, tvm.gpu(0))
func(tvm_a, tvm_b, tvm_c)
vec_add(_a, _b, _c)
numpy.testing.assert_allclose(_c, tvm_c.asnumpy(), rtol=1e-5)
def test_math_intrin(): def test_math_intrin():
@script @script
...@@ -277,9 +248,9 @@ def test_math_intrin(): ...@@ -277,9 +248,9 @@ def test_math_intrin():
a[6] = min(a[4], a[5]) a[6] = min(a[4], a[5])
a[7] = max(a[5], a[6]) a[7] = max(a[5], a[6])
a6 = tvm.placeholder((8, ), dtype='float32', name='a') a8 = tvm.placeholder((8, ), dtype='float32', name='a')
ir = intrin_real(a6) ir = intrin_real(a8)
func = tvm.build(tvm.lower(ir, [a6])) func = tvm.build(tvm.lower(ir, [a8]))
assert func assert func
a = numpy.arange(2, 10).astype('float32') a = numpy.arange(2, 10).astype('float32')
tvm_a = tvm.ndarray.array(a) tvm_a = tvm.ndarray.array(a)
...@@ -312,23 +283,12 @@ def test_non_zero(): ...@@ -312,23 +283,12 @@ def test_non_zero():
s = s + a[i-di, j-dj] s = s + a[i-di, j-dj]
b[i-2, j-2] = s / 9.0 b[i-2, j-2] = s / 9.0
try: try:
np_a = numpy.random.randn(32, 32).astype('float32') a = tvm.placeholder((32, 32), 'float32', 'a')
np_b = numpy.zeros((30, 30), dtype='float32') b = tvm.placeholder((30, 30), 'float32', 'b')
blur(np_a, np_b) run_and_check(blur, [a, b], [b])
except IOError as err:
ph_a = tvm.placeholder((32, 32), 'float32', 'a') assert sys.version_info[0] == 2
ph_b = tvm.placeholder((30, 30), 'float32', 'b') print('[Warning] Case test_non_zero is skipped by Python2 because "%s"' % str(err))
ir = tvm.hybrid.parse(blur, [ph_a, ph_b])
func = tvm.lower(ir, [ph_a, ph_b])
func = tvm.build(func)
nd_a = tvm.ndarray.array(np_a)
nd_b = tvm.ndarray.array(np_b)
func(nd_a, nd_b)
numpy.testing.assert_allclose(np_b, nd_b.asnumpy(), atol=1e-5, rtol=1e-5)
except IOError:
print('[Warning] Non-zero first test skipped by Python2')
@tvm.hybrid.script @tvm.hybrid.script
def triangle(a, b, c): def triangle(a, b, c):
...@@ -340,20 +300,7 @@ def test_non_zero(): ...@@ -340,20 +300,7 @@ def test_non_zero():
b = tvm.placeholder((10, ), dtype='float32', name='b') b = tvm.placeholder((10, ), dtype='float32', name='b')
c = tvm.placeholder((10, 10), dtype='float32', name='c') c = tvm.placeholder((10, 10), dtype='float32', name='c')
np_a = numpy.random.randn(10).astype('float32') run_and_check(triangle, [a, b, c], [c])
np_b = numpy.random.randn(10).astype('float32')
np_c = numpy.zeros((10, 10)).astype('float32')
nd_a = tvm.ndarray.array(np_a)
nd_b = tvm.ndarray.array(np_b)
nd_c = tvm.ndarray.array(np_c)
triangle(np_a, np_b, np_c)
func = tvm.build(tvm.lower(triangle(a, b, c), [a, b, c]))
assert func
func(nd_a, nd_b, nd_c)
numpy.testing.assert_allclose(nd_c.asnumpy(), np_c)
def test_allocate(): def test_allocate():
@tvm.hybrid.script @tvm.hybrid.script
...@@ -369,19 +316,27 @@ def test_allocate(): ...@@ -369,19 +316,27 @@ def test_allocate():
a = tvm.placeholder((32, 32), 'float32', 'a') a = tvm.placeholder((32, 32), 'float32', 'a')
b = tvm.placeholder((30, 30), 'float32', 'b') b = tvm.placeholder((30, 30), 'float32', 'b')
func = tvm.build(tvm.lower(blur2d(a, b), [a, b])) run_and_check(blur2d, [a, b], [b])
assert func
if tvm.gpu().exist:
np_a = numpy.random.randn(32, 32).astype('float32') @tvm.hybrid.script
np_b = numpy.zeros((30, 30)).astype('float32') def share_vec_add(a, b, c):
shared = allocate((256, ), 'float32', 'shared')
nd_a = tvm.ndarray.array(np_a) for i in bind("threadIdx.x", 256):
nd_b = tvm.ndarray.array(np_b) shared[i] = a[i]
local = allocate((256, ), 'float32', 'local')
func(nd_a, nd_b) for i in bind("threadIdx.x", 256):
blur2d(np_a, np_b) local[i] = b[i]
for i in bind("threadIdx.x", 256):
c[i] = shared[i] + local[i]
a = tvm.placeholder((256, ), dtype='float32', name='a')
b = tvm.placeholder((256, ), dtype='float32', name='b')
c = tvm.placeholder((256, ), dtype='float32', name='c')
run_and_check(share_vec_add, [a, b, c], [c], target='cuda')
else:
print('[Warning] No GPU found! Skip shared mem test!')
numpy.testing.assert_allclose(nd_b.asnumpy(), np_b, atol=1e-5, rtol=1e-5)
if __name__ == "__main__": if __name__ == "__main__":
test_outer_product() test_outer_product()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment