Unverified Commit 6d1f4c0b by Tianqi Chen Committed by GitHub

[RELAY][EXPR] Make const numpy consistent (#2349)

parent 3516cbe0
......@@ -26,22 +26,53 @@ handle = "handle"
def min_value(dtype):
"""minimum value of dtype"""
"""minimum value of dtype
Parameters
----------
dtype : str
The data type.
Returns
-------
value : tvm.Expr
The minimum value of dtype.
"""
return _api_internal._min_value(dtype)
def max_value(dtype):
"""maximum value of dtype"""
"""maximum value of dtype
Parameters
----------
dtype : str
The data type.
Returns
-------
value : tvm.Expr
The maximum value of dtype.
"""
return _api_internal._max_value(dtype)
def const(value, dtype=None):
"""construct a constant"""
if dtype is None:
if isinstance(value, _Integral):
dtype = 'int32'
else:
dtype = 'float32'
def const(value, dtype):
"""construct a constant
Parameters
----------
value : number
The content of the constant number.
dtype : str
The data type.
Returns
-------
const_val: tvm.Expr
The result expression.
"""
return _api_internal._const(value, dtype)
......
......@@ -43,7 +43,7 @@ def bind(func_id, args):
_internal_assert(isinstance(args[0], str), \
"A loop bind's first argument should be a string!")
iter_var = _api.thread_axis(args[0])
low, ext = _api.const(0), args[1]
low, ext = _api.const(0, "int32"), args[1]
for_type = None
return iter_var, low, ext, for_type
......
......@@ -4,6 +4,8 @@ import ast
import operator
import logging
import sys
from numbers import Integral
from .util import _internal_assert
from . import calls
from . import util
......@@ -137,6 +139,15 @@ class HybridParser(ast.NodeVisitor):
return self._args[s]
return self.alloc_buffers[s][0]
def _const(self, value, dtype=None):
if dtype is None:
if isinstance(value, bool):
dtype = "bool"
elif isinstance(value, Integral):
dtype = "int32"
else:
dtype = "float32"
return _api.const(value, dtype)
#pylint: disable=invalid-name, missing-docstring
def visit_Module(self, node):
......@@ -172,9 +183,9 @@ class HybridParser(ast.NodeVisitor):
if isinstance(res, tuple):
buf = res[0]
if isinstance(node.ctx, ast.Load):
return _make.Call(buf.dtype, buf.name, [_api.const(0)], \
return _make.Call(buf.dtype, buf.name, [self._const(0)], \
_expr.Call.Halide, buf.op, buf.value_index)
return buf, [_api.const(0)]
return buf, [self._const(0)]
if isinstance(node.ctx, ast.Load):
return res
return None
......@@ -183,7 +194,7 @@ class HybridParser(ast.NodeVisitor):
def visit_Num(self, node):
return _api.const(node.n)
return self._const(node.n)
def visit_AugAssign(self, node):
......@@ -193,7 +204,7 @@ class HybridParser(ast.NodeVisitor):
_internal_assert(len(buf) == 2, "LHS is supposed to be (buf, args)!")
buf, args = buf
else:
args = [_api.const(0)]
args = [self._const(0)]
_internal_assert(isinstance(buf, Tensor), "LHS is supposed to be Tensor!")
read = _make.Call(buf.dtype, buf.name, args, _expr.Call.Halide, buf.op, buf.value_index)
......@@ -378,7 +389,7 @@ class HybridParser(ast.NodeVisitor):
if iter_var is None:
_internal_assert(for_type is not None, "The loop bind function parse error!")
offset = iter_var = _api.var(_name)
if not _ir_pass.Equal(low, _api.const(0)):
if not _ir_pass.Equal(low, self._const(0)):
offset = iter_var + low
self.loops_above[_name] = offset
else:
......@@ -389,7 +400,7 @@ class HybridParser(ast.NodeVisitor):
if for_type is None:
res = _make.AttrStmt(iter_var, 'thread_extent', ext, _body)
else:
res = _make.For(iter_var, _api.const(0), ext, for_type, 0, _body)
res = _make.For(iter_var, self._const(0), ext, for_type, 0, _body)
self.loops_above.pop(_name)
return res
......
......@@ -465,12 +465,6 @@ def const(value, dtype=None):
"""
if isinstance(value, (_base.numeric_types, (bool, list))):
value = _np.array(value, dtype=dtype)
# convert default to int32 and float32
if dtype is None:
if value.dtype == "float64":
value = value.astype("float32")
elif value.dtype == "int64":
value = value.astype("int32")
if isinstance(value, (_np.ndarray, _np.generic)):
value = _nd.array(value)
......
......@@ -37,7 +37,8 @@ def test_tuple_value():
def test_id():
x = relay.var('x', 'float32')
ident = relay.Function([x], x)
check_eval(ident, [1.0], 1.0)
one = np.array(1.0, 'float32')
check_eval(ident, [one], one)
def test_add_const():
......@@ -60,8 +61,8 @@ def test_equal():
j = relay.var('i', shape=[], dtype='int32')
z = relay.equal(i, j)
func = relay.Function([i, j], z, ret_type=relay.TensorType([], 'bool'))
i_data = relay.const(0)
j_data = relay.const(0)
i_data = relay.const(0, 'int32')
j_data = relay.const(0, 'int32')
check_eval(func, [i_data, j_data], True)
......@@ -96,10 +97,10 @@ def test_loop():
i = relay.var('i', shape=[], dtype='int32')
accum = relay.var('accum', shape=[], dtype='int32')
sb = ScopeBuilder()
with sb.if_scope(relay.equal(i, relay.const(0))):
with sb.if_scope(relay.equal(i, relay.const(0, 'int32'))):
sb.ret(accum)
with sb.else_scope():
one_less = relay.subtract(i, relay.const(1))
one_less = relay.subtract(i, relay.const(1, 'int32'))
new_accum = relay.add(accum, i)
sb.ret(relay.Call(sum_up, [one_less, new_accum]))
func = relay.Function([i, accum], sb.get())
......
......@@ -13,10 +13,11 @@ def test_debug():
global _test_debug_hit
_test_debug_hit = True
prog = debug(x, debug_func=did_exec)
result = ex.evaluate(prog, { x: const(1) })
result = ex.evaluate(prog, { x: const(1, 'int32') })
assert _test_debug_hit
assert result.asnumpy() == 1
def test_debug_with_expr():
global _test_debug_hit
_test_debug_hit = False
......@@ -27,6 +28,6 @@ def test_debug_with_expr():
global _test_debug_hit
_test_debug_hit = True
prog = debug(x + x * x, debug_func=did_exec)
result = ex.evaluate(prog, { x: const(2) })
result = ex.evaluate(prog, { x: const(2, 'int32') })
assert _test_debug_hit
assert result.asnumpy() == 6
......@@ -329,7 +329,7 @@ def test_full():
for target, ctx in ctx_list():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(fill_value)
op_res = intrp.evaluate(func)(np.array(fill_value, dtype))
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
verify_full(4, (1, 3, 4, 4), "int32")
verify_full(4.0, (1, 4), "float32")
......@@ -365,7 +365,7 @@ def test_full_like():
for target, ctx in ctx_list():
for kind in ["graph", "debug"]:
intrp = relay.create_executor(kind, ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data, fill_value)
op_res = intrp.evaluate(func)(x_data, np.array(fill_value, dtype))
tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
verify_full_like((1, 3, 4, 4), 4, "int32")
verify_full_like((1, 1), 44.0, "float32")
......
......@@ -20,13 +20,13 @@ def test_alter_op():
@register_alter_op_layout("nn.conv2d", level=100)
def alter_conv2d(attrs, inputs, tinfos):
data, weight = inputs
weight = relay.multiply(weight, relay.const(2.0))
weight = relay.multiply(weight, relay.const(2.0, "float32"))
return relay.nn.conv2d(data, weight, **attrs)
def expected():
x = relay.var("x", shape=(1, 64, 56, 56))
weight = relay.var('weight', shape=(64, 64, 3, 3))
y = relay.nn.conv2d(x, relay.multiply(weight, relay.const(2.0)),
y = relay.nn.conv2d(x, relay.multiply(weight, relay.const(2.0, "float32")),
channels=64,
kernel_size=(3, 3),
padding=(1, 1))
......@@ -313,4 +313,3 @@ if __name__ == "__main__":
test_alter_layout_dual_path()
test_alter_layout_resnet()
test_alter_layout_broadcast_op()
......@@ -21,8 +21,8 @@ def test_simplify():
assert zz.a == x and zz.b.value == 4
n = tvm.var('n')
assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n % (-1)), tvm.const(0))
assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n % 1), tvm.const(0))
assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n % (-1)), tvm.const(0, "int32"))
assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n % 1), tvm.const(0, "int32"))
assert tvm.ir_pass.Equal(tvm.ir_pass.CanonicalSimplify(n / 1), n)
tvm.ir_pass.CanonicalSimplify(n / (-1))
# This is not true in the current implementation
......@@ -67,10 +67,11 @@ def test_modular():
ry = tvm.var("ry")
y = tvm.var("y")
x = tvm.var("x")
vmap = {rx: tvm.Range(tvm.const(0), tvm.const(3)),
ry: tvm.Range(tvm.const(0), tvm.const(3)),
y: tvm.Range(tvm.const(0), tvm.const(2)),
x: tvm.Range(tvm.const(0), tvm.const(14))}
i32_const = lambda x: tvm.const(x, "int32")
vmap = {rx: tvm.Range(i32_const(0), i32_const(3)),
ry: tvm.Range(i32_const(0), i32_const(3)),
y: tvm.Range(i32_const(0), i32_const(2)),
x: tvm.Range(i32_const(0), i32_const(14))}
idx = ry * 16 + rx + y * 16 + x
z1 = tvm.ir_pass.CanonicalSimplify(idx // 16, vmap)
z2 = tvm.ir_pass.CanonicalSimplify(idx % 16, vmap)
......@@ -82,4 +83,4 @@ if __name__ == "__main__":
test_modular()
test_simplify()
test_mul()
test_simplify_minmax()
\ No newline at end of file
test_simplify_minmax()
import tvm
def test_const():
x = tvm.const(1)
x = tvm.const(1, "int32")
print(x.dtype)
assert x.dtype == tvm.int32
assert isinstance(x, tvm.expr.IntImm)
def test_make():
x = tvm.const(1)
x = tvm.const(1, "int32")
y = tvm.var("x")
z = x + y
assert isinstance(tvm.max(x, y), tvm.expr.Max)
assert isinstance(tvm.min(x, y), tvm.expr.Min)
def test_ir():
x = tvm.const(1)
x = tvm.const(1, "int32")
y = tvm.make.IntImm('int32', 1)
z = x + y
stmt = tvm.make.Evaluate(z)
......
......@@ -2,7 +2,7 @@ import tvm
def test_const_fold():
def check(f, *args):
x = f(*[tvm.const(x) for x in args])
x = f(*[tvm.const(x, "int32") for x in args])
y = f(*args)
if not isinstance(x, (tvm.expr.IntImm, tvm.expr.UIntImm)) or x.value != int(y):
raise ValueError("check error: %s vs %s " % (x, y))
......
......@@ -2,8 +2,8 @@ import tvm
def test_const_saveload_json():
# save load json
x = tvm.const(1)
y = tvm.const(10)
x = tvm.const(1, "int32")
y = tvm.const(10, "int32")
z = x + y
z = z + z
json_str = tvm.save_json(z)
......@@ -13,8 +13,8 @@ def test_const_saveload_json():
def test_make_smap():
# save load json
x = tvm.const(1)
y = tvm.const(10)
x = tvm.const(1, "int32")
y = tvm.const(10, "int32")
z = tvm.expr.Add(x, y)
smap = tvm.convert({"z": z, "x": x})
json_str = tvm.save_json(tvm.convert([smap]))
......
......@@ -29,13 +29,13 @@ def test_basic():
def test_bound():
m = tvm.var('m')
vrange = tvm.convert({m: tvm.Range(tvm.const(0), tvm.const(10))})
vrange = tvm.convert({m: tvm.Range(tvm.const(0, "int32"), tvm.const(10, "int32"))})
ret = tvm.ir_pass.Simplify(m % 10, vrange)
assert ret == m
def test_canonical():
x = tvm.var("x")
z = tvm.const(3)
z = tvm.const(3, "int32")
ret = tvm.ir_pass.CanonicalSimplify(x / (z*z) - x / (z*z))
assert(tvm.ir_pass.Equal(ret, 0))
......
......@@ -238,7 +238,8 @@ def test_parallel_alloc():
n = tvm.var("n")
with ib.for_range(0, n, name="t") as i:
ib.scope_attr(
tvm.const(1) , "pragma_scope", tvm.make.StringImm("parallel_launch_point"))
tvm.const(1, "int32") , "pragma_scope",
tvm.make.StringImm("parallel_launch_point"))
with ib.for_range(0, n, name="i", for_type="parallel") as i:
with ib.for_range(0, 10, name="j") as j:
A = ib.allocate("float32", n, name="A", scope="global")
......
......@@ -24,7 +24,7 @@ def test_unroll_loop():
assert ret.for_type == tvm.stmt.For.Unrolled
ib = tvm.ir_builder.create()
ib.scope_attr(tvm.const(0), "pragma_auto_unroll_max_step", 16)
ib.scope_attr(tvm.const(0, "int32"), "pragma_auto_unroll_max_step", 16)
ib.emit(stmt)
wrapped = ib.get()
wrapped = tvm.make.Block(wrapped, stmt)
......@@ -54,4 +54,4 @@ def test_unroll_fake_loop():
if __name__ == "__main__":
test_unroll_loop()
test_unroll_fake_loop()
\ No newline at end of file
test_unroll_fake_loop()
......@@ -272,7 +272,8 @@ def test_schedule_cache_relayout4():
def test_schedule_bound_condition():
A = tvm.placeholder((64,), name='A', dtype="float32")
Apad = tvm.compute((66,), lambda i: tvm.select(tvm.all(i>0, i < 65), A[i-1], tvm.const(0.)), name='Apad')
Apad = tvm.compute((66,), lambda i: tvm.select(
tvm.all(i>0, i < 65), A[i-1], tvm.const(0., "float32")), name='Apad')
Apad2 = tvm.compute((66,), lambda i: Apad[i]*2, name='Apad2')
s = tvm.create_schedule(Apad2.op)
AL1 = s.cache_read(A,"local",[Apad])
......
......@@ -320,7 +320,7 @@ def bitpack(data, bits, pack_axis, bit_axis, pack_type, name="QuantizeInput"):
element = data(*idx)
for b in range(bits):
extracted_bit = ((element & tvm.const(masks[b])) >> b).astype(pack_type)
extracted_bit = ((element & tvm.const(masks[b], "int32")) >> b).astype(pack_type)
packed_data[b] = (packed_data[b] | extracted_bit)
if k < data_width - 1:
packed_data[b] = packed_data[b] << 1
......
......@@ -4,7 +4,7 @@ from topi import util
def test_util():
x = tvm.const(100)
x = tvm.const(100, "int32")
assert util.get_const_int(x) == 100
assert util.get_const_tuple((x, x)) == (100, 100)
......
......@@ -6,7 +6,7 @@ from topi import util
def test_util():
x = tvm.const(100)
x = tvm.const(100, "int32")
assert util.get_const_int(x) == 100
assert util.get_const_tuple((x, x)) == (100, 100)
......
import tvm
import topi
from topi import util
def test_util():
x = tvm.const(100)
assert util.get_const_int(x) == 100
assert util.get_const_tuple((x, x)) == (100, 100)
def test_ewise():
m = tvm.var('m')
l = tvm.var('l')
A = tvm.placeholder((m, l), name='A')
def test_apply(func, name):
B = func(A)
assert tuple(B.shape) == tuple(A.shape)
assert B.op.body[0].name == name
test_apply(topi.cpp.exp, "exp")
test_apply(topi.cpp.tanh, "tanh")
test_apply(topi.cpp.sigmoid, "sigmoid")
test_apply(topi.cpp.log, "log")
test_apply(topi.cpp.sqrt, "sqrt")
def test_flatten_tag():
A = tvm.placeholder((3, 4), name='A')
B = topi.cpp.nn.flatten(A)
assert B.op.tag == topi.tag.INJECTIVE
if __name__ == "__main__":
test_util()
test_ewise()
test_flatten_tag()
"""Test code for binary neural network operators."""
import numpy as np
import tvm
import topi
from topi.util import get_const_tuple
from tvm.contrib.pickle_memoize import memoize
def verify_binary_dense(batch, in_dim, out_dim):
A = tvm.placeholder((batch, in_dim), name='A')
B = tvm.placeholder((out_dim, in_dim), name='B')
bnn_A = topi.cpp.nn.binarize_pack(A, 1)
bnn_B = topi.cpp.nn.binarize_pack(B, 1)
# binary dense
bnn_A1 = tvm.placeholder(bnn_A.shape, dtype=bnn_A.dtype)
bnn_B1 = tvm.placeholder(bnn_B.shape, dtype=bnn_B.dtype)
bnn_C = topi.cpp.nn.binary_dense(bnn_A1, bnn_B1)
# schedule
target = topi.cpp.TEST_create_target("llvm")
s1 = topi.cpp.x86.schedule_binarize_pack(target, [bnn_A])
s2 = topi.cpp.x86.schedule_binarize_pack(target, [bnn_B])
s3 = topi.cpp.x86.schedule_binary_dense(target, [bnn_C])
dtype = A.dtype
@memoize("topi.tests.test_topi_binary_dense")
def get_ref_data():
# generate random matrix of +1 or -1 value
a_np = (np.random.randint(2, size=(batch, in_dim)) * 2 - 1).astype(dtype)
b_np = (np.random.randint(2, size=(out_dim, in_dim)) * 2 - 1).astype(dtype)
c_np = np.dot(a_np, b_np.T)
return (a_np, b_np, c_np)
a_np, b_np, c_np = get_ref_data()
ctx = tvm.cpu(0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(b_np, ctx)
bnn_a = tvm.nd.array(np.zeros(get_const_tuple(bnn_A.shape), dtype=bnn_A.dtype), ctx)
bnn_b = tvm.nd.array(np.zeros(get_const_tuple(bnn_B.shape), dtype=bnn_B.dtype), ctx)
bnn_c = tvm.nd.array(np.zeros(get_const_tuple(bnn_C.shape), dtype=bnn_C.dtype), ctx)
f1 = tvm.build(s1, [A, bnn_A], 'llvm')
f2 = tvm.build(s2, [B, bnn_B], 'llvm')
f3 = tvm.build(s3, [bnn_A1, bnn_B1, bnn_C], 'llvm')
f1(a, bnn_a)
f2(b, bnn_b)
f3(bnn_a, bnn_b, bnn_c)
tvm.testing.assert_allclose(bnn_c.asnumpy(), c_np, rtol=1e-5)
def test_binary_dense():
verify_binary_dense(1, 4096, 1024)
verify_binary_dense(1, 1024, 1000)
if __name__ == "__main__":
test_binary_dense()
"""Test code for clip operator"""
import numpy as np
import tvm
import topi
from topi.util import get_const_tuple
from tvm.contrib.pickle_memoize import memoize
def verify_clip(N, a_min, a_max, dtype):
A = tvm.placeholder((N, N), dtype=dtype, name='A')
B = topi.cpp.clip(A, a_min, a_max)
# use memoize to pickle the test data for next time use
@memoize("topi.tests.test_topi_clip")
def get_ref_data():
a_np = np.random.uniform(a_min*2, a_max*2, size=(N, N)).astype(dtype)
b_np = np.clip(a_np, a_min, a_max)
return a_np, b_np
a_np, b_np = get_ref_data()
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
target = topi.cpp.TEST_create_target(device)
s = topi.cpp.generic.default_schedule(target, [B], False)
ctx = tvm.cpu(0) if device == "llvm" else tvm.gpu(0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
f = tvm.build(s, [A, B], device, name="clip")
f(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ['llvm']:
check_device(device)
def test_clip():
verify_clip(1024, -127, 127, 'int8')
verify_clip(1024, -127, 127, 'int16')
verify_clip(1024, -127, 127, 'float32')
if __name__ == "__main__":
test_clip()
"""Test code for dense operator"""
import numpy as np
import tvm
import topi
from topi.util import get_const_tuple
from tvm.contrib.pickle_memoize import memoize
def verify_dense(batch, in_dim, out_dim, use_bias=True):
A = tvm.placeholder((batch, in_dim), name='A')
B = tvm.placeholder((out_dim, in_dim), name='B')
C = tvm.placeholder((out_dim,), name='C')
D = topi.cpp.nn.dense(A, B, C if use_bias else None)
D = topi.cpp.nn.relu(D)
dtype = A.dtype
# use memoize to pickle the test data for next time use
@memoize("topi.tests.test_topi_dense")
def get_ref_data():
a_np = np.random.uniform(size=(batch, in_dim)).astype(dtype)
b_np = np.random.uniform(size=(out_dim, in_dim)).astype(dtype)
c_np = np.random.uniform(size=(out_dim,)).astype(dtype)
if use_bias:
d_np = np.maximum(np.dot(a_np, b_np.T) + c_np, 0.0)
else:
d_np = np.maximum(np.dot(a_np, b_np.T), 0.0)
return (a_np, b_np, c_np, d_np)
# get the test data
a_np, b_np, c_np, d_np = get_ref_data()
def check_device(device):
ctx = tvm.context(device, 0)
if not ctx.exist:
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
if device == "llvm":
s = topi.cpp.generic.schedule_dense(target, [D])
elif device == "rocm":
s = topi.cpp.rocm.schedule_dense(target, [D])
else:
s = topi.cpp.cuda.schedule_dense(target, [D])
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(b_np, ctx)
c = tvm.nd.array(c_np, ctx)
d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
f = tvm.build(s, [A, B, C, D], device, name="dense")
f(a, b, c, d)
tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
for device in ['cuda', 'opencl', 'metal', 'rocm']:
check_device(device)
def test_dense():
verify_dense(1, 1024, 1000, use_bias=True)
verify_dense(1, 1024, 1000, use_bias=False)
if __name__ == "__main__":
test_dense()
import tvm
import topi
import topi.testing
import numpy as np
def test_dilate():
target = 'llvm'
ctx = tvm.cpu(0)
def _test_dilate(input_size, strides):
Input = tvm.placeholder((input_size))
Output = topi.cpp.nn.dilate(Input, strides)
tgt = topi.cpp.TEST_create_target(target)
schedule = topi.cpp.generic.default_schedule(tgt, [Output], True)
input_np = np.random.uniform(size=input_size).astype(Input.dtype)
output_np = topi.testing.dilate_python(input_np, strides)
input_tvm = tvm.nd.array(input_np, ctx=ctx)
output_size = topi.util.get_const_tuple(Output.shape)
output_tvm = tvm.nd.array(np.zeros(shape=output_size).astype(Output.dtype), ctx=ctx)
f = tvm.build(schedule, [Input, Output], target)
f(input_tvm, output_tvm)
tvm.testing.assert_allclose(output_tvm.asnumpy(), output_np, rtol=1e-5)
_test_dilate((32,), (2,))
_test_dilate((32,32), (2,2))
_test_dilate((1,3,32,32), (1,1,1,1))
_test_dilate((1,3,32,32), (2,2,2,2))
_test_dilate((1,32,32,3,3), (1,1,1,1,1))
_test_dilate((1,32,32,3,3), (2,2,2,2,2))
_test_dilate((1,32,32,32,3,3), (1,1,1,2,2,2))
_test_dilate((1,32,32,32,3,3), (2,2,2,1,1,1))
if __name__ == "__main__":
test_dilate()
"""Test code for l2 normalization"""
import numpy as np
import tvm
import topi
import logging
from topi.util import get_const_tuple
import topi.testing
def verify_l2_normalize(shape, eps, axis=None):
'''Verify l2 normalization operator by comparing outputs from tvm and numpy implementation'''
A = tvm.placeholder(shape, name='A')
B = topi.cpp.nn.l2_normalize(A, eps, axis)
dtype = A.dtype
a_np = np.random.uniform(size=shape).astype(dtype)
b_np = topi.testing.l2_normalize_python(a_np, eps, axis)
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
if device == "llvm":
s = topi.cpp.generic.default_schedule(target, [B], False)
else:
s = topi.cpp.cuda.schedule_l2_normalize(target, [B])
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
func = tvm.build(s, [A, B], device, name="l2_normalize")
func(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']:
check_device(device)
def test_l2_normalize():
verify_l2_normalize((1, 3, 20, 20), 0.001)
verify_l2_normalize((1, 3, 20, 20), 0.001, (1,))
verify_l2_normalize((1, 3, 20, 20), 0.001, (1, 2))
verify_l2_normalize((1, 3, 20, 20), 0.001, (2, 3))
verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 3))
verify_l2_normalize((1, 3, 20, 20), 0.001, (0, 2, 3))
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
test_l2_normalize()
"""Test code for LRN"""
import numpy as np
import tvm
import topi
import logging
from topi.util import get_const_tuple
import topi.testing
def verify_lrn(shape, size, axis, bias, alpha, beta):
'''Verify Local response normalization operator by comparing outputs from tvm and numpy implementation'''
A = tvm.placeholder(shape, name='A')
B = topi.cpp.nn.lrn(A, size, axis, alpha, beta, bias)
dtype = A.dtype
a_np = np.random.uniform(size=shape).astype(dtype)
b_np = topi.testing.lrn_python(a_np, size, axis, bias, alpha, beta)
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
if device == "llvm":
s = topi.cpp.generic.default_schedule(target, [B], False)
else:
s = topi.cpp.cuda.schedule_lrn(target, [B])
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
f = tvm.build(s, [A, B], device)
f(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-1)
for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']:
check_device(device)
def test_lrn():
verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5)
verify_lrn((1, 3, 5, 5), 3, 3, 1.0, 1.0, 0.5)
verify_lrn((1, 3, 20, 20), 3, 1, 2.0, 1.0, 0.75)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
test_lrn()
"""Test code for pooling"""
import numpy as np
import tvm
import topi
import math
from topi.util import get_const_tuple
pool_code = {
"avg": 0,
"max": 1
}
def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_pad=True):
iw = ih
kw = kh
sw = sh
pt, pl, pb, pr = padding
A = tvm.placeholder((n, ic, ih, iw), name='A')
B = topi.cpp.nn.pool(A, [kh, kw], [sh, sw], padding,
pool_code[pool_type], ceil_mode, "NCHW", count_include_pad)
B = topi.cpp.nn.relu(B)
dtype = A.dtype
bshape = get_const_tuple(B.shape)
ashape = get_const_tuple(A.shape)
if ceil_mode:
assert bshape[2] == int(math.ceil(float(ashape[2] - kh + pt + pb) / sh) + 1)
assert bshape[3] == int(math.ceil(float(ashape[3] - kw + pl + pr) / sw) + 1)
else:
assert bshape[2] == int(math.floor(float(ashape[2] - kh + pt + pb) / sh) + 1)
assert bshape[3] == int(math.floor(float(ashape[3] - kw + pl + pr) / sw) + 1)
a_np = np.random.uniform(size=(n, ic, ih, iw)).astype(dtype)
pad_np = np.zeros(shape=(n, ic, ih+pt+pb, iw+pl+pr)).astype(dtype)
no_zero = (range(n), range(ic), (range(pt, ih+pt)), (range(pl, iw+pl)))
pad_np[np.ix_(*no_zero)] = a_np
_, oc, oh, ow = get_const_tuple(B.shape)
b_np = np.zeros(shape=(n, oc, oh, ow)).astype(dtype)
if pool_type == 'avg':
for i in range(oh):
for j in range(ow):
if count_include_pad:
b_np[:,:,i,j] = np.mean(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3))
else:
pad_count = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw] > 0, axis=(2,3))
b_np[:,:,i,j] = np.sum(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3)) / np.maximum(pad_count, 1)
elif pool_type =='max':
for i in range(oh):
for j in range(ow):
b_np[:,:,i,j] = np.max(pad_np[:, :, i*sh:i*sh+kh, j*sw:j*sw+kw], axis=(2,3))
b_np = np.maximum(b_np, 0.0)
def check_device(device):
ctx = tvm.context(device, 0)
if not ctx.exist:
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
if device == "llvm":
s = topi.cpp.generic.default_schedule(target, [B], False)
else:
s = topi.cpp.cuda.schedule_pool(target, [B])
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
f = tvm.build(s, [A, B], device)
f(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ['cuda', 'opencl', 'metal', 'rocm']:
check_device(device)
def test_pool():
verify_pool(1, 256, 32, 2, 2, [0, 0, 0, 0], 'avg', False, True)
verify_pool(1, 256, 31, 3, 3, [1, 2, 1, 2], 'avg', False, True)
verify_pool(1, 256, 32, 2, 2, [1, 2, 1, 2], 'avg', False, False)
verify_pool(1, 256, 31, 4, 4, [3, 3, 3, 3], 'avg', False, False)
verify_pool(1, 256, 31, 4, 4, [0, 0, 0, 0], 'avg', False, False)
verify_pool(1, 256, 32, 2, 2, [0, 0, 0, 0], 'max', False)
verify_pool(1, 256, 31, 3, 3, [2, 1, 2, 1], 'max', False)
verify_pool(1, 256, 31, 3, 3, [2, 1, 2, 1], 'max', True)
verify_pool(1, 256, 31, 3, 3, [2, 1, 0, 3], 'avg', False, True)
verify_pool(1, 256, 32, 2, 2, [0, 3, 2, 1], 'avg', False, False)
verify_pool(1, 256, 31, 3, 3, [1, 0, 3, 2], 'max', False)
verify_pool(1, 256, 31, 3, 3, [3, 2, 1, 0], 'max', True)
def verify_global_pool(n, c, h, w, pool_type):
A = tvm.placeholder((n, c, h, w), name='A')
B = topi.cpp.nn.global_pool(A, pool_code[pool_type])
B = topi.cpp.nn.relu(B)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
if pool_type == 'avg':
b_np = np.mean(a_np, axis=(2,3), keepdims=True)
elif pool_type =='max':
b_np = np.max(a_np, axis=(2,3), keepdims=True)
b_np = np.maximum(b_np, 0.0)
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
if device == "llvm":
s = topi.cpp.generic.default_schedule(target, [B], False)
else:
s = topi.cpp.cuda.schedule_global_pool(target, [B])
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
f = tvm.build(s, [A, B], device)
f(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ['cuda', 'opencl', 'metal', 'rocm']:
check_device(device)
def test_global_pool():
verify_global_pool(1, 1024, 7, 7, 'avg')
verify_global_pool(4, 1024, 7, 7, 'avg')
verify_global_pool(1, 1024, 7, 7, 'max')
verify_global_pool(4, 1024, 7, 7, 'max')
if __name__ == "__main__":
test_pool()
test_global_pool()
"""Test code for reduce."""
import os
import numpy as np
import tvm
import topi
def _my_npy_argmax(arr, axis, keepdims):
if not keepdims:
return arr.argmax(axis=axis)
else:
if axis is not None:
out_shape = list(arr.shape)
out_shape[axis] = 1
else:
out_shape = [1 for _ in range(len(arr.shape))]
return arr.argmax(axis=axis).reshape(out_shape)
def _my_npy_argmin(arr, axis, keepdims):
if not keepdims:
return arr.argmin(axis=axis)
else:
out_shape = list(arr.shape)
out_shape[axis] = 1
return arr.argmin(axis=axis).reshape(out_shape)
def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum"):
# Build the logic and compile the function
dat_dtype = "float32"
A = tvm.placeholder(shape=in_shape, name="A", dtype=dat_dtype)
A1 = topi.cpp.sqrt(topi.cpp.exp(A))
out_dtype = "float32"
if type == "sum":
B = topi.cpp.sum(A1, axis, keepdims)
elif type == "max":
B = topi.cpp.max(A1, axis, keepdims)
elif type == "min":
B = topi.cpp.min(A1, axis, keepdims)
elif type == "argmax":
B = topi.cpp.argmax(A1, axis, keepdims)
out_dtype = "int32"
elif type == "argmin":
B = topi.cpp.argmin(A1, axis, keepdims)
out_dtype = "int32"
elif type == "prod":
B = topi.cpp.prod(A1, axis, keepdims)
else:
raise NotImplementedError
def check_device(device):
ctx = tvm.context(device, 0)
if not ctx.exist:
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
if device == "llvm":
s = topi.cpp.generic.default_schedule(target, [B], True)
else:
s = topi.cpp.cuda.schedule_reduce(target, [B])
foo = tvm.build(s, [A, B], device, name=type)
# Test
in_npy = np.random.uniform(size=in_shape).astype(np.float32)
in_npy_map = np.sqrt(np.exp(in_npy)).astype(np.float32)
if type == "sum":
out_npy = in_npy_map.sum(axis=axis, keepdims=keepdims)
elif type == "max":
out_npy = in_npy_map.max(axis=axis, keepdims=keepdims)
elif type == "min":
out_npy = in_npy_map.min(axis=axis, keepdims=keepdims)
elif type == "argmax":
out_npy = _my_npy_argmax(in_npy_map, axis=axis, keepdims=keepdims)
elif type == "argmin":
out_npy = _my_npy_argmin(in_npy_map, axis=axis, keepdims=keepdims)
elif type == "prod":
out_npy = in_npy_map.prod(axis=axis, keepdims=keepdims)
else:
raise NotImplementedError
data_tvm = tvm.nd.array(in_npy, ctx=ctx)
out_tvm = tvm.nd.empty(shape=out_npy.shape, ctx=ctx, dtype=out_dtype)
for _ in range(1):
foo(data_tvm, out_tvm)
if type == "argmax" or type == "argmin":
out_tvm_indices = out_tvm.asnumpy()
if keepdims:
out_tvm_indices = np.take(out_tvm_indices, indices=0, axis=axis)
if axis is None:
out_tvm_val = in_npy_map.ravel()[out_tvm_indices]
else:
other_indices = tuple(np.indices(in_shape[0:axis] + in_shape[(axis+1):]))
sel_indices = other_indices[0:axis] + (out_tvm_indices,) + other_indices[axis:]
out_tvm_val = in_npy_map[sel_indices]
if type == "argmax":
tvm.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1E-3, 1E-3)
elif type == "argmin":
tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1E-3, 1E-3)
else:
tvm.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3)
for device in ["cuda", "opencl", "metal", "llvm", "rocm"]:
check_device(device)
def test_reduce_map():
verify_reduce_map_ele(in_shape=(128, 24, 128, 24),
axis=(1, 2, 3),
keepdims=True,
type="sum")
verify_reduce_map_ele(in_shape=(128, 24 * 128 * 24),
axis=(1,),
keepdims=False,
type="max")
verify_reduce_map_ele(in_shape=(32, 128, 24),
axis=None,
keepdims=True,
type="sum")
verify_reduce_map_ele(in_shape=(128, 24, 128, 24),
axis=(0, 2),
keepdims=False,
type="min")
verify_reduce_map_ele(in_shape=(128, 4, 4, 128),
axis=(1, ),
keepdims=True,
type="prod")
verify_reduce_map_ele(in_shape=(4, 4),
axis=(0, 1),
keepdims=False,
type="prod")
verify_reduce_map_ele(in_shape=(32, 128),
axis=1,
keepdims=True,
type="argmax")
verify_reduce_map_ele(in_shape=(32, 24, 32, 24),
axis=2,
keepdims=False,
type="argmin")
verify_reduce_map_ele(in_shape=(31, 21, 15),
axis=None,
keepdims=True,
type="argmax")
verify_reduce_map_ele(in_shape=(31, 21, 15),
axis=None,
keepdims=False,
type="sum")
if __name__ == "__main__":
test_reduce_map()
"""Test code for region"""
import logging
import numpy as np
import tvm
import topi
import topi.testing
from topi.util import get_const_tuple
def verify_region(batch, in_size, in_channel, n, classes, coords, background, l_softmax):
'''Verify region operator by comparing outputs from tvm and numpy implementation'''
in_height = in_width = in_size
A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
B = topi.cpp.yolo.region(A, n, classes, coords, background, l_softmax)
a_shape = get_const_tuple(A.shape)
dtype = A.dtype
def get_ref_data_region():
'''Randomly initialize the data variables and get refernce output for the region operation'''
a_np = np.random.uniform(size=a_shape).astype(dtype)
b_np = topi.testing.region_python(a_np, n, classes, coords, background, l_softmax)
return a_np, b_np
a_np, b_np = get_ref_data_region()
def check_device(device):
'''Check the device is available and if so, build and run the program'''
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
if device == "llvm":
s = topi.cpp.generic.default_schedule(target, [B], False)
else:
s = topi.cpp.rocm.schedule_region(target, [B])
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
func = tvm.build(s, [A, B], device, name="region")
func(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm', 'vulkan']:
check_device(device)
def test_region():
verify_region(1, 19, 425, 5, 80, 4, 0, 1)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
test_region()
"""Test code for relu activation"""
import os
import numpy as np
import tvm
import topi
from topi.util import get_const_tuple
def verify_relu(m, n, dtype):
A = tvm.placeholder((m, n), name='A', dtype=dtype)
B = topi.cpp.nn.relu(A)
assert B.dtype == dtype
a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = a_np * (a_np > 0)
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
if device == "llvm":
s = topi.cpp.generic.schedule_injective(target, [B])
else:
s = topi.cpp.cuda.schedule_injective(target, [B])
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
foo = tvm.build(s, [A, B], device, name="relu")
foo(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ['cuda', 'opencl', 'metal', 'rocm']:
check_device(device)
def verify_leaky_relu(m, alpha):
A = tvm.placeholder((m,), name='A')
B = topi.cpp.nn.leaky_relu(A, alpha)
device = "llvm"
target = topi.cpp.TEST_create_target(device)
s = topi.cpp.generic.schedule_injective(target, [B])
a_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = a_np * (a_np > 0) + a_np * (a_np < 0) * alpha
ctx = tvm.cpu(0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
foo = tvm.build(s, [A, B], device, name="leaky_relu")
foo(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
def verify_prelu(x, w, axis, weight_reshape):
X = tvm.placeholder((x), name='X')
W = tvm.placeholder((w), name='W')
x_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(X.shape)).astype(X.dtype)
w_np = np.random.uniform(low=-1.0, high=1.0, size=get_const_tuple(W.shape)).astype(W.dtype)
def _prelu_numpy(x, W):
return (x < 0) * (x *W.reshape(weight_reshape)) + (x>=0) * x
out_np = _prelu_numpy(x_np, w_np)
B = topi.cpp.nn.prelu(X, W, axis)
device = "llvm"
target = topi.cpp.TEST_create_target(device)
s = topi.cpp.generic.schedule_injective(target, [B])
ctx = tvm.cpu(0)
x_tvm = tvm.nd.array(x_np, ctx)
w_tvm = tvm.nd.array(w_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(X.shape), dtype=B.dtype), ctx)
foo = tvm.build(s, [X, W, B], "llvm", name="prelu")
foo(x_tvm, w_tvm, b)
tvm.testing.assert_allclose(b.asnumpy(), out_np, rtol=1e-5)
def test_relu():
for dtype in ['float32', 'float64', 'int32', 'int16', 'int8', 'int64']:
verify_relu(10, 128, dtype)
def test_leaky_relu():
verify_leaky_relu(100, 0.5)
def test_prelu():
verify_prelu((1, 3, 2, 2), (3,), 1, (3, 1, 1))
verify_prelu((1, 3, 2, 2), (2,), 2, (2, 1))
if __name__ == "__main__":
test_relu()
test_leaky_relu()
test_prelu()
"""Test code for reorg"""
import logging
import numpy as np
import tvm
import topi
import topi.testing
from topi.util import get_const_tuple
def verify_reorg(batch, in_size, in_channel, stride):
'''Verify reorg operator by comparing outputs from tvm and numpy implementation'''
in_height = in_width = in_size
A = tvm.placeholder((batch, in_channel, in_height, in_width), name='A')
B = topi.cpp.vision.reorg(A, stride)
a_shape = get_const_tuple(A.shape)
dtype = A.dtype
def get_ref_data_reorg():
'''Randomly initialize the data variables and get refernce output for the reorg operation'''
a_np = np.random.uniform(size=a_shape).astype(dtype)
b_np = topi.testing.reorg_python(a_np, stride)
return a_np, b_np
a_np, b_np = get_ref_data_reorg()
def check_device(device):
'''Check the device is available and if so, build and run the program'''
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
if device == "llvm":
s = topi.cpp.generic.default_schedule(target, [B], False)
else:
s = topi.cpp.cuda.schedule_injective(target, [B])
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
func = tvm.build(s, [A, B], device, name="reorg")
func(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm', 'vulkan']:
check_device(device)
def test_reorg():
verify_reorg(1, 38, 64, 2)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
test_reorg()
"""Test code for softmax"""
import os
import numpy as np
import tvm
import topi
import logging
import topi.testing
from topi.util import get_const_tuple
def verify_softmax(m, n):
A = tvm.placeholder((m, n), name='A')
B = topi.cpp.nn.softmax(A, 1)
# confirm lower works
s = tvm.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.softmax_python(a_np)
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
if device == "llvm":
s = topi.cpp.generic.default_schedule(target, [B], False)
else:
s = topi.cpp.cuda.schedule_softmax(target, [B])
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
foo = tvm.build(s, [A, B], device, name="softmax")
foo(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ['cuda', 'opencl', 'metal', 'rocm']:
check_device(device)
def test_softmax():
verify_softmax(32, 10)
verify_softmax(3, 4)
def verify_log_softmax(m, n):
A = tvm.placeholder((m, n), name='A')
B = topi.cpp.nn.log_softmax(A)
# confirm lower works
s = tvm.create_schedule([B.op])
tvm.lower(s, [A, B], simple_mode=True)
a_np = np.random.uniform(size=get_const_tuple(A.shape)).astype(A.dtype)
b_np = topi.testing.log_softmax_python(a_np)
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
print("Running on target: %s" % device)
target = topi.cpp.TEST_create_target(device)
if device == "llvm":
s = topi.cpp.generic.default_schedule(target, [B], False)
else:
s = topi.cpp.cuda.schedule_softmax(target, [B])
ctx = tvm.context(device, 0)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
foo = tvm.build(s, [A, B], device, name="log_softmax")
foo(a, b)
tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
for device in ["cuda", "opencl", "metal", "rocm"]:
check_device(device)
def test_log_softmax():
verify_log_softmax(32, 10)
verify_log_softmax(3, 4)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
test_softmax()
test_log_softmax()
"""Test code for tensor operator"""
import numpy as np
import tvm
import topi
def verify_elemwise_sum(num_args, dtype):
shape = (3,5,4)
tvm_placeholders = []
for i in range(num_args):
tvm_placeholders.append(
tvm.placeholder(shape, name="data"+str(i), dtype=dtype))
esum = topi.cpp.elemwise_sum(tvm_placeholders)
s = tvm.create_schedule([esum.op])
def get_ref_data():
np_nd = [np.random.uniform(0, 10, size=shape).astype(dtype)
for i in range(num_args)]
return np_nd
np_nd = get_ref_data()
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
ctx = tvm.context(device, 0)
out = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
f = tvm.build(s, tvm_placeholders + [esum], device, name="elemwise_sum")
tvm_nd = [tvm.nd.array(nd, ctx) for nd in np_nd] + [out]
f(*tvm_nd)
np_out = np.sum(np.array(np_nd), axis=0)
tvm.testing.assert_allclose(out.asnumpy(), np_out, rtol=1e-5)
for device in ["llvm"]:
check_device(device)
def verify_full(shape, dtype, fill_value):
A = tvm.placeholder(shape, dtype=dtype, name="A")
B = topi.cpp.full_like(A, fill_value)
C = topi.cpp.full(shape, dtype, fill_value)
s1 = tvm.create_schedule([B.op])
s2 = tvm.create_schedule([C.op])
def get_ref_data():
return np.full(shape, fill_value, dtype)
np_nd = get_ref_data()
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
target = topi.cpp.TEST_create_target(device)
ctx = tvm.context(device, 0)
out = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
f = tvm.build(s1, [A, B], device, name="full_like")
f(tvm.nd.array(np.zeros(shape, dtype), ctx), out)
tvm.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)
f = tvm.build(s2, [C], device, name="full")
f(out)
tvm.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)
for device in ["llvm"]:
check_device(device)
def test_elemwise_sum():
verify_elemwise_sum(1, "float32")
verify_elemwise_sum(5, "float32")
verify_elemwise_sum(4, "int32")
def test_full():
verify_full((3,4,5), "float32", 3.14)
verify_full((10,), "int32", 7)
if __name__ == "__main__":
test_elemwise_sum()
test_full()
......@@ -34,7 +34,7 @@ import numpy as np
# our customized lowering pass to manipulate the IR directly instead of using schedule premitives.
#
n = tvm.const(128)
n = tvm.const(128, "int32")
a = tvm.placeholder((n, ), name="a")
b = tvm.placeholder((n, ), name="b")
c = tvm.compute((n, ), lambda i: a[i] + b[i], name='c')
......
......@@ -46,7 +46,7 @@ Apad = tvm.compute(
lambda yy, xx, cc, nn: tvm.select(
tvm.all(yy >= pad, yy - pad < in_size,
xx >= pad, xx - pad < in_size),
A[yy - pad, xx - pad, cc, nn], tvm.const(0.)),
A[yy - pad, xx - pad, cc, nn], tvm.const(0., "float32")),
name='Apad')
# Create reduction variables
rc = tvm.reduce_axis((0, in_channel), name='rc')
......@@ -64,7 +64,7 @@ B = tvm.compute(
###############################################################################
# Memory Hierarchy
# ----------------
#
#
# We first specify the memory hierarchy for buffers. The figure below shows the
# GPU memory hierarchy. One important difference from CPU memory hierarchy is
# that GPU provides a cache buffer called shared memory, which is managed by
......
......@@ -700,7 +700,7 @@ def inject_alu_intrin(stmt_in):
elif isinstance(loop_body.value, tvm.expr.Load):
alu_opcode = env.dev.ALU_OPCODE_SHR
lhs = loop_body.value
rhs = tvm.const(0)
rhs = tvm.const(0, "int32")
else:
raise RuntimeError(
"Expression not recognized %s, %s, %s" % (
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment