[TOPI] Specify non-zero absolute tolerance in tests (#1925)

39c8bc2a · Sergey Mironov · Tianqi Chen · be9784cc · 39c8bc2a · 39c8bc2a
Commit 39c8bc2a authored Oct 21, 2018 by Sergey Mironov Committed by Tianqi Chen Oct 20, 2018
125 changed files
--- a/apps/extension/tests/test_ext.py
+++ b/apps/extension/tests/test_ext.py
@@ -22,7 +22,7 @@ def test_ext_dev():
        a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(n, dtype=B.dtype), ctx)
        f(a, b)
-        np.testing.assert_allclose(b.asnumpy(), a.asnumpy() + 1)
+        tvm.testing.assert_allclose(b.asnumpy(), a.asnumpy() + 1)
    check_llvm()



--- a/docs/deploy/aocl_fpga.md
+++ b/docs/deploy/aocl_fpga.md
@@ -52,7 +52,7 @@ b = tvm.nd.array(np.random.uniform(size=n).astype("float32"), ctx)
 c = tvm.nd.array(np.zeros(n, dtype="float32"), ctx)

 fadd(a, b, c)
-np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
+tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
 ```

 Setup

--- a/docs/deploy/aws_fpga.md
+++ b/docs/deploy/aws_fpga.md
@@ -55,7 +55,7 @@ b = tvm.nd.array(np.random.uniform(size=n).astype("float32"), ctx)
 c = tvm.nd.array(np.zeros(n, dtype="float32"), ctx)

 fadd(a, b, c)
-np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
+tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
 ```

 Setup

--- a/nnvm/python/nnvm/testing/check_computation.py
+++ b/nnvm/python/nnvm/testing/check_computation.py
@@ -281,10 +281,10 @@ def check_function(symbol, forward=None, backward=None, grad_input_vars=None,
        Additional parameters for `check_numerical_grads`.

    atol : float, optional
-        Absolute tolerance for `np.testing.assert_allclose`. NOT used for numerical gradients.
+        Absolute tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients.

    rtol : float, optional
-        Relative tolerance for `np.testing.assert_allclose`. NOT used for numerical gradients.
+        Relative tolerance for `tvm.testing.assert_allclose`. NOT used for numerical gradients.

    quiet : bool, optional
        Don't dump additional information to stdout on failure.
@@ -466,7 +466,7 @@ def check_function(symbol, forward=None, backward=None, grad_input_vars=None,
                                     .format(len(numpy_res), out_len))

                for i in range(out_len):
-                    np.testing.assert_allclose(nnvm_res[i], numpy_res[i], atol=atol, rtol=rtol)
+                    tvm.testing.assert_allclose(nnvm_res[i], numpy_res[i], atol=atol, rtol=rtol)

            if backward is not None:
                nothing_was_done = False
@@ -495,7 +495,7 @@ def check_function(symbol, forward=None, backward=None, grad_input_vars=None,
                                         .format(set(grad_var_names) - set(numpy_grads)))

                for x_name in numpy_grads:
-                    np.testing.assert_allclose(nnvm_grads[x_name], numpy_grads[x_name],
+                    tvm.testing.assert_allclose(nnvm_grads[x_name], numpy_grads[x_name],
                                                atol=atol, rtol=rtol)

            if numerical_grads:

--- a/nnvm/tests/python/compiler/test_build.py
+++ b/nnvm/tests/python/compiler/test_build.py
@@ -27,7 +27,7 @@ def test_compile():
        # get outputs
        out = tvm.nd.empty(shape, dtype)
        get_output(0, out)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy()))

    graph, lib, _ = nnvm.compiler.build(z, "llvm", shape_dict)
@@ -49,7 +49,7 @@ def test_run():
    nx = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
    ny = tvm.nd.array(np.random.uniform(size=shape).astype(dtype))
    res = _run_graph(z, {"x": nx, "y": ny})
-    np.testing.assert_allclose(
+    tvm.testing.assert_allclose(
        res[0].asnumpy(), np.exp(nx.asnumpy() + ny.asnumpy()))


@@ -73,7 +73,7 @@ def test_precompute_prune():
    m["load_params"](nnvm.compiler.save_param_dict(params))
    m.run()
    out = m.get_output(0, out=res)
-    np.testing.assert_allclose(
+    tvm.testing.assert_allclose(
        res.asnumpy(), nx.asnumpy() + 1 + ny.asnumpy() + na.asnumpy())


@@ -92,7 +92,7 @@ def test_dtypes():
        m.run(x=data)
        data = (data > 0) * data
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
-        np.testing.assert_allclose(out.asnumpy(), data, atol=1e-5, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), data, atol=1e-5, rtol=1e-5)

 def test_ndarray_output():
    x = sym.Variable("x")
@@ -110,7 +110,7 @@ def test_ndarray_output():
    m.set_input("y", ny)
    m.run()
    out = m.get_output(0)
-    np.testing.assert_allclose(
+    tvm.testing.assert_allclose(
        out.asnumpy(), nx.asnumpy() + ny.asnumpy())

 def test_ndarray_input():
@@ -131,12 +131,12 @@ def test_ndarray_input():
    in_y = tvm.nd.empty(shape, dtype)
    m.get_input("x", in_x)
    m.get_input("y", in_y)
-    np.testing.assert_allclose(nx.asnumpy(), in_x.asnumpy())
-    np.testing.assert_allclose(ny.asnumpy(), in_y.asnumpy())
+    tvm.testing.assert_allclose(nx.asnumpy(), in_x.asnumpy())
+    tvm.testing.assert_allclose(ny.asnumpy(), in_y.asnumpy())
    in_nx = m.get_input("x")
    in_ny = m.get_input("y")
-    np.testing.assert_allclose(nx.asnumpy(), in_nx.asnumpy())
-    np.testing.assert_allclose(ny.asnumpy(), in_ny.asnumpy())
+    tvm.testing.assert_allclose(nx.asnumpy(), in_nx.asnumpy())
+    tvm.testing.assert_allclose(ny.asnumpy(), in_ny.asnumpy())

 def test_num_outputs():
    x = sym.Variable('x')

--- a/nnvm/tests/python/compiler/test_compiler_cache.py
+++ b/nnvm/tests/python/compiler/test_compiler_cache.py
@@ -19,7 +19,7 @@ def test_compile_cache():
        m.run(x=na, y=nb)
        # get outputs
        out = m.get_output(0, tvm.nd.empty(shape, dtype))
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy()))

    engine = nnvm.compiler.engine

--- a/nnvm/tests/python/compiler/test_fold_axis.py
+++ b/nnvm/tests/python/compiler/test_fold_axis.py
 """Unittest cases for fold_axis"""
+import tvm
 import nnvm
 import nnvm.testing.resnet
 import numpy as np
@@ -147,7 +148,7 @@ def test_fold_resnet():

    x = run_prune(graph, params, 0)
    y = run_prune(graph, params, 3)
-    np.testing.assert_allclose(y[0].asnumpy(), x[0].asnumpy())
+    tvm.testing.assert_allclose(y[0].asnumpy(), x[0].asnumpy())


 if __name__ == "__main__":

--- a/nnvm/tests/python/compiler/test_nhwc_layout.py
+++ b/nnvm/tests/python/compiler/test_nhwc_layout.py
@@ -50,7 +50,7 @@ def test_nhwc():
    oshape_nhwc = (1, 224, 224, out_channel)
    nchw_output = build_and_run(nchw_sym, nchw_params, data, oshape)
    nhwc_output = build_and_run(nhwc_sym, nhwc_params, data.transpose(0, 2, 3, 1), oshape_nhwc)
-    np.testing.assert_allclose(nchw_output, nhwc_output.transpose(0, 3, 1, 2), rtol=1e-5, atol=1e-5)
+    tvm.testing.assert_allclose(nchw_output, nhwc_output.transpose(0, 3, 1, 2), rtol=1e-5, atol=1e-5)


 if __name__ == "__main__":

--- a/nnvm/tests/python/compiler/test_op_fusion.py
+++ b/nnvm/tests/python/compiler/test_op_fusion.py
@@ -22,7 +22,7 @@ def test_ewise_injective():
        x_np = np.random.uniform(size=dshape).astype(dtype)
        m.run(x=x_np)
        out = m.get_output(0, tvm.nd.empty((10, 6)))
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            out.asnumpy(),  x_np.reshape(out.shape) * 2 + 1,
            atol=1e-5, rtol=1e-5)

@@ -54,7 +54,7 @@ def test_conv_ewise_injective():
            data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
        c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1) + 1
        c_np = c_np.reshape(c_np.shape[0], np.prod(c_np.shape[1:])) + 1
-        np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)


 def test_injective_reduce_injective():
@@ -74,7 +74,7 @@ def test_injective_reduce_injective():
        c_np = np.sum(data.reshape(32, 18 * 18) + 1, axis=1)
        # get output
        out = m.get_output(0, tvm.nd.empty(c_np.shape, dtype))
-        np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)


 def test_injective_conv2d():
@@ -107,7 +107,7 @@ def test_injective_conv2d():
            data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
        weight = np.mean(data.asnumpy(), axis=(2, 3))
        c_np = weight[:, :, np.newaxis, np.newaxis] * data.asnumpy() + residual
-        np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)


 def test_concatenate_conv2d():
@@ -140,7 +140,7 @@ def test_concatenate_conv2d():
        conv = topi.testing.conv2d_nchw_python(
            concat, kernel.asnumpy(), (1,1), 'SAME')
        ref = concat + conv
-        np.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5)


 def test_residual_block_layout_transform():
@@ -178,7 +178,7 @@ def test_residual_block_layout_transform():
    conv2 = topi.testing.conv2d_nchw_python(
        conv1, kernel2.asnumpy(), (1,1), 'SAME')
    ref = np.maximum(conv1 + conv2, 0)
-    np.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5)
+    tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=1e-5)


 def build_and_run(sym, params, data, out_shape, target, ctx, opt_level=2):
@@ -218,7 +218,7 @@ def test_fuse_conv2d_elu():
        _, params2 = utils.create_workload(sym2, 1, dshape[1:], seed=0)
        output1, g1 = build_and_run(sym1, params1, data, oshape, target, ctx, opt_level=2)
        output2, g2 = build_and_run(sym2, params2, data, oshape, target, ctx, opt_level=0)
-        np.testing.assert_allclose(output1, output2, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(output1, output2, rtol=1e-5, atol=1e-5)
        # data, conv weight, bias, batch norm gamma, batch norm beta, conv op
        assert g1.index.num_nodes == 6


--- a/nnvm/tests/python/compiler/test_optimizer.py
+++ b/nnvm/tests/python/compiler/test_optimizer.py
@@ -27,7 +27,7 @@ def helper(symbol, inputs, params, update_func, run_times, target, ctx, dtype="f
        m.run()
    y_np = update_func(**np_inputs)
    out = m.get_output(0, tvm.nd.empty(y_np.shape, dtype))
-    np.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)
+    tvm.testing.assert_allclose(out.asnumpy(), y_np, atol=1e-5, rtol=1e-5)


 def test_sgd():

--- a/nnvm/tests/python/compiler/test_param_dict.py
+++ b/nnvm/tests/python/compiler/test_param_dict.py
@@ -68,7 +68,7 @@ def test_bigendian_rpc_param():
        m.load_params(nnvm.compiler.save_param_dict(params))
        m.run()
        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype, ctx=ctx))
-        np.testing.assert_allclose(a + 1, out.asnumpy())
+        tvm.testing.assert_allclose(a + 1, out.asnumpy())

    print("Test RPC connection to PowerPC...")
    remote = rpc.connect(host, port)

--- a/nnvm/tests/python/compiler/test_rpc_exec.py
+++ b/nnvm/tests/python/compiler/test_rpc_exec.py
@@ -43,7 +43,7 @@ def test_rpc_executor():
    # get outputs
    out = tvm.nd.empty(shape, dtype, ctx)
    get_output(0, out)
-    np.testing.assert_allclose(
+    tvm.testing.assert_allclose(
        out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy()))
    server.terminate()


--- a/nnvm/tests/python/compiler/test_top_assign.py
+++ b/nnvm/tests/python/compiler/test_top_assign.py
@@ -27,11 +27,11 @@ def test_update():
        m.set_input("w", data)
        m.run()
        out = m.get_input("w2", tvm.nd.empty(dshape, dtype))
-        np.testing.assert_allclose(out.asnumpy(), data.asnumpy() + 2, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), data.asnumpy() + 2, rtol=1e-5)

        m.run()
        out = m.get_input("w2", tvm.nd.empty(dshape, dtype))
-        np.testing.assert_allclose(out.asnumpy(), data.asnumpy() + 3, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), data.asnumpy() + 3, rtol=1e-5)

    for target, ctx in ctx_list():
        check(target, ctx)

--- a/nnvm/tests/python/compiler/test_top_level2.py
+++ b/nnvm/tests/python/compiler/test_top_level2.py
@@ -22,7 +22,7 @@ def test_conv2d():
            c_np = topi.testing.conv2d_nchw_python(
                data.asnumpy(), kernel.asnumpy(), 1, padding)
            c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1)
-            np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
+            tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)

    x = sym.Variable("x")
    y = sym.conv2d(x, channels=10, kernel_size=(3,3),
@@ -71,7 +71,7 @@ def test_mixed_precision():
        c_np = topi.testing.conv2d_nchw_python(
            data.asnumpy().astype(out_dtype),
            kernel.asnumpy().astype(out_dtype), 1, 1)
-        np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)


 def test_dilated_conv2d():
@@ -97,7 +97,7 @@ def test_dilated_conv2d():
        c_np = topi.testing.conv2d_nchw_python(
            data.asnumpy(), dkernel_np, 1, 1)
        c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1)
-        np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)


 def test_grouped_conv2d_nchw():
@@ -120,7 +120,7 @@ def test_grouped_conv2d_nchw():
        c_np = topi.testing.depthwise_conv2d_python_nchw(
            data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
        c_np = c_np + bias.asnumpy().reshape(kshape[0], 1, 1)
-        np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)

 def test_grouped_conv2d_nhwc():
    x = sym.Variable("x")
@@ -142,7 +142,7 @@ def test_grouped_conv2d_nhwc():
        c_np = topi.testing.depthwise_conv2d_python_nhwc(
            data.asnumpy(), kernel.asnumpy(), (1,1), 'SAME')
        c_np = c_np + bias.asnumpy().reshape(1, 1, kshape[2])
-        np.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), c_np, rtol=1e-5)


 def test_conv2d_transpose():
@@ -167,7 +167,7 @@ def test_conv2d_transpose():
        c_np = c_np + bias.asnumpy().reshape(kshape[1], 1, 1)
        d_np = np.zeros(shape=oshape)
        d_np[:,:,0:c_np.shape[2],0:c_np.shape[3]] = c_np
-        np.testing.assert_allclose(out.asnumpy(), d_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), d_np, rtol=1e-5)


 def test_max_pool2d():
@@ -185,7 +185,7 @@ def test_max_pool2d():
        m.run(x=data)
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        b_np = np.max(data.asnumpy().reshape(1,3,14,2,14,2), axis=(3,5))
-        np.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)


 def test_avg_pool2d():
@@ -202,7 +202,7 @@ def test_avg_pool2d():
        m.run(x=data)
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        b_np = np.mean(data.asnumpy().reshape(1,3,14,2,14,2), axis=(3,5))
-        np.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)


 def test_avg_pool2d_no_count_pad():
@@ -237,7 +237,7 @@ def test_avg_pool2d_no_count_pad():
        data = tvm.nd.array(a_np)
        m.run(x=data)
        out = m.get_output(0, tvm.nd.empty((n, oc, oh, ow), dtype))
-        np.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)


 def test_global_max_pool2d():
@@ -254,7 +254,7 @@ def test_global_max_pool2d():
        m.run(x=data)
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        b_np = np.max(data.asnumpy(), axis=(2,3), keepdims=True)
-        np.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)


 def test_global_avg_pool2d():
@@ -271,7 +271,7 @@ def test_global_avg_pool2d():
        m.run(x=data)
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        b_np = np.mean(data.asnumpy(), axis=(2,3), keepdims=True)
-        np.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)


 def test_upsampling_nearest_neighbor():
@@ -290,7 +290,7 @@ def test_upsampling_nearest_neighbor():
        m.run(x=data)
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        b_np = topi.testing.upsampling_python(a_np, scale, "NCHW")
-        np.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5)

 def test_upsampling_bilinear():
    x = sym.Variable("x")
@@ -309,7 +309,7 @@ def test_upsampling_bilinear():
        m.run(x=data)
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        b_np = topi.testing.bilinear_resize_python(a_np, (32*scale, 32*scale), "NCHW")
-        np.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5, atol=1e-5)

 def test_resize_bilinear():
    x = sym.Variable("x")
@@ -327,7 +327,7 @@ def test_resize_bilinear():
        m.run(x=data)
        out = m.get_output(0, tvm.nd.empty(oshape, dtype))
        b_np = topi.testing.bilinear_resize_python(a_np, (60, 60), "NHWC")
-        np.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), b_np, rtol=1e-5, atol=1e-5)

 if __name__ == "__main__":
    test_mixed_precision()

--- a/nnvm/tests/python/compiler/test_top_level4.py
+++ b/nnvm/tests/python/compiler/test_top_level4.py
@@ -24,7 +24,7 @@ def verify_transpose(dshape, axes):
        m.run(x=data)
        out_np = np.transpose(data.asnumpy(), axes=axes) + 1
        out = m.get_output(0, tvm.nd.empty(out_np.shape))
-        np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)

 def verify_reduce_explicit(dshape, data, result, fsym, oshape=None, otype='float32', **kwargs):
    """ Verify reduce operations by comparign its result with `result` """
@@ -43,7 +43,7 @@ def verify_reduce_explicit(dshape, data, result, fsym, oshape=None, otype='float
        out = m.get_output(0, tvm.nd.empty(oshape, dtype=otype))
        if isinstance(result, np.ndarray):
            np.testing.assert_equal(out.asnumpy().shape, result.shape)
-            np.testing.assert_allclose(out.asnumpy(), result, atol=1e-5, rtol=1e-5)
+            tvm.testing.assert_allclose(out.asnumpy(), result, atol=1e-5, rtol=1e-5)
        else:
            tvm_out = out.asnumpy()
            assert abs(result - tvm_out) <= (1e-5 + 1e-5 * abs(tvm_out))
@@ -68,7 +68,7 @@ def verify_collapse(dshape, target_shape, fnp):
        m.run(x=data)
        out = m.get_output(0, tvm.nd.empty(target_shape))
        out_np = fnp(data)
-        np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)


 def test_transpose():
@@ -149,7 +149,7 @@ def verify_flip(ishape, axis):
        m = graph_runtime.create(graph, lib, ctx)
        m.run(x=x_np)
        out = m.get_output(0, tvm.nd.empty(res.shape))
-        np.testing.assert_allclose(out.asnumpy(), res, atol=1e-5, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), res, atol=1e-5, rtol=1e-5)


 def test_flip():
@@ -174,7 +174,7 @@ def verify_reshape(dshape, oshape):
        m.run(x=data)
        out_np = data.asnumpy().reshape(oshape) + 1
        out = m.get_output(0, tvm.nd.empty(out_np.shape))
-        np.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), out_np, atol=1e-5, rtol=1e-5)


 def test_reshape():
@@ -435,7 +435,7 @@ def test_full():
        m = graph_runtime.create(graph, lib, ctx)
        m.run(data=np.random.uniform(size=shape).astype(dtype))
        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            out.asnumpy(),
            np.full(shape, fill_value=value, dtype=dtype),
            atol=1e-5, rtol=1e-5)
@@ -445,7 +445,7 @@ def test_full():
        m = graph_runtime.create(graph, lib, ctx)
        m.run(data=np.random.uniform(size=shape).astype(dtype))
        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            out.asnumpy(),
            np.full(shape, fill_value=1, dtype=dtype),
            atol=1e-5, rtol=1e-5)
@@ -455,7 +455,7 @@ def test_full():
        m = graph_runtime.create(graph, lib, ctx)
        m.run(data=np.random.uniform(size=shape).astype(dtype))
        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            out.asnumpy(),
            np.full(shape, fill_value=0, dtype=dtype),
            atol=1e-5, rtol=1e-5)
@@ -465,7 +465,7 @@ def test_full():
        m = graph_runtime.create(graph, lib, ctx)
        m.run()
        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            out.asnumpy(),
            np.full(shape, fill_value=value, dtype=dtype),
            atol=1e-5, rtol=1e-5)
@@ -475,7 +475,7 @@ def test_full():
        m = graph_runtime.create(graph, lib, ctx)
        m.run()
        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            out.asnumpy(),
            np.full(shape, fill_value=1, dtype=dtype),
            atol=1e-5, rtol=1e-5)
@@ -485,7 +485,7 @@ def test_full():
        m = graph_runtime.create(graph, lib, ctx)
        m.run()
        out = m.get_output(0, tvm.nd.empty(shape, dtype=dtype))
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            out.asnumpy(),
            np.full(shape, fill_value=0, dtype=dtype),
            atol=1e-5, rtol=1e-5)
@@ -534,7 +534,7 @@ def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1),
    m.set_input("data", np.random.uniform(size=dshape).astype(dtype))
    m.run()
    out = m.get_output(0, tvm.nd.empty(np_out.shape, dtype))
-    np.testing.assert_allclose(out.asnumpy(), np_out, atol=1e-5, rtol=1e-5)
+    tvm.testing.assert_allclose(out.asnumpy(), np_out, atol=1e-5, rtol=1e-5)

 def test_multibox_prior():
    verify_multibox_prior((1, 3, 50, 50))
@@ -571,7 +571,7 @@ def test_multibox_transform_loc():
    m.set_input(**{"cls_prob": np_cls_prob.astype(dtype), "loc_preds": np_loc_preds.astype(dtype), "anchors": np_anchors.astype(dtype)})
    m.run()
    out = m.get_output(0, tvm.nd.empty(expected_np_out.shape, dtype))
-    np.testing.assert_allclose(out.asnumpy(), expected_np_out, atol=1e-5, rtol=1e-5)
+    tvm.testing.assert_allclose(out.asnumpy(), expected_np_out, atol=1e-5, rtol=1e-5)

 def test_nms():
    dshape = (1, 5, 6)
@@ -599,7 +599,7 @@ def test_nms():
    m.set_input(**{"data": np_data, "valid_count": np_valid_count})
    m.run()
    out = m.get_output(0, tvm.nd.empty(np_result.shape, "float32"))
-    np.testing.assert_allclose(out.asnumpy(), np_result, atol=1e-5, rtol=1e-5)
+    tvm.testing.assert_allclose(out.asnumpy(), np_result, atol=1e-5, rtol=1e-5)

 def np_slice_like(np_data, np_shape_like, axis=[]):
    begin_idx = [0 for _ in np_data.shape]
@@ -634,7 +634,7 @@ def verify_slice_like(np_data, np_shape_like, axis=[]):
        m.set_input(**{"data1": np_data, "data2": np_shape_like})
        m.run()
        out = m.get_output(0, tvm.nd.empty(np_result.shape, dtype))
-        np.testing.assert_allclose(out.asnumpy(), np_result, atol=1e-5, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), np_result, atol=1e-5, rtol=1e-5)

 def test_slice_like():
    np_data = np.random.uniform(size=(3, 4, 5))
@@ -673,7 +673,7 @@ def verify_where(condition, x, y):
        m.set_input(**{"condition": condition, "x": x, "y": y})
        m.run()
        out = m.get_output(0, tvm.nd.empty(x.shape, dtype))
-        np.testing.assert_allclose(out.asnumpy(), np_out, atol=1e-5, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), np_out, atol=1e-5, rtol=1e-5)

 def test_where():
    shape = (13, 8, 224, 224, 6)

--- a/nnvm/tests/python/frontend/coreml/test_forward.py
+++ b/nnvm/tests/python/frontend/coreml/test_forward.py
@@ -109,7 +109,7 @@ def verify_AddLayerParams(input_dim, alpha=2):
                           ['input1', 'input2'],
                           b_np.shape,
                           dtype)
-        np.testing.assert_allclose(out, b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)

 def test_forward_AddLayerParams():
    verify_AddLayerParams((1, 2, 2), 0)
@@ -139,7 +139,7 @@ def verify_MultiplyLayerParams(input_dim, alpha):
                           ['input1', 'input2'],
                           b_np.shape,
                           dtype)
-        np.testing.assert_allclose(out, b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)

 def test_forward_MultiplyLayerParams():
    verify_MultiplyLayerParams((1, 2, 2), 0)
@@ -168,7 +168,7 @@ def verify_ConcatLayerParams(input1_dim, input2_dim):
                           ['input1', 'input2'],
                           b_np.shape,
                           dtype)
-        np.testing.assert_allclose(out, b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)

 def test_forward_ConcatLayerParams():
    verify_ConcatLayerParams((1, 1, 2, 2), (1, 2, 2, 2))
@@ -198,7 +198,7 @@ def verify_UpsampleLayerParams(input_dim, scale, mode):
    model = cm.models.MLModel(builder.spec)
    for target, ctx in ctx_list():
        out = run_tvm_graph(model, a_np, 'input', b_np.shape, dtype)
-        np.testing.assert_allclose(out, b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)

 def test_forward_UpsampleLayerParams():
    verify_UpsampleLayerParams((1, 16, 32, 32), 2, 'NN')
@@ -218,7 +218,7 @@ def verify_l2_normalize(input_dim, eps):
    model = cm.models.MLModel(builder.spec)
    for target, ctx in ctx_list():
        out = run_tvm_graph(model, a_np, 'input', b_np.shape, dtype)
-        np.testing.assert_allclose(out, b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)

 def test_forward_l2_normalize():
    verify_l2_normalize((1, 3, 20, 20), 0.001)
@@ -243,7 +243,7 @@ def verify_lrn(input_dim, size, bias, alpha, beta):
    model = cm.models.MLModel(builder.spec)
    for target, ctx in ctx_list():
        out = run_tvm_graph(model, a_np, 'input', b_np.shape, dtype)
-        np.testing.assert_allclose(out, b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)

 def test_forward_lrn():
    verify_lrn((1, 3, 10, 20), 3, 1.0, 1.0, 0.5)
@@ -271,7 +271,7 @@ def verify_average(input_dim1, input_dim2, axis=0):
                           ['input1', 'input2'],
                           b_np.shape,
                           dtype)
-        np.testing.assert_allclose(out, b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)

 def test_forward_average():
    verify_average((1, 3, 20, 20), (1, 3, 20, 20))
@@ -303,7 +303,7 @@ def verify_max(input_dim):
                           ['input1', 'input2', 'input3'],
                           b_np.shape,
                           dtype)
-        np.testing.assert_allclose(out, b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)

 def test_forward_max():
    verify_max((1, 3, 20, 20))
@@ -334,7 +334,7 @@ def verify_min(input_dim):
                           ['input1', 'input2', 'input3'],
                           b_np.shape,
                           dtype)
-        np.testing.assert_allclose(out, b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(out, b_np, rtol=1e-5)

 def test_forward_min():
    verify_min((1, 3, 20, 20))

--- a/nnvm/tests/python/frontend/darknet/test_forward.py
+++ b/nnvm/tests/python/frontend/darknet/test_forward.py
@@ -139,7 +139,7 @@ def test_forward(net, build_dtype='float32'):

    tvm_out = _get_tvm_output(net, data, build_dtype)
    for tvm_outs, darknet_out in zip(tvm_out, darknet_output):
-        np.testing.assert_allclose(darknet_out, tvm_outs, rtol=1e-3, atol=1e-3)
+        tvm.testing.assert_allclose(darknet_out, tvm_outs, rtol=1e-3, atol=1e-3)

 def test_rnn_forward(net):
    '''Test network with given input data on both darknet and tvm'''
@@ -158,7 +158,7 @@ def test_rnn_forward(net):
    last_layer = net.layers[net.n-1]
    darknet_outshape = (last_layer.batch, last_layer.outputs)
    darknet_out = darknet_out.reshape(darknet_outshape)
-    np.testing.assert_allclose(darknet_out, tvm_out, rtol=1e-4, atol=1e-4)
+    tvm.testing.assert_allclose(darknet_out, tvm_out, rtol=1e-4, atol=1e-4)

 def test_forward_extraction():
    '''test extraction model'''

--- a/nnvm/tests/python/frontend/keras/test_forward.py
+++ b/nnvm/tests/python/frontend/keras/test_forward.py
@@ -52,7 +52,7 @@ def verify_keras_frontend(keras_model, need_transpose=True):
        for kout, tout in zip(keras_out, tvm_out):
            if need_transpose:
                tout = to_channels_last(tout)
-            np.testing.assert_allclose(kout, tout, rtol=1e-5, atol=1e-5)
+            tvm.testing.assert_allclose(kout, tout, rtol=1e-5, atol=1e-5)

 def test_forward_elemwise_add():
    r = []

--- a/nnvm/tests/python/frontend/mxnet/test_forward.py
+++ b/nnvm/tests/python/frontend/mxnet/test_forward.py
@@ -62,13 +62,13 @@ def verify_mxnet_frontend_impl(mx_symbol, data_shape=(1, 3, 224, 224), out_shape
        gluon_out, gluon_sym = get_gluon_output(name, x)
        for target, ctx in ctx_list():
            tvm_out = get_tvm_output(gluon_sym, x, None, None, target, ctx, dtype)
-            np.testing.assert_allclose(gluon_out, tvm_out, rtol=1e-5, atol=1e-5)
+            tvm.testing.assert_allclose(gluon_out, tvm_out, rtol=1e-5, atol=1e-5)
    else:
        mx_out, args, auxs = get_mxnet_output(mx_symbol, x, dtype)
        assert "data" not in args
        for target, ctx in ctx_list():
            tvm_out = get_tvm_output(mx_symbol, x, args, auxs, target, ctx, dtype)
-            np.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)
+            tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5)

 def test_forward_mlp():
    mlp = model_zoo.mx_mlp

--- a/nnvm/tests/python/frontend/onnx/test_forward.py
+++ b/nnvm/tests/python/frontend/onnx/test_forward.py
@@ -70,7 +70,7 @@ def verify_onnx_forward_impl(graph_file, data_shape, out_shape):
    c2_out = get_caffe2_output(model, x, dtype)
    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, x, target, ctx, out_shape, dtype)
-        np.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(c2_out, tvm_out, rtol=1e-5, atol=1e-5)

 def verify_super_resolution_example():
    verify_onnx_forward_impl(super_resolution, (1, 1, 224, 224), (1, 1, 672, 672))
@@ -112,7 +112,7 @@ def test_reshape():
        x = np.random.uniform(size=in_shape).astype('int32')
        tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'float32')

-    np.testing.assert_allclose(ref_shape, tvm_out.shape)
+    tvm.testing.assert_allclose(ref_shape, tvm_out.shape)

 def test_reshape_like():
    in_shape = (4, 3, 3, 4)
@@ -142,7 +142,7 @@ def test_reshape_like():
        x = np.random.uniform(size=in_shape).astype('float32')
        tvm_out = get_tvm_output(model, x, target, ctx, ref_shape, 'float32')

-    np.testing.assert_allclose(ref_shape, tvm_out.shape)
+    tvm.testing.assert_allclose(ref_shape, tvm_out.shape)

 def _test_power_iteration(x_shape, y_shape):
    if isinstance(y_shape, int):
@@ -168,7 +168,7 @@ def _test_power_iteration(x_shape, y_shape):

    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, [x, y], target, ctx, np_res.shape)
-        np.testing.assert_allclose(np_res, tvm_out, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(np_res, tvm_out, rtol=1e-5, atol=1e-5)

 def test_power():
    _test_power_iteration((1, 3), (1))
@@ -193,7 +193,7 @@ def test_squeeze():
        x = np.random.uniform(size=in_shape).astype('float32')
        tvm_out = get_tvm_output(model, x, target, ctx, out_shape, 'float32')

-    np.testing.assert_allclose(out_shape, tvm_out.shape)
+    tvm.testing.assert_allclose(out_shape, tvm_out.shape)

 def test_unsqueeze():
    in_shape = (3, 3)
@@ -214,7 +214,7 @@ def test_unsqueeze():
        x = np.random.uniform(size=in_shape).astype('float32')
        tvm_out = get_tvm_output(model, x, target, ctx, out_shape, 'float32')

-    np.testing.assert_allclose(out_shape, tvm_out.shape)
+    tvm.testing.assert_allclose(out_shape, tvm_out.shape)

 def verify_gather(in_shape, indices, axis, dtype):
    x = np.random.uniform(size=in_shape).astype(dtype)
@@ -235,7 +235,7 @@ def verify_gather(in_shape, indices, axis, dtype):

    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, [x, indices], target, ctx, out_np.shape)
-        np.testing.assert_allclose(out_np, tvm_out)
+        tvm.testing.assert_allclose(out_np, tvm_out)

 def test_gather():
    verify_gather((4,), [1], 0, 'int32')
@@ -263,7 +263,7 @@ def _test_slice_iteration(indata, outdata, starts, ends, axes=None):
    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, 'float32')

-    np.testing.assert_allclose(outdata, tvm_out)
+    tvm.testing.assert_allclose(outdata, tvm_out)

 def test_slice():
    x = np.random.randn(20, 10, 5).astype(np.float32)
@@ -290,7 +290,7 @@ def _test_onnx_op_elementwise(inshape, outfunc, npargs, dtype, opname, kwargs):
    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, indata, target, ctx, outdata.shape, dtype)

-    np.testing.assert_allclose(outdata, tvm_out)
+    tvm.testing.assert_allclose(outdata, tvm_out)

 def test_floor():
    _test_onnx_op_elementwise((2, 4, 5, 6), np.floor, {}, 'float32', 'Floor', {})
@@ -329,7 +329,7 @@ def test_matmul():

    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, [a_array, b_array], target, ctx, out_np.shape)
-        np.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(out_np, tvm_out, rtol=1e-5, atol=1e-5)

 def verify_lrn(shape, nsize, dtype, alpha=None, beta=None, bias=None):
    in_array = np.random.uniform(size=shape).astype(dtype)
@@ -376,7 +376,7 @@ def verify_lrn(shape, nsize, dtype, alpha=None, beta=None, bias=None):
        # get outputs
        tvm_out = m.get_output(0, tvm.nd.empty(shape, dtype))
        py_out = _get_python_lrn()
-        np.testing.assert_allclose(py_out, tvm_out.asnumpy(), rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(py_out, tvm_out.asnumpy(), rtol=1e-5, atol=1e-5)

 def test_lrn():
    verify_lrn((5, 5, 5, 5), 3, 'float32')
@@ -400,7 +400,7 @@ def _test_upsample_nearest():

    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, in_array, target, ctx, out_shape, 'float32')
-        np.testing.assert_allclose(out_array, tvm_out)
+        tvm.testing.assert_allclose(out_array, tvm_out)

 def _test_upsample_bilinear():
    scale = 2
@@ -420,7 +420,7 @@ def _test_upsample_bilinear():

    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, in_array, target, ctx, out_shape, 'float32')
-        np.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(out_array, tvm_out, rtol=1e-5, atol=1e-5)

 def test_upsample():
    _test_upsample_nearest()
@@ -447,7 +447,7 @@ def _test_softmax(inshape, axis):

    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, indata, target, ctx, outshape, 'float32')
-        np.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(outdata, tvm_out, rtol=1e-5, atol=1e-5)

 def test_softmax():
    _test_softmax((1, 10), None)
@@ -479,7 +479,7 @@ def verify_min(input_dim):

    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
-        np.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)

 def test_forward_min():
    verify_min((1, 3, 20, 20))
@@ -511,7 +511,7 @@ def verify_max(input_dim):

    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
-        np.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)

 def test_forward_max():
    verify_max((1, 3, 20, 20))
@@ -543,7 +543,7 @@ def verify_mean(input_dim):

    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, [a_np1, a_np2, a_np3], target, ctx, b_np.shape)
-        np.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)

 def test_forward_mean():
    verify_mean((1, 3, 20, 20))
@@ -569,7 +569,7 @@ def verify_hardsigmoid(input_dim, alpha, beta):

    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape)
-        np.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)

 def test_forward_hardsigmoid():
    verify_hardsigmoid((1, 3, 20, 20), 0.5, 0.6)
@@ -618,7 +618,7 @@ def verify_argmin(input_dim, axis=None, keepdims=None):

    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape, b_np.dtype)
-        np.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)

 def verify_argmax(input_dim, axis=None, keepdims=None):
    def _argmax_numpy(data, axis=0, keepdims=True):
@@ -665,7 +665,7 @@ def verify_argmax(input_dim, axis=None, keepdims=None):

    for target, ctx in ctx_list():
        tvm_out = get_tvm_output(model, [a_np1], target, ctx, b_np.shape, b_np.dtype)
-        np.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(b_np, tvm_out, rtol=1e-5, atol=1e-5)

 def test_forward_arg_min_max():
    '''Verify argmin and argmax'''
@@ -705,7 +705,7 @@ def verify_constantfill(is_shape, input_dim, out_dim, value, dtype, **kwargs):
        else:
            tvm_out = get_tvm_output(model, [input_a], target, ctx, out.shape)

-        np.testing.assert_allclose(out, tvm_out, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(out, tvm_out, rtol=1e-5, atol=1e-5)

 def test_constantfill():
    verify_constantfill(True, (2, 3, 4, 5), (2, 3, 4, 5), 10, 'float32')

--- a/nnvm/tests/python/frontend/tensorflow/test_forward.py
+++ b/nnvm/tests/python/frontend/tensorflow/test_forward.py
@@ -120,7 +120,7 @@ def compare_tf_with_tvm(in_data, in_name, out_name, init_global_variables=False,
                continue

            tvm_output = run_tvm_graph(final_graph_def, in_data, in_node, target=device)
-            np.testing.assert_allclose(tf_output, tvm_output, atol=1e-5, rtol=1e-5)
+            tvm.testing.assert_allclose(tf_output, tvm_output, atol=1e-5, rtol=1e-5)

        sess.close()

@@ -580,7 +580,7 @@ def _test_lstm_cell(batch_size, num_hidden, num_layers, forget_bias, dtype):
    out_state_c = np.reshape(out_state_tup[0], (batch_size, num_hidden))
    out_state_h = np.reshape(out_state_tup[1], (batch_size, num_hidden))
    tvm_out = [out, out_state_c, out_state_h]
-    np.testing.assert_allclose(tf_out, tvm_out, rtol=1e-3, atol=1e-3)
+    tvm.testing.assert_allclose(tf_out, tvm_out, rtol=1e-3, atol=1e-3)

 def test_forward_lstm():
    '''test LSTM block cell'''
@@ -653,7 +653,7 @@ def test_forward_inception_v3():
        with tf.Session() as sess:
            tf_output = run_tf_graph(sess, data, 'input:0', 'InceptionV3/Predictions/Reshape_1:0')
            tvm_output = run_tvm_graph(graph_def, data, 'input')
-            np.testing.assert_allclose(tf_output, tvm_output, rtol=1e-5, atol=1e-5)
+            tvm.testing.assert_allclose(tf_output, tvm_output, rtol=1e-5, atol=1e-5)

 #######################################################################
 # Inception V1
@@ -689,7 +689,7 @@ def test_forward_inception_v1():
        with tf.Session() as sess:
            tf_output = run_tf_graph(sess, data, 'DecodeJpeg/contents:0', 'softmax:0')
            tvm_output = run_tvm_graph(graph_def, tvm_data, 'DecodeJpeg/contents')
-            np.testing.assert_allclose(tf_output, tvm_output, rtol=1e-5, atol=1e-5)
+            tvm.testing.assert_allclose(tf_output, tvm_output, rtol=1e-5, atol=1e-5)

 #######################################################################
 # Mobilenet
@@ -707,7 +707,7 @@ def test_forward_mobilenet():
        with tf.Session() as sess:
            tf_output = run_tf_graph(sess, data, 'input:0', out_node + ':0')
            tvm_output = run_tvm_graph(graph_def, data, 'input')
-            np.testing.assert_allclose(np.squeeze(tvm_output), np.squeeze(tf_output), rtol=1e-5, atol=1e-5)
+            tvm.testing.assert_allclose(np.squeeze(tvm_output), np.squeeze(tf_output), rtol=1e-5, atol=1e-5)

 #######################################################################
 # ResnetV2
@@ -726,7 +726,7 @@ def test_forward_resnetv2():
            with tf.Session() as sess:
                tf_output = run_tf_graph(sess, data, 'input_tensor:0', out_node + ':0')
                tvm_output = run_tvm_graph(graph_def, data, 'input_tensor', tf_output.shape, 'float32')
-                np.testing.assert_allclose(np.squeeze(tvm_output), np.squeeze(tf_output), rtol=1e-5, atol=1e-5)
+                tvm.testing.assert_allclose(np.squeeze(tvm_output), np.squeeze(tf_output), rtol=1e-5, atol=1e-5)

 #######################################################################
 # PTB
@@ -834,7 +834,7 @@ def test_forward_ptb():
                                in_state, cnt_sample)
        tf_sample_str = _pretty_print(tf_samples, False, id_to_word)
        inpt = tvm_sample_str
-        np.testing.assert_allclose(tf_samples, tvm_samples, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(tf_samples, tvm_samples, rtol=1e-5, atol=1e-5)
        assert(tvm_sample_str == tf_sample_str)

 #######################################################################

--- a/python/tvm/__init__.py
+++ b/python/tvm/__init__.py
@@ -17,6 +17,7 @@ from . import ir_builder
 from . import target
 from . import generic
 from . import hybrid
+from . import testing

 from . import ndarray as nd
 from .ndarray import context, cpu, gpu, opencl, cl, vulkan, metal, mtl

--- a/python/tvm/testing.py
+++ b/python/tvm/testing.py
+""" TVM testing utilities """
+import numpy as np
+
+def assert_allclose(actual, desired, rtol=1e-7, atol=1e-7):
+    """ Version of np.testing.assert_allclose with `atol` and `rtol` fields set
+    in reasonable defaults.
+
+    Arguments `actual` and `desired` are not interchangable, since the function
+    compares the `abs(actual-desired)` with `atol+rtol*abs(desired)`.  Since we
+    often allow `desired` to be close to zero, we generally want non-zero `atol`.
+    """
+    np.testing.assert_allclose(actual, desired, rtol=rtol, atol=atol, verbose=True)
--- a/tests/python/contrib/test_cblas.py
+++ b/tests/python/contrib/test_cblas.py
@@ -27,7 +27,7 @@ def test_matmul_add():
        d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx)
        bb = 10.0
        f(a, b, d, bb)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            d.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + bb, rtol=1e-5)
    verify()


--- a/tests/python/contrib/test_cublas.py
+++ b/tests/python/contrib/test_cublas.py
@@ -24,7 +24,7 @@ def test_matmul_add():
        b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
        f(a, b, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()), rtol=1e-5)
    verify()


--- a/tests/python/contrib/test_dlpack.py
+++ b/tests/python/contrib/test_dlpack.py
@@ -34,7 +34,7 @@ def test():
        f_pytorch = to_pytorch_func(f)
        zz2 = torch.empty(137,137)
        f_pytorch(xx, yy, zz2)
-        np.testing.assert_allclose(zz.numpy(), zz2.numpy(), rtol=1e-6)
+        tvm.testing.assert_allclose(zz.numpy(), zz2.numpy(), rtol=1e-6)

    except ImportError:
        pass

--- a/tests/python/contrib/test_miopen.py
+++ b/tests/python/contrib/test_miopen.py
@@ -56,7 +56,7 @@ def test_conv2d():
        y_ref = tvm.nd.array(np.random.uniform(-1, 1, yshape).astype(np.float32), ctx)
        f_ref(x, w, y_ref)
        print("Max abs diff:", np.max(np.abs(y.asnumpy() - y_ref.asnumpy())))
-        np.testing.assert_allclose(y.asnumpy(), y_ref.asnumpy(), atol=1e-3)
+        tvm.testing.assert_allclose(y.asnumpy(), y_ref.asnumpy(), atol=1e-3)

    verify()


--- a/tests/python/contrib/test_mps.py
+++ b/tests/python/contrib/test_mps.py
@@ -41,7 +41,7 @@ def test_matmul():
        b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
        f(a, b, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 1, rtol=1e-5)
    verify(A, B, D, s)


--- a/tests/python/contrib/test_mxnet_bridge.py
+++ b/tests/python/contrib/test_mxnet_bridge.py
@@ -40,7 +40,7 @@ def mxnet_check():
    mxf(xx, yy, zz, 10.0)


-    np.testing.assert_allclose(
+    tvm.testing.assert_allclose(
        zz.asnumpy(), (xx.asnumpy() + yy.asnumpy()) * 10)



--- a/tests/python/contrib/test_nnpack.py
+++ b/tests/python/contrib/test_nnpack.py
@@ -28,7 +28,7 @@ def test_fully_connected_output():
        d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx)
        bb = 10.0
        f(a, b, d, bb)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            d.asnumpy(), np.dot(a.asnumpy(), b.asnumpy().T) + bb, rtol=1e-5)
    verify()

@@ -58,7 +58,7 @@ def test_fully_connected_inference():
        d = tvm.nd.array(np.zeros((m, ), dtype=D.dtype), ctx)
        bb = 10.0
        f(a, b, d, bb)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            d.asnumpy(), np.dot(a.asnumpy(), b.asnumpy().T) + bb, rtol=1e-5)
    verify()

@@ -142,7 +142,7 @@ def test_convolution_inference():
        td = tvm.nd.array(np.zeros(oshape, dtype=output.dtype), ctx)
        f(ta, tb, tc, td)
        nd = np_conv(np.reshape(na, (1, IC, IH, IW)), nb, PAD, STRIDE)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            td.asnumpy(), nd.reshape(IC, IH, IW), rtol=1e-5)
    verify()

@@ -187,7 +187,7 @@ def test_convolution_output():
        td = tvm.nd.array(np.zeros(oshape, dtype=output.dtype), ctx)
        f(ta, tb, tc, td)
        nd = np_conv(na, nb, PAD)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            td.asnumpy(), nd, rtol=1e-5)
    verify()


--- a/tests/python/contrib/test_rocblas.py
+++ b/tests/python/contrib/test_rocblas.py
@@ -24,7 +24,7 @@ def test_matmul_add():
        b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
        f(a, b, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()), rtol=1e-5)
    verify()


--- a/tests/python/contrib/test_sort.py
+++ b/tests/python/contrib/test_sort.py
@@ -28,7 +28,7 @@ def test_sort():
    b = tvm.nd.array(np.array(sort_num_input).astype(sort_num.dtype), ctx)
    c = tvm.nd.array(np.zeros(a.shape, dtype=out.dtype), ctx)
    f(a, b, c)
-    np.testing.assert_allclose(c.asnumpy(), np.array(sorted_index).astype(out.dtype), rtol=1e-5)
+    tvm.testing.assert_allclose(c.asnumpy(), np.array(sorted_index).astype(out.dtype), rtol=1e-5)

 def test_sort_np():
    dshape = (1, 2, 3, 4, 5, 6)
@@ -55,7 +55,7 @@ def test_sort_np():
    b = tvm.nd.array(np.array(sort_num_input).astype(sort_num.dtype), ctx)
    c = tvm.nd.array(np.zeros(a.shape, dtype=out.dtype), ctx)
    f(a, b, c)
-    np.testing.assert_allclose(c.asnumpy(), np_out, rtol=1e-5)
+    tvm.testing.assert_allclose(c.asnumpy(), np_out, rtol=1e-5)

 if __name__ == "__main__":
    test_sort()

--- a/tests/python/contrib/test_sparse.py
+++ b/tests/python/contrib/test_sparse.py
@@ -27,7 +27,7 @@ def test_static_tensor():
    c.indices = a.indices
    c.indptr = a.indptr
    f(a.data, c.data)
-    np.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)
+    tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)

 def test_dynamic_tensor():
    dtype = 'float32'
@@ -53,7 +53,7 @@ def test_dynamic_tensor():
    c.indices = a.indices
    c.indptr = a.indptr
    f(a.data.shape[0], a.data, c.data)
-    np.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)
+    tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)

 def test_sparse_array_tuple():
    dtype, itype = 'float32', 'int32'
@@ -91,7 +91,7 @@ def test_sparse_array_tuple():
    c.indices = a.indices
    c.indptr = a.indptr
    f(a.data.shape[0], a.data, c.data)
-    np.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)
+    tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() * 2., rtol=1e-5)

 if __name__ == "__main__":
    test_static_tensor()

--- a/tests/python/integration/test_dot.py
+++ b/tests/python/integration/test_dot.py
@@ -46,7 +46,7 @@ def test_dot():
        b = tvm.nd.array(np.random.uniform(size=(nn,)).astype(B.dtype), ctx)
        c  = tvm.nd.array(np.zeros((1,), dtype=C.dtype), ctx)
        f(a, b, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()), rtol=1e-4)

    verify("llvm")

--- a/tests/python/integration/test_ewise.py
+++ b/tests/python/integration/test_ewise.py
@@ -31,7 +31,7 @@ def test_exp():
        a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(n, dtype=B.dtype), ctx)
        fexp(a, b)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            b.asnumpy(), np.exp(a.asnumpy()), rtol=1e-5)

    check_device("opencl -device=intel_graphics")
@@ -75,7 +75,7 @@ def test_multiple_cache_write():
        a1 = tvm.nd.array(np.random.uniform(size=n).astype(A1.dtype), ctx)
        c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
        func(a0, a1, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), a0.asnumpy() + a1.asnumpy() + (a0.asnumpy() * a1.asnumpy()),
            rtol=1e-5)

@@ -106,7 +106,7 @@ def test_log_pow_llvm():
    ftimer = flog.time_evaluator(flog.entry_name, ctx, number=1, repeat=repeat)
    res = ftimer(a, b)
    assert(len(res.results) == repeat)
-    np.testing.assert_allclose(
+    tvm.testing.assert_allclose(
        b.asnumpy(), np.power(np.log(a.asnumpy()), 2.0), rtol=1e-5)


@@ -136,7 +136,7 @@ def test_popcount():
            a = tvm.nd.array(np.random.randint(low=0, high=1000, size=n, dtype=A.dtype), ctx)
            b = tvm.nd.array(np.zeros(shape=n, dtype=B.dtype), ctx)
            func(a, b)
-            np.testing.assert_allclose(
+            tvm.testing.assert_allclose(
                b.asnumpy(), list(map(lambda x: bin(x).count('1'), a.asnumpy())), rtol=1e-5)

        check_device("llvm")
@@ -186,7 +186,7 @@ def test_add():
            c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
            ftimer = fadd.time_evaluator(fadd.entry_name, ctx, number=1)
            tcost = ftimer(a, b, c).mean
-            np.testing.assert_allclose(
+            tvm.testing.assert_allclose(
                c.asnumpy(), a.asnumpy() + b.asnumpy(), rtol=1e-6)

        check_device("opencl")
@@ -233,7 +233,7 @@ def try_warp_memory():
        a = tvm.nd.array((np.random.uniform(size=m) * 256).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(m, dtype=B.dtype), ctx)
        f(a, b)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            b.asnumpy(), a.asnumpy() + 3, rtol=1e-6)
    check_device("cuda")


--- a/tests/python/integration/test_ewise_fpga.py
+++ b/tests/python/integration/test_ewise_fpga.py
@@ -37,7 +37,7 @@ def test_exp():
        a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
        b = tvm.nd.array(np.zeros(n, dtype=B.dtype), ctx)
        fexp(a, b)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            b.asnumpy(), np.exp(a.asnumpy()), rtol=1e-5)

    check_device("sdaccel")
@@ -78,7 +78,7 @@ def test_multi_kernel():
        c = tvm.nd.array(np.random.uniform(size=n).astype(C.dtype), ctx)
        d = tvm.nd.array(np.random.uniform(size=n).astype(D.dtype), ctx)
        fadd(a, b, c, d)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            d.asnumpy(), a.asnumpy() * 2 + b.asnumpy(), rtol=1e-5)

    check_device("sdaccel")

--- a/tests/python/integration/test_gemm.py
+++ b/tests/python/integration/test_gemm.py
@@ -85,7 +85,7 @@ def test_gemm():
        ftimer = f.time_evaluator(f.entry_name, ctx, number=1)
        tcost = ftimer(a, b, c).mean
        print("%s: exec=%g sec/op" % (ctx, tcost))
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), np.dot(a_np, b_np.T), rtol=1e-5)

    check_device("vulkan")

--- a/tests/python/integration/test_reduce.py
+++ b/tests/python/integration/test_reduce.py
@@ -42,7 +42,7 @@ def test_reduce_prims():
            npy[:2] = 0
            res = np_reducer(x.asnumpy(), axis=1)
            res[:2] = 0
-            np.testing.assert_allclose(npy, res, rtol=1e-4)
+            tvm.testing.assert_allclose(npy, res, rtol=1e-4)

        check_device("metal")
        check_device("vulkan")
@@ -78,7 +78,7 @@ def test_rfactor():
        b  = tvm.nd.array(np.zeros(1, dtype=B.dtype), ctx)
        fsum(a, b)
        res = np.sum(a.asnumpy(), axis=0)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            b.asnumpy(), res, rtol=1e-4)

    check_target()
@@ -108,7 +108,7 @@ def test_rfactor_factor_axis():
        b  = tvm.nd.array(np.zeros(1, dtype=B.dtype), ctx)
        fsum(a, b)
        res = np.sum(a.asnumpy(), axis=0)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            b.asnumpy(), res, rtol=1e-4)

    check_target()
@@ -155,7 +155,7 @@ def test_rfactor_threads():
        fsum(a, b)
        res = np.sum(a.asnumpy(), axis=1)
        res[:2] = 0
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            b.asnumpy(), res, rtol=1e-4)

    check_target("vulkan")
@@ -206,7 +206,7 @@ def test_rfactor_elemwise_threads():
        b  = tvm.nd.array(np.zeros(m, dtype=B.dtype), ctx)
        fsum(a, b)
        res = np.sum(a.asnumpy(), axis=1) + 2
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            b.asnumpy(), res, rtol=1e-4)

    check_target("vulkan")
@@ -256,7 +256,7 @@ def test_argmax():
        nd_res0 = tvm.nd.array(np.zeros(mm, dtype='int32'), ctx)
        nd_res1 = tvm.nd.array(np.zeros(mm, dtype='float32'), ctx)
        fargmax(nd_idx, nd_val, nd_res0, nd_res1)
-        np.testing.assert_allclose(np_res, nd_res0.asnumpy())
+        tvm.testing.assert_allclose(np_res, nd_res0.asnumpy())

    check_target()

@@ -316,7 +316,7 @@ def test_rfactor_argmax():
        nd_res0 = tvm.nd.array(np.zeros(mm, dtype='int32'), ctx)
        nd_res1 = tvm.nd.array(np.zeros(mm, dtype='float32'), ctx)
        fargmax(nd_idx, nd_val, nd_res0, nd_res1)
-        np.testing.assert_allclose(np_res, nd_res0.asnumpy())
+        tvm.testing.assert_allclose(np_res, nd_res0.asnumpy())

    check_target("cuda")
    check_target("vulkan")

--- a/tests/python/integration/test_scan.py
+++ b/tests/python/integration/test_scan.py
@@ -38,7 +38,7 @@ def test_scan():
        a = tvm.nd.array(a_np, ctx)
        b = tvm.nd.array(np.zeros((m, n), dtype=res.dtype), ctx)
        fscan(a, b)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            b.asnumpy(), np.cumsum(a_np, axis=0))

    check_device("vulkan")

--- a/tests/python/unittest/test_codegen_cross_llvm.py
+++ b/tests/python/unittest/test_codegen_cross_llvm.py
@@ -67,7 +67,7 @@ def test_llvm_add_pipeline():
            b = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
            c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
            farm(a, b, c)
-            np.testing.assert_allclose(
+            tvm.testing.assert_allclose(
                c.asnumpy(), a.asnumpy() + b.asnumpy())
            print("Verification finish on remote..")


--- a/tests/python/unittest/test_codegen_cuda.py
+++ b/tests/python/unittest/test_codegen_cuda.py
@@ -27,7 +27,7 @@ def test_cuda_vectorize_add():
            np.random.uniform(size=(n, lanes)))
        c = tvm.nd.empty((n,), B.dtype, ctx)
        fun(a, c)
-        np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)
+        tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)
        
    check_cuda("float32", 64, 2)
    check_cuda("float16", 64, 2)
@@ -62,7 +62,7 @@ def test_cuda_multiply_add():
        c = tvm.nd.empty((n,), C.dtype, ctx).copyfrom(np_c)
        d = tvm.nd.empty((n,), D.dtype, ctx)
        fun(a, b, c, d)
-        np.testing.assert_allclose(d.asnumpy(), np_d)
+        tvm.testing.assert_allclose(d.asnumpy(), np_d)
    check_cuda("int8", 64, 4)

 def test_cuda_vectorize_load():
@@ -83,7 +83,7 @@ def test_cuda_vectorize_load():
        a = tvm.nd.empty((n,), A.dtype, ctx).copyfrom(np_a)
        b = tvm.nd.empty((n,), B.dtype, ctx)
        fun(a,b)
-        np.testing.assert_allclose(a.asnumpy(), b.asnumpy())
+        tvm.testing.assert_allclose(a.asnumpy(), b.asnumpy())
    check_cuda("int8", 64, 8)
    check_cuda("int8", 64, 16)


--- a/tests/python/unittest/test_codegen_device.py
+++ b/tests/python/unittest/test_codegen_device.py
@@ -51,7 +51,7 @@ def test_add_pipeline():
        b = tvm.nd.array(np.random.uniform(size=()).astype(Bb.dtype), ctx)
        d = tvm.nd.array(np.zeros(n, dtype=Db.dtype), ctx)
        f(a, b, d)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            d.asnumpy(), a.asnumpy() + b.asnumpy() + 1)

    def check_module_save(device, host="stackvm"):
@@ -75,7 +75,7 @@ def test_add_pipeline():
        b = tvm.nd.array(np.random.uniform(size=()).astype(Bb.dtype), ctx)
        d = tvm.nd.array(np.zeros(n, dtype=Db.dtype), ctx)
        f(a, b, d)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            d.asnumpy(), a.asnumpy() + b.asnumpy() + 1)

    check_target("cuda", host="stackvm")

--- a/tests/python/unittest/test_codegen_extern.py
+++ b/tests/python/unittest/test_codegen_extern.py
@@ -46,7 +46,7 @@ def test_add_pipeline():
        a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
        c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
        f(a, c)
-        np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)
+        tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + 1)
        
    check_target("llvm")
    check_target("opencl")
@@ -80,7 +80,7 @@ def test_pack_buffer_simple():
        c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)

        f(a, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), a.asnumpy())
    check_target("stackvm")
    check_target("llvm")
@@ -112,12 +112,12 @@ def test_pack_buffer_intermediate():
        @tvm.register_func
        def my_extern_array_func2(aa, bb):
            assert aa.shape == a.shape
-            np.testing.assert_allclose(
+            tvm.testing.assert_allclose(
                aa.asnumpy(), a.asnumpy() + 1)
            aa.copyto(bb)

        f(a, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), a.asnumpy() + 1)

    check_target("llvm")

--- a/tests/python/unittest/test_codegen_llvm.py
+++ b/tests/python/unittest/test_codegen_llvm.py
@@ -52,7 +52,7 @@ def test_llvm_import():
        a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
        b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)
        f(a, b)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            b.asnumpy(), a.asnumpy() + 1.0)
    check_llvm(use_file=True)
    check_llvm(use_file=False)
@@ -106,7 +106,7 @@ def test_llvm_add_pipeline():
        b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
        f(a, b, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), a.asnumpy() + b.asnumpy())

    with tvm.build_config(offset_factor=4):
@@ -138,7 +138,7 @@ def test_llvm_persist_parallel():
        a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
        c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
        f(a, c)
-        np.testing.assert_allclose(c.asnumpy(),
+        tvm.testing.assert_allclose(c.asnumpy(),
                                   np.sqrt(a.asnumpy() + 1) * 2 + 2,
                                   rtol=1e-5)

@@ -164,7 +164,7 @@ def test_llvm_flip_pipeline():
        a = tvm.nd.array(np.random.uniform(size=(n + base)).astype(A.dtype), ctx)
        c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
        f(a, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), a.asnumpy()[::-1][:n])
    check_llvm(4, 0)
    check_llvm(128, 8)
@@ -195,7 +195,7 @@ def test_llvm_vadd_pipeline():
            np.random.uniform(size=(n, lanes)))
        c = tvm.nd.empty((n,), C.dtype, ctx)
        f(a, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), a.asnumpy() + 1)
    check_llvm(64, 2)
    check_llvm(512, 2)
@@ -220,7 +220,7 @@ def test_llvm_madd_pipeline():
        a = tvm.nd.array(np.random.uniform(size=(n + base, stride)).astype(A.dtype), ctx)
        c = tvm.nd.array(np.zeros((n, stride), dtype=C.dtype), ctx)
        f(a, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), a.asnumpy()[base:] + 1)
    check_llvm(64, 0, 2)
    check_llvm(4, 0, 1)
@@ -247,7 +247,7 @@ def test_llvm_temp_space():
        a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
        c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
        f(a, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), a.asnumpy() + 1 + 1)
    check_llvm()

@@ -277,10 +277,10 @@ def test_multiple_func():
        b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
        fadd1(a, b, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), a.asnumpy() + b.asnumpy())
        fadd2(a, b, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), a.asnumpy() + b.asnumpy())
    check_llvm()

@@ -302,7 +302,7 @@ def test_llvm_select():
        f(a, c)
        c_np = a.asnumpy()
        c_np[:offset] = 0
-        np.testing.assert_allclose(c.asnumpy(), c_np)
+        tvm.testing.assert_allclose(c.asnumpy(), c_np)
    check_llvm(64, 8)


@@ -321,7 +321,7 @@ def test_llvm_bool():
        c = tvm.nd.empty((n,), C.dtype, ctx)
        f(a, c)
        c_np = a.asnumpy() == 1
-        np.testing.assert_allclose(c.asnumpy(), c_np)
+        tvm.testing.assert_allclose(c.asnumpy(), c_np)
    check_llvm(64)


@@ -345,7 +345,7 @@ def test_rank_zero():
        d = tvm.nd.empty((), D.dtype, ctx)
        f(a, sc, d)
        d_np = np.sum(a.asnumpy()) * sc.asnumpy() + 1
-        np.testing.assert_allclose(d.asnumpy(), d_np)
+        tvm.testing.assert_allclose(d.asnumpy(), d_np)
    check_llvm(64)



--- a/tests/python/unittest/test_hybrid_script.py
+++ b/tests/python/unittest/test_hybrid_script.py
@@ -38,7 +38,7 @@ def run_and_check(func, args, outs, var_dict={}, target='llvm'):
    module(*nd_args)

    for nd, np in to_check:
-        numpy.testing.assert_allclose(nd.asnumpy(), np, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(nd.asnumpy(), np, rtol=1e-5, atol=1e-5)


 @script
@@ -257,7 +257,7 @@ def test_math_intrin():
    tvm_a = tvm.ndarray.array(a)
    func(tvm_a)
    intrin_real(a)
-    numpy.testing.assert_allclose(a, tvm_a.asnumpy(), rtol=1e-5)
+    tvm.testing.assert_allclose(a, tvm_a.asnumpy(), rtol=1e-5)

    @script
    def intrin_int(a):

--- a/tests/python/unittest/test_ir_builder.py
+++ b/tests/python/unittest/test_ir_builder.py
@@ -84,7 +84,7 @@ def test_cpu():
        b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
        fadd(a, b, c)
-        np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
+        tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
    check_target("llvm")

 def test_gpu():
@@ -125,7 +125,7 @@ def test_gpu():
        b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)
        c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
        fadd(a, b, c)
-        np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
+        tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
    check_target("opencl")
    check_target("cuda")


--- a/tests/python/unittest/test_lang_tensor_overload_op.py
+++ b/tests/python/unittest/test_lang_tensor_overload_op.py
@@ -66,7 +66,7 @@ def test_combination():
    c = tvm.nd.array(np.random.uniform(size=(n, m)).astype(C.dtype), ctx)
    d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx)
    foo(x, a, b, c, d)
-    np.testing.assert_allclose(d.asnumpy(), k + a.asnumpy() - b.asnumpy() * c.asnumpy() / x)
+    tvm.testing.assert_allclose(d.asnumpy(), k + a.asnumpy() - b.asnumpy() * c.asnumpy() / x)


 def verify_tensor_scalar_bop(shape, typ="add"):
@@ -111,7 +111,7 @@ def verify_tensor_scalar_bop(shape, typ="add"):
        a_nd = tvm.nd.array(a_npy, ctx)
        b_nd = tvm.nd.array(np.empty(b_npy.shape).astype(B.dtype), ctx)
        foo(a_nd, b_nd, k_, *shape)
-        np.testing.assert_allclose(b_nd.asnumpy(), b_npy, rtol=1e-5)
+        tvm.testing.assert_allclose(b_nd.asnumpy(), b_npy, rtol=1e-5)

    for device in ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan']:
        check_device(device)
@@ -160,7 +160,7 @@ def verify_broadcast_bop(lhs_shape, rhs_shape, typ="add"):
        out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), ctx)
        for _ in range(1):
            foo(lhs_nd, rhs_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)

    for device in ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan']:
        check_device(device)
@@ -213,7 +213,7 @@ def verify_conv2d_scalar_bop(batch, in_size, in_channel, num_filter, kernel, str
        b_nd = tvm.nd.array(np.empty(b_npy.shape).astype(B.dtype), ctx)
        c_nd = tvm.nd.array(np.empty(c_npy.shape).astype(C.dtype), ctx)
        foo(a_nd, w_nd, b_nd, c_nd)
-        np.testing.assert_allclose(c_nd.asnumpy(), c_npy, rtol=1E-4, atol=1E-4)
+        tvm.testing.assert_allclose(c_nd.asnumpy(), c_npy, rtol=1E-4, atol=1E-4)

    for device in ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan']:
        check_device(device)

--- a/tests/python/unittest/test_runtime_ndarray.py
+++ b/tests/python/unittest/test_runtime_ndarray.py
@@ -53,7 +53,7 @@ def test_fp16_conversion():
        expected = x_tvm.asnumpy().astype(dst)
        real = y_tvm.asnumpy()

-        np.testing.assert_allclose(expected, real)
+        tvm.testing.assert_allclose(expected, real)

 if __name__ == "__main__":
    test_nd_create()

--- a/tests/python/unittest/test_runtime_rpc.py
+++ b/tests/python/unittest/test_runtime_rpc.py
@@ -31,7 +31,7 @@ def test_bigendian_rpc():
        remote.upload(path_dso)
        f = remote.load_module("dev_lib.o")
        f(a, b)
-        np.testing.assert_allclose(a.asnumpy() + 1, b.asnumpy())
+        tvm.testing.assert_allclose(a.asnumpy() + 1, b.asnumpy())

    print("Test RPC connection to PowerPC...")
    remote = rpc.connect(host, port)

--- a/tests/verilog/integration/test_codegen_verilog.py
+++ b/tests/verilog/integration/test_codegen_verilog.py
@@ -60,7 +60,7 @@ def test_add_pipeline():
        c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
        f(a, b, c)
        print("Check correctness...")
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), a.asnumpy() + b.asnumpy())
    check_target("verilog")


--- a/tests/webgl/test_local_gemm.py
+++ b/tests/webgl/test_local_gemm.py
@@ -35,7 +35,7 @@ def test_local_gemm():
    c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
    f(a, b, c)

-    np.testing.assert_allclose(c.asnumpy(), np.dot(a_np, b_np.T))
+    tvm.testing.assert_allclose(c.asnumpy(), np.dot(a_np, b_np.T))

 if __name__ == "__main__":
    test_local_gemm()
--- a/tests/webgl/test_local_multi_stage.py
+++ b/tests/webgl/test_local_multi_stage.py
@@ -24,7 +24,7 @@ def test_local_multi_stage():
    c = tvm.nd.array(np.random.uniform(size=(n,)).astype(B.dtype), ctx)
    f(a, c)

-    np.testing.assert_allclose(c.asnumpy(), (a.asnumpy() + 1) * 2)
+    tvm.testing.assert_allclose(c.asnumpy(), (a.asnumpy() + 1) * 2)

 if __name__ == "__main__":
    test_local_multi_stage()
--- a/tests/webgl/test_local_save_load.py
+++ b/tests/webgl/test_local_save_load.py
@@ -30,7 +30,7 @@ def test_local_save_load():
    f.export_library(path_so)
    f1 = tvm.module.load(path_so)
    f1(a, b, c)
-    np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
+    tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())

 if __name__ == "__main__":
    test_local_save_load()
--- a/tests/webgl/test_local_topi_conv2d_nchw.py
+++ b/tests/webgl/test_local_topi_conv2d_nchw.py
@@ -49,8 +49,8 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p
            func2 = tvm.build(s2, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding))
            func1(a, w, b)
            func2(a, w, c)
-            np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-            np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
+            tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+            tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)

    for device in ['opengl']:
        check_device(device)

--- a/tests/webgl/test_local_topi_dense.py
+++ b/tests/webgl/test_local_topi_dense.py
@@ -45,7 +45,7 @@ def verify_dense(batch, in_dim, out_dim, use_bias=True):
        d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A, B, C, D], device, name="dense")
        f(a, b, c, d)
-        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
+        tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)

    for device in ['opengl']:
        check_device(device)

--- a/tests/webgl/test_local_topi_pooling.py
+++ b/tests/webgl/test_local_topi_pooling.py
@@ -60,7 +60,7 @@ def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode):

        f = tvm.build(s, [A, B], device)
        f(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['opengl']:
        check_device(device)
@@ -98,7 +98,7 @@ def verify_global_pool(n, c, h, w, pool_type):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        f = tvm.build(s, [A, B], device)
        f(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['opengl']:
        check_device(device)

--- a/tests/webgl/test_local_topi_softmax.py
+++ b/tests/webgl/test_local_topi_softmax.py
@@ -32,7 +32,7 @@ def verify_softmax(m, n):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        foo = tvm.build(s, [A, B], device, name="softmax")
        foo(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ["opengl"]:
        check_device(device)
@@ -63,7 +63,7 @@ def verify_log_softmax(m, n):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        foo = tvm.build(s, [A, B], device, name="log_softmax")
        foo(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ["opengl"]:
        check_device(device)

--- a/tests/webgl/test_remote_save_load.py
+++ b/tests/webgl/test_remote_save_load.py
@@ -73,7 +73,7 @@ def try_remote_save_load():
    b = tvm.nd.array(np.zeros(16, dtype=A.dtype), ctx)
    c = tvm.nd.array(np.zeros(16, dtype=C.dtype), ctx)
    fhost(a, b, c)
-    np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
+    tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())

 if __name__ == "__main__":
    try_remote_save_load()
--- a/topi/recipe/broadcast/test_broadcast_map.py
+++ b/topi/recipe/broadcast/test_broadcast_map.py
@@ -48,7 +48,7 @@ def test_broadcast_to(in_shape, out_shape):
    out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), tvm.gpu())
    for _ in range(2):
        fcuda(data_nd, out_nd)
-    np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+    tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)


 def test_broadcast_binary_op(lhs_shape, rhs_shape, typ="add"):
@@ -95,7 +95,7 @@ def test_broadcast_binary_op(lhs_shape, rhs_shape, typ="add"):
    out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), tvm.gpu())
    for _ in range(2):
        fcuda(lhs_nd, rhs_nd, out_nd)
-    np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+    tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)


 if __name__ == "__main__":

--- a/topi/recipe/conv/depthwise_conv2d_test.py
+++ b/topi/recipe/conv/depthwise_conv2d_test.py
@@ -106,9 +106,9 @@ def test_depthwise_conv2d_nchw():
        for c in range(in_channel * channel_multiplier):
            scale_shift_scipy[:,c,:,:] = depthwise_conv2d_scipy[:,c,:,:] * scale_np[c] + shift_np[c]
        relu_scipy = np.maximum(scale_shift_scipy, 0)
-        np.testing.assert_allclose(depthwise_conv2d_tvm.asnumpy(), depthwise_conv2d_scipy, rtol=1e-5)
-        np.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5)
-        np.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)
+        tvm.testing.assert_allclose(depthwise_conv2d_tvm.asnumpy(), depthwise_conv2d_scipy, rtol=1e-5)
+        tvm.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5)
+        tvm.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)
        print("success")

    for device in ['cuda', 'opencl', 'rocm']:
@@ -195,9 +195,9 @@ def test_depthwise_conv2d_nhwc():
        for c in range(in_channel * channel_multiplier):
            scale_shift_scipy[:,:,:,c] = depthwise_conv2d_scipy[:,:,:,c] * scale_np[c] + shift_np[c]
        relu_scipy = np.maximum(scale_shift_scipy, 0)
-        np.testing.assert_allclose(depthwise_conv2d_tvm.asnumpy(), depthwise_conv2d_scipy, rtol=1e-5)
-        np.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5)
-        np.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)
+        tvm.testing.assert_allclose(depthwise_conv2d_tvm.asnumpy(), depthwise_conv2d_scipy, rtol=1e-5)
+        tvm.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5)
+        tvm.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)
        print("success")

    for device in ['cuda', 'opencl', 'rocm']:

--- a/topi/recipe/conv/test_conv2d_hwcn_map.py
+++ b/topi/recipe/conv/test_conv2d_hwcn_map.py
@@ -64,10 +64,10 @@ def test_conv2d_hwcn_map():
                              unroll_explicit=device == 'rocm'):
            func1 = tvm.build(s1, [A, W, B], device)
            func1(a, w, b)
-            np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+            tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
            func2 = tvm.build(s2, [A, W, C], device)
            func2(a, w, c)
-            np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
+            tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'rocm']:
        check_device(device)

--- a/topi/recipe/gemm/cuda_gemm_square.py
+++ b/topi/recipe/gemm/cuda_gemm_square.py
@@ -118,7 +118,7 @@ def test_gemm():
        c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
        for i in range(2):
            f(a, b, c)
-        np.testing.assert_allclose(
+        tvm.testing.assert_allclose(
            c.asnumpy(), np.dot(b_np.T, a_np), rtol=1e-5)

        num_flops = 2 * nn * nn * nn

--- a/topi/recipe/gemm/gemm_int8.py
+++ b/topi/recipe/gemm/gemm_int8.py
@@ -140,7 +140,7 @@ if __name__ == '__main__':
    c = tvm.nd.array(np.zeros((n, m), dtype='int32'), ctx)
    f(a, b, c)

-    np.testing.assert_allclose(
+    tvm.testing.assert_allclose(
        c.asnumpy(),
        np.dot(
            a_np.astype('int32'),

--- a/topi/recipe/reduce/test_reduce_map.py
+++ b/topi/recipe/reduce/test_reduce_map.py
@@ -67,7 +67,7 @@ def test_reduce_map(in_shape, axis, keepdims, type="sum", test_id=0):

    for _ in range(2):
        fcuda(data_tvm, out_tvm)
-    np.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 4E-4, 4E-4)
+    tvm.testing.assert_allclose(out_tvm.asnumpy(), out_npy, rtol=4e-4, atol=4e-4)

 if __name__ == "__main__":
    test_reduce_map(in_shape=(128, 24, 128, 24),

--- a/topi/recipe/rnn/matexp.py
+++ b/topi/recipe/rnn/matexp.py
@@ -145,7 +145,7 @@ def rnn_matexp():
                for j in range(n_num_hidden):
                    if abs(res_cmp[i,0,j] - res_gpu[i,0,j]) > 1e-5:
                        print("%d, %d: %g vs %g" % (i,j, res_cmp[i,0,j], res_gpu[i,0,j]))
-            np.testing.assert_allclose(res_gpu, res_cmp, rtol=1e-3)
+            tvm.testing.assert_allclose(res_gpu, res_cmp, rtol=1e-3)
    check_device("cuda")

 if __name__ == "__main__":

--- a/topi/tests/python/test_topi_bitserial_conv2d.py
+++ b/topi/tests/python/test_topi_bitserial_conv2d.py
@@ -46,7 +46,7 @@ def verify_bitserial_conv2d_nchw(batch, in_size, in_channel, num_filter, kernel,
    b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
    func = tvm.build(s, [A, W, B], "llvm")
    func(a, w, b)
-    np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+    tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

 def verify_bitserial_conv2d_nhwc(batch, in_size, in_channel, num_filter, kernel, stride, padding, 
                        activation_bits, weight_bits, dorefa):
@@ -85,7 +85,7 @@ def verify_bitserial_conv2d_nhwc(batch, in_size, in_channel, num_filter, kernel,
    func = tvm.build(s, [A, W, B], 'llvm')

    func(a, w, b)
-    np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+    tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

 def test_bitserial_conv2d():
    in_size = 56

--- a/topi/tests/python/test_topi_bnn.py
+++ b/topi/tests/python/test_topi_bnn.py
@@ -44,7 +44,7 @@ def verify_binary_dense(batch, in_dim, out_dim):
    f1(a, bnn_a)
    f2(b, bnn_b)
    f3(bnn_a, bnn_b, bnn_c)
-    np.testing.assert_allclose(bnn_c.asnumpy(), c_np, rtol=1e-5)
+    tvm.testing.assert_allclose(bnn_c.asnumpy(), c_np, rtol=1e-5)

 def test_binary_dense():
    verify_binary_dense(1, 4096, 1024)

--- a/topi/tests/python/test_topi_broadcast.py
+++ b/topi/tests/python/test_topi_broadcast.py
@@ -23,7 +23,7 @@ def verify_broadcast_to_ele(in_shape, out_shape, fbcast):
        data_nd = tvm.nd.array(data_npy, ctx)
        out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), ctx)
        foo(data_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for target in get_all_backend():
        check_device(target)
@@ -77,7 +77,7 @@ def verify_broadcast_binary_ele(lhs_shape, rhs_shape,
        out_npy = fnumpy(lhs_npy, rhs_npy)
        out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(C.dtype), ctx)
        foo(lhs_nd, rhs_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)

    for target in get_all_backend():
        check_device(target)

--- a/topi/tests/python/test_topi_clip.py
+++ b/topi/tests/python/test_topi_clip.py
@@ -33,7 +33,7 @@ def verify_clip(N, a_min, a_max, dtype):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A, B], device, name="clip")
        f(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in get_all_backend():
        check_device(device)

--- a/topi/tests/python/test_topi_conv2d_hwcn.py
+++ b/topi/tests/python/test_topi_conv2d_hwcn.py
@@ -47,8 +47,8 @@ def verify_conv2d_hwcn(batch, in_channel, in_size, num_filter, kernel, stride, p
        func2 = tvm.build(s2, [A, W, C], device)
        func1(a, w, b)
        func2(a, w, c)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-        np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx']:
        check_device(device)

--- a/topi/tests/python/test_topi_conv2d_int8.py
+++ b/topi/tests/python/test_topi_conv2d_int8.py
@@ -83,7 +83,7 @@ def verify_conv2d_NCHWc_int8(batch, in_channel, in_size, num_filter, kernel, str
        else:
            func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation))
            func(a, w, c)
-        np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
+        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)

    for device in ["cuda"]:
        check_device(device)

--- a/topi/tests/python/test_topi_conv2d_nchw.py
+++ b/topi/tests/python/test_topi_conv2d_nchw.py
@@ -66,7 +66,7 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p
        else:
            func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation))
            func(a, w, c)
-        np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
+        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)

    for device in get_all_backend():
        with autotvm.tophub.context(device):  # load tophub pre-tuned parameters

--- a/topi/tests/python/test_topi_conv2d_nhwc.py
+++ b/topi/tests/python/test_topi_conv2d_nhwc.py
@@ -42,7 +42,7 @@ def verify_conv2d_nhwc(batch, in_channel, in_size, num_filter, kernel, stride, p
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        func = tvm.build(s, [A, W, B], device)
        func(a, w, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['llvm']:
        check_device(device)

--- a/topi/tests/python/test_topi_conv2d_transpose_nchw.py
+++ b/topi/tests/python/test_topi_conv2d_transpose_nchw.py
@@ -48,8 +48,8 @@ def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel,
        func2 = tvm.build(s2, [A, W, C], device)
        func1(a, w, b)
        func2(a, w, c)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
-        np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)

    for device in get_all_backend():
        check_device(device)

--- a/topi/tests/python/test_topi_conv2d_winograd.py
+++ b/topi/tests/python/test_topi_conv2d_winograd.py
@@ -65,7 +65,7 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p
        else:
            func = tvm.build(s, [A, W, C], device, name="relu_%d_%d_%d_%d_%d_%d_%d_%d" % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation))
            func(a, w, c)
-        np.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
+        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)


    for device in ['cuda', 'llvm -device=arm_cpu', 'opencl -device=mali']:

--- a/topi/tests/python/test_topi_dense.py
+++ b/topi/tests/python/test_topi_dense.py
@@ -44,7 +44,7 @@ def verify_dense(batch, in_dim, out_dim, use_bias=True):
        d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A, B, C, D], device, name="dense")
        f(a, b, c, d)
-        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
+        tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)

    for device in get_all_backend():
        check_device(device)

--- a/topi/tests/python/test_topi_depthwise_conv2d.py
+++ b/topi/tests/python/test_topi_depthwise_conv2d.py
@@ -97,9 +97,9 @@ def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_mu
        # launch kernel 3 (depthwise_conv2d + scale_shift + relu)
        timer_3 = f3.time_evaluator(f3.entry_name, ctx, number=1)
        tcost_3 = timer_3(input_tvm, filter_tvm, scale_tvm, shift_tvm, relu_tvm).mean
-        np.testing.assert_allclose(depthwise_conv2d_tvm.asnumpy(), depthwise_conv2d_scipy, rtol=1e-5)
-        np.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5)
-        np.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)
+        tvm.testing.assert_allclose(depthwise_conv2d_tvm.asnumpy(), depthwise_conv2d_scipy, rtol=1e-5)
+        tvm.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5)
+        tvm.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)

    for device in get_all_backend():
        with autotvm.tophub.context(device):  # load tophub pre-tuned parameters
@@ -197,9 +197,9 @@ def depthwise_conv2d_with_workload_nhwc(batch, in_channel, in_height, channel_mu
        timer_3 = f3.time_evaluator(f3.entry_name, ctx, number=1)
        tcost_3 = timer_3(input_tvm, filter_tvm, scale_tvm, shift_tvm, relu_tvm).mean
        relu_scipy = np.maximum(scale_shift_scipy, 0)
-        np.testing.assert_allclose(depthwise_conv2d_tvm.asnumpy(), depthwise_conv2d_scipy, rtol=1e-5)
-        np.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5)
-        np.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)
+        tvm.testing.assert_allclose(depthwise_conv2d_tvm.asnumpy(), depthwise_conv2d_scipy, rtol=1e-5)
+        tvm.testing.assert_allclose(scale_shift_tvm.asnumpy(), scale_shift_scipy, rtol=1e-5)
+        tvm.testing.assert_allclose(relu_tvm.asnumpy(), relu_scipy, rtol=1e-5)

    for device in get_all_backend():
        with autotvm.tophub.context(device):  # load tophub pre-tuned parameters

--- a/topi/tests/python/test_topi_depthwise_conv2d_back_input.py
+++ b/topi/tests/python/test_topi_depthwise_conv2d_back_input.py
@@ -80,7 +80,7 @@ def verify_depthwise_conv2d_back_input(batch, in_channel, in_h, channel_multipli
        # launch the kernel
        timer = f.time_evaluator(f.entry_name, ctx, number=1)
        tcost = timer(filter_tvm, out_grad_tvm, in_grad_tvm).mean
-        np.testing.assert_allclose(in_grad_np, in_grad_tvm.asnumpy(), rtol=1e-5)
+        tvm.testing.assert_allclose(in_grad_np, in_grad_tvm.asnumpy(), rtol=1e-5)

    check_device("opencl")
    check_device("cuda")

--- a/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py
+++ b/topi/tests/python/test_topi_depthwise_conv2d_back_weight.py
@@ -73,7 +73,7 @@ def verify_depthwise_conv2d_back_weight(batch, in_channel, in_h, channel_multipl
        # launch the kernel
        timer = f.time_evaluator(f.entry_name, ctx, number=1)
        tcost = timer(input_tvm, out_grad_tvm, weight_grad_tvm).mean
-        np.testing.assert_allclose(weight_grad_np, weight_grad_tvm.asnumpy(), rtol=1e-4)
+        tvm.testing.assert_allclose(weight_grad_np, weight_grad_tvm.asnumpy(), rtol=1e-4)

    check_device("opencl")
    check_device("cuda")

--- a/topi/tests/python/test_topi_dilate.py
+++ b/topi/tests/python/test_topi_dilate.py
@@ -19,7 +19,7 @@ def test_dilate():
        output_tvm = tvm.nd.array(np.zeros(shape=output_size).astype(Output.dtype), ctx=ctx)
        f = tvm.build(schedule, [Input, Output], target)
        f(input_tvm, output_tvm)
-        np.testing.assert_allclose(output_tvm.asnumpy(), output_np, rtol=1e-5)
+        tvm.testing.assert_allclose(output_tvm.asnumpy(), output_np, rtol=1e-5)

    _test_dilate((32,), (2,))
    _test_dilate((32,32), (2,2))

--- a/topi/tests/python/test_topi_l2norm.py
+++ b/topi/tests/python/test_topi_l2norm.py
@@ -29,7 +29,7 @@ def verify_l2_normalize(ishape, eps, axis=None):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A, B], device)
        f(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx']:
        check_device(device)

--- a/topi/tests/python/test_topi_lrn.py
+++ b/topi/tests/python/test_topi_lrn.py
@@ -28,7 +28,7 @@ def verify_lrn(shape, size, axis, bias, alpha, beta):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A, B], device)
        f(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['llvm', 'cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx']:
        check_device(device)

--- a/topi/tests/python/test_topi_math.py
+++ b/topi/tests/python/test_topi_math.py
@@ -37,7 +37,7 @@ def test_ewise():
            a = tvm.nd.array(a_np, ctx)
            b = tvm.nd.array(np.zeros_like(b_np), ctx)
            foo(a, b)
-            np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
+            tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)

        for device in ['cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'llvm', 'nvptx', 'sdaccel',
                       'aocl_sw_emu']:

--- a/topi/tests/python/test_topi_matmul.py
+++ b/topi/tests/python/test_topi_matmul.py
@@ -27,7 +27,7 @@ def verify_matmul(sa, sb, transp_a, transp_b):
    c1 = np.matmul(np.transpose(a) if transp_a else a,
                   np.transpose(b) if transp_b else b)
    c2 = with_tvm(lambda A,B: topi.matmul(A,B,transp_a,transp_b), a,b)
-    np.testing.assert_allclose(c1, c2, rtol=1e-5, atol=1e-5)
+    tvm.testing.assert_allclose(c1, c2, rtol=1e-5, atol=1e-5)

 def test_matmul():
    verify_matmul((1,1),(1,1),False,False)

--- a/topi/tests/python/test_topi_pooling.py
+++ b/topi/tests/python/test_topi_pooling.py
@@ -64,7 +64,7 @@ def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A, B], device)
        f(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in get_all_backend():
        check_device(device)
@@ -109,7 +109,7 @@ def verify_global_pool(n, c, h, w, pool_type):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        f = tvm.build(s, [A, B], device)
        f(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in get_all_backend():
        check_device(device)

--- a/topi/tests/python/test_topi_reduce.py
+++ b/topi/tests/python/test_topi_reduce.py
@@ -87,11 +87,11 @@ def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum", dtype="float32")
                sel_indices = other_indices[0:axis] + (out_tvm_indices,) + other_indices[axis:]
                out_tvm_val = in_npy_map[sel_indices]
            if type == "argmax":
-                np.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1E-3, 1E-3)
+                tvm.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1E-3, 1E-3)
            elif type == "argmin":
-                np.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1E-3, 1E-3)
+                tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1E-3, 1E-3)
        else:
-            np.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3)
+            tvm.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3)
    for device in get_all_backend():
        check_device(device)


--- a/topi/tests/python/test_topi_region.py
+++ b/topi/tests/python/test_topi_region.py
@@ -37,7 +37,7 @@ def verify_region(batch, in_size, in_channel, n, classes, coords, background, l_
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        func = tvm.build(s, [A, B], device)
        func(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['llvm', 'cuda']:
        check_device(device)

--- a/topi/tests/python/test_topi_relu.py
+++ b/topi/tests/python/test_topi_relu.py
@@ -27,7 +27,7 @@ def verify_relu(m, n):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        foo = tvm.build(s, [A, B], device, name="relu")
        foo(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in get_all_backend():
        check_device(device)
@@ -45,7 +45,7 @@ def verify_leaky_relu(m, alpha):
    b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
    foo = tvm.build(s, [A, B], "llvm", name="leaky_relu")
    foo(a, b)
-    np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+    tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)


 def verify_prelu(x, w, axis, weight_reshape):
@@ -68,7 +68,7 @@ def verify_prelu(x, w, axis, weight_reshape):
    foo = tvm.build(s, [X, W, B], "llvm", name="prelu")
    foo(x_tvm, w_tvm, b)
    out_np = _prelu_numpy(x_np, w_np)
-    np.testing.assert_allclose(b.asnumpy(), out_np, rtol=1e-5)
+    tvm.testing.assert_allclose(b.asnumpy(), out_np, rtol=1e-5)

 def test_relu():
    verify_relu(10, 128)

--- a/topi/tests/python/test_topi_reorg.py
+++ b/topi/tests/python/test_topi_reorg.py
@@ -38,7 +38,7 @@ def verify_reorg(batch, in_size, in_channel, stride):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        func = tvm.build(s, [A, B], device)
        func(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['llvm', 'cuda']:
        check_device(device)

--- a/topi/tests/python/test_topi_resize.py
+++ b/topi/tests/python/test_topi_resize.py
@@ -38,7 +38,7 @@ def verify_bilinear_scale(batch, in_channel, in_height, in_width, out_height, ou
        f = tvm.build(s, [A, B], device)
        f(a, b)

-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-3, atol=1e-3)

    for device in ['llvm', 'cuda', 'vulkan', 'nvptx']:
        check_device(device)

--- a/topi/tests/python/test_topi_shortcut.py
+++ b/topi/tests/python/test_topi_shortcut.py
@@ -36,7 +36,7 @@ def verify_shortcut(batch, in_size, in_channel):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        func = tvm.build(s, [A1, A2, B], device)
        func(a1, a2, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['llvm', 'cuda']:
        check_device(device)

--- a/topi/tests/python/test_topi_softmax.py
+++ b/topi/tests/python/test_topi_softmax.py
@@ -32,7 +32,7 @@ def verify_softmax(m, n, dtype="float32"):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        foo = tvm.build(s, [A, B], device, name="softmax")
        foo(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm', 'vulkan', 'nvptx']:
        check_device(device)
@@ -63,7 +63,7 @@ def verify_log_softmax(m, n, dtype="float32"):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        foo = tvm.build(s, [A, B], device, name="log_softmax")
        foo(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in get_all_backend():
        check_device(device)

--- a/topi/tests/python/test_topi_sparse.py
+++ b/topi/tests/python/test_topi_sparse.py
@@ -47,7 +47,7 @@ def verify_dynamic_csrmv(batch, in_dim, out_dim, use_bias=True):
        assert a.indptr.dtype == A.indptr.dtype
        f = tvm.build(s, [nr, A.data, A.indices, A.indptr, B, C, D], device, name="csrmv")
        f(_nr, a.data, a.indices, a.indptr, b, c, d)
-        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-4, atol=1e-4)
+        tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-4, atol=1e-4)

    for device in ["llvm"]:
        check_device(device)
@@ -89,7 +89,7 @@ def verify_dynamic_csrmm(batch, in_dim, out_dim, use_bias=True):
        f = tvm.build(s, [nr, A.data, A.indices, A.indptr, B, C, D], device, name="csrmm")

        f(_nr, a.data, a.indices, a.indptr, b, c, d)
-        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-2, atol=1e-2)
+        tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-2, atol=1e-2)

    for device in ["llvm"]:
        check_device(device)
@@ -127,7 +127,7 @@ def verify_dense_si(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
        d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A.data, A.indices, A.indptr, B, C, D], device, name="dense")
        f(a.data, a.indices, a.indptr, b, c, d)
-        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-4, atol=1e-4)
+        tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-4, atol=1e-4)

    check_device('llvm')

@@ -164,7 +164,7 @@ def verify_dense_sw(batch, in_dim, out_dim, use_bias=True, dtype='float32'):
        d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A, B.data, B.indices, B.indptr, C, D], device, name="dense")
        f(a, b.data, b.indices, b.indptr, c, d)
-        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-4, atol=1e-4)
+        tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-4, atol=1e-4)

    check_device('llvm')


--- a/topi/tests/python/test_topi_tensor.py
+++ b/topi/tests/python/test_topi_tensor.py
@@ -32,7 +32,7 @@ def verify_elemwise_sum(num_args, dtype):
        tvm_nd = [tvm.nd.array(nd, ctx) for nd in np_nd] + [out]
        f(*tvm_nd)
        np_out = np.sum(np.array(np_nd), axis=0)
-        np.testing.assert_allclose(out.asnumpy(), np_out, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), np_out, rtol=1e-5)

    for device in ["llvm"]:
        check_device(device)
@@ -59,11 +59,11 @@ def verify_full(shape, dtype, fill_value):
        out = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
        f = tvm.build(s1, [A, B], device, name="full_like")
        f(tvm.nd.array(np.zeros(shape, dtype), ctx), out)
-        np.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)

        f = tvm.build(s2, [C], device, name="full")
        f(out)
-        np.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)

    for device in ["llvm"]:
        check_device(device)

--- a/topi/tests/python/test_topi_transform.py
+++ b/topi/tests/python/test_topi_transform.py
@@ -22,7 +22,7 @@ def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
        data_nd = tvm.nd.array(data_npy, ctx)
        out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), ctx)
        foo(data_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in get_all_backend():
        check_device(device)
@@ -45,7 +45,7 @@ def verify_tranpose(in_shape, axes):
        data_nd = tvm.nd.array(data_npy, ctx)
        out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=B.dtype)
        foo(data_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in get_all_backend():
        check_device(device)
@@ -68,7 +68,7 @@ def verify_reshape(src_shape, dst_shape):
        data_nd = tvm.nd.array(data_npy, ctx)
        out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype)
        foo(data_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in get_all_backend():
        check_device(device)
@@ -96,7 +96,7 @@ def verify_squeeze(src_shape, axis):
            out_nd_shape = out_npy.shape
        out_nd = tvm.nd.empty(out_nd_shape, ctx=ctx, dtype=B.dtype)
        foo(data_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in get_all_backend():
        check_device(device)
@@ -121,7 +121,7 @@ def verify_concatenate(shapes, axis):
        data_nds = [tvm.nd.array(data_npy, ctx) for data_npy in data_npys]
        out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=out_tensor.dtype)
        foo(*(data_nds + [out_nd]))
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in get_all_backend():
        check_device(device)
@@ -146,7 +146,7 @@ def verify_split(src_shape, indices_or_sections, axis):
        out_nds = [tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=tensor_l[0].dtype) for out_npy in out_npys]
        foo(*([data_nd] + out_nds))
        for out_nd, out_npy in zip(out_nds, out_npys):
-            np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+            tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in get_all_backend():
        check_device(device)
@@ -181,7 +181,7 @@ def verify_expand_like(in_shape, out_shape, axis):
        tvm_shape_like = tvm.nd.array(np.zeros(out_shape).astype(B.dtype), ctx)
        out = tvm.nd.array(np.zeros(out_shape).astype(A.dtype), ctx)
        f(tvm_input, tvm_shape_like, out)
-        np.testing.assert_allclose(out.asnumpy(), input)
+        tvm.testing.assert_allclose(out.asnumpy(), input)

    for device in ["llvm"]:
        check_device(device)
@@ -204,7 +204,7 @@ def verify_flip(in_shape, axis):
        data_nd = tvm.nd.array(x_np, ctx)
        out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=A.dtype)
        foo(data_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in ["llvm", "cuda", "opencl", "sdaccel", "aocl_sw_emu"]:
        check_device(device)
@@ -243,7 +243,7 @@ def verify_take(src_shape, indices_src, axis=None):
        indices_nd = tvm.nd.array(indices_src, ctx)
        out_nd = tvm.nd.empty(out_npys.shape, ctx=ctx, dtype=src_dtype)
        foo(data_nd, indices_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npys)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npys)

    for device in ["llvm", "opencl", "sdaccel", "aocl_sw_emu"]:
        check_device(device)
@@ -270,7 +270,7 @@ def verify_strided_slice(in_shape, begin, end, stride=None):
        data_nd = tvm.nd.array(x_np, ctx)
        out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=A.dtype)
        foo(data_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in ["llvm", "opencl", "sdaccel", "aocl_sw_emu"]:
        check_device(device)

--- a/topi/tests/python/test_topi_upsampling.py
+++ b/topi/tests/python/test_topi_upsampling.py
@@ -43,7 +43,7 @@ def verify_upsampling(batch, in_channel, in_height, in_width, scale, layout='NCH
        f = tvm.build(s, [A, B], device)
        f(a, b)

-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5, atol=1e-5)

    for device in ['llvm', 'cuda', 'vulkan', 'nvptx']:
        check_device(device)

--- a/topi/tests/python/test_topi_vision.py
+++ b/topi/tests/python/test_topi_vision.py
@@ -41,7 +41,7 @@ def test_nms():
        tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data.dtype), ctx)
        f = tvm.build(s, [data, valid_count, out], device)
        f(tvm_data, tvm_valid_count, tvm_out)
-        np.testing.assert_allclose(tvm_out.asnumpy(), np_result, rtol=1e-4)
+        tvm.testing.assert_allclose(tvm_out.asnumpy(), np_result, rtol=1e-4)

    for device in ['llvm', 'opencl']:
        check_device(device)
@@ -100,7 +100,7 @@ def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), offse
        tvm_out = tvm.nd.array(np.zeros(oshape, dtype=dtype), ctx)
        f = tvm.build(s, [data, out], device)
        f(tvm_input_data, tvm_out)
-        np.testing.assert_allclose(tvm_out.asnumpy(), np_out, rtol=1e-3)
+        tvm.testing.assert_allclose(tvm_out.asnumpy(), np_out, rtol=1e-3)

    for device in ['llvm', 'opencl']:
        check_device(device)
@@ -148,7 +148,7 @@ def test_multibox_detection():
        tvm_out = tvm.nd.array(np.zeros((batch_size, num_anchors, 6)).astype(out.dtype), ctx)
        f = tvm.build(s, [cls_prob, loc_preds, anchors, out], device)
        f(tvm_cls_prob, tvm_loc_preds, tvm_anchors, tvm_out)
-        np.testing.assert_allclose(tvm_out.asnumpy(), expected_np_out, rtol=1e-4)
+        tvm.testing.assert_allclose(tvm_out.asnumpy(), expected_np_out, rtol=1e-4)

    for device in ['llvm', 'opencl']:
        check_device(device)

--- a/topi/tests/python_cpp/test_topi_bnn.py
+++ b/topi/tests/python_cpp/test_topi_bnn.py
@@ -44,7 +44,7 @@ def verify_binary_dense(batch, in_dim, out_dim):
    f1(a, bnn_a)
    f2(b, bnn_b)
    f3(bnn_a, bnn_b, bnn_c)
-    np.testing.assert_allclose(bnn_c.asnumpy(), c_np, rtol=1e-5)
+    tvm.testing.assert_allclose(bnn_c.asnumpy(), c_np, rtol=1e-5)

 def test_binary_dense():
    verify_binary_dense(1, 4096, 1024)

--- a/topi/tests/python_cpp/test_topi_clip.py
+++ b/topi/tests/python_cpp/test_topi_clip.py
@@ -29,7 +29,7 @@ def verify_clip(N, a_min, a_max, dtype):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A, B], device, name="clip")
        f(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['llvm']:
        check_device(device)

--- a/topi/tests/python_cpp/test_topi_dense.py
+++ b/topi/tests/python_cpp/test_topi_dense.py
@@ -47,7 +47,7 @@ def verify_dense(batch, in_dim, out_dim, use_bias=True):
        d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A, B, C, D], device, name="dense")
        f(a, b, c, d)
-        np.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)
+        tvm.testing.assert_allclose(d.asnumpy(), d_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm']:
        check_device(device)

--- a/topi/tests/python_cpp/test_topi_dilate.py
+++ b/topi/tests/python_cpp/test_topi_dilate.py
@@ -19,7 +19,7 @@ def test_dilate():
        output_tvm = tvm.nd.array(np.zeros(shape=output_size).astype(Output.dtype), ctx=ctx)
        f = tvm.build(schedule, [Input, Output], target)
        f(input_tvm, output_tvm)
-        np.testing.assert_allclose(output_tvm.asnumpy(), output_np, rtol=1e-5)
+        tvm.testing.assert_allclose(output_tvm.asnumpy(), output_np, rtol=1e-5)

    _test_dilate((32,), (2,))
    _test_dilate((32,32), (2,2))

--- a/topi/tests/python_cpp/test_topi_l2norm.py
+++ b/topi/tests/python_cpp/test_topi_l2norm.py
@@ -30,7 +30,7 @@ def verify_l2_normalize(shape, eps, axis=None):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        func = tvm.build(s, [A, B], device, name="l2_normalize")
        func(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']:
        check_device(device)

--- a/topi/tests/python_cpp/test_topi_lrn.py
+++ b/topi/tests/python_cpp/test_topi_lrn.py
@@ -29,7 +29,7 @@ def verify_lrn(shape, size, axis, bias, alpha, beta):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A, B], device)
        f(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-1)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-1)

    for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm']:
        check_device(device)

--- a/topi/tests/python_cpp/test_topi_pooling.py
+++ b/topi/tests/python_cpp/test_topi_pooling.py
@@ -67,7 +67,7 @@ def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), ctx)
        f = tvm.build(s, [A, B], device)
        f(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm']:
        check_device(device)
@@ -115,7 +115,7 @@ def verify_global_pool(n, c, h, w, pool_type):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        f = tvm.build(s, [A, B], device)
        f(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm']:
        check_device(device)

--- a/topi/tests/python_cpp/test_topi_reduce.py
+++ b/topi/tests/python_cpp/test_topi_reduce.py
@@ -92,11 +92,11 @@ def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum"):
                sel_indices = other_indices[0:axis] + (out_tvm_indices,) + other_indices[axis:]
                out_tvm_val = in_npy_map[sel_indices]
            if type == "argmax":
-                np.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1E-3, 1E-3)
+                tvm.testing.assert_allclose(out_tvm_val, in_npy_map.max(axis=axis), 1E-3, 1E-3)
            elif type == "argmin":
-                np.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1E-3, 1E-3)
+                tvm.testing.assert_allclose(out_tvm_val, in_npy_map.min(axis=axis), 1E-3, 1E-3)
        else:
-            np.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3)
+            tvm.testing.assert_allclose(out_tvm.asnumpy(), out_npy, 1E-3, 1E-3)
    for device in ["cuda", "opencl", "metal", "llvm", "rocm"]:
        check_device(device)


--- a/topi/tests/python_cpp/test_topi_region.py
+++ b/topi/tests/python_cpp/test_topi_region.py
@@ -39,7 +39,7 @@ def verify_region(batch, in_size, in_channel, n, classes, coords, background, l_
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        func = tvm.build(s, [A, B], device, name="region")
        func(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm', 'vulkan']:
        check_device(device)

--- a/topi/tests/python_cpp/test_topi_relu.py
+++ b/topi/tests/python_cpp/test_topi_relu.py
@@ -28,7 +28,7 @@ def verify_relu(m, n, dtype):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        foo = tvm.build(s, [A, B], device, name="relu")
        foo(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm']:
        check_device(device)
@@ -48,7 +48,7 @@ def verify_leaky_relu(m, alpha):
    b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
    foo = tvm.build(s, [A, B], device, name="leaky_relu")
    foo(a, b)
-    np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+    tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

 def verify_prelu(x, w, axis, weight_reshape):
    X = tvm.placeholder((x), name='X')
@@ -71,7 +71,7 @@ def verify_prelu(x, w, axis, weight_reshape):
    b = tvm.nd.array(np.zeros(get_const_tuple(X.shape), dtype=B.dtype), ctx)
    foo = tvm.build(s, [X, W, B], "llvm", name="prelu")
    foo(x_tvm, w_tvm, b)
-    np.testing.assert_allclose(b.asnumpy(), out_np, rtol=1e-5)
+    tvm.testing.assert_allclose(b.asnumpy(), out_np, rtol=1e-5)

 def test_relu():
    for dtype in ['float32', 'float64', 'int32', 'int16', 'int8', 'int64']:

--- a/topi/tests/python_cpp/test_topi_reorg.py
+++ b/topi/tests/python_cpp/test_topi_reorg.py
@@ -39,7 +39,7 @@ def verify_reorg(batch, in_size, in_channel, stride):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        func = tvm.build(s, [A, B], device, name="reorg")
        func(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm', 'vulkan']:
        check_device(device)

--- a/topi/tests/python_cpp/test_topi_softmax.py
+++ b/topi/tests/python_cpp/test_topi_softmax.py
@@ -32,7 +32,7 @@ def verify_softmax(m, n):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        foo = tvm.build(s, [A, B], device, name="softmax")
        foo(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm']:
        check_device(device)
@@ -66,7 +66,7 @@ def verify_log_softmax(m, n):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        foo = tvm.build(s, [A, B], device, name="log_softmax")
        foo(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ["cuda", "opencl", "metal", "rocm"]:
        check_device(device)

--- a/topi/tests/python_cpp/test_topi_tensor.py
+++ b/topi/tests/python_cpp/test_topi_tensor.py
@@ -30,7 +30,7 @@ def verify_elemwise_sum(num_args, dtype):
        tvm_nd = [tvm.nd.array(nd, ctx) for nd in np_nd] + [out]
        f(*tvm_nd)
        np_out = np.sum(np.array(np_nd), axis=0)
-        np.testing.assert_allclose(out.asnumpy(), np_out, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), np_out, rtol=1e-5)

    for device in ["llvm"]:
        check_device(device)
@@ -56,11 +56,11 @@ def verify_full(shape, dtype, fill_value):
        out = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
        f = tvm.build(s1, [A, B], device, name="full_like")
        f(tvm.nd.array(np.zeros(shape, dtype), ctx), out)
-        np.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)

        f = tvm.build(s2, [C], device, name="full")
        f(out)
-        np.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)
+        tvm.testing.assert_allclose(out.asnumpy(), np_nd, rtol=1e-5)

    for device in ["llvm"]:
        check_device(device)

--- a/topi/tests/python_cpp/test_topi_transform.py
+++ b/topi/tests/python_cpp/test_topi_transform.py
@@ -23,7 +23,7 @@ def verify_expand_dims(in_shape, out_shape, axis, num_newaxis):
        data_nd = tvm.nd.array(data_npy, ctx)
        out_nd = tvm.nd.array(np.empty(out_shape).astype(B.dtype), ctx)
        foo(data_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
        check_device(device)
@@ -50,7 +50,7 @@ def verify_tranpose(in_shape, axes):
        data_nd = tvm.nd.array(data_npy, ctx)
        out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=B.dtype)
        foo(data_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
        check_device(device)
@@ -76,7 +76,7 @@ def verify_reshape(src_shape, dst_shape):
        data_nd = tvm.nd.array(data_npy, ctx)
        out_nd = tvm.nd.empty(dst_shape, ctx=ctx, dtype=B.dtype)
        foo(data_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
        check_device(device)
@@ -106,7 +106,7 @@ def verify_squeeze(src_shape, axis):
            out_nd_shape = out_npy.shape
        out_nd = tvm.nd.empty(out_nd_shape, ctx=ctx, dtype=B.dtype)
        foo(data_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
        check_device(device)
@@ -133,7 +133,7 @@ def verify_concatenate(shapes, axis):
        data_nds = [tvm.nd.array(data_npy, ctx) for data_npy in data_npys]
        out_nd = tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=out_tensor.dtype)
        foo(*(data_nds + [out_nd]))
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
        check_device(device)
@@ -162,7 +162,7 @@ def verify_split(src_shape, indices_or_sections, axis):
        out_nds = [tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=tensor_l[0].dtype) for out_npy in out_npys]
        foo(*([data_nd] + out_nds))
        for out_nd, out_npy in zip(out_nds, out_npys):
-            np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+            tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
        check_device(device)
@@ -201,7 +201,7 @@ def verify_take(src_shape, indices_src, axis=None):
        indices_nd = tvm.nd.array(indices_src, ctx)
        out_nd = tvm.nd.empty(out_npys.shape, ctx=ctx, dtype=src_dtype)
        foo(data_nd, indices_nd, out_nd)
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npys)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npys)

    for device in ["llvm", "opencl"]:
        check_device(device)
@@ -230,7 +230,7 @@ def verify_where(condition, x, y):
        tvm_out = tvm.nd.empty(x.shape, ctx=ctx, dtype=dtype)
        foo(tvm.nd.array(condition, ctx), tvm.nd.array(x, ctx),
            tvm.nd.array(y, ctx), tvm_out)
-        np.testing.assert_allclose(tvm_out.asnumpy(), np_out)
+        tvm.testing.assert_allclose(tvm_out.asnumpy(), np_out)

    for device in ["llvm", "nvptx", "cuda", "opencl", "metal", "rocm"]:
        check_device(device)
@@ -261,7 +261,7 @@ def verify_concatenate_split(shapes, axis, indices_or_sections):
        out_nds = [tvm.nd.empty(out_npy.shape, ctx=ctx, dtype=tensor_l[0].dtype) for out_npy in out_npys_split]
        foo(*(data_nds + out_nds))
        for out_nd, out_npy in zip(out_nds, out_npys_split):
-            np.testing.assert_allclose(out_nd.asnumpy(), out_npy)
+            tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy)

    for device in ["llvm", "cuda", "opencl", "metal", "rocm"]:
        check_device(device)
@@ -295,7 +295,7 @@ def verify_concatenate_broadcast(shapes, axis, rhs_shape):
        out_nd = tvm.nd.array(np.empty(out_npy.shape).astype(B.dtype), ctx)
        for _ in range(1):
            foo(*(data_nds + [rhs_nd] + [out_nd]))
-        np.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)
+        tvm.testing.assert_allclose(out_nd.asnumpy(), out_npy, rtol=1E-4, atol=1E-4)

    for device in ["llvm", "cuda", "opencl", "metal", "rocm"]:
        check_device(device)

--- a/topi/tests/python_cpp/test_topi_yolo.py
+++ b/topi/tests/python_cpp/test_topi_yolo.py
@@ -36,7 +36,7 @@ def verify_yolo(ishape, n, classes):
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), ctx)
        func = tvm.build(s, [A, B], device, name="yolo")
        func(a, b)
-        np.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
+        tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)

    for device in ['cuda', 'opencl', 'metal', 'rocm', 'llvm', 'vulkan']:
        check_device(device)

--- a/tutorials/autotvm/tune_conv2d_cuda.py
+++ b/tutorials/autotvm/tune_conv2d_cuda.py
@@ -211,7 +211,7 @@ w_tvm = tvm.nd.array(w_np, ctx=ctx)
 c_tvm = tvm.nd.empty(c_np.shape, ctx=ctx)
 func(a_tvm, w_tvm, c_tvm)

-np.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-2)
+tvm.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-2)

 # Evaluate running time. Here we choose a large repeat number (400) to reduce the noise
 # and the overhead of kernel launch. You can also use nvprof to validate the result.

--- a/tutorials/autotvm/tune_simple_template.py
+++ b/tutorials/autotvm/tune_simple_template.py
@@ -305,4 +305,4 @@ c_np = a_np.dot(b_np)
 c_tvm = tvm.nd.empty(c_np.shape)
 func(tvm.nd.array(a_np), tvm.nd.array(b_np), c_tvm)

-np.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-2)
+tvm.testing.assert_allclose(c_np, c_tvm.asnumpy(), rtol=1e-2)
--- a/tutorials/get_started.py
+++ b/tutorials/get_started.py
@@ -138,7 +138,7 @@ a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), ctx)
 b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)
 c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
 fadd(a, b, c)
-np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
+tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())

 ######################################################################
 # Inspect the Generated Code
@@ -217,7 +217,7 @@ if tgt == "cuda":
    fadd1_dev = tvm.module.load(temp.relpath("myadd.ptx"))
    fadd1.import_module(fadd1_dev)
 fadd1(a, b, c)
-np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
+tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())

 ######################################################################
 # Pack Everything into One Library
@@ -231,7 +231,7 @@ np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
 fadd.export_library(temp.relpath("myadd_pack.so"))
 fadd2 = tvm.module.load(temp.relpath("myadd_pack.so"))
 fadd2(a, b, c)
-np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
+tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())

 ######################################################################
 # .. note:: Runtime API and Thread-Safety
@@ -264,7 +264,7 @@ if tgt == "opencl":
    b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), ctx)
    c = tvm.nd.array(np.zeros(n, dtype=C.dtype), ctx)
    fadd_cl(a, b, c)
-    np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
+    tvm.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())

 ######################################################################
 # Summary

--- a/tutorials/language/extern_op.py
+++ b/tutorials/language/extern_op.py
@@ -59,7 +59,7 @@ b = tvm.nd.array(np.random.uniform(size=(l, m)).astype(B.dtype), ctx)
 d = tvm.nd.array(np.zeros((n, m), dtype=D.dtype), ctx)
 bb = 10.0
 f(a, b, d, bb)
-np.testing.assert_allclose(
+tvm.testing.assert_allclose(
    d.asnumpy(), np.dot(a.asnumpy(), b.asnumpy()) + 10, rtol=1e-5)

 ######################################################################
@@ -98,7 +98,7 @@ f = tvm.build(s, [A, B], "llvm")
 a = tvm.nd.array(np.random.uniform(size=(n,)).astype(A.dtype), ctx)
 b = tvm.nd.array(np.random.uniform(size=(n,)).astype(B.dtype), ctx)
 f(a, b)
-np.testing.assert_allclose(b.asnumpy(), a.asnumpy() + 1, rtol=1e-5)
+tvm.testing.assert_allclose(b.asnumpy(), a.asnumpy() + 1, rtol=1e-5)

 ######################################################################
 # Summary

--- a/tutorials/language/reduction.py
+++ b/tutorials/language/reduction.py
@@ -123,7 +123,7 @@ ctx  = tvm.gpu(0)
 a = tvm.nd.array(np.random.uniform(size=(nn, nn)).astype(A.dtype), ctx)
 b = tvm.nd.array(np.zeros(nn, dtype=B.dtype), ctx)
 fcuda(a, b)
-np.testing.assert_allclose(
+tvm.testing.assert_allclose(
    b.asnumpy(),  np.sum(a.asnumpy(), axis=1), rtol=1e-4)

 ######################################################################

--- a/tutorials/language/scan.py
+++ b/tutorials/language/scan.py
@@ -72,7 +72,7 @@ a_np = np.random.uniform(size=(m, n)).astype(s_scan.dtype)
 a = tvm.nd.array(a_np, ctx)
 b = tvm.nd.array(np.zeros((m, n), dtype=s_scan.dtype), ctx)
 fscan(a, b)
-np.testing.assert_allclose(b.asnumpy(), np.cumsum(a_np, axis=0))
+tvm.testing.assert_allclose(b.asnumpy(), np.cumsum(a_np, axis=0))

 ######################################################################
 # Multi-Stage Scan Cell

--- a/tutorials/language/tensorize.py
+++ b/tutorials/language/tensorize.py
@@ -163,7 +163,7 @@ a = np.random.uniform(size=get_const_tuple(A.shape)).astype(dtype)
 b = np.random.uniform(size=get_const_tuple(B.shape)).astype(dtype)
 c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=dtype), ctx)
 func(tvm.nd.array(a, ctx), tvm.nd.array(b, ctx), c)
-np.testing.assert_allclose(c.asnumpy(), np.dot(a, b.T), rtol=1e-3)
+tvm.testing.assert_allclose(c.asnumpy(), np.dot(a, b.T), rtol=1e-3)

 ######################################################################
 # We compare the tensorize version with that :code:`numpy.dot` produces,
@@ -270,7 +270,7 @@ a = np.random.uniform(size=get_const_tuple(A.shape)).astype(dtype)
 b = np.random.uniform(size=get_const_tuple(B.shape)).astype(dtype)
 c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=dtype), ctx)
 func(tvm.nd.array(a, ctx), tvm.nd.array(b, ctx), c)
-np.testing.assert_allclose(c.asnumpy(), np.dot(a, b.T), rtol=1e-3)
+tvm.testing.assert_allclose(c.asnumpy(), np.dot(a, b.T), rtol=1e-3)

 ######################################################################
 # Summary

--- a/tutorials/nnvm/using_external_lib.py
+++ b/tutorials/nnvm/using_external_lib.py
@@ -195,7 +195,7 @@ out_cudnn = out.asnumpy()
 # -----------------
 # We can check that the results of two runs match.

-np.testing.assert_allclose(out_cuda, out_cudnn, rtol=1e-5)
+tvm.testing.assert_allclose(out_cuda, out_cudnn, rtol=1e-5)

 #####################################################################
 # Conclusion

--- a/tutorials/optimize/opt_gemm.py
+++ b/tutorials/optimize/opt_gemm.py
@@ -93,7 +93,7 @@ assert func

 c = tvm.nd.array(numpy.zeros((M, N), dtype=dtype), ctx)
 func(a, b, c)
-numpy.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
+tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)

 evaluator = func.time_evaluator(func.entry_name, ctx, number=1)
 print('Baseline: %f' % evaluator(a, b, c).mean)
@@ -128,7 +128,7 @@ assert func

 c = tvm.nd.array(numpy.zeros((M, N), dtype = dtype), ctx)
 func(a, b, c)
-numpy.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
+tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)

 # By simply tiling the loop 32x32, and hoisting ko, ki outside the blocking loops,
 # we can see big speedup compared with the baseline.
@@ -164,7 +164,7 @@ assert func

 c = tvm.nd.array(numpy.zeros((M, N), dtype = dtype), ctx)
 func(a, b, c)
-numpy.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
+tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)

 evaluator = func.time_evaluator(func.entry_name, ctx, number=10)
 print('Opt2: %f' % evaluator(a, b, c).mean)
@@ -197,7 +197,7 @@ assert func

 c = tvm.nd.array(numpy.zeros((M, N), dtype = dtype), ctx)
 func(a, b, c)
-numpy.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
+tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)

 evaluator = func.time_evaluator(func.entry_name, ctx, number=10)
 print('Opt3: %f' % evaluator(a, b, c).mean)
@@ -252,7 +252,7 @@ assert func

 c = tvm.nd.array(numpy.zeros((M, N), dtype = dtype), ctx)
 func(a, b, c)
-numpy.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
+tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)

 evaluator = func.time_evaluator(func.entry_name, ctx, number=10)
 print('Opt4: %f' % evaluator(a, b, c).mean)
@@ -298,7 +298,7 @@ assert func

 c = tvm.nd.array(numpy.zeros((M, N), dtype = dtype), ctx)
 func(a, b, c)
-numpy.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
+tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)

 evaluator = func.time_evaluator(func.entry_name, ctx, number=10)
 print('Opt5: %f' % evaluator(a, b, c).mean)
@@ -341,7 +341,7 @@ assert func

 c = tvm.nd.array(numpy.zeros((M, N), dtype = dtype), ctx)
 func(a, b, c)
-numpy.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)
+tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)

 evaluator = func.time_evaluator(func.entry_name, ctx, number=50)
 opt6_time = evaluator(a, b, c).mean

--- a/tutorials/topi/intro_topi.py
+++ b/tutorials/topi/intro_topi.py
@@ -89,7 +89,7 @@ a_nd = tvm.nd.array(a_np, ctx)
 b_nd = tvm.nd.array(b_np, ctx)
 g_nd = tvm.nd.array(np.zeros(g_np.shape, dtype=g_np.dtype), ctx)
 func(a_nd, b_nd, g_nd)
-np.testing.assert_allclose(g_nd.asnumpy(), g_np, rtol=1e-5)
+tvm.testing.assert_allclose(g_nd.asnumpy(), g_np, rtol=1e-5)

 ######################################################################
 # TOPI also provides common neural nets operations such as _softmax_ with optimized schedule

--- a/vta/tests/python/integration/test_benchmark_gemm.py
+++ b/vta/tests/python/integration/test_benchmark_gemm.py
@@ -94,7 +94,7 @@ def test_gemm():
                                                   env.BATCH,
                                                   env.BLOCK_OUT)
            if check_correctness:
-                np.testing.assert_allclose(res_unpack, res_ref)
+                tvm.testing.assert_allclose(res_unpack, res_ref)
            return cost

        def run_schedule(load_inp,

--- a/vta/tests/python/integration/test_benchmark_topi_conv2d.py
+++ b/vta/tests/python/integration/test_benchmark_topi_conv2d.py
@@ -87,7 +87,7 @@ def test_cpu_conv2d():
                padding = wl.hpad
                res_ref = res_ref >> 8
                res_ref = np.clip(res_ref, 0, 127).astype("int8")
-                np.testing.assert_allclose(res_unpack, res_ref)
+                tvm.testing.assert_allclose(res_unpack, res_ref)
            return cost

        def conv_normal(print_ir):
@@ -219,7 +219,7 @@ def test_vta_conv2d():
                res_ref = res_ref >> 8
                res_ref += bias_orig.reshape(wl.out_filter, 1, 1)
                res_ref = np.clip(res_ref, 0, 127).astype("int8")
-                np.testing.assert_allclose(res_unpack, res_ref)
+                tvm.testing.assert_allclose(res_unpack, res_ref)
            return cost

        def conv_normal(print_ir):

--- a/vta/tutorials/convolution_opt.py
+++ b/vta/tutorials/convolution_opt.py
@@ -413,7 +413,7 @@ res_ref = res_ref.reshape((batch_size // env.BATCH,
                           env.BLOCK_OUT,
                           fout_height,
                           fout_width)).transpose((0, 2, 4, 5, 1, 3))
-np.testing.assert_allclose(res_ref, res_nd.asnumpy())
+tvm.testing.assert_allclose(res_ref, res_nd.asnumpy())
 print("Successful 2D convolution test!")

 ######################################################################