[Fix] Fix get_valid_count flaky test for cuda (#4901)

* get_valid_count accuracy issue fixed for individual tests but not for all tests running together * minor fix * initialize valid_count and PrefixSum buffers * test updated * udpate relay test as well * update document * fix lint * address comment * fix lint * correct atomicAdd identifier name

[Fix] Fix get_valid_count flaky test for cuda (#4901)
* get_valid_count accuracy issue fixed for individual tests but not for all tests running together * minor fix * initialize valid_count and PrefixSum buffers * test updated * udpate relay test as well * update document * fix lint * address comment * fix lint * correct atomicAdd identifier name
c4c61cb7 · Leyuan Wang · GitHub · 8290eaba · c4c61cb7 · c4c61cb7
Unverified Commit c4c61cb7 authored Feb 21, 2020 by Leyuan Wang Committed by GitHub Feb 21, 2020
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 4 deletions

tests/python/relay/test_op_level5.py
+0 -2

topi/python/topi/cuda/nms.py
+0 -0

topi/tests/python/test_topi_vision.py
+2 -2

No files found.
--- a/tests/python/relay/test_op_level5.py
+++ b/tests/python/relay/test_op_level5.py
@@ -221,8 +221,6 @@ def test_get_valid_counts():
        func = relay.Function([x], z.astuple())
        func = run_infer_type(func)
        for target, ctx in ctx_list():
-            if target == 'cuda':
-                return
            intrp = relay.create_executor("debug", ctx=ctx, target=target)
            out = intrp.evaluate(func)(np_data)
            tvm.testing.assert_allclose(out[0].asnumpy(), np_out1, rtol=1e-3, atol=1e-04)

--- a/topi/python/topi/cuda/nms.py
+++ b/topi/python/topi/cuda/nms.py
--- a/topi/tests/python/test_topi_vision.py
+++ b/topi/tests/python/test_topi_vision.py
@@ -67,8 +67,8 @@ def verify_get_valid_counts(dshape, score_threshold, id_index, score_index):
        tvm.testing.assert_allclose(tvm_out2.asnumpy(), np_out2, rtol=1e-3)

    for device in ['llvm', 'cuda', 'opencl']:
-        # Disable gpu test for now
-        if device != "llvm":
+        # Disable opencl test for now
+        if device != "llvm" and device != "cuda":
            continue
        check_device(device)