Commit 9089e196 by Leyuan Wang Committed by Tianqi Chen

add more syncs (#3151)

parent 4332b0aa
......@@ -133,6 +133,9 @@ def get_valid_counts_upsweep(data, idx_in, idx, partial):
idx[bx * num_anchors + tx * elem_per_thread + i] = \
idx[bx * num_anchors + tx * elem_per_thread + i - 1] + \
idx_in[bx * num_anchors + tx * elem_per_thread + i]
ib.emit(tvm.make.Call(None, 'tvm_storage_sync',
tvm.convert(['shared']),
tvm.expr.Call.Intrinsic, None, 0))
return ib.get()
def get_valid_counts_scan(data, partial_in, partial):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment