Commit 685f78d0 by Yuwei Hu Committed by Tianqi Chen

[INTRIN] Enable popcount (#606)

* enable popcount intrin

* fix lint

* add test

* fix python3
parent 3bb2eef5
......@@ -225,6 +225,22 @@ def power(x, y):
return call_pure_intrin(x.dtype, "pow", x, y)
def popcount(x):
"""Count the number of set bits in input x.
Parameters
----------
x : Expr
Input argument.
Returns
-------
y : Expr
The result.
"""
return call_pure_intrin(x.dtype, "popcount", x)
# Intrinsic rule related code
def register_intrin_rule(target, intrin, f=None, override=False):
"""Register an intrinsic function generation rule.
......
......@@ -28,6 +28,9 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.sqrt")
TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.pow")
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 1>);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.popcount")
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::ctpop, 1>);
} // namespace llvm
} // namespace codegen
} // namespace tvm
......
......@@ -60,6 +60,26 @@ def test_log_pow_llvm():
b.asnumpy(), np.power(np.log(a.asnumpy()), 2.0), rtol=1e-5)
def test_popcount_llvm():
# graph
n = tvm.var('n')
A = tvm.placeholder((n,), name='A', dtype="uint32")
B = tvm.compute(A.shape, lambda *i: tvm.popcount(A(*i)), name='B')
s = tvm.create_schedule(B.op)
if not tvm.module.enabled("llvm"):
return
f = tvm.build(s, [A, B], "llvm")
ctx = tvm.cpu(0)
# launch the kernel.
n = 1024
a = tvm.nd.array(np.random.randint(low=0, high=1000, size=n, dtype=A.dtype), ctx)
b = tvm.nd.array(np.zeros(shape=n, dtype=B.dtype), ctx)
f(a, b)
np.testing.assert_allclose(
b.asnumpy(), list(map(lambda x: bin(x).count('1'), a.asnumpy())), rtol=1e-5)
def test_add():
# graph
n = tvm.var('n')
......@@ -107,5 +127,6 @@ def test_add():
if __name__ == "__main__":
test_log_pow_llvm()
test_popcount_llvm()
test_exp()
test_add()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment