Commit 4d4e19ce by Tianqi Chen Committed by GitHub

[TESTCASE] Add a mock test workflow of CUDA codegen (#19)

parent 110c9bec
......@@ -117,11 +117,13 @@ def compute(shape, fcompute, name="compute"):
The created tensor
"""
shape = (shape,) if isinstance(shape, _expr.Expr) else shape
ndim = len(shape)
arg_names = fcompute.__code__.co_varnames
if fcompute.__code__.co_argcount == 0 and len(arg_names) == 1:
arg_names = ["i%d" % i for i in range(ndim)]
if ndim != len(arg_names):
raise ValueError("fcompute do not match dimension")
raise ValueError("fcompute do not match dimension, ndim=%d" % ndim)
dim_var = [IterVar((0, s), x) for x, s in zip(arg_names, shape)]
body = fcompute(*[v.var for v in dim_var])
......@@ -170,7 +172,7 @@ def Buffer(shape, dtype=None,
name, ptr, shape, strides, dtype)
def IterVar(dom, name='iter', thread_tag=''):
def IterVar(dom=None, name=None, thread_tag=''):
"""Create a iteration variable
Parameters
......@@ -189,14 +191,17 @@ def IterVar(dom, name='iter', thread_tag=''):
iter_var : IterVar
The result itervar
"""
if isinstance(dom, (list, tuple)):
if len(dom) != 2:
raise ValueError("need to list of ranges")
dom = Range(dom[0], dom[1])
if not isinstance(dom, _collections.Range):
raise ValueError("dom need to be Range")
if dom is not None:
if isinstance(dom, (list, tuple)):
if len(dom) != 2:
raise ValueError("need to list of ranges")
dom = Range(dom[0], dom[1])
if not isinstance(dom, _collections.Range):
raise ValueError("dom need to be Range")
if name is None:
name = thread_tag if thread_tag else name
name = name if name else 'iter'
return _function_internal._IterVar(dom, name, thread_tag)
......
......@@ -56,8 +56,6 @@ class Stage(NodeBase):
if outer is not None:
if outer.thread_tag == '':
raise ValueError("split by outer must have special thread_tag")
if outer.dom is None:
raise ValueError("split by outer must have specified domain")
inner = _function_internal._StageSplitByOuter(self, parent, outer, factor)
else:
if factor is None:
......
......@@ -5,6 +5,7 @@
*/
#include <tvm/ir.h>
#include <tvm/ir_visitor.h>
#include <tvm/ir_pass.h>
#include <tvm/schedule_pass.h>
#include "./int_set.h"
#include "./graph.h"
......@@ -14,7 +15,7 @@ namespace schedule {
// result = ceil((a / b)), both a and b are positive integer
inline Expr DivCeil(Expr a, Expr b) {
return (a + b - 1) / b;
return ir::Simplify((a + b - 1) / b);
}
// Downward message passing algorithm on stage schedule s,
......
import tvm
import numpy
def mock_test_add():
"""Not yet working, mock design"""
n = tvm.Var('n')
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C')
s = tvm.Schedule(C.op)
# GPU schedule have to split by gridIdx and threadIdx
num_thread = 256
grid_x = tvm.IterVar(thread_tag="gridIdx.x")
thread_x = tvm.IterVar((0, num_thread), thread_tag="threadIdx.x")
_, x = s[C].split(C.op.axis[0], factor=num_thread, outer=grid_x)
_, x = s[C].split(x, outer=thread_x)
# compile to IR
bounds = tvm.schedule.InferBound(s)
stmt = tvm.ir_pass.ScheduleOps(s, bounds)
Ab = tvm.Buffer(A.shape, A.dtype, name='A')
Bb = tvm.Buffer(B.shape, B.dtype, name='B')
Cb = tvm.Buffer(C.shape, C.dtype, name='C')
def codegen():
# generate host/device code
host_code, device_code = tvm.codegen.GenCUDA(
s,
inputs={A: Ab, B:Bb},
outputs={C: Cb},
args=[A, B, C])
# generate a function based on the code
f = tvm.cuda.build_function(host_code, device_code)
# create arrays
a = tvm.nd.array(np.ones(10), ctx=tvm.gpu(0))
b = tvm.nd.array(np.ones(10), ctx=tvm.gpu(0))
c = tvm.nd.array(np.zeros(10), ctx=tvm.gpu(0))
# calll the generated code
f(a, b, c)
# sync the result
np.testing.assert_equal(c.asnumpy(), np.ones(10) * 2)
if __name__ == "__main__":
mock_test_add()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment