Commit d29b1c9e by Jian Weng Committed by Tianqi Chen

[FRONTEND] [HYBRID] Non-zero starting supported; Buffer AttrStmt add! (#1330)

parent 9b8cb1b6
......@@ -29,7 +29,7 @@ class bind(_range): #pylint: disable=invalid-name
unroll = vectorize = parallel = _range #pylint: disable=invalid-name
def allocate(shape, dtype='float32'):
def allocate(shape, dtype='float32', scope='global'): #pylint: disable=unused-argument
"""Allocate a buffer with given shape
Parameters
......@@ -38,6 +38,8 @@ def allocate(shape, dtype='float32'):
The shape of the tensor to be allocated
dtype: string
The data type of the tensor
scope: string
The storage scope of the tensor
Returns
-------
......
......@@ -3,7 +3,7 @@
import ast
import operator
import sys
from .util import make_nop, make_const_true, make_range_one, halide_imm_types
from .util import make_nop, halide_imm_types
from .intrin import LOOP_INTRIN, MATH_INTRIN
from .var_decl import determine_variable_usage
from ..api import thread_axis
......@@ -75,7 +75,8 @@ class HybridParser(ast.NodeVisitor):
self.args = args[:]
self.usage = usage.copy()
self._args = {} # Dict maps arg name to actual arg instance (either a var or a buffer)
self.buffers = {}
self.var_buffers = {} # Buffers formed by mutatble variables
self.alloc_buffers = {} # Buffers formed by allocate instructions
self.loops_above = {} # State variable that indicates loop levels above the current node
self.var_consts = {} # Variables that are determined as readonly in previous stage
self.func_name = func_name # The name of the function to be lowered
......@@ -87,19 +88,30 @@ class HybridParser(ast.NodeVisitor):
for key, val in self.usage.items():
if key in self.var_consts.keys():
continue
_, scope, _ = val
if scope == node:
_buf = self.buffers[key]
_, level, _ = val
if level == node:
if key in self.var_buffers.keys():
_buf = self.var_buffers[key]
_scope = 'global'
else:
_buf, _scope = self.alloc_buffers[key]
_domain = [_make.range_by_min_extent(0, i) for i in _buf.shape]
_dtype = _buf.dtype
_one = make_range_one()
_true = make_const_true()
body = _make.Realize(_buf.op, 0, _dtype, [_one], _true, body)
_true = _api.convert(True)
body = _make.Realize(_buf.op, 0, _dtype, _domain, _true, body)
body = _make.AttrStmt(_buf.op, 'realize_scope', _api.convert(_scope), body)
return body
def _check_id_a_buffer(self, s):
if s not in self._args.keys():
def _get_buffer_from_id(self, s):
if s not in self._args.keys() and s not in self.alloc_buffers.keys():
raise ValueError("This %s is expected to be in argument list or allocated buffer!" % s)
if s in self._args.keys() and s in self.alloc_buffers.keys():
raise ValueError("%s, a buffer cannot be both argument and allocated!" % s)
if s in self._args.keys():
return self._args[s]
return self.alloc_buffers[s][0]
#pylint: disable=invalid-name, missing-docstring
......@@ -138,8 +150,8 @@ class HybridParser(ast.NodeVisitor):
if _id not in self.usage.keys():
raise ValueError("This id %s is expected to be a defined variable!" % _id)
# Buffer
if _id in self.buffers.keys():
_buf = self.buffers[_id]
if _id in self.var_buffers.keys():
_buf = self.var_buffers[_id]
return _make.Call(_buf.dtype, _id, [_api.const(0)], _expr.Call.Halide, _buf.op, 0)
# Compilation time constant
if _id not in self.var_consts.keys():
......@@ -155,7 +167,9 @@ class HybridParser(ast.NodeVisitor):
if len(node.targets) != 1:
raise ValueError("So far only one-valued assignment is supported!")
lhs = node.targets[0]
rhs = _ir_pass.Simplify(self.visit(node.value))
rhs = self.visit(node.value)
if isinstance(rhs, _expr.Expr):
rhs = _ir_pass.Simplify(rhs)
if isinstance(lhs, ast.Name):
#TODO: support defined intermediate buffer later
lhs_ = lhs
......@@ -166,25 +180,31 @@ class HybridParser(ast.NodeVisitor):
if decl == lhs_:
if lhs in self.var_consts.keys():
raise ValueError("BUG: A constant cannot be overwritten!")
if lhs in self.buffers.keys():
if lhs in self.var_buffers.keys() or lhs in self.alloc_buffers.keys():
raise ValueError("BUG: This value should not be defined before this point!")
if isinstance(rhs, tuple):
shape, dtype, scope = rhs
ph = _api.placeholder(shape, dtype=dtype, name=lhs)
self.alloc_buffers[lhs] = (ph, scope)
return make_nop()
if isinstance(rhs, halide_imm_types) and ast.Store not in rw:
self.var_consts[lhs] = rhs
else:
self.buffers[lhs] = _api.placeholder((1, ), dtype=rhs.dtype, name=lhs)
self.var_buffers[lhs] = _api.placeholder((1, ), dtype=rhs.dtype, name=lhs)
if lhs in self.var_consts.keys():
return make_nop()
else:
if lhs not in self.buffers.keys():
raise ValueError("BUG: This value should be defined before!")
return _make.Provide(self.buffers[lhs].op, 0, rhs, [_api.const(0, dtype=rhs.dtype)])
if lhs not in self.var_buffers.keys():
raise ValueError("BUG: This variable should be defined before!")
tgt = self.var_buffers[lhs]
return _make.Provide(tgt.op, 0, rhs, [_api.const(0, dtype=rhs.dtype)])
else:
lhs = self.visit(lhs)
if not isinstance(lhs, _expr.Call):
raise ValueError("An array access's LHS is expected to be a expr.Call!")
#TODO: support slice later
self._check_id_a_buffer(lhs.name)
return _make.Provide(self._args[lhs.name].op, 0, rhs, lhs.args)
buf = self._get_buffer_from_id(lhs.name)
return _make.Provide(buf.op, 0, rhs, lhs.args)
def visit_Index(self, node):
......@@ -197,8 +217,7 @@ class HybridParser(ast.NodeVisitor):
args = self.visit(node.slice)
if isinstance(node.value, ast.Name):
array = node.value.id
self._check_id_a_buffer(array)
_buf = self._args[array]
_buf = self._get_buffer_from_id(array)
return _make.Call(_buf.dtype, array, args, _expr.Call.Halide, _buf.op, 0)
elif isinstance(node.value, ast.Attribute):
if not isinstance(node.value.value, ast.Name):
......@@ -211,8 +230,8 @@ class HybridParser(ast.NodeVisitor):
#TODO: maybe support non-constant value later?
if not isinstance(args, (_expr.IntImm, _expr.UIntImm)):
raise ValueError("So far only constant shape access supported!")
self._check_id_a_buffer(node.value.value.id)
return self._args[node.value.value.id].shape[args.value]
buf = self._get_buffer_from_id(node.value.value.id)
return buf.shape[args.value]
else:
raise ValueError("Not supported yet!")
......@@ -303,8 +322,30 @@ class HybridParser(ast.NodeVisitor):
elif func_id in MATH_INTRIN:
return getattr(intrin, func_id)(*[self.visit(arg) for arg in node.args])
elif func_id == 'allocate':
#TODO: Support it later!
return make_nop()
if not isinstance(node.args[0], ast.Tuple):
raise ValueError("allocate's first argument should be a tuple of shape!")
shape = tuple(self.visit(i) for i in node.args[0].elts)
for i in shape:
if not isinstance(i, _expr.Expr):
raise ValueError("The shape should be an expression")
if n > 1:
if not isinstance(node.args[1], ast.Str):
raise ValueError("The data type should be an string")
dtype = node.args[1].s
else:
dtype = 'float32'
if n > 2:
if not isinstance(node.args[2], ast.Str):
raise ValueError("The data type should be an string")
scope = node.args[2].s
else:
scope = 'global'
return (shape, dtype, scope)
elif func_id == 'max' or func_id == 'min':
if n != 2:
raise ValueError("Max/Min function should have 2 elements")
a, b = self.visit(node.args[0]), self.visit(node.args[1])
return getattr(_make, func_id.title())(a, b)
else:
raise ValueError("Function call not supported yet!")
......@@ -317,8 +358,10 @@ class HybridParser(ast.NodeVisitor):
if iter_var is None:
if for_type is None:
raise ValueError("The loop bind function parse error!")
iter_var = _api.var(_name)
self.loops_above[_name] = iter_var
offset = iter_var = _api.var(_name)
if not _ir_pass.Equal(low, _api.const(0, dtype='int32')):
offset = iter_var + low
self.loops_above[_name] = offset
else:
if for_type is not None:
raise ValueError("The loop iterating function parse error!")
......@@ -328,7 +371,7 @@ class HybridParser(ast.NodeVisitor):
if for_type is None:
res = _make.AttrStmt(iter_var, 'thread_extent', ext, _body)
else:
res = _make.For(iter_var, low, ext, for_type, 0, _body)
res = _make.For(iter_var, _api.const(0, dtype='int32'), ext, for_type, 0, _body)
self.loops_above.pop(_name)
return res
......
......@@ -22,16 +22,6 @@ def make_nop():
return _make.Evaluate(_api.const(0, dtype='int32'))
def make_range_one():
"""Returns a [0, 1] range node in HalideIR."""
return _make.range_by_min_extent(0, 1)
def make_const_true():
"""Returns a constant True node in HalideIR."""
return _api.convert(True)
def _pruned_source(func):
"""Prune source code's extra leading spaces"""
lines = inspect.getsource(func).split('\n')
......
......@@ -41,7 +41,8 @@ class PyVariableUsage(ast.NodeVisitor):
#No function pointer supported so far
if not isinstance(node.func, ast.Name):
raise ValueError("Function call should be an id")
if (node.func.id not in HYBRID_GLOBALS.keys()) and node.func.id != 'range':
func_id = node.func.id
if func_id not in list(HYBRID_GLOBALS.keys()) + ['range', 'max', 'min']:
raise ValueError("Function call id not in intrinsics' list")
for elem in node.args:
self.visit(elem)
......@@ -64,7 +65,6 @@ class PyVariableUsage(ast.NodeVisitor):
self.status[node.id] = (node, self.scope_level[-1], set())
else:
decl, loop, usage = self.status[node.id]
loop = self.scope_level[-1]
usage.add(type(node.ctx))
self.status[node.id] = (decl, loop, usage)
......
......@@ -2,6 +2,7 @@ import tvm, inspect, sys, traceback, numpy
from tvm.hybrid import script
from tvm.hybrid.intrin import HYBRID_GLOBALS
@script
def outer_product(n, m, a, b, c):
for i in range(n):
......@@ -56,6 +57,7 @@ def test_outer_product():
tvm_c = tvm.ndarray.array(numpy.zeros((_n, _m), dtype='float32'))
func(_n, _m, tvm_a, tvm_b, tvm_c)
numpy.testing.assert_allclose(tvm_c.asnumpy(), c_python, rtol=1e-5)
for key, _ in HYBRID_GLOBALS.items():
assert key not in globals().keys()
assert key not in outer_product.__globals__.keys()
......@@ -74,8 +76,8 @@ def test_fanout():
b[i] = sigma
n = tvm.var('n')
a = tvm.placeholder((n, ), name='a')
b = tvm.placeholder((n-3, ), name='b')
a = tvm.placeholder((n, ), 'float32', name='a')
b = tvm.placeholder((n-3, ), 'float32', name='b')
ir = fanout(n, a, b)
#Check for i in (0, n-3)
......@@ -85,12 +87,14 @@ def test_fanout():
assert tvm.ir_pass.Equal(ir.extent, n - 3)
#Check loopbody
ibody = ir.body
assert isinstance(ibody, tvm.stmt.Realize)
assert ibody.bounds[0].min.value == 0
assert ibody.bounds[0].extent.value == 1
assert ibody.func.name == 'sigma'
assert isinstance(ibody, tvm.stmt.AttrStmt)
abody = ibody.body
assert isinstance(abody, tvm.stmt.Realize)
assert abody.bounds[0].min.value == 0
assert abody.bounds[0].extent.value == 1
assert abody.func.name == 'sigma'
#Check i loop body
rbody = ibody.body
rbody = abody.body
assert isinstance(rbody.first, tvm.stmt.Provide)
assert rbody.first.func.name == 'sigma'
assert len(rbody.first.args) == 1
......@@ -131,6 +135,21 @@ def test_fanout():
assert len(write.value.args) == 1
assert write.value.args[0].value == 0
func = tvm.build(tvm.lower(ir, [n, a, b]))
assert func
np_a = numpy.random.randn(10).astype('float32')
np_b = numpy.zeros(7).astype('float32')
nd_a = tvm.ndarray.array(np_a)
nd_b = tvm.ndarray.array(np_b)
fanout(10, np_a, np_b)
func(10, nd_a, nd_b)
numpy.testing.assert_allclose(nd_b.asnumpy(), np_b, rtol=1e-5, atol=1e-5)
@script
def failure():
for i in range(1, 100):
......@@ -148,15 +167,18 @@ def test_failure():
def test_looptype():
@script
def looptype(a):
for i in parallel(6):
def looptype(a, b, c):
for i in parallel(8):
a[i] = i
for j in vectorize(6):
a[j] = j
for k in unroll(6):
a[k] = k
a = tvm.placeholder((6, ), name='a')
ir = looptype(a)
for j in vectorize(8):
b[j] = j
for k in unroll(8):
c[k] = k
a = tvm.placeholder((8, ), name='a', dtype='int32')
b = tvm.placeholder((8, ), name='b', dtype='int32')
c = tvm.placeholder((8, ), name='c', dtype='int32')
ir = looptype(a, b, c)
iloop = ir.first
jloop = ir.rest.first
kloop = ir.rest.rest
......@@ -164,6 +186,24 @@ def test_looptype():
assert jloop.for_type == tvm.stmt.For.Vectorized
assert kloop.for_type == tvm.stmt.For.Unrolled
func = tvm.build(tvm.lower(ir, [a, b, c]))
np_a = numpy.zeros((8, )).astype('int32')
np_b = numpy.zeros((8, )).astype('int32')
np_c = numpy.zeros((8, )).astype('int32')
nd_a = tvm.ndarray.array(np_a)
nd_b = tvm.ndarray.array(np_b)
nd_c = tvm.ndarray.array(np_c)
looptype(np_a, np_b, np_c)
func(nd_a, nd_b, nd_c)
numpy.testing.assert_allclose(np_a, nd_a.asnumpy())
numpy.testing.assert_allclose(np_b, nd_b.asnumpy())
numpy.testing.assert_allclose(np_c, nd_c.asnumpy())
def test_if():
@script
def if_then_else(a, b):
......@@ -234,12 +274,14 @@ def test_math_intrin():
a[3] = sigmoid(a[3])
a[4] = power(a[4], a[5])
a[5] = tanh(a[5])
a[6] = min(a[4], a[5])
a[7] = max(a[5], a[6])
a6 = tvm.placeholder((6, ), dtype='float32', name='a')
a6 = tvm.placeholder((8, ), dtype='float32', name='a')
ir = intrin_real(a6)
func = tvm.build(tvm.lower(ir, [a6]))
assert func
a = numpy.arange(2, 8).astype('float32')
a = numpy.arange(2, 10).astype('float32')
tvm_a = tvm.ndarray.array(a)
func(tvm_a)
intrin_real(a)
......@@ -259,22 +301,87 @@ def test_math_intrin():
func(tvm_a)
assert tvm_a.asnumpy()[0] == a[0]
def test_allocate_buffer():
def blur(a):
for i in serail(32):
h_blur = allocate((4, 36))
for j in serail(4):
for k in serail(36):
s = allocate((1, ), 'float32')
for dj in serail(4):
s[0] = s[0] + a[i, j + dj]
h_blur[j, k] = s[0] / 4.
for j in serail(32):
s = 0.
for di in serail(4):
s = s + h_blur[di, j]
h_blur[i, j] = s / 4.
def test_non_zero():
@tvm.hybrid.script
def blur(a, b):
for i in range(2, 32):
for j in range(2, 32):
s = 0.0
for di in range(3):
for dj in range(3):
s = s + a[i-di, j-dj]
b[i-2, j-2] = s / 9.0
try:
np_a = numpy.random.randn(32, 32).astype('float32')
np_b = numpy.zeros((30, 30), dtype='float32')
blur(np_a, np_b)
ph_a = tvm.placeholder((32, 32), 'float32', 'a')
ph_b = tvm.placeholder((30, 30), 'float32', 'b')
ir = tvm.hybrid.parse(blur, [ph_a, ph_b])
func = tvm.lower(ir, [ph_a, ph_b])
func = tvm.build(func)
nd_a = tvm.ndarray.array(np_a)
nd_b = tvm.ndarray.array(np_b)
func(nd_a, nd_b)
numpy.testing.assert_allclose(np_b, nd_b.asnumpy(), atol=1e-5, rtol=1e-5)
except IOError:
print('[Warning] Non-zero first test skipped by Python2')
@tvm.hybrid.script
def triangle(a, b, c):
for i in range(10):
for j in range(i, 10):
c[i, j] = a[i] * b[j]
a = tvm.placeholder((10, ), dtype='float32', name='a')
b = tvm.placeholder((10, ), dtype='float32', name='b')
c = tvm.placeholder((10, 10), dtype='float32', name='c')
np_a = numpy.random.randn(10).astype('float32')
np_b = numpy.random.randn(10).astype('float32')
np_c = numpy.zeros((10, 10)).astype('float32')
nd_a = tvm.ndarray.array(np_a)
nd_b = tvm.ndarray.array(np_b)
nd_c = tvm.ndarray.array(np_c)
triangle(np_a, np_b, np_c)
func = tvm.build(tvm.lower(triangle(a, b, c), [a, b, c]))
assert func
func(nd_a, nd_b, nd_c)
numpy.testing.assert_allclose(nd_c.asnumpy(), np_c)
def test_allocate():
@tvm.hybrid.script
def blur2d(a, b):
for i in range(30):
ha = allocate((3, 30), 'float32')
for j in range(3):
for k in range(30):
ha[j, k] = a[i+j, k] + a[i+j, k+1] + a[i+j, k+2]
for j in range(30):
b[i, j] = (ha[0, j] + ha[1, j] + ha[2, j]) / 9.0
a = tvm.placeholder((32, 32), 'float32', 'a')
b = tvm.placeholder((30, 30), 'float32', 'b')
func = tvm.build(tvm.lower(blur2d(a, b), [a, b]))
assert func
np_a = numpy.random.randn(32, 32).astype('float32')
np_b = numpy.zeros((30, 30)).astype('float32')
nd_a = tvm.ndarray.array(np_a)
nd_b = tvm.ndarray.array(np_b)
func(nd_a, nd_b)
blur2d(np_a, np_b)
numpy.testing.assert_allclose(nd_b.asnumpy(), np_b, atol=1e-5, rtol=1e-5)
if __name__ == "__main__":
test_outer_product()
......@@ -284,4 +391,6 @@ if __name__ == "__main__":
test_if()
test_bind()
test_math_intrin()
test_non_zero()
test_allocate()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment