Unverified Commit 9f441d81 by Tianqi Chen Committed by GitHub

[RELAY] CompileEngine update, nn conv2d, fix dense, pool. (#2082)

parent 933aeba2
......@@ -72,7 +72,8 @@ using FTVMCompute = runtime::TypedPackedFunc<
* \return schedule The computation schedule.
*/
using FTVMSchedule = runtime::TypedPackedFunc<
Schedule(const Array<Tensor>& outs,
Schedule(const Attrs& attrs,
const Array<Tensor>& outs,
const Target& target)>;
} // namespace relay
} // namespace tvm
......
......@@ -2,13 +2,8 @@
"""Backend compiler related feature registration"""
from __future__ import absolute_import
import topi
import topi.cuda
from .op import register_compute, register_schedule, register_pattern, OpPattern
def schedule_injective(outputs, target):
"""Generic schedule for binary broadcast."""
with target:
return topi.generic.schedule_injective(outputs)
from .op import register_compute, register_schedule, register_pattern
from .op import schedule_injective, OpPattern
schedule_broadcast = schedule_injective
schedule_elemwise = schedule_injective
......
......@@ -2,3 +2,4 @@
"""Neural network related operators."""
from __future__ import absolute_import as _abs
from .nn import *
from . import _nn
#pylint: disable=invalid-name, unused-argument
"""Backend compiler related feature registration"""
import tvm
import topi
from .. import register
from topi.util import get_const_int, get_const_tuple
from .. import op as reg
from ..op import OpPattern, schedule_injective
def dense_compiler(attrs, inputs, output_type):
assert len(inputs) == 2
# dense
@reg.register_compute("nn.dense")
def compute_dense(attrs, inputs, out_type, target):
"""Compute definition of dense"""
return [topi.nn.dense(inputs[0], inputs[1])]
def dense_schedule(outputs, target):
assert len(outputs) == 1
return tvm.create_schedule(outputs[0].op)
@reg.register_schedule("nn.dense")
def schedule_dense(attrs, outputs, target):
"""Schedule definition of dense"""
with target:
return topi.generic.schedule_dense(outputs)
register("nn.dense", "FTVMCompute", dense_compiler)
register("nn.dense", "FTVMSchedule", dense_schedule)
reg.register_pattern("nn.dense", reg.OpPattern.OUT_ELEMWISE_FUSABLE)
# conv2d
@reg.register_compute("nn.conv2d")
def compute_conv2d(attrs, inputs, out_type, target):
"""Compute definition of conv2d"""
padding = get_const_tuple(attrs.padding)
strides = get_const_tuple(attrs.strides)
dilation = get_const_tuple(attrs.dilation)
groups = attrs.groups
layout = attrs.data_layout
weight_layout = attrs.weight_layout
out_dtype = attrs.out_dtype
out_dtype = (inputs[0].dtype if (out_dtype == "same" or out_dtype == "")
else out_dtype)
assert layout in ["NCHW", "NHWC", "NCHW4c"]
(dilation_h, dilation_w) = dilation
if dilation_h < 1 or dilation_w < 1:
raise ValueError("dilation should be positive value")
if groups == 1:
out = topi.nn.conv2d(
inputs[0], inputs[1], strides, padding,
dilation, layout, out_dtype=out_dtype)
elif layout == "NCHW" and \
weight_layout == "OIHW" and \
get_const_int(inputs[1].shape[0]) == groups and \
get_const_int(inputs[1].shape[1]) == 1:
out = topi.nn.depthwise_conv2d_nchw(
inputs[0], inputs[1], strides, padding, dilation, out_dtype=out_dtype)
elif layout == "NHWC" and \
kernel_layout == "HWOI" and\
get_const_int(inputs[1].shape[2]) == groups and \
get_const_int(inputs[1].shape[3]) == 1:
out = topi.nn.depthwise_conv2d_nhwc(
inputs[0], inputs[1], strides, padding, dilation, out_dtype=out_dtype)
else:
raise ValueError("not support arbitrary group number for now")
return [out]
@reg.register_schedule("nn.conv2d")
def schedule_conv2d(attrs, outs, target):
"""Schedule definition of conv2d"""
groups = attrs.groups
layout = attrs.data_layout
kernel_layout = attrs.weight_layout
with target:
if groups == 1 and layout == "NCHW":
return topi.generic.schedule_conv2d_nchw(outs)
elif groups == 1 and layout == "NCHW4c":
return topi.generic.schedule_conv2d_nchw(outs)
elif groups == 1 and layout == "NHWC":
return topi.generic.schedule_conv2d_nhwc(outs)
elif groups != 1:
if layout == "NCHW":
# TODO(leyuan, merrymercy, Huyuwei): fold depthwise topi into conv2d.
return topi.generic.schedule_depthwise_conv2d_nchw(outs)
elif layout == "NHWC" and kernel_layout == "HWOI":
return topi.generic.schedule_depthwise_conv2d_nhwc(outs)
raise ValueError("No compatible schedule")
reg.register_pattern("nn.conv2d", OpPattern.OUT_ELEMWISE_FUSABLE)
# conv2d_transpose
@reg.register_compute("nn.conv2d_transpose")
def compute_conv2d_transpose(attrs, inputs, out_dtype, target):
"""Compute definition of conv2d_transpose"""
padding = get_const_tuple(attrs.padding)
strides = get_const_tuple(attrs.strides)
dilation = get_const_tuple(attrs.dilation)
groups = attrs.groups
layout = attrs.data_layout
out_dtype = attrs.out_dtype
out_dtype = (inputs[0].dtype if (out_dtype == "same" or out_dtype == "")
else out_dtype)
assert layout == "NCHW", "only support nchw for now"
assert dilation == (1, 1), "not support dilate now"
assert groups == 1, "only support groups == 1 for now"
out = topi.nn.conv2d_transpose_nchw(inputs[0], inputs[1], strides, padding, out_dtype)
output_padding = get_const_tuple(attrs.output_padding)
out = topi.nn.pad(out,
[0, 0, 0, 0], [0, 0, output_padding[0], output_padding[1]])
return [out]
@reg.register_schedule("nn.conv2d_transpose")
def schedule_conv2d_transpose(attrs, outs, target):
"""Schedule definition of conv2d_transpose"""
with target:
return topi.generic.schedule_conv2d_transpose_nchw(outs)
reg.register_pattern("nn.conv2d_transpose", OpPattern.OUT_ELEMWISE_FUSABLE)
# bias_add
@reg.register_compute("nn.bias_add")
def compute_bias_add(attrs, inputs, out_dtype, target):
"""Compute definition of conv2d_transpose"""
axis = attrs.axis
bias = inputs[1]
data_ndim = len(inputs[0].shape)
if axis < 0:
axis = axis + data_ndim
num_newaxis = data_ndim - axis - 1
if num_newaxis:
bias = topi.expand_dims(bias, axis=1, num_newaxis=num_newaxis)
return [topi.add(inputs[0], bias)]
reg.register_schedule("nn.bias_add", schedule_injective)
reg.register_pattern("nn.bias_add", OpPattern.BROADCAST)
# max_pool2d
@reg.register_schedule("nn.max_pool2d")
def schedule_max_pool2d(attrs, outs, target):
"""Schedule definition of max_pool2d"""
layout = attrs.layout
with target:
return topi.generic.schedule_pool(outs, layout)
reg.register_pattern("nn.max_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
# avg_pool2d
@reg.register_schedule("nn.avg_pool2d")
def schedule_avg_pool2d(attrs, outs, target):
"""Schedule definition of avg_pool2d"""
layout = attrs.layout
with target:
return topi.generic.schedule_pool(outs, layout)
reg.register_pattern("nn.avg_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
# global_max_pool2d
@reg.register_schedule("nn.global_max_pool2d")
def schedule_global_max_pool2d(_, outs, target):
"""Schedule definition of global_max_pool2d"""
with target:
return topi.generic.schedule_global_pool(outs)
reg.register_pattern("nn.global_max_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
# global_avg_pool2d
@reg.register_schedule("nn.global_avg_pool2d")
def schedule_global_avg_pool2d(_, outs, target):
"""Schedule definition of global_avg_pool2d"""
with target:
return topi.generic.schedule_global_pool(outs)
reg.register_pattern("nn.global_avg_pool2d", OpPattern.OUT_ELEMWISE_FUSABLE)
#pylint: disable=unused-argument
"""The base node types for the Relay language."""
import topi
from ..._ffi.function import _init_api
from ..base import register_relay_node
......@@ -156,3 +159,9 @@ def _lower(name, schedule, inputs, outputs):
@register_func("relay.op.compiler._build")
def _build(lowered_funcs):
return build(lowered_funcs, target="llvm")
def schedule_injective(attrs, outputs, target):
"""Generic schedule for binary broadcast."""
with target:
return topi.generic.schedule_injective(outputs)
......@@ -89,7 +89,7 @@ class ScheduleGetter :
CachedFunc cfunc(cache_node);
CHECK(master_op_.defined());
Schedule schedule = fschedule[master_op_](
cache_node->outputs, target_);
master_attrs_, cache_node->outputs, target_);
return std::make_pair(schedule, cfunc);
}
......@@ -145,6 +145,7 @@ class ScheduleGetter :
}
if (op_pattern >= master_op_patetrn_) {
master_op_ = op;
master_attrs_ = call_node->attrs;
master_op_patetrn_ = op_pattern;
}
if (outputs.size() != 1) {
......@@ -193,6 +194,7 @@ class ScheduleGetter :
private:
tvm::Target target_;
Op master_op_;
Attrs master_attrs_;
int master_op_patetrn_{0};
std::ostringstream readable_name_stream_;
std::unordered_map<Expr, Array<Tensor>, NodeHash, NodeEqual> memo_;
......@@ -285,6 +287,9 @@ class CompileEngineImpl : public CompileEngineNode {
* \return Updated name which is unique.
*/
std::string GetUniqeName(std::string name) {
for (size_t i = 0; i < name.length(); ++i) {
if (name[i] == '.') name[i] = '_';
}
while (true) {
auto it = name_map_.find(name);
if (it == name_map_.end()) {
......
......@@ -91,16 +91,15 @@ bool DenseRel(const Array<Type>& types,
Array<tvm::Expr> oshape = data->shape;
if (param->units.defined()) {
Array<tvm::Expr> dshape = data->shape;
// validate the weight shape is proper if defined
// Assign weight type
Array<IndexExpr> wshape({dshape[dshape.size() - 1], param->units});
Array<IndexExpr> wshape({param->units, dshape[dshape.size() - 1]});
reporter->Assign(types[1], TensorTypeNode::make(wshape, data->dtype));
oshape.Set((oshape.size() - 1), param->units);
} else {
if (weight == nullptr) return false;
Array<tvm::Expr> wshape = weight->shape;
oshape.Set((oshape.size() - 1), wshape[wshape.size() - 1]);
oshape.Set((oshape.size() - 1), wshape[0]);
}
// assign output type
......
......@@ -4,7 +4,9 @@
* \brief Pooling operators
*/
#include <tvm/relay/op.h>
#include <tvm/relay/op_attr_types.h>
#include <tvm/relay/attrs/nn.h>
#include <topi/nn/pooling.h>
#include <vector>
#include "layout.h"
......@@ -14,7 +16,7 @@ namespace relay {
TVM_REGISTER_NODE_TYPE(MaxPool2DAttrs);
TVM_REGISTER_NODE_TYPE(AvgPool2DAttrs);
template <typename AttrTtype>
template <typename AttrType>
bool Pool2DRel(const Array<Type>& types,
int num_inputs,
const Attrs& attrs,
......@@ -27,7 +29,7 @@ bool Pool2DRel(const Array<Type>& types,
CHECK_NE(dshape.size(), 0);
CHECK_GE(dshape.size(), 2U)
<< "Pool2D only support input >= 2-D: input must have height and width";
const auto param = attrs.as<AttrTtype>();
const auto param = attrs.as<AttrType>();
CHECK(param != nullptr);
Layout layout(param->layout);
......@@ -88,6 +90,46 @@ Expr MakeMaxPool2D(Expr data,
return CallNode::make(op, {data}, Attrs(attrs), {});
}
template<typename AttrType, topi::nn::PoolType mode>
Array<Tensor> Pool2DCompute(const Attrs& attrs,
const Array<Tensor>& inputs,
const Type& out_type,
const Target& target) {
const auto* param = attrs.as<AttrType>();
CHECK(param != nullptr);
auto pool_size = param->pool_size;
auto strides = param->strides;
auto padding = param->padding;
auto ceil_mode = param->ceil_mode;
Layout layout(param->layout);
CHECK(layout.convertible(Layout("NCHW")))
<< "max_pool2d currently only supports layouts that are convertible from NCHW";
CHECK_EQ(layout.indexof('h'), -1) << "max_pool2d does not support input split on height";
CHECK_EQ(layout.indexof('w'), -1) << "max_pool2d does not support input split on width";
CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U)
<< "Pool2D only support 4-D input (e.g., NCHW)"
<< " or 5-D input (last dimension is a split of channel)";
if (param->padding.size() == 1) {
padding.push_back(padding[0]);
padding.push_back(padding[0]);
padding.push_back(padding[0]);
} else if (param->padding.size() == 2) {
padding.push_back(padding[0]);
padding.push_back(padding[1]);
}
if (mode == topi::nn::kAvgPool) {
bool count_include_pad = reinterpret_cast<const AvgPool2DAttrs*>(param)->count_include_pad;
return Array<Tensor>{
topi::nn::pool(inputs[0], pool_size, strides, padding,
mode, ceil_mode, layout.name(), count_include_pad)};
} else {
return Array<Tensor>{
topi::nn::pool(inputs[0], pool_size, strides, padding,
mode, ceil_mode, layout.name())};
}
}
TVM_REGISTER_API("relay.op.nn._make.max_pool2d")
.set_body([](const TVMArgs& args, TVMRetValue* rv) {
......@@ -120,7 +162,8 @@ RELAY_REGISTER_OP("nn.max_pool2d")
.set_num_inputs(1)
.add_argument("data", "Tensor", "The input tensor.")
.set_support_level(2)
.add_type_rel("MaxPool2D", Pool2DRel<MaxPool2DAttrs>);
.add_type_rel("MaxPool2D", Pool2DRel<MaxPool2DAttrs>)
.set_attr<FTVMCompute>("FTVMCompute", Pool2DCompute<MaxPool2DAttrs, topi::nn::kMaxPool>);
// AvgPool2D
......@@ -175,7 +218,8 @@ Average pooling operation for one dimensional data.
.set_num_inputs(1)
.add_argument("data", "Tensor", "The input tensor.")
.set_support_level(2)
.add_type_rel("AvgPool2D", Pool2DRel<AvgPool2DAttrs>);
.add_type_rel("AvgPool2D", Pool2DRel<AvgPool2DAttrs>)
.set_attr<FTVMCompute>("FTVMCompute", Pool2DCompute<AvgPool2DAttrs, topi::nn::kAvgPool>);
// Global Pool
TVM_REGISTER_NODE_TYPE(GlobalPool2DAttrs);
......@@ -211,6 +255,29 @@ bool GlobalPool2DRel(const Array<Type>& types,
return true;
}
template<topi::nn::PoolType mode>
Array<Tensor> GlobalPool2DCompute(const Attrs& attrs,
const Array<Tensor>& inputs,
const Type& out_type,
const Target& target) {
const auto* param = attrs.as<GlobalPool2DAttrs>();
CHECK(param != nullptr);
Layout layout(param->layout);
CHECK(layout.convertible(Layout("NCHW")))
<< "global_avg_pool2d currently only supports layouts that are convertible from NCHW";
CHECK_EQ(layout.indexof('h'), -1)
<< "global_avg_pool2d does not support input split on height";
CHECK_EQ(layout.indexof('w'), -1)
<< "global_avg_pool2d does not support input split on width";
CHECK(inputs[0].ndim() == 4U || inputs[0].ndim() == 5U)
<< "Pool2D only support 4-D input (e.g., NCHW)"
<< " or 5-D input (last dimension is a split of channel)";
return Array<Tensor>{
topi::nn::global_pool(inputs[0], mode, layout.name()) };
}
Expr MakeGlobalAvgPool2D(Expr data,
std::string layout) {
auto attrs = make_node<GlobalPool2DAttrs>();
......@@ -239,7 +306,8 @@ RELAY_REGISTER_OP("nn.global_avg_pool2d")
.set_num_inputs(1)
.add_argument("data", "Tensor", "The input tensor.")
.set_support_level(2)
.add_type_rel("GlobalAvgPool2D", GlobalPool2DRel);
.add_type_rel("GlobalAvgPool2D", GlobalPool2DRel)
.set_attr<FTVMCompute>("FTVMCompute", GlobalPool2DCompute<topi::nn::kAvgPool>);
// GlobalMaxPool
Expr MakeGlobalMaxPool2D(Expr data,
......@@ -269,7 +337,8 @@ RELAY_REGISTER_OP("nn.global_max_pool2d")
.set_num_inputs(1)
.add_argument("data", "Tensor", "The input tensor.")
.set_support_level(2)
.add_type_rel("GlobalMaxPool2D", GlobalPool2DRel);
.add_type_rel("GlobalMaxPool2D", GlobalPool2DRel)
.set_attr<FTVMCompute>("FTVMCompute", GlobalPool2DCompute<topi::nn::kMaxPool>);
} // namespace relay
} // namespace tvm
......@@ -55,28 +55,6 @@ def test_mul_param():
check_eval(func, [x_data, y_data], x_data * y_data)
# failing due to numeric issues
# def test_dense():
# x = relay.var('x', shape=(10, 10))
# w = relay.var('w', shape=(10, 10))
# y = relay.nn.dense(x, w)
# func = relay.Function([x, w], y)
# x_data = np.random.rand(10, 10).astype('float32')
# w_data = np.random.rand(10, 10).astype('float32')
# check_eval(func, [x_data, w_data], x_data @ w_data, rtol=0.1)
# def test_linear():
# x = relay.var('x', shape=(10, 10))
# w = relay.var('w', shape=(10, 10))
# b = relay.var('b', shape=(10,))
# y = relay.add(relay.nn.dense(x, w), b)
# func = relay.Function([x, w, b], y)
# x_data = np.random.rand(10, 10).astype('float32')
# w_data = np.random.rand(10, 10).astype('float32')
# b_data = np.random.rand(10).astype('float32')
# check_eval(func, [x_data, w_data, b_data], x_data @ w_data + b_data)
def test_equal():
i = relay.var('i', shape=[], dtype='int32')
j = relay.var('i', shape=[], dtype='int32')
......
......@@ -74,6 +74,7 @@ def test_binary_op():
y_data = np.random.rand(5, 10, 5).astype(t2.dtype)
ref_res = ref(x_data, y_data)
func = relay.Function([x, y], z)
for target, ctx in ctx_list():
# use graph by execuor default for testing, as we need
# create function explicitly to avoid constant-folding.
......@@ -89,12 +90,24 @@ def test_binary_op():
def test_bias_add():
x = relay.var("x", shape=(10, 2, 3, 4))
xshape=(10, 2, 3, 4)
bshape=(2,)
dtype="float32"
x = relay.var("x", shape=xshape)
bias = relay.var("bias")
z = relay.nn.bias_add(x, bias)
zz = relay.ir_pass.infer_type(z)
assert "axis=" not in zz.astext()
assert zz.args[1].checked_type == relay.TensorType((2,))
assert zz.args[1].checked_type == relay.TensorType(bshape)
func = relay.Function([x, bias], z)
x_data = np.random.uniform(size=xshape).astype(dtype)
y_data = np.random.uniform(size=bshape).astype(dtype)
ref_res = x_data + y_data.reshape((2, 1, 1))
for target, ctx in ctx_list():
intrp = relay.create_executor("graph", ctx=ctx, target=target)
op_res = intrp.evaluate(func)(x_data, y_data)
np.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5)
def test_expand_dims_infer_type():
......@@ -217,6 +230,50 @@ def test_batch_norm():
]))
def test_dense():
n, c , h, w = tvm.var("n"), tvm.var("c"), tvm.var("h"), tvm.var("w")
x = relay.var("x", relay.TensorType((n, c, h, w), "float32"))
w = relay.var("w", relay.TensorType((2, w), "float32"))
y = relay.nn.dense(x, w, units=2)
"units=2" in y.astext()
yy = relay.ir_pass.infer_type(y)
assert yy.checked_type == relay.TensorType((n, c, h, 2), "float32")
n, c , h, w = tvm.var("n"), tvm.var("c"), tvm.var("h"), 2
x = relay.var("x", relay.TensorType((n, c, h, w), "float32"))
wh, ww = tvm.var("wh"), tvm.var("ww")
w = relay.var("w", relay.TensorType((ww, wh), "float32"))
y = relay.nn.dense(x, w)
yy = relay.ir_pass.infer_type(y)
assert yy.checked_type == relay.TensorType((n, c, h, ww), "float32")
n, c , h, w = tvm.var("n"), tvm.var("c"), tvm.var("h"), 2
x = relay.var("x", relay.TensorType((n, c, h, w), "float32"))
w = relay.var("w", relay.IncompleteType())
y = relay.nn.dense(x, w, units=2)
yy = relay.ir_pass.infer_type(y)
assert yy.checked_type == relay.TensorType((n, c, h, 2), "float32")
x = relay.var("x", shape=(10, 5))
w = relay.var("w", shape=(2, 5))
z = relay.nn.dense(x, w)
# Check result.
func = relay.Function([x, w], z)
x_data = np.random.rand(10, 5).astype('float32')
w_data = np.random.rand(2, 5).astype('float32')
ref_res = np.dot(x_data, w_data.T)
for target, ctx in ctx_list():
intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
intrp2 = relay.create_executor("debug", ctx=ctx, target=target)
op_res1 = intrp1.evaluate(func)(x_data, w_data)
tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5)
op_res2 = intrp2.evaluate(func)(x_data, w_data)
tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5)
if __name__ == "__main__":
test_bias_add()
test_unary_op()
......@@ -227,3 +284,4 @@ if __name__ == "__main__":
test_log_softmax()
test_dropout()
test_batch_norm()
test_dense()
# pylint: disable=invalid-name
"""Common topi utilities"""
from __future__ import absolute_import as _abs
import tvm
from numbers import Integral
import tvm
from . import tag
def traverse_inline(s, final_op, callback):
......@@ -68,13 +69,13 @@ def get_const_int(expr):
out_value : int
The output.
"""
if isinstance(expr, int):
if isinstance(expr, Integral):
return expr
if not isinstance(expr, (tvm.expr.IntImm, tvm.expr.UIntImm)):
expr = tvm.ir_pass.Simplify(expr)
if not isinstance(expr, (tvm.expr.IntImm, tvm.expr.UIntImm)):
raise ValueError("Expect value to be constant int")
return expr.value
return int(expr.value)
def equal_const_int(expr, value):
......@@ -90,7 +91,7 @@ def equal_const_int(expr, value):
equal : bool
Whether they equals.
"""
if isinstance(expr, int):
if isinstance(expr, Integral):
return expr == value
if not isinstance(expr, (tvm.expr.IntImm, tvm.expr.UIntImm)):
expr = tvm.ir_pass.Simplify(expr)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment