Commit 8d263e37 by Lianmin Zheng Committed by Tianqi Chen

[CODEGEN] fix vector conversion for opencl (#783)

* support more argument type in depthwise_conv2d

* mark all pointer as 'restrict' & fix vector conversion for opencl
parent 3ff2d958
...@@ -38,14 +38,17 @@ void CodeGenC::AddFunction(LoweredFunc f) { ...@@ -38,14 +38,17 @@ void CodeGenC::AddFunction(LoweredFunc f) {
if (i != 0) stream << ", "; if (i != 0) stream << ", ";
if (v.type().is_handle()) { if (v.type().is_handle()) {
auto it = alloc_storage_scope_.find(v.get()); auto it = alloc_storage_scope_.find(v.get());
if (it != alloc_storage_scope_.end()) { if (it != alloc_storage_scope_.end())
PrintStorageScope(it->second, stream); PrintStorageScope(it->second, stream);
stream << ' '; stream << ' ';
if (handle_data_type_.count(v.get())) {
PrintType(handle_data_type_.at(v.get()), stream);
} else {
stream << "void";
} }
}
if (handle_data_type_.count(v.get())) {
PrintType(handle_data_type_.at(v.get()), stream);
stream << "*"; stream << "*";
if (f->is_restricted && restrict_keyword_.length() != 0) { if (f->is_restricted && restrict_keyword_.length() != 0) {
stream << ' ' << restrict_keyword_; stream << ' ' << restrict_keyword_;
} }
...@@ -402,12 +405,9 @@ inline void PrintBinaryIntrinsitc(const Call* op, ...@@ -402,12 +405,9 @@ inline void PrintBinaryIntrinsitc(const Call* op,
} }
} }
void CodeGenC::VisitExpr_(const Cast *op, std::ostream& os) { // NOLINT(*) void CodeGenC::VisitExpr_(const Cast *op, std::ostream& os) { // NOLINT(*)
os << "("; std::stringstream value;
this->PrintType(op->type, os); this->PrintExpr(op->value, value);
os << ")"; os << CastFromTo(value.str(), op->value.type(), op->type);
os << '(';
this->PrintExpr(op->value, os);
os << ')';
} }
void CodeGenC::VisitExpr_(const Variable *op, std::ostream& os) { // NOLINT(*) void CodeGenC::VisitExpr_(const Variable *op, std::ostream& os) { // NOLINT(*)
os << GetVarID(op); os << GetVarID(op);
......
...@@ -142,6 +142,8 @@ class CodeGenC : ...@@ -142,6 +142,8 @@ class CodeGenC :
// print store of single element. // print store of single element.
virtual void PrintVecElemStore( virtual void PrintVecElemStore(
const std::string& vec, Type t, int i, const std::string& value); const std::string& vec, Type t, int i, const std::string& value);
// Get a cast type from to
virtual std::string CastFromTo(std::string value, Type from, Type target);
protected: protected:
// Print reference to struct location // Print reference to struct location
...@@ -150,8 +152,6 @@ class CodeGenC : ...@@ -150,8 +152,6 @@ class CodeGenC :
// print reference to a buffer as type t in index. // print reference to a buffer as type t in index.
std::string GetBufferRef( std::string GetBufferRef(
Type t, const Variable* buffer, Expr index); Type t, const Variable* buffer, Expr index);
// Get a cast type from to
std::string CastFromTo(std::string value, Type from, Type target);
/*! /*!
* \brief If buffer is allocated as type t. * \brief If buffer is allocated as type t.
* \param buf_var The buffer variable. * \param buf_var The buffer variable.
......
...@@ -175,6 +175,22 @@ void CodeGenOpenCL::PrintStorageScope( ...@@ -175,6 +175,22 @@ void CodeGenOpenCL::PrintStorageScope(
} }
} }
std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) {
if (from == target) return value;
std::ostringstream os;
if (target.lanes() == 1) {
os << "((";
this->PrintType(target, os);
os << ")" << value << ")";
} else { // convert vector type
os << "(";
os << "convert_";
this->PrintType(target, os);
os << "(" << value << "))";
}
return os.str();
}
void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*) void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*)
std::string v = PrintExpr(op->value); std::string v = PrintExpr(op->value);
os << "(("; os << "((";
......
...@@ -34,6 +34,8 @@ class CodeGenOpenCL final : public CodeGenC { ...@@ -34,6 +34,8 @@ class CodeGenOpenCL final : public CodeGenC {
// the address of load/store // the address of load/store
void PrintVecAddr(const Variable* buffer, Type t, void PrintVecAddr(const Variable* buffer, Type t,
Expr base, std::ostream& os); // NOLINT(*) Expr base, std::ostream& os); // NOLINT(*)
std::string CastFromTo(std::string value, Type from, Type target); // NOLINT(*)
// overload visitor // overload visitor
void VisitExpr_(const Broadcast* op, std::ostream& os) final; // NOLINT(*) void VisitExpr_(const Broadcast* op, std::ostream& os) final; // NOLINT(*)
......
...@@ -191,9 +191,6 @@ class HostDeviceSplitter : public IRMutator { ...@@ -191,9 +191,6 @@ class HostDeviceSplitter : public IRMutator {
auto it = handle_data_type_.find(v.get()); auto it = handle_data_type_.find(v.get());
if (it != handle_data_type_.end()) { if (it != handle_data_type_.end()) {
n->handle_data_type.Set(v, it->second); n->handle_data_type.Set(v, it->second);
} else {
// int32 as a placeholder
n->handle_data_type.Set(v, make_const(UInt(32), 0));
} }
} }
} }
......
...@@ -31,9 +31,14 @@ def depthwise_conv2d_nchw(Input, Filter, stride, padding, out_dtype='float32'): ...@@ -31,9 +31,14 @@ def depthwise_conv2d_nchw(Input, Filter, stride, padding, out_dtype='float32'):
Output : tvm.Tensor Output : tvm.Tensor
4-D with shape [batch, out_channel, out_height, out_width] 4-D with shape [batch, out_channel, out_height, out_width]
""" """
out_dtype = Input.dtype
batch, in_channel, in_height, in_width = Input.shape batch, in_channel, in_height, in_width = Input.shape
filter_channel, channel_multiplier, filter_height, filter_width = Filter.shape filter_channel, channel_multiplier, filter_height, filter_width = Filter.shape
stride_h, stride_w = stride if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
pad_top, pad_left, pad_down, pad_right = get_pad_tuple( pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
padding, (filter_height, filter_width)) padding, (filter_height, filter_width))
...@@ -82,7 +87,10 @@ def depthwise_conv2d_nhwc(Input, Filter, stride, padding): ...@@ -82,7 +87,10 @@ def depthwise_conv2d_nhwc(Input, Filter, stride, padding):
""" """
batch, in_height, in_width, in_channel = Input.shape batch, in_height, in_width, in_channel = Input.shape
filter_height, filter_width, filter_channel, channel_multiplier = Filter.shape filter_height, filter_width, filter_channel, channel_multiplier = Filter.shape
stride_h, stride_w = stride if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
pad_top, pad_left, pad_down, pad_right = get_pad_tuple( pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
padding, (filter_height, filter_width)) padding, (filter_height, filter_width))
...@@ -131,7 +139,10 @@ def depthwise_conv2d_backward_input_nhwc(Filter, Out_grad, oshape, ishape, strid ...@@ -131,7 +139,10 @@ def depthwise_conv2d_backward_input_nhwc(Filter, Out_grad, oshape, ishape, strid
batch, in_h, in_w, in_c = ishape batch, in_h, in_w, in_c = ishape
_, out_h, out_w, out_c = oshape _, out_h, out_w, out_c = oshape
filter_h, filter_w, _, channel_multiplier = Filter.shape filter_h, filter_w, _, channel_multiplier = Filter.shape
stride_h, stride_w = stride if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
dilated_out_grad = dilate(Out_grad, [1, stride_h, stride_w, 1], name='dilated_out_grad') dilated_out_grad = dilate(Out_grad, [1, stride_h, stride_w, 1], name='dilated_out_grad')
...@@ -186,7 +197,10 @@ def depthwise_conv2d_backward_weight_nhwc(Input, Out_grad, oshape, fshape, strid ...@@ -186,7 +197,10 @@ def depthwise_conv2d_backward_weight_nhwc(Input, Out_grad, oshape, fshape, strid
batch, out_h, out_w, out_c = oshape batch, out_h, out_w, out_c = oshape
filter_h, filter_w, _, channel_multiplier = fshape filter_h, filter_w, _, channel_multiplier = fshape
in_c = Input.shape[3].value in_c = Input.shape[3].value
stride_h, stride_w = stride if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (filter_h, filter_w)) pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (filter_h, filter_w))
......
...@@ -27,7 +27,11 @@ def depthwise_conv2d_python_nchw(input_np, filter_np, stride, padding): ...@@ -27,7 +27,11 @@ def depthwise_conv2d_python_nchw(input_np, filter_np, stride, padding):
""" """
batch, in_channel, in_height, in_width = input_np.shape batch, in_channel, in_height, in_width = input_np.shape
_, channel_multiplier, filter_height, filter_width = filter_np.shape _, channel_multiplier, filter_height, filter_width = filter_np.shape
stride_h, stride_w = stride if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
# calculate output shape # calculate output shape
if padding == 'VALID': if padding == 'VALID':
out_channel = in_channel * channel_multiplier out_channel = in_channel * channel_multiplier
...@@ -84,7 +88,11 @@ def depthwise_conv2d_python_nhwc(input_np, filter_np, stride, padding): ...@@ -84,7 +88,11 @@ def depthwise_conv2d_python_nhwc(input_np, filter_np, stride, padding):
""" """
batch, in_height, in_width, in_channel = input_np.shape batch, in_height, in_width, in_channel = input_np.shape
filter_height, filter_width, _, channel_multiplier = filter_np.shape filter_height, filter_width, _, channel_multiplier = filter_np.shape
stride_h, stride_w = stride if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
# calculate output shape # calculate output shape
if padding == 'VALID': if padding == 'VALID':
out_channel = in_channel * channel_multiplier out_channel = in_channel * channel_multiplier
......
...@@ -7,18 +7,17 @@ from tvm.contrib.pickle_memoize import memoize ...@@ -7,18 +7,17 @@ from tvm.contrib.pickle_memoize import memoize
from topi.cuda.depthwise_conv2d import schedule_depthwise_conv2d_nhwc from topi.cuda.depthwise_conv2d import schedule_depthwise_conv2d_nhwc
def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_multiplier, filter_height, stride_h, padding): def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_multiplier, filter_height, stride, padding):
in_width = in_height in_width = in_height
filter_channel = in_channel filter_channel = in_channel
filter_width = filter_height filter_width = filter_height
stride_w = stride_h
# placeholder # placeholder
Input = tvm.placeholder((batch, in_channel, in_height, in_width), name='Input') Input = tvm.placeholder((batch, in_channel, in_height, in_width), name='Input')
Filter = tvm.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter') Filter = tvm.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter')
Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale') Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale')
Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift') Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift')
# declare # declare
DepthwiseConv2d = topi.nn.depthwise_conv2d_nchw(Input, Filter, stride=[stride_h, stride_w], padding=padding) DepthwiseConv2d = topi.nn.depthwise_conv2d_nchw(Input, Filter, stride=stride, padding=padding)
ScaleShift = topi.nn.scale_shift_nchw(DepthwiseConv2d, Scale, Shift) ScaleShift = topi.nn.scale_shift_nchw(DepthwiseConv2d, Scale, Shift)
Relu = topi.nn.relu(ScaleShift) Relu = topi.nn.relu(ScaleShift)
...@@ -56,7 +55,7 @@ def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_mu ...@@ -56,7 +55,7 @@ def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_mu
shift_np = np.random.uniform(size=shift_shape).astype(dtype) shift_np = np.random.uniform(size=shift_shape).astype(dtype)
# correctness with scipy # correctness with scipy
depthwise_conv2d_scipy = topi.testing.depthwise_conv2d_python_nchw( depthwise_conv2d_scipy = topi.testing.depthwise_conv2d_python_nchw(
input_np, filter_np, stride=[stride_h, stride_w], padding=padding) input_np, filter_np, stride=stride, padding=padding)
scale_shift_scipy = np.zeros(shape=scale_shift_shape) scale_shift_scipy = np.zeros(shape=scale_shift_shape)
for c in range(in_channel * channel_multiplier): for c in range(in_channel * channel_multiplier):
scale_shift_scipy[:,c,:,:] = depthwise_conv2d_scipy[:,c,:,:] * scale_np[c] + shift_np[c] scale_shift_scipy[:,c,:,:] = depthwise_conv2d_scipy[:,c,:,:] * scale_np[c] + shift_np[c]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment