Commit 8d263e37 by Lianmin Zheng Committed by Tianqi Chen

[CODEGEN] fix vector conversion for opencl (#783)

* support more argument type in depthwise_conv2d

* mark all pointer as 'restrict' & fix vector conversion for opencl
parent 3ff2d958
......@@ -38,14 +38,17 @@ void CodeGenC::AddFunction(LoweredFunc f) {
if (i != 0) stream << ", ";
if (v.type().is_handle()) {
auto it = alloc_storage_scope_.find(v.get());
if (it != alloc_storage_scope_.end()) {
if (it != alloc_storage_scope_.end())
PrintStorageScope(it->second, stream);
stream << ' ';
stream << ' ';
if (handle_data_type_.count(v.get())) {
PrintType(handle_data_type_.at(v.get()), stream);
} else {
stream << "void";
}
}
if (handle_data_type_.count(v.get())) {
PrintType(handle_data_type_.at(v.get()), stream);
stream << "*";
if (f->is_restricted && restrict_keyword_.length() != 0) {
stream << ' ' << restrict_keyword_;
}
......@@ -402,12 +405,9 @@ inline void PrintBinaryIntrinsitc(const Call* op,
}
}
void CodeGenC::VisitExpr_(const Cast *op, std::ostream& os) { // NOLINT(*)
os << "(";
this->PrintType(op->type, os);
os << ")";
os << '(';
this->PrintExpr(op->value, os);
os << ')';
std::stringstream value;
this->PrintExpr(op->value, value);
os << CastFromTo(value.str(), op->value.type(), op->type);
}
void CodeGenC::VisitExpr_(const Variable *op, std::ostream& os) { // NOLINT(*)
os << GetVarID(op);
......
......@@ -142,6 +142,8 @@ class CodeGenC :
// print store of single element.
virtual void PrintVecElemStore(
const std::string& vec, Type t, int i, const std::string& value);
// Get a cast type from to
virtual std::string CastFromTo(std::string value, Type from, Type target);
protected:
// Print reference to struct location
......@@ -150,8 +152,6 @@ class CodeGenC :
// print reference to a buffer as type t in index.
std::string GetBufferRef(
Type t, const Variable* buffer, Expr index);
// Get a cast type from to
std::string CastFromTo(std::string value, Type from, Type target);
/*!
* \brief If buffer is allocated as type t.
* \param buf_var The buffer variable.
......
......@@ -175,6 +175,22 @@ void CodeGenOpenCL::PrintStorageScope(
}
}
std::string CodeGenOpenCL::CastFromTo(std::string value, Type from, Type target) {
if (from == target) return value;
std::ostringstream os;
if (target.lanes() == 1) {
os << "((";
this->PrintType(target, os);
os << ")" << value << ")";
} else { // convert vector type
os << "(";
os << "convert_";
this->PrintType(target, os);
os << "(" << value << "))";
}
return os.str();
}
void CodeGenOpenCL::VisitExpr_(const Broadcast* op, std::ostream& os) { // NOLINT(*)
std::string v = PrintExpr(op->value);
os << "((";
......
......@@ -34,6 +34,8 @@ class CodeGenOpenCL final : public CodeGenC {
// the address of load/store
void PrintVecAddr(const Variable* buffer, Type t,
Expr base, std::ostream& os); // NOLINT(*)
std::string CastFromTo(std::string value, Type from, Type target); // NOLINT(*)
// overload visitor
void VisitExpr_(const Broadcast* op, std::ostream& os) final; // NOLINT(*)
......
......@@ -191,9 +191,6 @@ class HostDeviceSplitter : public IRMutator {
auto it = handle_data_type_.find(v.get());
if (it != handle_data_type_.end()) {
n->handle_data_type.Set(v, it->second);
} else {
// int32 as a placeholder
n->handle_data_type.Set(v, make_const(UInt(32), 0));
}
}
}
......
......@@ -31,9 +31,14 @@ def depthwise_conv2d_nchw(Input, Filter, stride, padding, out_dtype='float32'):
Output : tvm.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
out_dtype = Input.dtype
batch, in_channel, in_height, in_width = Input.shape
filter_channel, channel_multiplier, filter_height, filter_width = Filter.shape
stride_h, stride_w = stride
if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
padding, (filter_height, filter_width))
......@@ -82,7 +87,10 @@ def depthwise_conv2d_nhwc(Input, Filter, stride, padding):
"""
batch, in_height, in_width, in_channel = Input.shape
filter_height, filter_width, filter_channel, channel_multiplier = Filter.shape
stride_h, stride_w = stride
if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
pad_top, pad_left, pad_down, pad_right = get_pad_tuple(
padding, (filter_height, filter_width))
......@@ -131,7 +139,10 @@ def depthwise_conv2d_backward_input_nhwc(Filter, Out_grad, oshape, ishape, strid
batch, in_h, in_w, in_c = ishape
_, out_h, out_w, out_c = oshape
filter_h, filter_w, _, channel_multiplier = Filter.shape
stride_h, stride_w = stride
if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
dilated_out_grad = dilate(Out_grad, [1, stride_h, stride_w, 1], name='dilated_out_grad')
......@@ -186,7 +197,10 @@ def depthwise_conv2d_backward_weight_nhwc(Input, Out_grad, oshape, fshape, strid
batch, out_h, out_w, out_c = oshape
filter_h, filter_w, _, channel_multiplier = fshape
in_c = Input.shape[3].value
stride_h, stride_w = stride
if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (filter_h, filter_w))
......
......@@ -27,7 +27,11 @@ def depthwise_conv2d_python_nchw(input_np, filter_np, stride, padding):
"""
batch, in_channel, in_height, in_width = input_np.shape
_, channel_multiplier, filter_height, filter_width = filter_np.shape
stride_h, stride_w = stride
if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
# calculate output shape
if padding == 'VALID':
out_channel = in_channel * channel_multiplier
......@@ -84,7 +88,11 @@ def depthwise_conv2d_python_nhwc(input_np, filter_np, stride, padding):
"""
batch, in_height, in_width, in_channel = input_np.shape
filter_height, filter_width, _, channel_multiplier = filter_np.shape
stride_h, stride_w = stride
if isinstance(stride, int):
stride_h = stride_w = stride
else:
stride_h, stride_w = stride
# calculate output shape
if padding == 'VALID':
out_channel = in_channel * channel_multiplier
......
......@@ -7,18 +7,17 @@ from tvm.contrib.pickle_memoize import memoize
from topi.cuda.depthwise_conv2d import schedule_depthwise_conv2d_nhwc
def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_multiplier, filter_height, stride_h, padding):
def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_multiplier, filter_height, stride, padding):
in_width = in_height
filter_channel = in_channel
filter_width = filter_height
stride_w = stride_h
# placeholder
Input = tvm.placeholder((batch, in_channel, in_height, in_width), name='Input')
Filter = tvm.placeholder((filter_channel, channel_multiplier, filter_height, filter_width), name='Filter')
Scale = tvm.placeholder((in_channel * channel_multiplier,), name='Scale')
Shift = tvm.placeholder((in_channel * channel_multiplier,), name='Shift')
# declare
DepthwiseConv2d = topi.nn.depthwise_conv2d_nchw(Input, Filter, stride=[stride_h, stride_w], padding=padding)
DepthwiseConv2d = topi.nn.depthwise_conv2d_nchw(Input, Filter, stride=stride, padding=padding)
ScaleShift = topi.nn.scale_shift_nchw(DepthwiseConv2d, Scale, Shift)
Relu = topi.nn.relu(ScaleShift)
......@@ -56,7 +55,7 @@ def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_mu
shift_np = np.random.uniform(size=shift_shape).astype(dtype)
# correctness with scipy
depthwise_conv2d_scipy = topi.testing.depthwise_conv2d_python_nchw(
input_np, filter_np, stride=[stride_h, stride_w], padding=padding)
input_np, filter_np, stride=stride, padding=padding)
scale_shift_scipy = np.zeros(shape=scale_shift_shape)
for c in range(in_channel * channel_multiplier):
scale_shift_scipy[:,c,:,:] = depthwise_conv2d_scipy[:,c,:,:] * scale_np[c] + shift_np[c]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment