Commit 0d4870cc by Animesh Jain Committed by Zhi

[QNN] Convolution 2D Implementation. (#3580)

Rebasing. Empty commit.

Clang-format styling.
parent df7cc5db
......@@ -211,6 +211,7 @@ This level supports dialect operators.
:nosignatures:
tvm.relay.qnn.op.requantize
tvm.relay.qnn.op.conv2d
Level 1 Definitions
......@@ -357,3 +358,4 @@ Level 10 Definitions
Level 11 Definitions
--------------------
.. autofunction:: tvm.relay.qnn.op.requantize
.. autofunction:: tvm.relay.qnn.op.conv2d
......@@ -125,6 +125,68 @@ struct QnnConcatenateAttrs : public tvm::AttrsNode<QnnConcatenateAttrs> {
}
}; // struct QnnConcatenateAttrs
/*! \brief Attribute for QNN Conv2d operator */
struct QnnConv2DAttrs : public tvm::AttrsNode<QnnConv2DAttrs> {
// Traditional conv2d attributes.
Array<IndexExpr> strides;
Array<IndexExpr> padding;
Array<IndexExpr> dilation;
int groups;
IndexExpr channels;
Array<IndexExpr> kernel_size;
std::string data_layout;
std::string kernel_layout;
std::string out_layout;
DataType out_dtype;
// Quantization related attributes.
int32_t input_zero_point;
int32_t kernel_zero_point;
TVM_DECLARE_ATTRS(QnnConv2DAttrs, "relay.attrs.QnnConv2DAttrs") {
TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1}))
.describe("Specifies the strides of the convolution.");
TVM_ATTR_FIELD(padding).set_default(Array<IndexExpr>({0, 0}))
.describe("If padding is non-zero, then the input is implicitly zero-padded"
"on both sides for padding number of points");
TVM_ATTR_FIELD(dilation).set_default(Array<IndexExpr>({1, 1}))
.describe("Specifies the dilation rate to use for dilated convolution.");
TVM_ATTR_FIELD(groups).set_default(1)
.describe("Controls the connections between inputs and outputs."
"At groups=1, all inputs are convolved to all outputs."
"At groups=2, the operation becomes equivalent to having two convolution"
"layers side by side, each seeing half the input channels, and producing"
"half the output channels, and both subsequently concatenated.");
TVM_ATTR_FIELD(channels)
.describe("The number of output channels in the convolution."
" If it is not set, inferred by shape of the weight.")
.set_default(NullValue<IndexExpr>());
TVM_ATTR_FIELD(kernel_size)
.describe("Specifies the dimensions of the convolution window.")
.set_default(NullValue<Array<IndexExpr> >());
TVM_ATTR_FIELD(data_layout).set_default("NCHW")
.describe("Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
"dimensions respectively. Convolution is applied on the 'H' and"
"'W' dimensions.");
TVM_ATTR_FIELD(kernel_layout).set_default("OIHW")
.describe("Dimension ordering of weight. Can be 'OIHW', 'OIHW16o16i', etc."
"'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
"dimensions respectively.");
TVM_ATTR_FIELD(out_layout).set_default("")
.describe("Dimension ordering of output. Can be 'NCHW', 'NHWC', etc."
"'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
"dimensions respectively. Default to be same as input layout.");
TVM_ATTR_FIELD(out_dtype)
.set_default(NullValue<DataType>())
.describe("Output data type, set to explicit type under mixed precision setting");
TVM_ATTR_FIELD(input_zero_point)
.describe("The zero point of the input tensor.");
TVM_ATTR_FIELD(kernel_zero_point)
.describe("The zero point of the kernel tensor.");
}
};
} // namespace qnn
} // namespace relay
} // namespace tvm
......
......@@ -183,3 +183,83 @@ def concatenate(data,
output_scale,
output_zero_point,
axis)
def conv2d(data,
kernel,
input_zero_point,
kernel_zero_point,
strides=(1, 1),
padding=(0, 0),
dilation=(1, 1),
groups=1,
channels=None,
kernel_size=None,
data_layout="NCHW",
kernel_layout="OIHW",
out_layout="",
out_dtype="int32"):
r"""Quantized 2D convolution.
This operator convolves quantized data with quantized kernel. The scale of
the output quantized tensor is the product of the kernel_scale and
input_scale of the input quantized tensors. The zero point of the output
quantized tensor is 0. By default, the dtype of output is int32. Please also
refer to Requantize operator to understand how to scale back the int32
output to (u)int8.
Parameters
----------
data : tvm.relay.Expr
The input data to the operator.
kernel : tvm.relay.Expr
The kernel expressions.
input_zero_point: int
The zero point of the data distribution.
kernel_zero_point: int
The zero point of the quantized_kernel distribution.
strides : tuple of int, optional
The strides of convolution.
padding : tuple of int, optional
The padding of convolution on both sides of inputs before convolution.
dilation : tuple of int, optional
Specifies the dilation rate to be used for dilated convolution.
groups : int, optional
Number of groups for grouped convolution.
channels : int, optional
Number of output channels of this convolution.
kernel_size : tuple of int, optional
The spatial of the convolution kernel.
data_layout : str, optional
Layout of the input.
kernel_layout : str, optional
Layout of the kernel.
out_layout : str, optional
Layout of the output, by default, out_layout is the same as data_layout
out_dtype : str, optional
Specifies the output data type for mixed precision conv2d.
Returns
-------
result : tvm.relay.Expr
The computed result.
"""
return _make.conv2d(data, kernel,
input_zero_point, kernel_zero_point,
strides, padding, dilation,
groups, channels, kernel_size,
data_layout, kernel_layout, out_layout, out_dtype)
......@@ -415,6 +415,71 @@ static inline Expr Full(Expr fill_value,
return CallNode::make(op, {fill_value}, Attrs(attrs), {});
}
static inline Expr Conv2D(Expr data, Expr weight, Array<IndexExpr> strides,
Array<IndexExpr> padding, Array<IndexExpr> dilation, int groups,
IndexExpr channels, Array<IndexExpr> kernel_size, std::string data_layout,
std::string kernel_layout, std::string out_layout, DataType out_dtype) {
auto attrs = make_node<Conv2DAttrs>();
attrs->strides = std::move(strides);
attrs->padding = std::move(padding);
attrs->dilation = std::move(dilation);
attrs->groups = groups;
attrs->channels = std::move(channels);
attrs->kernel_size = std::move(kernel_size);
attrs->data_layout = std::move(data_layout);
attrs->kernel_layout = std::move(kernel_layout);
attrs->out_layout = std::move(out_layout);
attrs->out_dtype = std::move(out_dtype);
static const Op& op = Op::Get("nn.conv2d");
return CallNode::make(op, {data, weight}, Attrs(attrs), {});
}
static inline Expr Sum(Expr data, Array<Integer> axis, bool keepdims, bool exclude) {
auto attrs = make_node<ReduceAttrs>();
attrs->axis = std::move(axis);
attrs->keepdims = keepdims;
attrs->exclude = exclude;
static const Op& op = Op::Get("sum");
return CallNode::make(op, {data}, Attrs(attrs), {});
}
static inline Expr Reshape(Expr data, Array<Integer> newshape) {
auto attrs = make_node<ReshapeAttrs>();
attrs->newshape = std::move(newshape);
attrs->reverse = false;
static const Op& op = Op::Get("reshape");
return CallNode::make(op, {data}, Attrs(attrs), {});
}
static inline Expr AvgPool2D(Expr data, Array<IndexExpr> pool_size, Array<IndexExpr> strides,
Array<IndexExpr> padding, std::string layout, bool ceil_mode,
bool count_include_pad) {
auto attrs = make_node<AvgPool2DAttrs>();
attrs->pool_size = std::move(pool_size);
attrs->strides = std::move(strides);
attrs->padding = std::move(padding);
attrs->layout = std::move(layout);
attrs->ceil_mode = ceil_mode;
attrs->count_include_pad = count_include_pad;
static const Op& op = Op::Get("nn.avg_pool2d");
return CallNode::make(op, {data}, Attrs(attrs), {});
}
static inline Expr Pad(Expr data, Array<Array<IndexExpr>> pad_width, double pad_value) {
auto attrs = make_node<PadAttrs>();
attrs->pad_value = pad_value;
attrs->pad_width = std::move(pad_width);
static const Op& op = Op::Get("nn.pad");
return CallNode::make(op, {data}, Attrs(attrs), {});
}
static inline Expr Tile(Expr data, Array<Integer> reps) {
auto attrs = make_node<TileAttrs>();
attrs->reps = reps;
static const Op& op = Op::Get("tile");
return CallNode::make(op, {data}, Attrs(attrs), {});
}
Expr MakeConcatenate(Expr data, int axis);
Expr MakeStridedSlice(Expr data, Array<Integer> begin, Array<Integer> end, Array<Integer> strides);
......
......@@ -86,6 +86,12 @@ static inline Expr Requantize(const Expr& data, const Array<IndexExpr>& input_sh
return RequantizeLower(data, attrs.operator->(), input_shape, out_dtype);
}
static inline int64_t get_const_int(const tvm::Expr& x) {
auto* value_ptr = as_const_int(x);
CHECK(value_ptr) << "Expr is not a constant int";
return value_ptr[0];
}
} // namespace qnn
} // namespace relay
} // namespace tvm
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment