Commit 43f54a58 by shoubhik Committed by Zhi

Qnn fully connected (#3910)

* Qnn Dense layer.

* Reformatting code.

* Reformatting code and making the test case more readable.

* Fixing lint issues.

* Fixing test method names to pass the nose related configurations.

* Aligning the code for code style.
parent 16d4da4d
......@@ -74,10 +74,8 @@ struct QuantizeAttrs : public tvm::AttrsNode<QuantizeAttrs> {
TVM_DECLARE_ATTRS(QuantizeAttrs, "relay.attrs.QuantizeAttrs") {
TVM_ATTR_FIELD(out_dtype)
.describe("Output data type, can be one of [int8 or uint8].");
TVM_ATTR_FIELD(output_zero_point)
.describe("The zero_point for the activation of this op.");
TVM_ATTR_FIELD(output_scale)
.describe("The scale for the activation of this op.");
}
......@@ -91,7 +89,6 @@ struct DequantizeAttrs : public tvm::AttrsNode<DequantizeAttrs> {
TVM_DECLARE_ATTRS(DequantizeAttrs, "relay.attrs.DequantizeAttrs") {
TVM_ATTR_FIELD(input_zero_point)
.describe("The zero_point for the input tensor of this op.");
TVM_ATTR_FIELD(input_scale)
.describe("The scale for the input tensor of this op.");
}
......@@ -108,16 +105,12 @@ struct QnnConcatenateAttrs : public tvm::AttrsNode<QnnConcatenateAttrs> {
TVM_DECLARE_ATTRS(QnnConcatenateAttrs, "relay.attrs.QnnConcatenateAttrs") {
TVM_ATTR_FIELD(input_scales)
.describe("The list of scales of input quantized tensors.");
TVM_ATTR_FIELD(input_zero_points)
.describe("The list of zero points of input quantized tensors.");
TVM_ATTR_FIELD(output_zero_point)
.describe("The zero_point for the output tensor.");
TVM_ATTR_FIELD(output_scale)
.describe("The scale for the output tensor.");
TVM_ATTR_FIELD(axis)
.describe("The axis at which the input arrays are concatenated."
"Should lie in range `[-ndim, ndim)`.")
......@@ -199,24 +192,39 @@ struct QnnBinaryOpAttrs : public tvm::AttrsNode<QnnBinaryOpAttrs> {
TVM_DECLARE_ATTRS(QnnBinaryOpAttrs, "relay.attrs.QnnBinaryOpAttrs") {
TVM_ATTR_FIELD(lhs_zero_point)
.describe("The zero_point for the lhs input tensor of this op.");
TVM_ATTR_FIELD(lhs_scale)
.describe("The scale for the lhs input tensor of this op.");
TVM_ATTR_FIELD(rhs_zero_point)
.describe("The zero_point for the rhs input tensor of this op.");
TVM_ATTR_FIELD(rhs_scale)
.describe("The scale for the rhs input tensor of this op.");
TVM_ATTR_FIELD(output_zero_point)
.describe("The zero_point for the activation of this op.");
TVM_ATTR_FIELD(output_scale)
.describe("The scale for the activation of this op.");
}
};
/*! \brief Attributes for qnn dense operator */
struct QnnDenseAttrs : public tvm::AttrsNode<QnnDenseAttrs> {
IndexExpr units;
DataType out_dtype;
// Quantization related attributes.
int32_t input_zero_point;
int32_t kernel_zero_point;
TVM_DECLARE_ATTRS(QnnDenseAttrs, "relay.attrs.qnn.QnnDenseAttrs") {
TVM_ATTR_FIELD(units)
.describe("Number of hidden units of the dense transformation.");
TVM_ATTR_FIELD(out_dtype)
.describe("Output data type, set to explicit type under mixed precision setting");
TVM_ATTR_FIELD(input_zero_point)
.describe("The zero point of the input tensor.");
TVM_ATTR_FIELD(kernel_zero_point)
.describe("The zero point of the kernel tensor.");
}
};
} // namespace qnn
} // namespace relay
} // namespace tvm
......
......@@ -96,7 +96,7 @@ def quantize(data,
The output zero_point.
output_scale : float
The output scale.
input_dtype : str, optional
out_dtype : str, optional
The data type of the input tensor. Can be [int8, uint8]
Returns
-------
......@@ -265,7 +265,13 @@ def conv2d(data,
data_layout, kernel_layout, out_layout, out_dtype)
def add(lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point, output_scale,
def add(lhs,
rhs,
lhs_scale,
lhs_zero_point,
rhs_scale,
rhs_zero_point,
output_scale,
output_zero_point):
"""Quantized addition with numpy-style broadcasting.
......@@ -305,3 +311,41 @@ def add(lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point, output_s
lhs_scale, lhs_zero_point,
rhs_scale, rhs_zero_point,
output_scale, output_zero_point)
def quantized_dense(data,
weight,
input_zero_point,
kernel_zero_point,
units=None,
out_dtype="int32"):
"""Qnn Dense operator.
Applies a quantized linear transformation
.. math::
`Y = X * W`
Parameters
----------
data : tvm.relay.Expr
The quantized input data to the operator.
weight : tvm.relay.Expr
The quantized weight expressions.
units : int, optional
Number of hidden units of the dense transformation.
out_dtype : str, optional
Specifies the output data type for mixed precision dense can be int32 or int16.
Returns
-------
result : tvm.relay.Expr
The computed result.
"""
return _make.dense(data,
weight,
units,
input_zero_point,
kernel_zero_point,
out_dtype)
......@@ -22,7 +22,7 @@ from tvm import relay
def CanonicalizeOps():
"""Converts/Lowers an expression containing QNN ops to an expression containing only core
(non-Dialect) Relay ops. Each QNN op is lowered to a sequence of exisiting Relay ops. This is a
(non-Dialect) Relay ops. Each QNN op is lowered to a sequence of existing Relay ops. This is a
target-independent pass. One can register the lowering/transformation function for this op using
FTVMQnnCanonicalize attr_name for FTVMLegalize op attribute. An example of this transformation
is below
......@@ -40,7 +40,7 @@ def CanonicalizeOps():
output_zero_point=0,
out_dtype='int8')
# We want to utilize all the existing Relay infrastucture. So, instead of supporting this
# We want to utilize all the existing Relay infrastructure. So, instead of supporting this
# QNN requantize op, we convert it into a sequence of existing Relay operators.
mod = relay.Module.from_expr(qnn_expr)
mod = relay.qnn.transform.CanonicalizeOps()(mod)
......
......@@ -25,6 +25,7 @@
#ifndef TVM_RELAY_OP_NN_CONVOLUTION_H_
#define TVM_RELAY_OP_NN_CONVOLUTION_H_
#include <tvm/ir_pass.h>
#include <string>
#include <utility>
......
......@@ -434,6 +434,17 @@ static inline Expr Conv2D(Expr data, Expr weight, Array<IndexExpr> strides,
return CallNode::make(op, {data, weight}, Attrs(attrs), {});
}
static inline Expr Dense(Expr data,
Expr weight,
IndexExpr units,
DataType out_dtype) {
auto attrs = make_node<DenseAttrs>();
attrs->units = units;
attrs->out_dtype = out_dtype;
static const Op& op = Op::Get("nn.dense");
return CallNode::make(op, {data, weight}, Attrs(attrs), {});
}
static inline Expr Sum(Expr data, Array<Integer> axis, bool keepdims, bool exclude) {
auto attrs = make_node<ReduceAttrs>();
attrs->axis = std::move(axis);
......
......@@ -23,7 +23,6 @@
* \brief Property def of qnn convolution operator.
*/
#include <tvm/data_layout.h>
#include <tvm/ir_pass.h>
#include <tvm/relay/analysis.h>
#include <tvm/relay/base.h>
#include <tvm/relay/op.h>
......@@ -178,7 +177,7 @@ Expr Conv2DPadInput(const Expr& data, const QnnConv2DAttrs* param) {
* \param data The input expr.
* \param weight The weight expr.
* \param param The qnn conv2d attributes.
* \return The sequence of Relay operatos for term1.
* \return The sequence of Relay operators for term1.
* \note The term1 is
* Sigma(c,r,s) QW(k, c, r, s) * QA(n, c, h + r, w + s)
* This is just conv2d on int tensors.
......@@ -198,12 +197,12 @@ Expr Conv2DFirstTerm(const Expr& padded_data, const Expr& weight, const QnnConv2
* \param param The qnn conv2d attributes.
* \param kernel_h The height of kernel.
* \param kernel_w The width of kernel.
* \return The sequence of Relay operatos for term2.
* \return The sequence of Relay operators for term2.
* \note The term2 looks like this
*
* Sigma(c,r,s) zp_w * QA(n, c, h + r, w + s)
*
* Second term is not directly represetable by one Relay operator.
* Second term is not directly representable by one Relay operator.
* However, deeper analysis shows that we can reduce r,s using avg_pool2d,
* followed by a reduce on the C axis. Using avg_pool2d also gives an
* opportunity to reuse alter_op_layout infrastructure.
......@@ -313,7 +312,7 @@ Expr Conv2DThirdTerm(const Expr& weight, const Expr& zp_data, const QnnConv2DAtt
* \param in_channels The number of input channels.
* \param kernel_h The height of kernel.
* \param kernel_w The width of kernel.
* \return The sequence of Relay operatos for term4.
* \return The sequence of Relay operators for term4.
* \note The term4 looks like this
*
* Sigma(c,r,s) zp_a * zp_w
......@@ -373,7 +372,7 @@ Expr Conv2DCombineTerms(const Expr& term1, const Expr& term2, const Expr& term3,
* where QA is quantized tensor, scale_a and zp_A are quantizations
* params.
*
* Quantized convlution convolves two quantized tensors and returns a
* Quantized convolution will convolve two quantized tensors and returns a
* quantized tensor of default dtype of int32, with scale equaling to the
* product of scales of input tensors, and a zero point of zero.
*
......@@ -399,7 +398,7 @@ Expr Conv2DCombineTerms(const Expr& term1, const Expr& term2, const Expr& term3,
* zero point. This might leave some performance opportunity at the
* table. Can be avoided by modifying conv2d API to accept the
* pad_const_value.
* 2) Second term is not directly represetable by one Relay operator.
* 2) Second term is not directly representable by one Relay operator.
* However, deeper analysis shows that we can reduce r,s using
* avg_pool2d, followed by a reduce on the C axis. Using avg_pool2d also
* gives an opportunity to reuse alter_op_layout infrastructure.
......@@ -408,7 +407,7 @@ Expr Conv2DCombineTerms(const Expr& term1, const Expr& term2, const Expr& term3,
* the conv is dilated. We fallback also in case of depthwise conv.
*
* The whole process can be broken down into following steps
* * Assertion checks for exisiting support, fallback if necessary
* * Assertion checks for existing support, fallback if necessary
* * Pad the input.
* * Get Term1.
* * Get Term2.
......
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* Copyright (c) 2019 by Contributors
* \file src/relay/qnn/op/dense.cc
* \brief Property def of qnn dense operator.
*/
#include <tvm/relay/base.h>
#include <tvm/relay/op.h>
#include <tvm/relay/op_attr_types.h>
#include <tvm/relay/qnn/attrs.h>
#include "../../op/nn/nn.h"
#include "../../pass/pattern_util.h"
namespace tvm {
namespace relay {
namespace qnn {
// relay.op.qnn.dense
TVM_REGISTER_NODE_TYPE(QnnDenseAttrs);
bool QnnDenseRel(const Array<Type>& types,
int num_inputs,
const Attrs& attrs,
const TypeReporter& reporter) {
CHECK_EQ(types.size(), 3);
const auto* data = types[0].as<TensorTypeNode>();
const auto* weight = types[1].as<TensorTypeNode>();
if (data == nullptr || weight == nullptr) return false;
const auto* param = attrs.as<QnnDenseAttrs>();
CHECK(param != nullptr) << "QnnConv2DAttrs cannot be nullptr.";
CHECK(data->dtype == Int(8) || data->dtype == UInt(8))
<< "Expected quantized dense type(int8, uint8) for input but was " << data->dtype;
CHECK(weight->dtype == Int(8) || weight->dtype == UInt(8))
<< "Expected quantized dense type(int8, uint8) for weight but was " << weight->dtype;
CHECK(param->out_dtype == Int(32))
<< "Expected quantized dense type(int32) for output but was " << param->out_dtype;
CHECK(param->out_dtype.bits() > 0) << "Output dtype bits should be greater than 0.";
return DenseRel<QnnDenseAttrs>(types, num_inputs, attrs, reporter);
}
// Positional relay function to create quantized dense operator used by frontend FFI.
Expr MakeQuantizedDense(Expr data,
Expr weight,
IndexExpr units,
int32_t input_zero_point,
int32_t kernel_zero_point,
DataType out_dtype) {
auto attrs = make_node<QnnDenseAttrs>();
attrs->units = std::move(units);
attrs->out_dtype = out_dtype;
attrs->input_zero_point = input_zero_point;
attrs->kernel_zero_point = kernel_zero_point;
static const Op& op = Op::Get("qnn.dense");
return CallNode::make(op, {data, weight}, Attrs(attrs), {});
}
/**
* \brief Lowers Qnn convolution in terms of core operators in relay.
* Mathematically it is equals to -
* Dense((quantized_input - input_zero_point;int32), (quantized_kernel - kernel_zero_point; int32))
*
* \param attrs QnnDenseAttrs for Qnn Dense layer.
* \param new_args The new mutated args to the call node.
* \param arg_types The data types of input and output.
* \reutrn The sequence of Relay ops for qnn cov2d op.
*/
Expr QnnDenseCanonicalize(const Attrs& attrs,
const Array<Expr>& new_args,
const Array<tvm::relay::Type>& arg_types) {
CHECK_EQ(new_args.size(), 2);
Expr quantized_data = new_args[0];
Expr quantized_kernel = new_args[1];
const auto* qnn_dense_attrs = attrs.as<QnnDenseAttrs>();
Expr quantized_data_int32 = Cast(quantized_data, Int(32));
if (qnn_dense_attrs->input_zero_point != 0) {
quantized_data_int32 = Subtract(quantized_data_int32,
MakeConstantScalar(Int(32),
qnn_dense_attrs->input_zero_point));
}
Expr quantized_kernel_int32 = Cast(quantized_kernel, Int(32));
if (qnn_dense_attrs->kernel_zero_point != 0) {
quantized_kernel_int32 = Subtract(quantized_kernel_int32,
MakeConstantScalar(Int(32),
qnn_dense_attrs->kernel_zero_point));
}
Expr int32_dense = Dense(quantized_data_int32,
quantized_kernel_int32,
qnn_dense_attrs->units,
qnn_dense_attrs->out_dtype);
return int32_dense;
}
RELAY_REGISTER_OP("qnn.dense")
.describe(R"code(Applies a linear transformation: :math:`Y = XW^T`.
- **data**: quantized(int8, unit8) `(x1, x2, ..., xn, input_dim)`
- **weight**: quantized(int8, unit8) `(units, input_dim)`
- **out**: quantized(int32) `(x1, x2, ..., xn, units)`.
)code" TVM_ADD_FILELINE)
.set_attrs_type_key("relay.attrs.qnn.QnnDenseAttrs")
.set_num_inputs(2)
.add_argument("data", "quantized nD Tensor", "Input data.")
.add_argument("weight", "quantized 2D Tensor", "Weight matrix.")
.set_support_level(11)
.add_type_rel("QDense", DenseRel<QnnDenseAttrs>)
.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnDenseCanonicalize);
TVM_REGISTER_API("relay.qnn.op._make.dense")
.set_body_typed(MakeQuantizedDense);
} // namespace qnn
} // namespace relay
} // namespace tvm
......@@ -19,7 +19,6 @@ import tvm
import numpy as np
from tvm import relay
from tvm.relay import transform
from tvm.relay.testing import create_workload
from tvm.relay.testing import run_infer_type
from tvm.contrib import graph_runtime
......
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import tvm
import numpy as np
from tvm import relay
from tvm.contrib import graph_runtime
def make_requantize_params(input_scale, output_scale, output_zero_point, out_dtype):
config = {
'input_scale': input_scale,
'output_scale': output_scale,
'output_zero_point': output_zero_point,
'out_dtype': out_dtype
}
return config
def make_configuration(quantized_data,
quantized_kernel,
dtype,
input_shape,
kernel_shape,
input_zero_point,
kernel_zero_point,
units,
output,
out_dtype='int32',
bias=None,
requantize=None):
if requantize is not None:
assert bias is not None
config = {
'quantized_data': quantized_data,
'quantized_kernel': quantized_kernel,
'dtype': dtype,
'input_shape': input_shape,
'kernel_shape': kernel_shape,
'input_zero_point': input_zero_point,
'kernel_zero_point': kernel_zero_point,
'units': units,
'output': output,
'out_dtype': out_dtype,
'bias': bias,
'requantize': requantize
}
return config
def make_uint_configuration(use_bias=False, requantize_output=False):
input_shape, kernel_shape, output_shape = (2, 10), (3,10), (2, 3)
input_zero_point, kernel_zero_point = 127, 127
in_dtype = 'uint8'
out_dtype = 'int32' if not requantize_output else 'uint8'
units = 3
quantized_data_np = np.array([129, 131, 133, 135, 137, 139, 141, 143, 109, 107,
129, 131, 133, 135, 137, 139, 141, 111, 145, 107]) \
.astype(in_dtype) \
.reshape(input_shape)
quantized_kernel_np = np.array([129, 131, 133, 135, 137, 139, 141, 143, 145, 147,
129, 131, 133, 135, 137, 139, 141, 143, 145, 147,
129, 131, 133, 135, 137, 139, 141, 143, 145, 147]) \
.astype(in_dtype) \
.reshape(kernel_shape)
bias = np.array([4, 8, 12]).astype(out_dtype).reshape((units, )) if use_bias else None
requant_params = make_requantize_params(0.25, 1.0, 127, 'uint8') if requantize_output else None
if requantize_output:
assert use_bias
output = np.array([151, 152, 153, 185, 186, 187])
elif use_bias:
output = np.array([96, 100, 104, 232, 236, 240 ])
else:
output = np.array([92, 92, 92, 228, 228, 228 ])
output = output.astype(out_dtype).reshape(output_shape)
return make_configuration(quantized_data=quantized_data_np,
quantized_kernel=quantized_kernel_np,
dtype=in_dtype,
input_shape=input_shape,
kernel_shape=kernel_shape,
input_zero_point=input_zero_point,
kernel_zero_point=kernel_zero_point,
units=units,
output=output,
bias=bias,
requantize=requant_params)
def make_int_configuration(use_bias=False, requantize_output=False):
input_shape, kernel_shape, output_shape = (2, 10), (3,10), (2, 3)
input_zero_point, kernel_zero_point = -1, -1
in_dtype = 'int8'
out_dtype = 'int32' if not requantize_output else 'int8'
units = 3
quantized_data_np = np.array([1, 3, 5, 7, 9, 11, 13, 15, -19, -21,
1, 3, 5, 7, 9, 11, 13, -17, 17, -21]) \
.astype(in_dtype) \
.reshape(input_shape)
quantized_kernel_np = np.array([1, 3, 5, 7, 9, 11, 13, 15, 17, 19,
1, 3, 5, 7, 9, 11, 13, 15, 17, 19,
1, 3, 5, 7, 9, 11, 13, 15, 17, 19]) \
.astype(in_dtype) \
.reshape(kernel_shape)
bias = np.array([4, 8, 12]).astype(out_dtype).reshape((units, )) if use_bias else None
requant_params = make_requantize_params(0.25, 1.0, -1, 'int8') if requantize_output else None
if requantize_output:
assert use_bias
output = np.array([23, 24, 25, 57, 58, 59])
elif use_bias:
output = np.array([96, 100, 104, 232, 236, 240 ])
else:
output = np.array([92, 92, 92, 228, 228, 228 ])
output = output.astype(out_dtype).reshape(output_shape)
return make_configuration(quantized_data=quantized_data_np,
quantized_kernel=quantized_kernel_np,
dtype=in_dtype,
input_shape=input_shape,
kernel_shape=kernel_shape,
input_zero_point=input_zero_point,
kernel_zero_point=kernel_zero_point,
units=units,
output=output,
bias=bias,
requantize=requant_params)
def qnn_dense_driver(test_configuration):
in_dtype = test_configuration['dtype']
out_dtype = test_configuration['out_dtype']
quantized_data_name = "quantized_data"
quantized_kernel_name = "quantized_kernel"
expected_out_dtype = test_configuration['out_dtype']
bias_name = 'bias'
quantized_data = relay.var(quantized_data_name,
shape=test_configuration['input_shape'],
dtype=in_dtype)
quantized_kernel = relay.var(quantized_kernel_name,
shape=test_configuration['kernel_shape'],
dtype=in_dtype)
mod = relay.qnn.op.quantized_dense(
quantized_data,
quantized_kernel,
test_configuration['input_zero_point'],
test_configuration['kernel_zero_point'],
test_configuration['units'])
if test_configuration[bias_name] is not None:
bias = relay.var(bias_name,
shape=test_configuration['bias'].shape,
dtype=out_dtype)
mod = relay.nn.bias_add(mod, bias)
if test_configuration['requantize'] is not None:
requantize_config = test_configuration['requantize']
mod = relay.qnn.op.requantize(
mod,
input_scale=requantize_config['input_scale'],
input_zero_point=0,
output_scale=requantize_config['output_scale'],
output_zero_point=requantize_config['output_zero_point'],
out_dtype=requantize_config['out_dtype'])
expected_out_dtype = requantize_config['out_dtype']
mod = relay.Function(relay.analysis.free_vars(mod), mod)
mod = relay.Module.from_expr(mod)
mod = relay.qnn.transform.CanonicalizeOps()(mod)
with relay.build_config(opt_level=2):
graph, lib, params = relay.build(mod, "llvm", params=None)
mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
mod.set_input(quantized_data_name,test_configuration[quantized_data_name])
mod.set_input(quantized_kernel_name,test_configuration[quantized_kernel_name])
if test_configuration[bias_name] is not None:
mod.set_input(bias_name, test_configuration[bias_name])
mod.set_input(**params)
mod.run()
res = mod.get_output(0).asnumpy()
np.testing.assert_equal(res, test_configuration['output'])
assert res.dtype == expected_out_dtype
def test_qnn_dense_without_bias():
uint32_output_without_bias_paramas = \
make_uint_configuration(use_bias=False)
int32_output_without_bias_params = \
make_int_configuration(use_bias=False)
qnn_dense_driver(uint32_output_without_bias_paramas)
qnn_dense_driver(int32_output_without_bias_params)
def test_qnn_dense_with_bias():
uint32_output_with_bias_params = \
make_uint_configuration(use_bias=True)
int32_output_with_bias_params = \
make_int_configuration(use_bias=True)
qnn_dense_driver(uint32_output_with_bias_params)
qnn_dense_driver(int32_output_with_bias_params)
def test_qnn_dense_with_requantized_output():
uint8_requantized_output_with_bias_params = \
make_uint_configuration(use_bias=True, requantize_output=True)
int8_requantized_output_with_bias_params = \
make_int_configuration(use_bias=True, requantize_output=True)
qnn_dense_driver(uint8_requantized_output_with_bias_params)
qnn_dense_driver(int8_requantized_output_with_bias_params)
if __name__ == "__main__":
test_qnn_dense_without_bias()
test_qnn_dense_with_bias()
test_qnn_dense_with_requantized_output()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment