Unverified Commit 7d263c31 by shoubhik Committed by GitHub

Mxnet parser for Qnn dialect (#4714)

* - Additional util methods needed for mxnet frontend for qnn dialect.

* - Fixing call to quantize.

* [QNN] MxNet-MKLDNN parser support for QNN

* [QNN] Relax conv check.

* - Merge from origin

* [QNN] Channel wise changes

* [QNN] Dense changes

* Dense fix for QNN ops.

* - Removed non-mkl code from utils.

- Small refactoring

- Remove "with_sum" from conv

- Simplified code

* - Fixing ring buffer name.

* - Fixing pylint issues.

* - Fixing lint
- Removing redundant commented code.

* - Adding test cases
- Removing unused methods.

* [WIP] end to end test case for mxnet qnn parser

* Changes to parse large CV models.

* Pylint issues.

* Fix Conv2D with sum and quantized pooling.

* Reverting the changes made for mxnet-mkldnn test cases. Because of #4753, mxnet could not be updated to mxnet-mkldnn.

Co-authored-by: Animesh Jain <anijain@umich.edu>
parent 3e7bd703
...@@ -25,6 +25,10 @@ from __future__ import absolute_import ...@@ -25,6 +25,10 @@ from __future__ import absolute_import
from .mxnet import from_mxnet from .mxnet import from_mxnet
from .mxnet_qnn_op_utils import dequantize_mxnet_min_max from .mxnet_qnn_op_utils import dequantize_mxnet_min_max
from .mxnet_qnn_op_utils import quantize_mxnet_min_max
from .mxnet_qnn_op_utils import get_mkldnn_int8_scale
from .mxnet_qnn_op_utils import get_mkldnn_uint8_scale
from .mxnet_qnn_op_utils import quantize_conv_bias_mkldnn_from_var
from .keras import from_keras from .keras import from_keras
from .onnx import from_onnx from .onnx import from_onnx
from .tflite import from_tflite from .tflite import from_tflite
......
...@@ -988,4 +988,4 @@ if __name__ == '__main__': ...@@ -988,4 +988,4 @@ if __name__ == '__main__':
test_forward_one_hot() test_forward_one_hot()
test_forward_convolution() test_forward_convolution()
test_forward_deconvolution() test_forward_deconvolution()
test_forward_cond() test_forward_cond()
\ No newline at end of file
...@@ -21,21 +21,20 @@ from tvm import relay ...@@ -21,21 +21,20 @@ from tvm import relay
from tvm.contrib import graph_runtime from tvm.contrib import graph_runtime
def test_mxnet_dequantize_op(): def test_mkldnn_dequantize():
def quantize_test_driver(in_dtype, quant_args, in_data, verify_output_data): def dequantize_test_driver(in_dtype, quant_args, in_data, verify_output_data):
shape = in_data.shape shape = in_data.shape
input_data = relay.var("input_data", shape=shape, dtype=in_dtype) input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
min_range = quant_args['min_range'] min_range = quant_args['min_range']
max_range = quant_args['max_range'] max_range = quant_args['max_range']
quantized_output = \ dequantized_output = \
relay.frontend.dequantize_mxnet_min_max(input_data, relay.frontend.dequantize_mxnet_min_max(input_data,
min_range=min_range, min_range=min_range,
max_range=max_range, max_range=max_range,
in_dtype=in_dtype) in_dtype=in_dtype)
mod = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output) mod = relay.Function(relay.analysis.free_vars(dequantized_output), dequantized_output)
mod = relay.Module.from_expr(mod) mod = relay.Module.from_expr(mod)
mod = relay.qnn.transform.CanonicalizeOps()(mod)
with relay.build_config(opt_level=3): with relay.build_config(opt_level=3):
graph, lib, params = relay.build(mod, "llvm", params=None) graph, lib, params = relay.build(mod, "llvm", params=None)
rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0)) rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
...@@ -43,56 +42,55 @@ def test_mxnet_dequantize_op(): ...@@ -43,56 +42,55 @@ def test_mxnet_dequantize_op():
rt_mod.set_input(**params) rt_mod.set_input(**params)
rt_mod.run() rt_mod.run()
res = rt_mod.get_output(0).asnumpy() res = rt_mod.get_output(0).asnumpy()
assert np.allclose(res, verify_output_data, ) assert np.allclose(res, verify_output_data)
assert res.dtype == np.float32 assert res.dtype == np.float32
def test_uint8_to_float32(): def test_uint8_to_float32():
data = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]) \ data = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]) \
.astype('uint8') \ .astype('uint8') \
.reshape((2, 5)) .reshape((2, 5))
output = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \ output = np.array([0., 0.25048923, 0.50097847, 0.7514677, 1.0019569, 62.8728, 63.123287,
63.373775, 63.624268, 63.874756]) \
.astype('float32') \ .astype('float32') \
.reshape((2, 5)) .reshape((2, 5))
quant_args = {"min_range": -63.5, "max_range": 64} quant_args = {"min_range": -63.5, "max_range": 64}
quantize_test_driver(in_dtype='uint8', dequantize_test_driver(in_dtype='uint8',
quant_args=quant_args, quant_args=quant_args,
in_data=data, in_data=data,
verify_output_data=output) verify_output_data=output)
def test_int8_to_float32(): def test_int8_to_float32():
data = np.array([-126, -125, -124, -123, -122, 123, 124, 125, 126, 127]) \ data = np.array([-126, -125, -124, -123, -122, 123, 124, 125, 126, 127]) \
.astype('int8') \ .astype('int8') \
.reshape((2, 5)) .reshape((2, 5))
output = np.array([-63.496063, -62.992126, -62.48819, -61.984253, -61.480316, output = np.array([-63.247063, -62.745102, -62.24314, -61.74118, -61.23922,
61.984253, 62.48819, 62.992126, 63.496063, 64.]) \ 61.74118, 62.24314, 62.745102, 63.247063, 63.749023]) \
.astype('float32') \ .astype('float32') \
.reshape((2, 5)) .reshape((2, 5))
quant_args = {"min_range": -63.5, "max_range": 64} dequantize_args = {"min_range": -63.5, "max_range": 64}
quantize_test_driver(in_dtype='int8', dequantize_test_driver(in_dtype='int8',
quant_args=quant_args, quant_args=dequantize_args,
in_data=data, in_data=data,
verify_output_data=output) verify_output_data=output)
test_uint8_to_float32() test_uint8_to_float32()
test_int8_to_float32() test_int8_to_float32()
def test_mkldnn_dequantize_op(): def test_mkldnn_quantize():
def quantize_test_driver(in_dtype, quant_args, in_data, verify_output_data): def quantize_test_driver(out_dtype, quant_args, in_data, verify_output_data):
shape = in_data.shape shape = in_data.shape
input_data = relay.var("input_data", shape=shape, dtype=in_dtype) input_data = relay.var("input_data", shape=shape, dtype='float32')
min_range = quant_args['min_range'] min_range = quant_args['min_range']
max_range = quant_args['max_range'] max_range = quant_args['max_range']
quantized_output = \ quantized_output, _, _ = \
relay.frontend.dequantize_mxnet_min_max(input_data, relay.frontend.quantize_mxnet_min_max(input_data,
min_range=min_range, min_range=min_range,
max_range=max_range, max_range=max_range,
in_dtype=in_dtype, out_dtype=out_dtype)
use_mkldnn=True)
mod = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output) mod = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output)
mod = relay.Module.from_expr(mod) mod = relay.Module.from_expr(mod)
mod = relay.qnn.transform.CanonicalizeOps()(mod)
with relay.build_config(opt_level=3): with relay.build_config(opt_level=3):
graph, lib, params = relay.build(mod, "llvm", params=None) graph, lib, params = relay.build(mod, "llvm", params=None)
rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0)) rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
...@@ -100,43 +98,76 @@ def test_mkldnn_dequantize_op(): ...@@ -100,43 +98,76 @@ def test_mkldnn_dequantize_op():
rt_mod.set_input(**params) rt_mod.set_input(**params)
rt_mod.run() rt_mod.run()
res = rt_mod.get_output(0).asnumpy() res = rt_mod.get_output(0).asnumpy()
# print(res) assert np.allclose(res, verify_output_data)
# np.testing.assert_equal(res, verify_output_data) assert res.dtype == verify_output_data.dtype
assert np.allclose(res, verify_output_data, )
assert res.dtype == np.float32
def test_uint8_to_float32(): def test_float32_to_uint8():
data = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]) \ data = np.array([0., 0.25048923, 0.50097847, 0.7514677, 1.0019569, 62.8728, 63.123287,
.astype('uint8') \ 63.373775, 63.624268, 63.874756]) \
.reshape((2, 5))
output = np.array([0., 0.2509804, 0.5019608, 0.75294125, 1.0039216,
62.996082, 63.247063, 63.498043, 63.749023, 64.]) \
.astype('float32') \ .astype('float32') \
.reshape((2, 5)) .reshape((2, 5))
output = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]) \
.astype('uint8') \
.reshape((2, 5))
quant_args = {"min_range": -63.5, "max_range": 64} quant_args = {"min_range": -63.5, "max_range": 64}
quantize_test_driver(in_dtype='uint8', quantize_test_driver(out_dtype='uint8',
quant_args=quant_args, quant_args=quant_args,
in_data=data, in_data=data,
verify_output_data=output) verify_output_data=output)
def test_int8_to_float32(): def test_float32_to_int8():
data = np.array([-126, -125, -124, -123, -122, 123, 124, 125, 126, 127]) \ data = np.array([-63.247063, -62.745102, -62.24314, -61.74118, -61.23922,
.astype('int8') \ 61.74118, 62.24314, 62.745102, 63.247063, 63.749023]) \
.reshape((2, 5))
output = np.array([-63.496063, -62.992126, -62.48819, -61.984253, -61.480316,
61.984253, 62.48819, 62.992126, 63.496063, 64.]) \
.astype('float32') \ .astype('float32') \
.reshape((2, 5)) .reshape((2, 5))
output = np.array([-126, -125, -124, -123, -122, 123, 124, 125, 126, 127]) \
.astype('int8') \
.reshape((2, 5))
quant_args = {"min_range": -63.5, "max_range": 64} quant_args = {"min_range": -63.5, "max_range": 64}
quantize_test_driver(in_dtype='int8', quantize_test_driver(out_dtype='int8',
quant_args=quant_args, quant_args=quant_args,
in_data=data, in_data=data,
verify_output_data=output) verify_output_data=output)
test_uint8_to_float32() test_float32_to_uint8()
test_int8_to_float32() test_float32_to_int8()
def test_get_mkldnn_int8_scale():
range_min = -3.904039
range_max = 3.904039
expected = 0.03061991354976495
output = relay.frontend.get_mkldnn_int8_scale(range_max=range_max,
range_min=range_min)
assert np.allclose(output, expected)
def test_get_mkldnn_uint8_scale():
range_min = 0.0
range_max = 55.77269
expected = 0.21828841189047482
output = relay.frontend.get_mkldnn_uint8_scale(range_max=range_max,
range_min=range_min)
assert np.allclose(output, expected)
def test_quantize_conv_bias_mkldnn_from_var():
bias_var = relay.var('bias', shape=(3,), dtype='float32')
bias_scale = tvm.nd.array(np.array([0.5, 0.6, 0.7]))
output = relay.frontend.quantize_conv_bias_mkldnn_from_var(bias_var, bias_scale)
assert isinstance(output, tvm.relay.expr.Call)
attrs = output.attrs
assert attrs.axis == 0
assert attrs.out_dtype == 'int32'
assert output.op.name == 'qnn.quantize'
assert output.args[1].data == bias_scale
if __name__ == "__main__": if __name__ == "__main__":
test_mxnet_dequantize_op() test_mkldnn_dequantize()
test_mkldnn_dequantize_op() test_mkldnn_quantize()
test_get_mkldnn_int8_scale()
test_get_mkldnn_uint8_scale()
test_quantize_conv_bias_mkldnn_from_var()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment