Mxnet parser for Qnn dialect (#4714)

* - Additional util methods needed for mxnet frontend for qnn dialect. * - Fixing call to quantize. * [QNN] MxNet-MKLDNN parser support for QNN * [QNN] Relax conv check. * - Merge from origin * [QNN] Channel wise changes * [QNN] Dense changes * Dense fix for QNN ops. * - Removed non-mkl code from utils. - Small refactoring - Remove "with_sum" from conv - Simplified code * - Fixing ring buffer name. * - Fixing pylint issues. * - Fixing lint - Removing redundant commented code. * - Adding test cases - Removing unused methods. * [WIP] end to end test case for mxnet qnn parser * Changes to parse large CV models. * Pylint issues. * Fix Conv2D with sum and quantized pooling. * Reverting the changes made for mxnet-mkldnn test cases. Because of #4753, mxnet could not be updated to mxnet-mkldnn. Co-authored-by: Animesh Jain <anijain@umich.edu>

Mxnet parser for Qnn dialect (#4714)
* - Additional util methods needed for mxnet frontend for qnn dialect. * - Fixing call to quantize. * [QNN] MxNet-MKLDNN parser support for QNN * [QNN] Relax conv check. * - Merge from origin * [QNN] Channel wise changes * [QNN] Dense changes * Dense fix for QNN ops. * - Removed non-mkl code from utils. - Small refactoring - Remove "with_sum" from conv - Simplified code * - Fixing ring buffer name. * - Fixing pylint issues. * - Fixing lint - Removing redundant commented code. * - Adding test cases - Removing unused methods. * [WIP] end to end test case for mxnet qnn parser * Changes to parse large CV models. * Pylint issues. * Fix Conv2D with sum and quantized pooling. * Reverting the changes made for mxnet-mkldnn test cases. Because of #4753, mxnet could not be updated to mxnet-mkldnn. Co-authored-by: Animesh Jain <anijain@umich.edu>
7d263c31 · shoubhik · GitHub · 3e7bd703 · 7d263c31 · 7d263c31
Unverified Commit 7d263c31 authored Feb 05, 2020 by shoubhik Committed by GitHub Feb 05, 2020
5 changed files
--- a/python/tvm/relay/frontend/__init__.py
+++ b/python/tvm/relay/frontend/__init__.py
@@ -25,6 +25,10 @@ from __future__ import absolute_import

 from .mxnet import from_mxnet
 from .mxnet_qnn_op_utils import dequantize_mxnet_min_max
+from .mxnet_qnn_op_utils import quantize_mxnet_min_max
+from .mxnet_qnn_op_utils import get_mkldnn_int8_scale
+from .mxnet_qnn_op_utils import get_mkldnn_uint8_scale
+from .mxnet_qnn_op_utils import quantize_conv_bias_mkldnn_from_var
 from .keras import from_keras
 from .onnx import from_onnx
 from .tflite import from_tflite

--- a/python/tvm/relay/frontend/mxnet.py
+++ b/python/tvm/relay/frontend/mxnet.py
--- a/python/tvm/relay/frontend/mxnet_qnn_op_utils.py
+++ b/python/tvm/relay/frontend/mxnet_qnn_op_utils.py
--- a/tests/python/frontend/mxnet/test_forward.py
+++ b/tests/python/frontend/mxnet/test_forward.py
@@ -988,4 +988,4 @@ if __name__ == '__main__':
    test_forward_one_hot()
    test_forward_convolution()
    test_forward_deconvolution()
-    test_forward_cond()
+    test_forward_cond()
\ No newline at end of file
--- a/tests/python/frontend/mxnet/test_qnn_ops_utils.py
+++ b/tests/python/frontend/mxnet/test_qnn_ops_utils.py
@@ -21,21 +21,20 @@ from tvm import relay
 from tvm.contrib import graph_runtime


-def test_mxnet_dequantize_op():
+def test_mkldnn_dequantize():

-    def quantize_test_driver(in_dtype, quant_args, in_data, verify_output_data):
+    def dequantize_test_driver(in_dtype, quant_args, in_data, verify_output_data):
        shape = in_data.shape
        input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
        min_range = quant_args['min_range']
        max_range = quant_args['max_range']
-        quantized_output = \
+        dequantized_output = \
            relay.frontend.dequantize_mxnet_min_max(input_data,
                                                    min_range=min_range,
                                                    max_range=max_range,
                                                    in_dtype=in_dtype)
-        mod = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output)
+        mod = relay.Function(relay.analysis.free_vars(dequantized_output), dequantized_output)
        mod = relay.Module.from_expr(mod)
-        mod = relay.qnn.transform.CanonicalizeOps()(mod)
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build(mod, "llvm", params=None)
            rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
@@ -43,56 +42,55 @@ def test_mxnet_dequantize_op():
            rt_mod.set_input(**params)
            rt_mod.run()
            res = rt_mod.get_output(0).asnumpy()
-            assert np.allclose(res, verify_output_data, )
+            assert np.allclose(res, verify_output_data)
            assert res.dtype == np.float32

    def test_uint8_to_float32():
        data = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]) \
            .astype('uint8') \
            .reshape((2, 5))
-        output = np.array([-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64]) \
+        output = np.array([0., 0.25048923, 0.50097847, 0.7514677, 1.0019569, 62.8728, 63.123287,
+                           63.373775, 63.624268, 63.874756]) \
            .astype('float32') \
            .reshape((2, 5))
        quant_args = {"min_range": -63.5, "max_range": 64}
-        quantize_test_driver(in_dtype='uint8',
-                             quant_args=quant_args,
-                             in_data=data,
-                             verify_output_data=output)
+        dequantize_test_driver(in_dtype='uint8',
+                               quant_args=quant_args,
+                               in_data=data,
+                               verify_output_data=output)

    def test_int8_to_float32():
        data = np.array([-126, -125, -124, -123, -122, 123, 124, 125, 126, 127]) \
            .astype('int8') \
            .reshape((2, 5))
-        output = np.array([-63.496063, -62.992126, -62.48819, -61.984253, -61.480316,
-                           61.984253, 62.48819, 62.992126, 63.496063, 64.]) \
+        output = np.array([-63.247063, -62.745102, -62.24314, -61.74118, -61.23922,
+                           61.74118, 62.24314, 62.745102, 63.247063, 63.749023]) \
            .astype('float32') \
            .reshape((2, 5))
-        quant_args = {"min_range": -63.5, "max_range": 64}
-        quantize_test_driver(in_dtype='int8',
-                             quant_args=quant_args,
-                             in_data=data,
-                             verify_output_data=output)
+        dequantize_args = {"min_range": -63.5, "max_range": 64}
+        dequantize_test_driver(in_dtype='int8',
+                               quant_args=dequantize_args,
+                               in_data=data,
+                               verify_output_data=output)

    test_uint8_to_float32()
    test_int8_to_float32()


-def test_mkldnn_dequantize_op():
+def test_mkldnn_quantize():

-    def quantize_test_driver(in_dtype, quant_args, in_data, verify_output_data):
+    def quantize_test_driver(out_dtype, quant_args, in_data, verify_output_data):
        shape = in_data.shape
-        input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
+        input_data = relay.var("input_data", shape=shape, dtype='float32')
        min_range = quant_args['min_range']
        max_range = quant_args['max_range']
-        quantized_output = \
-            relay.frontend.dequantize_mxnet_min_max(input_data,
-                                                    min_range=min_range,
-                                                    max_range=max_range,
-                                                    in_dtype=in_dtype,
-                                                    use_mkldnn=True)
+        quantized_output, _, _ = \
+            relay.frontend.quantize_mxnet_min_max(input_data,
+                                                  min_range=min_range,
+                                                  max_range=max_range,
+                                                  out_dtype=out_dtype)
        mod = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output)
        mod = relay.Module.from_expr(mod)
-        mod = relay.qnn.transform.CanonicalizeOps()(mod)
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build(mod, "llvm", params=None)
            rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0))
@@ -100,43 +98,76 @@ def test_mkldnn_dequantize_op():
            rt_mod.set_input(**params)
            rt_mod.run()
            res = rt_mod.get_output(0).asnumpy()
-            # print(res)
-            # np.testing.assert_equal(res, verify_output_data)
-            assert np.allclose(res, verify_output_data, )
-            assert res.dtype == np.float32
+            assert np.allclose(res, verify_output_data)
+            assert res.dtype == verify_output_data.dtype

-    def test_uint8_to_float32():
-        data = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]) \
-            .astype('uint8') \
-            .reshape((2, 5))
-        output = np.array([0., 0.2509804, 0.5019608, 0.75294125, 1.0039216,
-                           62.996082, 63.247063, 63.498043, 63.749023, 64.]) \
+    def test_float32_to_uint8():
+        data = np.array([0., 0.25048923, 0.50097847, 0.7514677, 1.0019569, 62.8728, 63.123287,
+                         63.373775, 63.624268, 63.874756]) \
            .astype('float32') \
            .reshape((2, 5))
+        output = np.array([0, 1, 2, 3, 4, 251, 252, 253, 254, 255]) \
+            .astype('uint8') \
+            .reshape((2, 5))
+
        quant_args = {"min_range": -63.5, "max_range": 64}
-        quantize_test_driver(in_dtype='uint8',
+        quantize_test_driver(out_dtype='uint8',
                             quant_args=quant_args,
                             in_data=data,
                             verify_output_data=output)

-    def test_int8_to_float32():
-        data = np.array([-126, -125, -124, -123, -122, 123, 124, 125, 126, 127]) \
-            .astype('int8') \
-            .reshape((2, 5))
-        output = np.array([-63.496063, -62.992126, -62.48819, -61.984253, -61.480316,
-                           61.984253, 62.48819, 62.992126, 63.496063, 64.]) \
+    def test_float32_to_int8():
+        data = np.array([-63.247063, -62.745102, -62.24314, -61.74118, -61.23922,
+                         61.74118, 62.24314, 62.745102, 63.247063, 63.749023]) \
            .astype('float32') \
            .reshape((2, 5))
+        output = np.array([-126, -125, -124, -123, -122, 123, 124, 125, 126, 127]) \
+            .astype('int8') \
+            .reshape((2, 5))
+
        quant_args = {"min_range": -63.5, "max_range": 64}
-        quantize_test_driver(in_dtype='int8',
+        quantize_test_driver(out_dtype='int8',
                             quant_args=quant_args,
                             in_data=data,
                             verify_output_data=output)

-    test_uint8_to_float32()
-    test_int8_to_float32()
+    test_float32_to_uint8()
+    test_float32_to_int8()
+
+
+def test_get_mkldnn_int8_scale():
+    range_min = -3.904039
+    range_max = 3.904039
+    expected = 0.03061991354976495
+    output = relay.frontend.get_mkldnn_int8_scale(range_max=range_max,
+                                                  range_min=range_min)
+    assert np.allclose(output, expected)
+
+
+def test_get_mkldnn_uint8_scale():
+    range_min = 0.0
+    range_max = 55.77269
+    expected = 0.21828841189047482
+    output = relay.frontend.get_mkldnn_uint8_scale(range_max=range_max,
+                                                   range_min=range_min)
+    assert np.allclose(output, expected)
+
+
+def test_quantize_conv_bias_mkldnn_from_var():
+    bias_var = relay.var('bias', shape=(3,), dtype='float32')
+    bias_scale = tvm.nd.array(np.array([0.5, 0.6, 0.7]))
+    output = relay.frontend.quantize_conv_bias_mkldnn_from_var(bias_var, bias_scale)
+    assert isinstance(output, tvm.relay.expr.Call)
+    attrs = output.attrs
+    assert attrs.axis == 0
+    assert attrs.out_dtype == 'int32'
+    assert output.op.name == 'qnn.quantize'
+    assert output.args[1].data == bias_scale


 if __name__ == "__main__":
-    test_mxnet_dequantize_op()
-    test_mkldnn_dequantize_op()
+    test_mkldnn_dequantize()
+    test_mkldnn_quantize()
+    test_get_mkldnn_int8_scale()
+    test_get_mkldnn_uint8_scale()
+    test_quantize_conv_bias_mkldnn_from_var()
\ No newline at end of file