[QNN] Legalization for Intel x86 QNN Conv2D (#3896)

* QNNLegalize for conv2d * [QNN] Legalization for Intel x86 QNN Conv2D

[QNN] Legalization for Intel x86 QNN Conv2D (#3896)
* QNNLegalize for conv2d * [QNN] Legalization for Intel x86 QNN Conv2D
26eaea4a · Animesh Jain · Zhi · 9e4f07b4 · 26eaea4a · 26eaea4a
Commit 26eaea4a authored Sep 16, 2019 by Animesh Jain Committed by Zhi Sep 16, 2019
Showing with 195 additions and 0 deletions

python/tvm/relay/qnn/op/__init__.py
+2 -0

python/tvm/relay/qnn/op/legalizations.py
+140 -0

python/tvm/relay/qnn/op/op_attrs.py
+24 -0

tests/python/relay/test_pass_qnn_legalize.py
+29 -0

No files found.
--- a/python/tvm/relay/qnn/op/__init__.py
+++ b/python/tvm/relay/qnn/op/__init__.py
@@ -19,3 +19,5 @@
 from __future__ import absolute_import as _abs
 from .qnn import *
 from .op import register_qnn_legalize
+from . import legalizations
+from . import op_attrs
--- a/python/tvm/relay/qnn/op/legalizations.py
+++ b/python/tvm/relay/qnn/op/legalizations.py
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""Backend QNN related feature registration"""
+from __future__ import absolute_import
+
+import tvm
+from tvm import relay
+from .. import op as reg
+
+# Registering QNN Conv2D legalization function.
+@reg.register_qnn_legalize("qnn.conv2d")
+def legalize_qnn_conv2d(attrs, inputs, types):
+    """Legalizes QNN conv2d op.
+
+    Parameters
+    ----------
+    attrs : tvm.attrs.Attrs
+        Attributes of current convolution
+    inputs : list of tvm.relay.Expr
+        The args of the Relay expr to be legalized
+    types : list of types
+        List of input and output types
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The legalized expr
+    """
+    return qnn_conv2d_legalize(attrs, inputs, types)
+
+# Generic QNN Conv2D legalization function.
+@tvm.target.generic_func
+def qnn_conv2d_legalize(attrs, inputs, types):
+    """Default legalization is None."""
+    return None
+
+# Intel x86 QNN Conv2D legalization function.
+@qnn_conv2d_legalize.register('cpu')
+def _qnn_conv2d_legalize(attrs, inputs, types):
+    """Legalizes QNN conv2d op. VNNI supports u8 x i8 fast conv/MM. If the dtypes are already good,
+    we dont transform. Else, we shift the tensor values and zero points to change the dtype.
+
+    Converting from int8 to uint8 can be done in following manner.
+
+    Original equation
+      scale * (QA - zp_a)
+      scale * (QA + 128 - 128 - zp_a)
+      scale * ( (QA + 128) - (zp_a + 128))
+
+    Replacing QA + 128 with QA' and (zp_a + 128) with zp_a'
+    We get our new quantized uint8 tensor - scale * (QA' - zp_a')
+
+    Similarly we can convert from int8 to uint8.
+
+    Parameters
+    ----------
+    attrs : tvm.attrs.Attrs
+        Attributes of current convolution
+    inputs : list of tvm.relay.Expr
+        The args of the Relay expr to be legalized
+    types : list of types
+        List of input and output types
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The legalized expr
+    """
+
+    def _shift(data, out_dtype):
+        """Shifts (add/subtracts) the qnn tensor with +/-128)"""
+        if out_dtype == 'uint8':
+            shift = 128
+        elif out_dtype == 'int8':
+            shift = -128
+        else:
+            raise ValueError("Unsupport out dtype.")
+        data_modified = relay.cast(data, 'int32')
+        data_modified = relay.add(data_modified, relay.const(shift, 'int32'))
+        data_modified = relay.cast(data_modified, out_dtype)
+        return data_modified
+
+    def _is_int8_hw_support(target):
+        """
+        Checks to ensure that we can use Intel DLBoost instructions - Check if the target is skylake
+        and above.
+        """
+        supported_arches = {'-mcpu=skylake-avx512',}
+        return supported_arches.intersection(set(target.options))
+
+    # Collect the dtypes.
+    data_dtype = types[0].dtype
+    kernel_dtype = types[1].dtype
+
+    # Collect the input exprs.
+    data, kernel = inputs
+
+    # The VNNI transformations are applicable only Skylake and above.g
+    target = tvm.target.current_target(allow_none=False)
+    if not _is_int8_hw_support(target):
+        return None
+
+    # VNNI supports u8 x i8 fast conv/MM. Don't do anything if it is already satisfied.
+    if data_dtype == 'uint8' and kernel_dtype == 'int8':
+        return None
+
+    # Shift input if necessary.
+    input_zp = attrs['input_zero_point']
+    if data_dtype == 'int8':
+        # Compute (QA + 128) and (zp_a + 128)
+        data = _shift(data, 'uint8')
+        input_zp = input_zp + 128
+
+    # Shift kernel if necessary.
+    kernel_zp = attrs['kernel_zero_point']
+    if kernel_dtype == 'uint8':
+        # Compute (QA - 128) and (zp_a - 128)
+        kernel = _shift(kernel, 'int8')
+        kernel_zp = kernel_zp - 128
+
+    # Call qnn.conv2d with modified inputs and zero points.
+    new_attrs = {k : attrs[k] for k in attrs.keys()}
+    new_attrs['input_zero_point'] = input_zp
+    new_attrs['kernel_zero_point'] = kernel_zp
+    return relay.qnn.op.conv2d(data, kernel, **new_attrs)
--- a/python/tvm/relay/qnn/op/op_attrs.py
+++ b/python/tvm/relay/qnn/op/op_attrs.py
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""The attributes node used for QNN operators"""
+
+from ....attrs import Attrs
+from ...base import register_relay_attr_node
+
+@register_relay_attr_node
+class QnnConv2DAttrs(Attrs):
+    """Attributes for qnn.conv2d"""
--- a/tests/python/relay/test_pass_qnn_legalize.py
+++ b/tests/python/relay/test_pass_qnn_legalize.py
@@ -82,5 +82,34 @@ def test_qnn_legalize():
    b = run_opt_pass(expected(), transform.InferType())
    assert analysis.alpha_equal(a, b), "Actual = \n" + str(a)

+def test_qnn_legalize_qnn_conv2d():
+    data_shape = (1, 64, 256, 256)
+    kernel_shape = (128, 64, 3, 3)
+    for dtype in ['uint8', 'int8']:
+        data_dtype =  kernel_dtype = dtype
+        data = relay.var("data", shape=data_shape,
+                dtype=data_dtype)
+        kernel = relay.var("kernel", shape=kernel_shape,
+                dtype=kernel_dtype)
+        func = relay.qnn.op.conv2d(
+                data, kernel,
+                input_zero_point=1,
+                kernel_zero_point=1,
+                kernel_size=(3, 3),
+                strides=(1, 1),
+                dilation=(1, 1),
+                out_dtype='int32',
+                data_layout='NCHW',
+                kernel_layout='OIHW')
+
+        mod = relay.Function(relay.analysis.free_vars(func), func)
+        mod = relay.Module.from_expr(mod)
+
+        with tvm.target.create('llvm -mcpu=skylake-avx512'):
+            mod = relay.qnn.transform.Legalize()(mod)
+
+        assert 'cast' in mod.astext()
+
 if __name__ == "__main__":
    test_qnn_legalize()
+    test_qnn_legalize_qnn_conv2d()