qnn.py 12.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#pylint: disable=invalid-name
"""QNN dialect operators."""

from __future__ import absolute_import as _abs
21
from tvm.relay.expr import Tuple
22 23 24 25 26 27 28
from . import _make

def requantize(data,
               input_scale,
               input_zero_point,
               output_scale,
               output_zero_point,
29
               axis=-1,
30
               rounding="UPWARD",
31 32 33 34 35 36 37 38 39 40 41 42 43 44
               out_dtype="int8"):
    r"""Requantized operator.

    The requantize operator converts one quantized tensor representation to
    another quantized tensor representation. For the output tensor, we are
    provided with output scale and zero point. The computation is as follows

    Q_output = zp_output +  (scale_input)/(scale_output) * (Q_input - zp_input)

    Parameters
    ----------
    data : tvm.relay.Expr
        The input data to the operator.

45
    input_scale: tvm.relay.Expr
46 47
        The quantization scale for the input tensor.

48
    input_zero_point: tvm.relay.Expr
49 50
        The zero point of the input tensor.

51
    output_scale: tvm.relay.Expr
52 53
        The quantization scale for the output tensor.

54
    output_zero_point: tvm.relay.Expr
55 56
        The zero point of the output tensor.

57 58 59
    axis : int
        The channel axis for quantization. Default value is -1 which corresponds to the last axis.

60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77
    rounding : string, optional
        Defines the rounding direction when the value is midway between two
        representable values.

    out_dtype : str, optional
        Specifies the output data type.

    Returns
    -------
    result : tvm.relay.Expr
        The computed result.
    """

    return _make.requantize(data,
                            input_scale,
                            input_zero_point,
                            output_scale,
                            output_zero_point,
78
                            axis,
79 80
                            rounding,
                            out_dtype)
81

82 83 84 85

def quantize(data,
             output_scale,
             output_zero_point,
86
             axis=-1,
87 88 89 90 91 92 93 94 95 96 97 98 99
             out_dtype='int8'):
    r""" Quantize op
    This operator takes float32 as input and produces quantized int8 or unit8 as output.
    The input tensor can be of any shape. The output shape is the same as input shape.

    Q_output = clamp((round(input_tensor/output_scale) + output_zero_point),
                     out_dtype::min,
                     out_dtype::max)

    Parameters
    ----------
    data : tvm.relay.Expr
        The input tensor to be quantized. Can be of type float32.
100
    output_zero_point : tvm.relay.Expr
101
        The output zero_point.
102
    output_scale : tvm.relay.Expr
103
        The output scale.
104 105
    axis : int
        The channel axis for quantization. Default value is -1 which corresponds to the last axis.
shoubhik committed
106
    out_dtype : str, optional
107 108 109 110 111 112 113 114 115 116
        The data type of the input tensor. Can be [int8, uint8]
    Returns
    -------
    result : tvm.relay.Expr
        The computed result.
    """

    return _make.quantize(data,
                          output_scale,
                          output_zero_point,
117
                          axis,
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
                          out_dtype)


def dequantize(data,
               input_scale,
               input_zero_point):
    r""" Dequantize op
    This operator takes quantized int8 and unit8 as input and produces
    dequantized float32 as output. The output shape is the same as input shape. The input
    tensor can be of any shape.

    Parameters
    ----------
    data : tvm.relay.Expr
        The input tensor to be dequantized. Can be of type [int8, uint8].
133
    input_zero_point : tvm.relay.Expr
134
        The input zero_point.
135
    input_scale : tvm.relay.Expr
136
        The input scale.
137 138 139 140 141 142 143 144 145
    Returns
    -------
    result : tvm.relay.Expr
        The computed result.
    """

    return _make.dequantize(data,
                            input_scale,
                            input_zero_point)
146 147


148 149 150 151 152 153 154 155 156 157 158 159 160
def concatenate(data,
                input_scales,
                input_zero_points,
                output_scale,
                output_zero_point,
                axis):
    """Concatenate the quantized input tensors along the given axis.

    Parameters
    ----------
    data : Union(List[relay.Expr], Tuple[relay.Expr])
        The list of quantized tensors.

161
    input_scales : List[relay.Expr]
162 163
        The list of scales of input quantized tensors.

164
    input_zero_points : List[relay.Expr]
165 166
        The list of zero points of input quantized tensors.

167
    output_scale : relay.Expr
168 169
        The scale of the output quantized tensor.

170
    output_zero_point : relay.Expr
171 172 173 174 175 176 177 178 179 180 181 182
        The zero point of the output quantized tensor.

    axis : int
        The axis along which the tensors are concatenated.

    Returns
    -------
    result: relay.Expr
        The concatenated quantized tensor.
    """

    data = list(data)
183 184 185 186
    if not data:
        raise ValueError("relay.concatenate requires data to be non-empty.")
    if not isinstance(axis, int):
        raise ValueError("For now, we only support integer axis")
187 188
    input_scales = list(input_scales)
    input_zero_points = list(input_zero_points)
189 190

    return _make.concatenate(Tuple(data),
191 192
                             Tuple(input_scales),
                             Tuple(input_zero_points),
193 194 195
                             output_scale,
                             output_zero_point,
                             axis)
196 197 198 199 200 201


def conv2d(data,
           kernel,
           input_zero_point,
           kernel_zero_point,
202 203
           input_scale,
           kernel_scale,
204
           kernel_size,
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
           strides=(1, 1),
           padding=(0, 0),
           dilation=(1, 1),
           groups=1,
           channels=None,
           data_layout="NCHW",
           kernel_layout="OIHW",
           out_layout="",
           out_dtype="int32"):
    r"""Quantized 2D convolution.

    This operator convolves quantized data with quantized kernel. The scale of
    the output quantized tensor is the product of the kernel_scale and
    input_scale of the input quantized tensors. The zero point of the output
    quantized tensor is 0. By default, the dtype of output is int32. Please also
    refer to Requantize operator to understand how to scale back the int32
    output to (u)int8.

    Parameters
    ----------
    data : tvm.relay.Expr
        The input data to the operator.

    kernel : tvm.relay.Expr
        The kernel expressions.

231
    input_zero_point: tvm.relay.Expr
232 233
           The zero point of the data distribution.

234 235 236 237
    kernel_zero_point: tvm.relay.Expr
           The zero point of the quantized_kernel distribution.

    input_scale: tvm.relay.Expr
238 239 240
           The scale for the input tensor. The scale for the input tensor is
           stored purely for convenience here. See more commentary below.

241
    kernel_scale: tvm.relay.Expr
242 243 244 245 246
           The scale for the weight tensor. The scale for the weight tensor is
           stored for access to this during relay. This information is not
           needed in the pass pipeline after qnn.conv2d is lowered to the
           sequence of steps as in nn.conv2d. See also input_scale in Requantize.

247 248 249
    kernel_size : tuple of int
        The spatial width and height of the convolution kernel.

250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
    strides : tuple of int, optional
        The strides of convolution.

    padding : tuple of int, optional
        The padding of convolution on both sides of inputs before convolution.

    dilation : tuple of int, optional
        Specifies the dilation rate to be used for dilated convolution.

    groups : int, optional
        Number of groups for grouped convolution.

    channels : int, optional
        Number of output channels of this convolution.

    data_layout : str, optional
        Layout of the input.

    kernel_layout : str, optional
        Layout of the kernel.

    out_layout : str, optional
        Layout of the output, by default, out_layout is the same as data_layout

    out_dtype : str, optional
        Specifies the output data type for mixed precision conv2d.

    Returns
    -------
    result : tvm.relay.Expr
        The computed result.
    """

    return _make.conv2d(data, kernel,
                        input_zero_point, kernel_zero_point,
285
                        input_scale, kernel_scale,
286 287 288
                        strides, padding, dilation,
                        groups, channels, kernel_size,
                        data_layout, kernel_layout, out_layout, out_dtype)
289 290


shoubhik committed
291 292 293 294 295 296 297
def add(lhs,
        rhs,
        lhs_scale,
        lhs_zero_point,
        rhs_scale,
        rhs_zero_point,
        output_scale,
298 299 300 301 302 303 304 305 306 307 308 309 310 311
        output_zero_point):
    """Quantized addition with numpy-style broadcasting.

    Parameters
    ----------
    lhs : relay.Expr
        The left hand side quantized input data.

    rhs : relay.Expr
        The right hand side quantized input data.

    lhs_scale: float
        The scale of the lhs quantized expr.

312 313 314 315
    lhs_scale: relay.Expr
        The scale of the lhs quantized expr.

    lhs_zero_point: relay.Expr
316 317
       The zero point of lhs quantized expr.

318
    rhs_scale: relay.Expr
319 320
        The scale of the rhs quantized expr.

321
    rhs_zero_point: relay.Expr
322 323
       The zero point of rhs quantized expr.

324
    output_scale: relay.Expr
325 326
        The scale of the output quantized expr.

327
    output_zero_point: relay.Expr
328 329 330 331 332 333 334 335 336 337 338 339
       The zero point of output quantized expr.

    Returns
    -------
    result : relay.Expr
        The computed result.

    """
    return _make.add(lhs, rhs,
                     lhs_scale, lhs_zero_point,
                     rhs_scale, rhs_zero_point,
                     output_scale, output_zero_point)
shoubhik committed
340 341


342 343 344 345
def dense(data,
          weight,
          input_zero_point,
          kernel_zero_point,
346 347
          input_scale,
          kernel_scale,
348 349
          units=None,
          out_dtype="int32"):
shoubhik committed
350 351 352 353 354 355 356 357 358 359 360 361 362
    """Qnn Dense operator.
    Applies a quantized linear transformation

     .. math::

     `Y = X * W`

    Parameters
    ----------
    data : tvm.relay.Expr
        The quantized input data to the operator.
    weight : tvm.relay.Expr
        The quantized weight expressions.
363
    input_zero_point: tvm.relay.Expr
364
        The input zero point.
365
    kernel_zero_point: tvm.relay.Expr
366
        The kernel zero point.
367
    input_scale: tvm.relay.Expr
368
        The scale for the input tensor.
369
    kernel_scale: tvm.relay.Expr
370 371 372 373
        The scale for the weight tensor. The scale for the weight tensor is
        stored for access to this during relay. This information is not
        needed in the pass pipeline after qnn.conv2d is lowered to the
        sequence of steps as in nn.conv2d. See also input_scale in Requantize.
shoubhik committed
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
    units : int, optional
        Number of hidden units of the dense transformation.
    out_dtype : str, optional
        Specifies the output data type for mixed precision dense can be int32 or int16.

    Returns
    -------
    result : tvm.relay.Expr
        The computed result.
    """

    return _make.dense(data,
                       weight,
                       input_zero_point,
                       kernel_zero_point,
389 390 391
                       input_scale,
                       kernel_scale,
                       units,
shoubhik committed
392
                       out_dtype)
393 394 395 396 397 398 399 400 401 402 403 404 405 406


def mul(lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point,
        output_scale, output_zero_point):
    """Quantized multiplication with numpy-style broadcasting.

    Parameters
    ----------
    lhs : relay.Expr
        The left hand side quantized input data.

    rhs : relay.Expr
        The right hand side quantized input data.

407
    lhs_scale: relay.Expr
408 409
        The scale of the lhs quantized expr.

410
    lhs_zero_point: relay.Expr
411 412
       The zero point of lhs quantized expr.

413
    rhs_scale: relay.Expr
414 415
        The scale of the rhs quantized expr.

416
    rhs_zero_point: relay.Expr
417 418
       The zero point of rhs quantized expr.

419
    output_scale: relay.Expr
420 421
        The scale of the output quantized expr.

422
    output_zero_point: relay.Expr
423 424 425 426 427 428 429 430 431 432 433 434
       The zero point of output quantized expr.

    Returns
    -------
    result : relay.Expr
        The computed result.

    """
    return _make.mul(lhs, rhs,
                     lhs_scale, lhs_zero_point,
                     rhs_scale, rhs_zero_point,
                     output_scale, output_zero_point)