dense.cc 7.48 KB
Newer Older
shoubhik committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

/*!
 *  Copyright (c) 2019 by Contributors
 * \file src/relay/qnn/op/dense.cc
 * \brief Property def of qnn dense operator.
 */

#include <tvm/relay/base.h>
#include <tvm/relay/op.h>
#include <tvm/relay/op_attr_types.h>
#include <tvm/relay/qnn/attrs.h>
#include "../../op/nn/nn.h"
#include "../../pass/pattern_util.h"
32
#include "../util.h"
shoubhik committed
33 34 35 36 37 38 39 40

namespace tvm {
namespace relay {
namespace qnn {

// relay.op.qnn.dense
TVM_REGISTER_NODE_TYPE(QnnDenseAttrs);

41
bool QnnDenseRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
shoubhik committed
42 43 44 45 46 47
                 const TypeReporter& reporter) {
  CHECK_EQ(types.size(), 3);
  const auto* data = types[0].as<TensorTypeNode>();
  const auto* weight = types[1].as<TensorTypeNode>();
  if (data == nullptr || weight == nullptr) return false;
  const auto* param = attrs.as<QnnDenseAttrs>();
48
  CHECK(param != nullptr) << "QnnDenseAttrs cannot be nullptr.";
shoubhik committed
49
  CHECK(data->dtype == Int(8) || data->dtype == UInt(8))
50
      << "Expected quantized dense type(int8, uint8) for input but was " << data->dtype;
shoubhik committed
51
  CHECK(weight->dtype == Int(8) || weight->dtype == UInt(8))
52
      << "Expected quantized dense type(int8, uint8) for weight but was " << weight->dtype;
shoubhik committed
53
  CHECK(param->out_dtype == Int(32))
54
      << "Expected quantized dense type(int32) for output but was " << param->out_dtype;
shoubhik committed
55 56 57 58 59
  CHECK(param->out_dtype.bits() > 0) << "Output dtype bits should be greater than 0.";
  return DenseRel<QnnDenseAttrs>(types, num_inputs, attrs, reporter);
}

// Positional relay function to create quantized dense operator used by frontend FFI.
60 61
Expr MakeQuantizedDense(Expr data, Expr weight, IndexExpr units, int32_t input_zero_point,
                        int32_t kernel_zero_point, DataType out_dtype) {
shoubhik committed
62 63 64 65 66 67 68 69 70
  auto attrs = make_node<QnnDenseAttrs>();
  attrs->units = std::move(units);
  attrs->out_dtype = out_dtype;
  attrs->input_zero_point = input_zero_point;
  attrs->kernel_zero_point = kernel_zero_point;
  static const Op& op = Op::Get("qnn.dense");
  return CallNode::make(op, {data, weight}, Attrs(attrs), {});
}

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
Expr DenseFirstTerm(const Expr& quantized_data, const Expr& quantized_kernel,
                    const QnnDenseAttrs* attrs) {
  return Dense(quantized_data, quantized_kernel, attrs->units, attrs->out_dtype);
}

Expr DenseSecondTerm(const Expr& quantized_data, const Expr& zp_kernel) {
  Array<Integer> axes = {1};
  return Multiply(zp_kernel, Sum(Cast(quantized_data, Int(32)), axes, true, false));
}

Expr DenseThirdTerm(const Expr& quantized_kernel, const Expr& zp_data) {
  Array<Integer> axes = {1};
  return Multiply(zp_data, Sum(Cast(quantized_kernel, Int(32)), axes, false, false));
}

Expr DenseFourthTerm(const QnnDenseAttrs* attrs, int reduction_dim_size) {
  int32_t scalar_term = attrs->input_zero_point * attrs->kernel_zero_point * reduction_dim_size;
  return MakeConstantScalar(Int(32), scalar_term);
}

/*
 * \brief Forward rewrite the qnn dense op.
 * \param attrs The QNN dense attrs.
shoubhik committed
94
 * \param new_args The new mutated args to the call node.
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
 * \param arg_types The types of input and output.
 * \return The sequence of Relay ops for qnn cov2d op.
 * \note Lowering of the qnn.dense operator
 *       A quantized tensor is represented in following manner
 *          A = scale_a x (QA - zp_A)
 *       where QA is quantized tensor, scale_a and zp_A are quantization
 *       params.
 *
 *       Quantized dense multiplies two quantized tensors and returns a
 *       quantized tensor of default dtype of int32, with scale equaling to the
 *       product of scales of input tensors, and a zero point of zero.
 *
 *       The lowering for asymmetric quantized dense looks as follows. More details at
 *       https://discuss.tvm.ai/t/tf-lite-quantized-conv2d-operator-conversion/2651/8
 *       The computation gets unrolled into following 4 terms
 *          C(m, n) = Sigma(k) (A(m, k) * W(n, k))
 *
 *          RHS becomes
 *            Sigma(k) ([QA(m, k) - zp_a] * [QW(n, k) - zp_w])
 *
 *          Unrolling leads to following sequence
 *            Sigma(k) QA(m, k) * QW(n, k)                         // Term1
 *          - Sigma(k) zp_w * QA(m, k)                             // Term2
 *          - Sigma(k) zp_a * QW(n, k)                             // Term3
 *          - Sigma(k) * zp_a * zp_w                               // Term4
 *
 *       Term3 and Term4 can be computed at compile time.
shoubhik committed
122
 */
123
Expr QnnDenseCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
shoubhik committed
124 125 126 127
                          const Array<tvm::relay::Type>& arg_types) {
  CHECK_EQ(new_args.size(), 2);
  Expr quantized_data = new_args[0];
  Expr quantized_kernel = new_args[1];
128 129 130 131

  const auto in_shape = get_shape(arg_types[0]);
  const int reduction_dim_size = get_const_int(in_shape[1]);

shoubhik committed
132
  const auto* qnn_dense_attrs = attrs.as<QnnDenseAttrs>();
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
  auto zp_kernel = MakeConstantScalar(Int(32), qnn_dense_attrs->kernel_zero_point);
  auto zp_data = MakeConstantScalar(Int(32), qnn_dense_attrs->input_zero_point);

  // Get all the terms as described in the comments.
  auto term1 = DenseFirstTerm(quantized_data, quantized_kernel, qnn_dense_attrs);
  auto term2 = DenseSecondTerm(quantized_data, zp_kernel);
  auto term3 = DenseThirdTerm(quantized_kernel, zp_data);
  auto term4 = DenseFourthTerm(qnn_dense_attrs, reduction_dim_size);

  // Combine those 4 terms depending on the zero points to get the best lowering.
  if (qnn_dense_attrs->input_zero_point == 0 && qnn_dense_attrs->kernel_zero_point == 0) {
    // term 2, 3 and 4 become zero.
    return term1;
  } else if (qnn_dense_attrs->input_zero_point == 0 && qnn_dense_attrs->kernel_zero_point != 0) {
    // term 3 and term 4 become zero.
    return Subtract(term1, term2);
  } else if (qnn_dense_attrs->input_zero_point != 0 && qnn_dense_attrs->kernel_zero_point == 0) {
    // term 2 and term 4 become zero.
    return Subtract(term1, term3);
  } else {
    auto data_term = Subtract(term1, term2);
    // Putting constant terms together, so that constant folding can fold it.
    auto const_term = Subtract(term4, term3);
    return Add(data_term, const_term);
shoubhik committed
157 158 159 160 161 162 163 164 165
  }
}

RELAY_REGISTER_OP("qnn.dense")
.describe(R"code(Applies a linear transformation: :math:`Y = XW^T`.
- **data**: quantized(int8, unit8) `(x1, x2, ..., xn, input_dim)`
- **weight**: quantized(int8, unit8) `(units, input_dim)`
- **out**: quantized(int32) `(x1, x2, ..., xn, units)`.
)code" TVM_ADD_FILELINE)
166
.set_attrs_type<QnnDenseAttrs>()
shoubhik committed
167 168 169 170 171 172 173 174 175 176 177 178 179
.set_num_inputs(2)
.add_argument("data", "quantized nD Tensor", "Input data.")
.add_argument("weight", "quantized 2D Tensor", "Weight matrix.")
.set_support_level(11)
.add_type_rel("QDense", DenseRel<QnnDenseAttrs>)
.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnDenseCanonicalize);

TVM_REGISTER_API("relay.qnn.op._make.dense")
.set_body_typed(MakeQuantizedDense);

}  // namespace qnn
}  // namespace relay
}  // namespace tvm