nnpack.py 7.53 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
Ruslan Baratov committed
17
"""External function interface to NNPACK libraries."""
18 19 20 21
from __future__ import absolute_import as _abs

from .. import api as _api
from .. import intrin as _intrin
22 23
from .._ffi.function import _init_api

24 25 26
def is_available():
    """Check whether NNPACK is available, that is, `nnp_initialize()`
    returns `nnp_status_success`.
27
    """
28
    return _initialize() == 0
29

30
def fully_connected_inference(lhs, rhs, nthreads=1):
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
    """Create an extern op that compute fully connected of 1D tensor lhs and
    2D tensor rhs with nnpack.

    Parameters
    ----------
    lhs : Tensor
        lhs 1D array input[input_channels] of FP32 elements
    rhs : Tensor
        lhs 2D matrix kernel[output_channels][input_channels] of FP32 elements

    Returns
    -------
    C : Tensor
        lhs 1D array out[output_channels] of FP32 elements.
    """
    m = rhs.shape[0]
    return _api.extern(
        (m, ), [lhs, rhs],
        lambda ins, outs: _intrin.call_packed(
            "tvm.contrib.nnpack.fully_connected_inference",
51
            ins[0], ins[1], outs[0], nthreads), name="C")
52

53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72

class ConvolutionAlgorithm:
    AUTO = 0
    FFT_8x8 = 1
    FFT_16x16 = 2
    WT_8x8 = 3
    IMPLICIT_GEMM = 4
    DIRECT = 5
    WT_8x8_FP16 = 6


class ConvolutionTransformStrategy:
    COMPUTE = 1
    PRECOMPUTE = 2


def convolution_inference(
        data, kernel, bias, padding, stride, nthreads=1,
        algorithm=ConvolutionAlgorithm.AUTO):
    """Create an extern op to do inference convolution of 4D tensor data and
73 74 75 76 77
    4D tensor kernel and 1D tensor bias with nnpack.

    Parameters
    ----------
    data : Tensor
78
        data 4D tensor input[batch][input_channels][input_height][input_width] of
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
        FP32 elements.
    kernel : Tensor
        kernel 4D tensor kernel[output_channels][input_channels][kernel_height]
        [kernel_width] of FP32 elements.
    bias : Tensor
        bias 1D array bias[output_channels][input_channels][kernel_height]
        [kernel_width] of FP32 elements.
    padding : list
        padding A 4-dim list of [pad_top, pad_bottom, pad_left, pad_right],
        which indicates the padding around the feature map.
    stride : list
        stride A 2-dim list of [stride_height, stride_width], which indicates
        the stride.

    Returns
    -------
    output : Tensor
96
        output 4D tensor output[batch][output_channels][output_height][output_width]
97 98 99 100 101
        of FP32 elements.
    """

    assert isinstance(padding, list) and len(padding) == 4
    assert isinstance(stride, list) and len(stride) == 2
102
    batch, _, input_height, input_width = data.shape
103
    output_channels, _, kernel_height, kernel_width = kernel.shape
104 105 106 107 108
    idxdiv = _api.indexdiv
    output_height = idxdiv(
        input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1
    output_width = idxdiv(
        input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1
109 110

    return _api.extern(
111 112
        (batch, output_channels, output_height, output_width),
        [data, kernel, bias] if bias is not None else [data, kernel],
113
        lambda ins, outs: _intrin.call_packed(
114 115 116 117
            "tvm.contrib.nnpack.convolution_inference",
            ins[0],
            ins[1],
            ins[2] if bias is not None else 0,
118
            outs[0], padding[0], padding[1], padding[2], padding[3],
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
            stride[0], stride[1], nthreads, algorithm), name="C")

def convolution_inference_without_weight_transform(
        data, transformed_kernel, bias, padding, stride, nthreads=1,
        algorithm=ConvolutionAlgorithm.AUTO):
    """Create an extern op to do inference convolution of 4D tensor data and
    4D pre-transformed tensor kernel and 1D tensor bias with nnpack.

    Parameters
    ----------
    data : Tensor
        data 4D tensor input[batch][input_channels][input_height][input_width] of
        FP32 elements.
    transformed_kernel : Tensor
        transformed_kernel 4D tensor kernel[output_channels][input_channels][tile]
        [tile] of FP32 elements.
    bias : Tensor
        bias 1D array bias[output_channels][input_channels][kernel_height]
        [kernel_width] of FP32 elements.
    padding : list
        padding A 4-dim list of [pad_top, pad_bottom, pad_left, pad_right],
        which indicates the padding around the feature map.
    stride : list
        stride A 2-dim list of [stride_height, stride_width], which indicates
        the stride.

    Returns
    -------
    output : Tensor
        output 4D tensor output[batch][output_channels][output_height][output_width]
        of FP32 elements.
    """

    assert algorithm in (ConvolutionAlgorithm.WT_8x8,
                         ConvolutionAlgorithm.WT_8x8_FP16)
    assert isinstance(padding, list) and len(padding) == 4
    assert isinstance(stride, list) and len(stride) == 2
    batch, _, input_height, input_width = data.shape
    output_channels, _, _, _ = transformed_kernel.shape
    kernel_height, kernel_width = (3, 3)
159 160 161
    idxdiv = _api.indexdiv
    output_height = idxdiv(input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1
    output_width = idxdiv(input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1
162 163 164 165 166 167 168 169 170 171

    return _api.extern(
        (batch, output_channels, output_height, output_width),
        [data, transformed_kernel, bias] if bias is not None else [data, transformed_kernel],
        lambda ins, outs: _intrin.call_packed(
            "tvm.contrib.nnpack.convolution_inference_without_weight_transform",
            ins[0],
            ins[1],
            ins[2] if bias is not None else 0,
            outs[0], padding[0], padding[1], padding[2], padding[3],
hlu1 committed
172
            stride[0], stride[1], nthreads, algorithm), name="C", dtype='float32')
173 174 175

def convolution_inference_weight_transform(
        kernel, nthreads=1,
hlu1 committed
176 177
        algorithm=ConvolutionAlgorithm.AUTO,
        dtype='float32'):
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
    """Create an extern op to do inference convolution of 3D tensor data and
    4D tensor kernel and 1D tensor bias with nnpack.

    Parameters
    ----------
    kernel : Tensor
        kernel 4D tensor kernel[output_channels][input_channels][kernel_height]
        [kernel_width] of FP32 elements.

    Returns
    -------
    output : Tensor
        output 4D tensor output[output_channels][input_channels][tile][tile]
        of FP32 elements.
    """
    assert algorithm in (ConvolutionAlgorithm.WT_8x8, ConvolutionAlgorithm.WT_8x8_FP16)
    output_channels, input_channels, _, _ = kernel.shape
    transform_tile_size = 8
hlu1 committed
196 197
    if not isinstance(dtype, str):
        dtype = dtype.dtype
198 199 200 201 202
    return _api.extern(
        (output_channels, input_channels, transform_tile_size, transform_tile_size),
        [kernel],
        lambda ins, outs: _intrin.call_packed(
            "tvm.contrib.nnpack.convolution_inference_weight_transform",
hlu1 committed
203
            ins[0], outs[0], nthreads, algorithm), name="transform_kernel", dtype=dtype)
204

205
_init_api("tvm.contrib.nnpack")