deploy_model_on_mali_gpu.py 8.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
17
"""
18 19
.. _tutorial-deploy-model-on-mali-gpu:

20
Deploy the Pretrained Model on ARM Mali GPU
21
===========================================
22 23 24
**Author**: `Lianmin Zheng <https://lmzheng.net/>`_, `Ziheng Jiang <https://ziheng.org/>`_

This is an example of using NNVM to compile a ResNet model and
25
deploy it on Firefly-RK3399 with ARM Mali GPU. We will use the
26 27
Mali-T860 MP4 GPU on this board to accelerate the inference.
"""
28

29 30 31
import tvm
import nnvm.compiler
import nnvm.testing
32 33
from tvm import rpc
from tvm.contrib import util, graph_runtime as runtime
34
from tvm.contrib.download import download_testdata
35 36 37 38 39

######################################################################
# Build TVM Runtime on Device
# ---------------------------
#
40
# The first step is to build tvm runtime on the remote device.
41 42 43
#
# .. note::
#
44 45 46
#   All instructions in both this section and next section should be
#   executed on the target device, e.g. Rk3399. And we assume it
#   has Linux running.
47
#
48 49 50 51 52
# Since we do compilation on local machine, the remote device is only used
# for running the generated code. We only need to build tvm runtime on
# the remote device. Make sure you have opencl driver in your board.
# You can refer to `tutorial <https://gist.github.com/mli/585aed2cec0b5178b1a510f9f236afa2>`_
# to setup OS and opencl driver for rk3399.
53
#
54
# .. code-block:: bash
55
#
56 57 58
#   git clone --recursive https://github.com/dmlc/tvm
#   cd tvm
#   cp cmake/config.cmake .
59
#   sed -i "s/USE_OPENCL OFF/USE_OPENCL ON/" config.cmake
60
#   make runtime -j4
61
#
62 63
# After building runtime successfully, we need to set environment varibles
# in :code:`~/.bashrc` file. We can edit :code:`~/.bashrc`
64
# using :code:`vi ~/.bashrc` and add the line below (Assuming your TVM
65
# directory is in :code:`~/tvm`):
66
#
67
# .. code-block:: bash
68
#
69
#   export PYTHONPATH=$PYTHONPATH:~/tvm/python
70
#
71
# To update the environment variables, execute :code:`source ~/.bashrc`.
72 73 74 75

######################################################################
# Set Up RPC Server on Device
# ---------------------------
76 77
# To start an RPC server, run the following command on your remote device
# (Which is RK3399 in our example).
78 79 80 81 82
#
#   .. code-block:: bash
#
#     python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090
#
83 84
# If you see the line below, it means the RPC server started
# successfully on your device.
85 86 87 88 89 90 91
#
#    .. code-block:: bash
#
#      INFO:root:RPCServer: bind to 0.0.0.0:9090
#

######################################################################
92 93 94
# Prepare the Pre-trained Model
# -----------------------------
# Back to the host machine, which should have a full TVM installed (with LLVM).
95
#
96 97 98
# We will use pre-trained model from
# `MXNet Gluon model zoo <https://mxnet.incubator.apache.org/api/python/gluon/model_zoo.html>`_.
# You can found more details about this part at tutorial :ref:`tutorial-from-mxnet`.
99 100 101 102 103 104 105 106 107 108 109

from mxnet.gluon.model_zoo.vision import get_model
from PIL import Image
import numpy as np

# only one line to get the model
block = get_model('resnet18_v1', pretrained=True)

######################################################################
# In order to test our model, here we download an image of cat and
# transform its format.
110
img_name = 'cat.png'
111 112 113
img_path = download_testdata('https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true',
                             img_name, module='data')
image = Image.open(img_path).resize((224, 224))
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130

def transform_image(image):
    image = np.array(image) - np.array([123., 117., 104.])
    image /= np.array([58.395, 57.12, 57.375])
    image = image.transpose((2, 0, 1))
    image = image[np.newaxis, :]
    return image

x = transform_image(image)

######################################################################
# synset is used to transform the label from number of ImageNet class to
# the word human can understand.
synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/',
                      '4d0b62f3d01426887599d4f7ede23ee5/raw/',
                      '596b27d23537e5a1b5751d2b0481ef172f58b539/',
                      'imagenet1000_clsid_to_human.txt'])
131

132 133 134
synset_name = 'imagenet1000_clsid_to_human.txt'
synset_path = download_testdata(synset_url, synset_name, module='data')
with open(synset_path) as f:
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
    synset = eval(f.read())

######################################################################
# Now we would like to port the Gluon model to a portable computational graph.
# It's as easy as several lines.

# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
net, params = nnvm.frontend.from_mxnet(block)
# we want a probability so add a softmax operator
net = nnvm.sym.softmax(net)

######################################################################
# Here are some basic data workload configurations.
batch_size = 1
num_classes = 1000
image_shape = (3, 224, 224)
data_shape = (batch_size,) + image_shape

######################################################################
# Compile The Graph
# -----------------
# To compile the graph, we call the :any:`nnvm.compiler.build` function
157
# with the graph configuration and parameters. As we use OpenCL for
158 159 160 161 162 163
# GPU computing, the tvm will generate both OpenCL kernel code and ARM
# CPU host code. The CPU host code is used for calling OpenCL kernels.
# In order to generate correct CPU code, we need to specify the target
# triplet for host ARM device by setting the parameter :code:`target_host`.

######################################################################
164 165 166 167
# If we run the example on our x86 server for demonstration, we can simply
# set it as :code:`llvm`. If running it on the RK3399, we need to
# specify its instruction set. Set :code:`local_demo` to False if you
# want to run this tutorial with a real device.
168

169 170 171
local_demo = True

if local_demo:
172
    target_host = "llvm"
173 174 175
    target = "llvm"
else:
    # Here is the setting for my rk3399 board
176
    # If you don't use rk3399, you can query your target triple by
177 178 179 180 181 182
    # execute `gcc -v` on your board.
    target_host = "llvm -target=aarch64-linux-gnu"

    # set target as  `tvm.target.mali` instead of 'opencl' to enable
    # optimization for mali
    target = tvm.target.mali()
183

184
with nnvm.compiler.build_config(opt_level=3):
185 186
    graph, lib, params = nnvm.compiler.build(net, target=target,
            shape={"data": data_shape}, params=params, target_host=target_host)
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202

# After `nnvm.compiler.build`, you will get three return values: graph,
# library and the new parameter, since we do some optimization that will
# change the parameters but keep the result of model as the same.

# Save the library at local temporary directory.
tmp = util.tempdir()
lib_fname = tmp.relpath('net.tar')
lib.export_library(lib_fname)

######################################################################
# Deploy the Model Remotely by RPC
# --------------------------------
# With RPC, you can deploy the model remotely from your host machine
# to the remote device.

203 204 205 206 207 208 209 210
# obtain an RPC session from remote device.
if local_demo:
    remote = rpc.LocalSession()
else:
    # The following is my environment, change this to the IP address of your target device
    host = '10.77.1.145'
    port = 9090
    remote = rpc.connect(host, port)
211 212 213 214 215 216

# upload the library to remote device and load it
remote.upload(lib_fname)
rlib = remote.load_module('net.tar')

# create the remote runtime module
217
ctx = remote.cl(0) if not local_demo else remote.cpu(0)
218
module = runtime.create(graph, rlib, ctx)
219 220
# set parameter (upload params to the remote device. This may take a while)
module.set_input(**params)
221 222 223 224 225
# set input data
module.set_input('data', tvm.nd.array(x.astype('float32')))
# run
module.run()
# get output
226
out = module.get_output(0)
227 228 229
# get top1 result
top1 = np.argmax(out.asnumpy())
print('TVM prediction top-1: {}'.format(synset[top1]))