cross_compilation_and_rpc.py 8.75 KB
Newer Older
1
"""
2 3
.. _tutorial-cross-compilation-and-rpc:

4 5
Cross Compilation and RPC
=========================
6
**Author**: `Ziheng Jiang <https://github.com/ZihengJiang/>`_, `Lianmin Zheng <https://github.com/merrymercy/>`_
7 8 9 10

This tutorial introduces cross compilation and remote device
execution with RPC in TVM.

11
With cross compilation and RPC, you can **compile program on your
12 13 14 15
local machine then run it on the remote device**. It is useful when
the resource of remote devices is limited, like Raspberry Pi and mobile
platforms. In this tutorial, we will take Raspberry Pi for CPU example
and Firefly-RK3399 for opencl example.
16 17 18
"""

######################################################################
19
# Build TVM Runtime on Device
20 21
# ---------------------------
#
22
# The first step is to build tvm runtime on the remote device.
23
#
24
# .. note::
Tianqi Chen committed
25 26 27 28
#
#   All instructions in both this section and next section should be
#   executed on the target device, e.g. Raspberry Pi. And we assume it
#   has Linux running.
29 30 31 32
# 
# Since we do compilation on local machine, the remote device is only used
# for running the generated code. We only need to build tvm runtime on
# the remote device.
Tianqi Chen committed
33
#
34
# .. code-block:: bash
35
#
36 37 38
#   git clone --recursive https://github.com/dmlc/tvm
#   cd tvm
#   make runtime -j2
39
#
40 41 42 43
# After building runtime successfully, we need to set environment variables
# in :code:`~/.bashrc` file. We can edit :code:`~/.bashrc`
# using :code:`vi ~/.bashrc` and add the line below (Assuming your TVM
# directory is in :code:`~/tvm`):
44
#
45
# .. code-block:: bash
46
#
47
#   export PYTHONPATH=$PYTHONPATH:~/tvm/python
48
#
49
# To update the environment variables, execute :code:`source ~/.bashrc`.
50 51 52 53

######################################################################
# Set Up RPC Server on Device
# ---------------------------
54 55
# To start an RPC server, run the following command on your remote device
# (Which is Raspberry Pi in this example).
56 57 58
#
#   .. code-block:: bash
#
59
#     python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090
60
#
61 62
# If you see the line below, it means the RPC server started
# successfully on your device.
63 64
#
#    .. code-block:: bash
65
#
66
#      INFO:root:RPCServer: bind to 0.0.0.0:9090
67
#
Tianqi Chen committed
68

69 70 71 72
######################################################################
# Declare and Cross Compile Kernel on Local Machine
# -------------------------------------------------
#
73 74
# .. note::
#
75 76
#   Now we back to the local machine, which has a full TVM installed
#   (with LLVM).
77
#
78 79 80 81 82 83 84
# Here we will declare a simple kernel on the local machine:

import numpy as np

import tvm
from tvm import rpc
from tvm.contrib import util
85

86 87
n = tvm.convert(1024)
A = tvm.placeholder((n,), name='A')
88
B = tvm.compute((n,), lambda i: A[i] + 1.0, name='B')
89 90 91
s = tvm.create_schedule(B.op)

######################################################################
92 93 94 95 96 97
# Then we cross compile the kernel.
# The target should be 'llvm -target=armv7l-linux-gnueabihf' for
# Raspberry Pi 3B, but we use 'llvm' here to make this tutorial runnable
# on our webpage building server. See the detailed note in the following block.

local_demo = True
98

99 100 101 102 103 104 105
if local_demo:
    target = 'llvm'
else:
    target = 'llvm -target=armv7l-linux-gnueabihf'

func = tvm.build(s, [A, B], target=target, name='add_one')
# save the lib at a local temp folder
106
temp = util.tempdir()
107 108
path = temp.relpath('lib.tar')
func.export_library(path)
109 110 111 112

######################################################################
# .. note::
#
113
#   To run this tutorial with a real remote device, change :code:`local_demo`
114 115 116 117 118
#   to False and replace :code:`target` in :code:`build` with the true
#   target triple of your device. The target triple which might be
#   different for different devices. For example, it is
#   :code:`'llvm -target=armv7l-linux-gnueabihf'` for Raspberry Pi 3B and
#   :code:`'llvm -target=aarch64-linux-gnu'` for RK3399.
119 120
#
#   Usually, you can query the target by execute :code:`gcc -v` on your
Tianqi Chen committed
121
#   device, and look for the line starting with :code:`Target:`
122
#   (Though it may be still a loose configuration.)
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
#
#   Besides :code:`-target`, you can also set other compilation options
#   like:
#
#   * -mcpu=<cpuname>
#       Specify a specific chip in the current architecture to generate code for. By default this is inferred from the target triple and autodetected to the current architecture.
#   * -mattr=a1,+a2,-a3,...
#       Override or control specific attributes of the target, such as whether SIMD operations are enabled or not. The default set of attributes is set by the current CPU.
#       To get the list of available attributes, you can do:
#
#       .. code-block:: bash
#
#         llc -mtriple=<your device target triple> -mattr=help
#
#   These options are consistent with `llc <http://llvm.org/docs/CommandGuide/llc.html>`_.
#   It is recommended to set target triple and feature set to contain specific
#   feature available, so we can take full advantage of the features of the
#   board.
#   You can find more details about cross compilation attributes from
#   `LLVM guide of cross compilation <https://clang.llvm.org/docs/CrossCompilation.html>`_.

######################################################################
145 146
# Run CPU Kernel Remotely by RPC
# ------------------------------
147 148
# We show how to run the generated cpu kernel on the remote device.
# First we obtain an RPC session from remote device.
149

150 151 152 153 154 155 156
if local_demo:
    remote = rpc.LocalSession()
else:
    # The following is my environment, change this to the IP address of your target device
    host = '10.77.1.162'
    port = 9090
    remote = rpc.connect(host, port)
157 158

######################################################################
159 160 161
# Upload the lib to the remote device, then invoke a device local
# compiler to relink them. Now `func` is a remote module object.

162
remote.upload(path)
163
func = remote.load_module('lib.tar')
164

165 166
# create arrays on the remote device
ctx = remote.cpu()
167 168 169
a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
# the function will run on the remote device
170
func(a, b)
171 172 173 174
np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)

######################################################################
# When you want to evaluate the performance of the kernel on the remote
175
# device, it is important to avoid the overhead of network.
176 177
# :code:`time_evaluator` will returns a remote function that runs the
# function over number times, measures the cost per run on the remote
178 179 180
# device and returns the measured cost. Network overhead is excluded.

time_f = func.time_evaluator(func.entry_name, ctx, number=10)
181
cost = time_f(a, b).mean
182 183
print('%g secs/op' % cost)

184 185 186 187
#########################################################################
# Run OpenCL Kernel Remotely by RPC
# ---------------------------------
# As for remote OpenCL devices, the workflow is almost the same as above.
188
# You can define the kernel, upload files, and run by RPC. 
189 190
#
# .. note::
191
#
192
#    Raspberry Pi does not support OpenCL, the following code is tested on
193
#    Firefly-RK3399. You may follow this `tutorial <https://gist.github.com/mli/585aed2cec0b5178b1a510f9f236afa2>`_
194 195 196 197
#    to setup the OS and OpenCL driver for RK3399.
#
#    Also we need to build the runtime with OpenCL enabled on rk3399 board. In the tvm
#    root directory, execute
198
#
199
# .. code-block:: bash
200
#
201 202 203
#    cp cmake/config.cmake .
#    sed -i "s/USE_OPENCL OFF/USE_OPENCL ON/" config.cmake
#    make runtime -j4
204
#
205 206 207 208 209 210 211 212 213 214
# The following function shows how we run OpenCL kernel remotely

def run_opencl():
    # NOTE: This is the setting for my rk3399 board. You need to modify
    # them according to your environment.
    target_host = "llvm -target=aarch64-linux-gnu"
    opencl_device_host = '10.77.1.145'
    opencl_device_port = 9090

    # create scheule for the above "add one" compute decleration
215 216 217 218
    s = tvm.create_schedule(B.op)
    xo, xi = s[B].split(B.op.axis[0], factor=32)
    s[B].bind(xo, tvm.thread_axis("blockIdx.x"))
    s[B].bind(xi, tvm.thread_axis("threadIdx.x"))
219
    func = tvm.build(s, [A, B], "opencl", target_host=target_host)
220

221
    remote = rpc.connect(opencl_device_host, opencl_device_port)
222

223 224 225 226 227
    # export and upload
    path = temp.relpath('lib_cl.tar')
    func.export_library(path)
    remote.upload(path)
    func = remote.load_module('lib_cl.tar')
228 229

    # run
230
    ctx = remote.cl()
231 232
    a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), ctx)
    b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx)
233
    func(a, b)
234
    np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
235
    print("OpenCP test passed!")
236 237 238 239 240 241 242 243 244 245 246

######################################################################
# Summary
# -------
# This tutorial provides a walk through of cross compilation and RPC
# features in TVM.
#
# - Set up RPC server on the remote device.
# - Set up target device configuration to cross compile kernel on the
#   local machine.
# - Upload and run the kernel remotely by RPC API.