nnvm_quick_start.py 5.13 KB
Newer Older
1
"""
2 3
.. _tutorial-nnvm-quick-start:

4
Quick Start Tutorial for Compiling Deep Learning Models
5 6 7 8
=======================================================
**Author**: `Yao Wang <https://github.com/kevinthesun>`_

This example shows how to build a neural network with NNVM python frontend and
9 10
generate runtime library for Nvidia GPU with TVM.
Notice that you need to build TVM with cuda and llvm enabled.
11 12 13 14
"""

######################################################################
# Overview for Supported Hardware Backend of TVM
15
# ----------------------------------------------
16 17 18 19 20 21 22 23
# The image below shows hardware backend currently supported by TVM:
#
# .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tvm_support_list.png
#      :align: center
#      :scale: 100%
#
# In this tutorial, we'll choose cuda and llvm as target backends.
# To begin with, let's import NNVM and TVM.
24 25 26

import numpy as np

27 28
import nnvm.compiler
import nnvm.testing
29 30
import tvm
from tvm.contrib import graph_runtime
31 32 33 34 35 36 37

######################################################################
# Define Neural Network in NNVM
# -----------------------------
# First, let's define a neural network with nnvm python frontend.
# For simplicity, we'll use pre-defined resnet-18 network in NNVM.
# Parameters are initialized with Xavier initializer.
38 39
# NNVM also supports other model formats such as MXNet, CoreML, ONNX and 
# Tensorflow.
40 41 42 43 44 45 46 47 48 49 50 51
#
# In this tutorial, we assume we will do inference on our device
# and the batch size is set to be 1. Input images are RGB color
# images of size 224 * 224. We can call the :any:`nnvm.symbol.debug_str`
# to show the network structure.

batch_size = 1
num_class = 1000
image_shape = (3, 224, 224)
data_shape = (batch_size,) + image_shape
out_shape = (batch_size, num_class)

52 53
net, params = nnvm.testing.resnet.get_workload(
    layers=18, batch_size=batch_size, image_shape=image_shape)
54 55 56 57
print(net.debug_str())

######################################################################
# Compilation
58
# -----------
59 60
# Next step is to compile the model using the NNVM/TVM pipeline.
# Users can specify the optimization level of the compilation.
61 62
# Currently this value can be 0 to 3. The optimization passes include
# operator fusion, pre-computation, layout transformation and so on.
63 64 65 66 67 68 69 70
#
# :any:`nnvm.compiler.build` returns three components: the execution graph in
# json format, the TVM module library of compiled functions specifically
# for this graph on the target hardware, and the parameter blobs of
# the model. During the compilation, NNVM does the graph-level
# optimization while TVM does the tensor-level optimization, resulting
# in an optimized runtime module for model serving.
#
71 72
# We'll first compile for Nvidia GPU. Behind the scene, `nnvm.compiler.build`
# first does a number of graph-level optimizations, e.g. pruning, fusing, etc.,
73
# then registers the operators (i.e. the nodes of the optimized graphs) to
74
# TVM implementations to generate a `tvm.module`.
75 76 77
# To generate the module library, TVM will first transfer the High level IR
# into the lower intrinsic IR of the specified target backend, which is CUDA
# in this example. Then the machine code will be generated as the module library.
78

79
opt_level = 3
80 81 82 83 84
target = tvm.target.cuda()
with nnvm.compiler.build_config(opt_level=opt_level):
    graph, lib, params = nnvm.compiler.build(
        net, target, shape={"data": data_shape}, params=params)

85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
#####################################################################
# Run the generate library
# ------------------------
# Now we can create graph runtime and run the module on Nvidia GPU.

# create random input
ctx = tvm.gpu()
data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
# create module
module = graph_runtime.create(graph, lib, ctx)
# set input and parameters
module.set_input("data", data)
module.set_input(**params)
# run
module.run()
# get output
out = module.get_output(0, tvm.nd.empty(out_shape))
# convert to numpy
out.asnumpy()

# Print first 10 elements of output
print(out.asnumpy().flatten()[0:10])

108
######################################################################
109 110 111
# Save and Load Compiled Module
# -----------------------------
# We can also save the graph, lib and parameters into files and load them
112
# back in deploy environment.
113

114 115 116
####################################################

# save the graph, lib and params into separate files
117 118 119
from tvm.contrib import util

temp = util.tempdir()
120
path_lib = temp.relpath("deploy_lib.tar")
121 122 123 124 125 126 127
lib.export_library(path_lib)
with open(temp.relpath("deploy_graph.json"), "w") as fo:
    fo.write(graph.json())
with open(temp.relpath("deploy_param.params"), "wb") as fo:
    fo.write(nnvm.compiler.save_param_dict(params))
print(temp.listdir())

128
####################################################
129

130
# load the module back.
131
loaded_json = open(temp.relpath("deploy_graph.json")).read()
132
loaded_lib = tvm.module.load(path_lib)
133
loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
134 135
input_data = tvm.nd.array(np.random.uniform(size=data_shape).astype("float32"))

136 137 138
module = graph_runtime.create(loaded_json, loaded_lib, tvm.gpu(0))
module.load_params(loaded_params)
module.run(data=input_data)
139
out = module.get_output(0).asnumpy()