imagenet_inference_gpu.py 3.13 KB
Newer Older
1
"""
Tianqi Chen committed
2 3
Compile GPU Inference
=====================
4 5
**Author**: `Yuwei Hu <https://huyuwei.github.io/>`_

6
This is an example of using NNVM to compile MobileNet/ResNet model and deploy its inference on GPU.
7 8 9 10

To begin with, we import nnvm(for compilation) and TVM(for deployment).
"""
import tvm
11
import numpy as np
12
from tvm.contrib import nvcc, graph_runtime
13 14 15 16 17 18
import nnvm.compiler
import nnvm.testing

######################################################################
# Register the NVCC Compiler Option
# ---------------------------------
19 20
# NNVM optimizes the graph and relies on TVM to generate fast GPU code.
# To get the maximum performance, we need to enable nvcc's compiler hook.
21
# This usually gives better performance than nvrtc mode.
22 23 24

@tvm.register_func
def tvm_callback_cuda_compile(code):
25
    ptx = nvcc.compile_cuda(code, target="ptx")
26 27 28 29 30 31
    return ptx

######################################################################
# Prepare the Benchmark
# ---------------------
# We construct a standard imagenet inference benchmark.
32 33 34 35 36 37 38
# NNVM needs two things to compile a deep learning model:
#
# - net: the graph representation of the computation
# - params: a dictionary of str to parameters
#
# We use nnvm's testing utility to produce the model description and random parameters
# so that the example does not depend on a specific front-end framework.
39 40 41 42
#
# .. note::
#
#   In a typical workflow, we can get this pair from :any:`nnvm.frontend`
43
#
44 45 46 47 48 49 50
target = "cuda"
ctx = tvm.gpu(0)
batch_size = 1
num_classes = 1000
image_shape = (3, 224, 224)
data_shape = (batch_size,) + image_shape
out_shape = (batch_size, num_classes)
51 52 53
# To use ResNet to do inference, run the following instead
#net, params = nnvm.testing.resnet.get_workload(
#    batch_size=1, image_shape=image_shape)
54 55 56 57
net, params = nnvm.testing.mobilenet.get_workload(
    batch_size=1, image_shape=image_shape)

######################################################################
58
# Compile the Graph
59 60 61
# -----------------
# To compile the graph, we call the build function with the graph
# configuration and parameters.
62 63
# When parameters are provided, NNVM will pre-compute certain part of the graph if possible (e.g. simplify batch normalization to scale shift),
# and return the updated parameters.
64 65 66 67

graph, lib, params = nnvm.compiler.build(
    net, target, shape={"data": data_shape}, params=params)

68

69 70 71 72
######################################################################
# Run the Compiled Module
# -----------------------
#
73
# To deploy the module, we call :any:`tvm.contrib.graph_runtime.create` passing in the graph, the lib, and context.
74 75 76
# Thanks to TVM, we can deploy the compiled module to many platforms and languages.
# The deployment module is designed to contain minimum dependencies.
# This example runs on the same machine.
77 78
#
# Note that the code below no longer depends on NNVM, and only relies TVM's runtime to run(deploy).
79
data = np.random.uniform(-1, 1, size=data_shape).astype("float32")
80
module = graph_runtime.create(graph, lib, ctx)
81 82
# set input
module.set_input(**params)
83
module.set_input("data", data)
84 85 86 87
# run
module.run()
# get output
out = module.get_output(0, tvm.nd.empty(out_shape))
88
# convert to numpy
89
out.asnumpy()