# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ .. _tutorial-relay-quick-start: Quick Start Tutorial for Compiling Deep Learning Models ====================================================== **Author**: `Yao Wang <https://github.com/kevinthesun>`_, `Truman Tian <https://github.com/SiNZeRo>`_ This example shows how to build a neural network with Relay python frontend and generates a runtime library for Nvidia GPU with TVM. Notice that you need to build TVM with cuda and llvm enabled. """ ###################################################################### # Overview for Supported Hardware Backend of TVM # ---------------------------------------------- # The image below shows hardware backend currently supported by TVM: # # .. image:: https://github.com/dmlc/web-data/raw/master/tvm/tutorial/tvm_support_list.png # :align: center # :scale: 100% # # In this tutorial, we'll choose cuda and llvm as target backends. # To begin with, let's import Relay and TVM. import numpy as np from tvm import relay from tvm.relay import testing import tvm from tvm.contrib import graph_runtime ###################################################################### # Define Neural Network in Relay # ----------------------------- # First, let's define a neural network with relay python frontend. # For simplicity, we'll use pre-defined resnet-18 network in Relay. # Parameters are initialized with Xavier initializer. # Relay also supports other model formats such as MXNet, CoreML, ONNX and # Tensorflow. # # In this tutorial, we assume we will do inference on our device # and the batch size is set to be 1. Input images are RGB color # images of size 224 * 224. We can call the :any:`tvm.relay.expr.astext()` # to show the network structure. batch_size = 1 num_class = 1000 image_shape = (3, 224, 224) data_shape = (batch_size,) + image_shape out_shape = (batch_size, num_class) mod, params = relay.testing.resnet.get_workload( num_layers=18, batch_size=batch_size, image_shape=image_shape) # set show_meta_data=True if you want to show meta data print(mod.astext(show_meta_data=False)) ###################################################################### # Compilation # ----------- # Next step is to compile the model using the Relay/TVM pipeline. # Users can specify the optimization level of the compilation. # Currently this value can be 0 to 3. The optimization passes include # operator fusion, pre-computation, layout transformation and so on. # # :any:`relay.build_module.build` returns three components: the execution graph in # json format, the TVM module library of compiled functions specifically # for this graph on the target hardware, and the parameter blobs of # the model. During the compilation, Relay does the graph-level # optimization while TVM does the tensor-level optimization, resulting # in an optimized runtime module for model serving. # # We'll first compile for Nvidia GPU. Behind the scene, `relay.build_module.build` # first does a number of graph-level optimizations, e.g. pruning, fusing, etc., # then registers the operators (i.e. the nodes of the optimized graphs) to # TVM implementations to generate a `tvm.module`. # To generate the module library, TVM will first transfer the high level IR # into the lower intrinsic IR of the specified target backend, which is CUDA # in this example. Then the machine code will be generated as the module library. opt_level = 3 target = tvm.target.cuda() with relay.build_config(opt_level=opt_level): graph, lib, params = relay.build_module.build( mod, target, params=params) ##################################################################### # Run the generate library # ------------------------ # Now we can create graph runtime and run the module on Nvidia GPU. # create random input ctx = tvm.gpu() data = np.random.uniform(-1, 1, size=data_shape).astype("float32") # create module module = graph_runtime.create(graph, lib, ctx) # set input and parameters module.set_input("data", data) module.set_input(**params) # run module.run() # get output out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy() # Print first 10 elements of output print(out.flatten()[0:10]) ###################################################################### # Save and Load Compiled Module # ----------------------------- # We can also save the graph, lib and parameters into files and load them # back in deploy environment. #################################################### # save the graph, lib and params into separate files from tvm.contrib import util temp = util.tempdir() path_lib = temp.relpath("deploy_lib.tar") lib.export_library(path_lib) with open(temp.relpath("deploy_graph.json"), "w") as fo: fo.write(graph) with open(temp.relpath("deploy_param.params"), "wb") as fo: fo.write(relay.save_param_dict(params)) print(temp.listdir()) #################################################### # load the module back. loaded_json = open(temp.relpath("deploy_graph.json")).read() loaded_lib = tvm.runtime.load_module(path_lib) loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read()) input_data = tvm.nd.array(np.random.uniform(size=data_shape).astype("float32")) module = graph_runtime.create(loaded_json, loaded_lib, ctx) module.load_params(loaded_params) module.run(data=input_data) out_deploy = module.get_output(0).asnumpy() # Print first 10 elements of output print(out_deploy.flatten()[0:10]) # check whether the output from deployed module is consistent with original one tvm.testing.assert_allclose(out_deploy, out, atol=1e-3)