""" Get Started with NNVM ===================== **Author**: `Tianqi Chen <https://tqchen.github.io/>`_ This article is an introductory tutorial to workflow in NNVM. """ import nnvm.compiler import nnvm.symbol as sym ###################################################################### # Declare Computation # ------------------- # We start by describing our need using computational graph. # Most deep learning frameworks use computation graph to describe # their computation. In this example, we directly use # NNVM's API to construct the computational graph. # # .. note:: # # In a typical deep learning compilation workflow, # we can get the models from :any:`nnvm.frontend` # # The following code snippet describes :math:`z = x + \sqrt{y}` # and creates a nnvm graph from the description. # We can print out the graph ir to check the graph content. x = sym.Variable("x") y = sym.Variable("y") z = sym.elemwise_add(x, sym.sqrt(y)) compute_graph = nnvm.graph.create(z) print("-------compute graph-------") print(compute_graph.ir()) ###################################################################### # Compile # ------- # We can call :any:`nnvm.compiler.build` to compile the graph. # The build function takes a shape parameter which specifies the # input shape requirement. Here we only need to pass in shape of ``x`` # and the other one will be inferred automatically by NNVM. # # The function returns three values. ``deploy_graph`` contains # the final compiled graph structure. ``lib`` is a :any:`tvm.module.Module` # that contains compiled CUDA functions. We do not need the ``params`` # in this case. shape = (4,) deploy_graph, lib, params = nnvm.compiler.build( compute_graph, target="cuda", shape={"x": shape}, dtype="float32") ###################################################################### # We can print out the IR of ``deploy_graph`` to understand what just # happened under the hood. We can find that ``deploy_graph`` only # contains a single operator ``tvm_op``. This is because NNVM # automatically fused the operator together into one operator. # print("-------deploy graph-------") print(deploy_graph.ir()) ###################################################################### # Let us also peek into content of ``lib``. # Typically a compiled TVM CUDA module contains a host module(lib) # and a device module(``lib.imported_modules[0]``) that contains the CUDA code. # We print out the the generated device code here. # This is exactly a fused CUDA version of kernel that the graph points to. # print("-------deploy library-------") print(lib.imported_modules[0].get_source()) ###################################################################### # Deploy and Run # -------------- # Now that we have have compiled module, let us run it. # We can use :any:`graph_runtime <tvm.contrib.graph_runtime.create>` # in tvm to create a deployable :any:`GraphModule <tvm.contrib.graph_runtime.GraphModule>`. # We can use the :any:`set_input <tvm.contrib.graph_runtime.GraphModule.set_input>`, # :any:`run <tvm.contrib.graph_runtime.GraphModule.run>` and # :any:`get_output <tvm.contrib.graph_runtime.GraphModule.get_output>` function # to set the input, execute the graph and get the output we need. # import tvm import numpy as np from tvm.contrib import graph_runtime, util module = graph_runtime.create(deploy_graph, lib, tvm.gpu(0)) x_np = np.array([1, 2, 3, 4]).astype("float32") y_np = np.array([4, 4, 4, 4]).astype("float32") # set input to the graph module module.set_input(x=x_np, y=y_np) # run forward computation module.run() # get the first output out = module.get_output(0, out=tvm.nd.empty(shape)) print(out.asnumpy()) ###################################################################### # Provide Model Parameters # ------------------------ # Most deep learning models contains two types of inputs: parameters # that remains fixed during inference and data input that need to # change for each inference task. It is helpful to provide these # information to NNVM. Let us assume that ``y`` is the parameter # in our example. We can provide the model parameter information # by the params argument to :any:`nnvm.compiler.build`. # deploy_graph, lib, params = nnvm.compiler.build( compute_graph, target="cuda", shape={"x": shape}, params={"y": y_np}) ###################################################################### # This time we will need params value returned by :any:`nnvm.compiler.build`. # NNVM applys optimization to pre-compute the intermediate values in # the graph that can be determined by parameters. In this case # :math:`\sqrt{y}` can be pre-computed. The pre-computed values # are returned as new params. We can print out the new compiled library # to confirm that the fused kernel only now contains add. # print("-----optimized params-----") print(params) print("-------deploy library-------") print(lib.imported_modules[0].get_source()) ###################################################################### # Save the Deployed Module # ------------------------ # We can save the ``deploy_graph``, ``lib`` and ``params`` separately # and load them back later. We can use :any:`tvm.module.Module` to export # the compiled library. ``deploy_graph`` is saved in json format and ``params`` # is serialized into a bytearray. # temp = util.tempdir() path_lib = temp.relpath("deploy.so") lib.export_library(path_lib) with open(temp.relpath("deploy.json"), "w") as fo: fo.write(deploy_graph.json()) with open(temp.relpath("deploy.params"), "wb") as fo: fo.write(nnvm.compiler.save_param_dict(params)) print(temp.listdir()) ###################################################################### # We can load the module back. loaded_lib = tvm.module.load(path_lib) loaded_json = open(temp.relpath("deploy.json")).read() loaded_params = bytearray(open(temp.relpath("deploy.params"), "rb").read()) module = graph_runtime.create(loaded_json, loaded_lib, tvm.gpu(0)) params = nnvm.compiler.load_param_dict(loaded_params) # directly load from byte array module.load_params(loaded_params) module.run(x=x_np) # get the first output out = module.get_output(0, out=tvm.nd.empty(shape)) print(out.asnumpy()) ###################################################################### # Deploy using Another Language # ----------------------------- # We use python in this example for demonstration. # We can also deploy the compiled modules with other languages # supported by TVM such as c++, java, javascript. # The graph module itself is fully embedded in TVM runtime. # # The following block demonstrates how we can directly use TVM's # runtime API to execute the compiled module. # You can find similar runtime API in TVMRuntime of other languages. # fcreate = tvm.get_global_func("tvm.graph_runtime.create") ctx = tvm.gpu(0) gmodule = fcreate(loaded_json, loaded_lib, ctx.device_type, ctx.device_id) set_input, get_output, run = gmodule["set_input"], gmodule["get_output"], gmodule["run"] set_input("x", tvm.nd.array(x_np)) gmodule["load_params"](loaded_params) run() out = tvm.nd.empty(shape) get_output(0, out) print(out.asnumpy())