Bundled interpreter demo (#2297)

001ab525 · Andrew Tulloch · Yizhi Liu · 766008ca · 001ab525 · 001ab525
Commit 001ab525 authored Dec 18, 2018 by Andrew Tulloch Committed by Yizhi Liu Dec 18, 2018
Showing with 244 additions and 0 deletions

apps/bundle_deploy/Makefile
+39 -0

apps/bundle_deploy/README.md
+35 -0

apps/bundle_deploy/build_model.py
+40 -0

apps/bundle_deploy/bundle.cc
+47 -0

apps/bundle_deploy/demo.cc
+66 -0

apps/bundle_deploy/runtime.cc
+17 -0

No files found.
--- a/apps/bundle_deploy/Makefile
+++ b/apps/bundle_deploy/Makefile
+# Makefile Example to bundle TVM modules.
+TVM_ROOT=$(shell cd ../..; pwd)
+NNVM_PATH=nnvm
+DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core
+PKG_CFLAGS = -std=c++14 -Oz -fPIC\
+	-I${TVM_ROOT}/include\
+	-I${DMLC_CORE}/include\
+	-I${TVM_ROOT}/3rdparty/dlpack/include\
+
+PKG_LDFLAGS = -L${TVM_ROOT}/build
+
+build_dir := build
+
+test: $(build_dir)/demo $(build_dir)/bundle.so
+	$(build_dir)/demo $(build_dir)/bundle.so
+
+$(build_dir)/demo: demo.cc
+	@mkdir -p $(@D)
+	$(CXX) $(PKG_CFLAGS) -o $@  $^
+
+# Serialize our graph.json file.
+$(build_dir)/graph.json.cc: $(build_dir)/graph.json
+	xxd -i $^  > $@
+
+# Serialize our params.bin file.
+$(build_dir)/params.bin.cc: $(build_dir)/params.bin
+	xxd -i $^  > $@
+
+$(build_dir)/model.o $(build_dir)/graph.json $(build_dir)/params.bin: build_model.py
+	python $< -o $(build_dir)
+
+# Build our bundle against the serialized bundle.cc API, the runtime.cc API, and
+# the serialized graph.json and params.bin
+$(build_dir)/bundle.so: bundle.cc runtime.cc $(build_dir)/model.o $(build_dir)/graph.json.cc $(build_dir)/params.bin.cc
+	@mkdir -p $(@D)
+	$(CXX) $(PKG_CFLAGS) -fvisibility=hidden -o $@  $^ $(PKG_LDFLAGS) -shared
+
+clean:
+	rm -r $(build_dir)
--- a/apps/bundle_deploy/README.md
+++ b/apps/bundle_deploy/README.md
+How to Bundle TVM Modules
+=========================
+
+This folder contains an example on how to bundle a TVM module (with the required
+interpreter runtime modules such as `runtime::GraphRuntime`, the graph JSON, and
+the params) into a single, self-contained shared object (`bundle.so`) which
+exposes a C API wrapping the appropriate `runtime::GraphRuntime` instance.
+
+This is useful for cases where we'd like to avoid deploying the TVM runtime
+components to the target host in advance - instead, we simply deploy the bundled
+shared-object to the host, which embeds both the model and the runtime
+components. The bundle should only depend on libc/libc++.
+
+It also contains an example code (`demo.cc`) to load this shared object and
+invoke the packaged TVM model instance. This is a dependency-free binary that
+uses the functionality packaged in `bundle.so` (which means that `bundle.so` can
+be deployed lazily at runtime, instead of at compile time) to invoke TVM
+functionality.
+
+Type the following command to run the sample code under the current folder,
+after building TVM first.
+
+```bash
+make demo
+```
+
+This will:
+
+- Download the mobilenet0.25 model from the MXNet Gluon Model Zoo
+- Compile the model with NNVM
+- Build a `bundle.so` shared object containing the model specification and
+  parameters
+- Build a `demo` executable that `dlopen`'s `bundle.so`, instantiates the
+  contained graph runtime, and invokes the `GraphRuntime::Run` function on a
+  random input, then prints the output tensor to `stderr`.
--- a/apps/bundle_deploy/build_model.py
+++ b/apps/bundle_deploy/build_model.py
+"""Creates a simple TVM modules."""
+
+import argparse
+import os
+import nnvm.compiler
+import nnvm.testing
+import tvm
+import logging
+
+
+def main():
+    logging.basicConfig(level=logging.INFO)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-o', '--out-dir', default='.')
+    opts = parser.parse_args()
+
+    dshape = (1, 3, 224, 224)
+    from mxnet.gluon.model_zoo.vision import get_model
+    block = get_model('mobilenet0.25', pretrained=True)
+    net, params = nnvm.frontend.from_mxnet(block)
+    net = nnvm.sym.softmax(net)
+
+    with nnvm.compiler.build_config(opt_level=3):
+        graph, lib, params = nnvm.compiler.build(
+            net, 'llvm --system-lib', shape={'data': dshape}, params=params)
+    print(graph.symbol().debug_str())
+    build_dir = os.path.abspath(opts.out_dir)
+    if not os.path.isdir(build_dir):
+        os.makedirs(build_dir)
+
+    lib.save(os.path.join(build_dir, 'model.o'))
+    with open(os.path.join(build_dir, 'graph.json'), 'w') as f_graph_json:
+        f_graph_json.write(graph.json())
+    with open(os.path.join(build_dir, 'params.bin'), 'wb') as f_params:
+        f_params.write(nnvm.compiler.save_param_dict(params))
+
+
+if __name__ == '__main__':
+    main()
--- a/apps/bundle_deploy/bundle.cc
+++ b/apps/bundle_deploy/bundle.cc
+#include <memory>
+#include <tvm/runtime/c_runtime_api.h>
+#include <tvm/runtime/registry.h>
+
+extern unsigned char build_graph_json[];
+extern unsigned int build_graph_json_len;
+extern unsigned char build_params_bin[];
+extern unsigned int build_params_bin_len;
+
+#define TVM_BUNDLE_FUNCTION __attribute__((visibility("default"))) extern "C"
+
+TVM_BUNDLE_FUNCTION void *tvm_runtime_create() {
+  const std::string json_data(&build_graph_json[0],
+                              &build_graph_json[0] + build_graph_json_len);
+  tvm::runtime::Module mod_syslib =
+      (*tvm::runtime::Registry::Get("module._GetSystemLib"))();
+  int device_type = kDLCPU;
+  int device_id = 0;
+  tvm::runtime::Module mod =
+      (*tvm::runtime::Registry::Get("tvm.graph_runtime.create"))(
+          json_data, mod_syslib, device_type, device_id);
+  TVMByteArray params;
+  params.data = reinterpret_cast<const char *>(&build_params_bin[0]);
+  params.size = build_params_bin_len;
+  mod.GetFunction("load_params")(params);
+  return new tvm::runtime::Module(mod);
+}
+
+TVM_BUNDLE_FUNCTION void tvm_runtime_destroy(void *handle) {
+  delete reinterpret_cast<tvm::runtime::Module *>(handle);
+}
+
+TVM_BUNDLE_FUNCTION void tvm_runtime_set_input(void *handle, const char *name,
+                                               void *tensor) {
+  reinterpret_cast<tvm::runtime::Module *>(handle)->GetFunction("set_input")(
+      name, reinterpret_cast<DLTensor *>(tensor));
+}
+
+TVM_BUNDLE_FUNCTION void tvm_runtime_run(void *handle) {
+  reinterpret_cast<tvm::runtime::Module *>(handle)->GetFunction("run")();
+}
+
+TVM_BUNDLE_FUNCTION void tvm_runtime_get_output(void *handle, int index,
+                                                void *tensor) {
+  reinterpret_cast<tvm::runtime::Module *>(handle)->GetFunction("get_output")(
+      index, reinterpret_cast<DLTensor *>(tensor));
+}
--- a/apps/bundle_deploy/demo.cc
+++ b/apps/bundle_deploy/demo.cc
+#include "tvm/runtime/c_runtime_api.h"
+#include <assert.h>
+#include <dlfcn.h> //dlopen
+#include <dlpack/dlpack.h>
+#include <iostream>
+#include <random>
+#include <vector>
+
+template <typename F> auto getFunc(void *bundle, const char *name) {
+  dlerror();
+  auto *f =
+      reinterpret_cast<typename std::add_pointer<F>::type>(dlsym(bundle, name));
+  assert(!dlerror());
+  return f;
+}
+
+int main(int argc, char **argv) {
+  assert(argc == 2 && "Usage: demo <bundle.so>");
+  auto *bundle = dlopen(argv[1], RTLD_LAZY | RTLD_LOCAL);
+  assert(bundle);
+
+  auto *handle = getFunc<void *()>(bundle, "tvm_runtime_create")();
+
+  std::vector<float> input_storage(1 * 3 * 224 * 224);
+  std::mt19937 gen(0);
+  for (auto &e : input_storage) {
+    e = std::uniform_real_distribution<float>(0.0, 1.0)(gen);
+  }
+
+  std::vector<int64_t> input_shape = {1, 3, 224, 224};
+  DLTensor input;
+  input.data = input_storage.data();
+  input.ctx = DLContext{kDLCPU, 0};
+  input.ndim = 4;
+  input.dtype = DLDataType{kDLFloat, 32, 1};
+  input.shape = input_shape.data();
+  input.strides = nullptr;
+  input.byte_offset = 0;
+  getFunc<void(void *, const char *, void *)>(bundle, "tvm_runtime_set_input")(
+      handle, "data", &input);
+
+  auto *ftvm_runtime_run =
+      (auto (*)(void *)->void)dlsym(bundle, "tvm_runtime_run");
+  assert(!dlerror());
+  ftvm_runtime_run(handle);
+
+  std::vector<float> output_storage(1000);
+  std::vector<int64_t> output_shape = {1, 1000};
+  DLTensor output;
+  output.data = output_storage.data();
+  output.ctx = DLContext{kDLCPU, 0};
+  output.ndim = 2;
+  output.dtype = DLDataType{kDLFloat, 32, 1};
+  output.shape = output_shape.data();
+  output.strides = nullptr;
+  output.byte_offset = 0;
+
+  getFunc<void(void *, int, void *)>(bundle, "tvm_runtime_get_output")(
+      handle, 0, &output);
+  for (auto i = 0; i < output_storage.size(); ++i) {
+    std::cerr << "output[" << i << "]: " << output_storage[i] << std::endl;
+  }
+  getFunc<void(void *)>(bundle, "tvm_runtime_destroy")(handle);
+  dlclose(bundle);
+  return 0;
+}
--- a/apps/bundle_deploy/runtime.cc
+++ b/apps/bundle_deploy/runtime.cc
+#include <dlpack/dlpack.h>
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/registry.h>
+#include <tvm/runtime/packed_func.h>
+
+#include "../../src/runtime/c_runtime_api.cc"
+#include "../../src/runtime/cpu_device_api.cc"
+#include "../../src/runtime/workspace_pool.cc"
+#include "../../src/runtime/module_util.cc"
+#include "../../src/runtime/module.cc"
+#include "../../src/runtime/registry.cc"
+#include "../../src/runtime/file_util.cc"
+#include "../../src/runtime/threading_backend.cc"
+#include "../../src/runtime/thread_pool.cc"
+#include "../../src/runtime/ndarray.cc"
+#include "../../src/runtime/system_lib_module.cc"
+#include "../../src/runtime/graph/graph_runtime.cc"