Commit 338bfd45 by Tianqi Chen Committed by GitHub

[CODEGEN] More robust llvm intrin handling, remove graph executor (#519)

parent 4468c576
......@@ -3,9 +3,9 @@ This folder contains various extension projects using TVM,
they also serve as examples on how to use TVM in your own project.
If you are interested in writing optimized kernels with TVM, checkout [TOPI: TVM Operator Inventory](../topi).
If you are interested in end to end deep learning model compilation, checkout [NNVM Compiler](https://github.com/dmlc/nnvm).
- [extension](extension) How to extend TVM C++ api along with python API.
- [graph_executor](graph_executor) Build nnvm graph executor with TVM.
- [ios_rpc](ios_rpc) iOS RPC server.
- [android_rpc](android_rpc) Android RPC server.
- [howto_deploy](howto_depploy) Tutorial on how to deploy TVM with minimum code dependency.
\ No newline at end of file
# Minimum Makefile for the extension package
TVM_ROOT=$(shell cd ../..; pwd)
NNVM_PATH=nnvm
DMLC_CORE=${TVM_ROOT}/dmlc-core
PKG_CFLAGS = -std=c++11 -O2 -fPIC\
-I${TVM_ROOT}/include\
-I${DMLC_CORE}/include\
-I${TVM_ROOT}/dlpack/include\
-I${TVM_ROOT}/HalideIR/src
PKG_LDFLAGS =
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S), Darwin)
PKG_LDFLAGS += -undefined dynamic_lookup
WHOLE_ARCH= -all_load
NO_WHOLE_ARCH= -noall_load
else
WHOLE_ARCH= --whole-archive
NO_WHOLE_ARCH= --no-whole-archive
endif
NNVM_CONTRIB_SRC = $(wildcard src/*.cc)
NNVM_CONTRIB_OBJ = $(patsubst src/%.cc, build/%.o, $(NNVM_CONTRIB_SRC))
include $(DMLC_CORE)/make/dmlc.mk
ALL_DEP = $(NNVM_CONTRIB_OBJ)
PKG_CFLAGS += -I${NNVM_PATH}/include
ALL_DEP += ${DMLC_CORE}/libdmlc.a ${NNVM_PATH}/lib/libnnvm.a
.PHONY: clean all
all: lib/libtvm_graph_exec.so
nnvm:
git clone https://github.com/dmlc/nnvm --recursive
nnvm/lib/libnnvm.a: | nnvm
+ cd nnvm; make ; cd -
$(DMLC_CORE)/libdmlc.a:
+ cd $(DMLC_CORE); make libdmlc.a; cd $(TVM_ROOT)
build/%.o: src/%.cc | nnvm
@mkdir -p $(@D)
$(CXX) $(PKG_CFLAGS) -MM -MT build/$*.o $< >build/$*.d
$(CXX) -c $(PKG_CFLAGS) -c $< -o $@
lib/libtvm_graph_exec.so: $(ALL_DEP)
@mkdir -p $(@D)
$(CXX) $(PKG_CFLAGS) -shared -o $@ $(filter %.o, $^) $(PKG_LDFLAGS) \
-Wl,${WHOLE_ARCH} $(filter %.a, $^) -Wl,${NO_WHOLE_ARCH} $(PKG_LDFLAGS)
clean:
$(RM) -rf build lib bin *~ */*~ */*/*~ */*/*/*~ */*.o */*/*.o */*/*/*.o */*.d */*/*.d */*/*/*.d
-include build/*.d
-include build/*/*.d
Example Graph Executor
======================
This folder contains a minimum example of graph executor library based on TVM and NNVM.
It demonstrates how to build a computation graph compilation and execution framework.
- The to build library, need to clone and build into root of the repo.
"""The graph build library"""
from __future__ import absolute_import as _abs
import tvm
from . import _base
from nnvm.symbol import *
from . import op_tvm_def
from .build import build, bind, save_params, compile_graph, remote_load_exec
from __future__ import absolute_import as _abs
import os
import sys
if sys.version_info[0] == 3:
import builtins as __builtin__
else:
import __builtin__
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
if hasattr(__builtin__, "NNVM_BASE_PATH"):
assert __builtin__.NNVM_BASE_PATH == curr_path
else:
__builtin__.NNVM_BASE_PATH = curr_path
if hasattr(__builtin__, "NNVM_LIBRARY_NAME"):
assert __builtin__.NNVM_LIBRARY_NAME == curr_path
else:
__builtin__.NNVM_LIBRARY_NAME = "libtvm_graph_exec"
"""Logics related to build."""
import nnvm.graph as graph
import tvm
import json
DTYPE_DICT = {
"float32": 0
}
_create_exec = tvm.get_global_func("tvm_graph._create_executor")
def build(sym, target, shape, dtype="float32"):
# Do shape inference in python.
g = graph.create(sym)
jgraph = json.loads(g.apply('SaveJSON').json_attr('json'))
jnodes = jgraph['nodes']
jnode_row_ptr = jgraph['node_row_ptr']
nindex = {n['name']: i for i, n in enumerate(jnodes)}
list_shape = [[]] * jnode_row_ptr[-1]
list_dtype = [DTYPE_DICT[dtype]] * jnode_row_ptr[-1]
for k, v in shape.items():
list_shape[jnode_row_ptr[nindex[k]]] = v
g._set_json_attr("shape", list_shape, 'list_shape')
g._set_json_attr("dtype", list_dtype, 'list_int')
g._set_json_attr("target", target, 'str')
g = g.apply("InferShape").apply("InferType")
g = g.apply("GraphPartition").apply("GraphFuse")
return g
def bind(g, ctx):
m = _create_exec(g.handle, ctx.device_type, ctx.device_id)
return m
_get_module = tvm.get_global_func("tvm_graph._get_module_from_graph")
def compile_graph(lib_fname, sym, target, shape, dtype="float32"):
g = build(sym, target, shape, dtype)
m = _get_module(g.handle)
m.save(lib_fname)
json_str = g.apply('SaveJSON').json_attr('json')
return json_str
@tvm.register_func("tvm_graph.lower")
def _lower(sch, inputs, func_name):
f = tvm.lower(sch, inputs, name=func_name)
return f if isinstance(
f, (tvm.container.Array, tuple, list)) else [f]
@tvm.register_func("tvm_graph.build_target")
def _build(funcs, target):
return tvm.build(funcs, target=target)
_save_param_dict = tvm.get_global_func("tvm_graph._save_param_dict")
def save_params(fname, params):
args = []
args.append(fname)
args.append(len(params))
for kv in params.items():
args.append(kv[0])
args.append(kv[1])
_save_param_dict(*args)
def remote_load_exec(sess, sym_json, remote_module_name, param_blob, ctx):
"""Load a remote graph executor, with the local files.
Parameters
----------
sym_json : str
The symbol json file.
remote_module_fname : str
The relative library location to remote temp folder. The
library need to be uploaded first.
param_blob : bytes or bytearray
The binary file to the local parameters.
Returns
-------
exec : GraphExecutor
The remote graph executor containing remote function.
"""
if "load_executor" not in sess._remote_funcs:
sess._remote_funcs["load_executor"] = sess.get_function("tvm_graph._load_executor")
assert ctx.device_type / tvm.contrib.rpc.RPC_SESS_MASK == sess._tbl_index + 1
device_type = ctx.device_type % tvm.contrib.rpc.RPC_SESS_MASK
return sess._remote_funcs["load_executor"](sym_json,
remote_module_name,
bytearray(param_blob),
device_type,
ctx.device_id)
"""NNVM operator definitions."""
import tvm
@tvm.register_func("tvm_graph.compute.add")
def compute_add(a, b):
return tvm.compute(a.shape, lambda *i: a(*i) + b(*i))
@tvm.register_func("tvm_graph.compute.exp")
def compute_exp(a):
return tvm.compute(a.shape, lambda *i: tvm.exp(a(*i)))
@tvm.register_func("tvm_graph.schedule.ewise")
def schedule_ewise(outs, target):
s = tvm.create_schedule([x.op for x in outs])
tvm.schedule.AutoInlineElemWise(s)
return s
/*!
* Copyright (c) 2017 by Contributors
* \file graph_executor.cc
*/
#include "./graph_executor.h"
namespace tvm {
namespace contrib {
PackedFunc GraphExecutor::GetFunction(
const std::string& name,
const std::shared_ptr<ModuleNode>& sptr_to_self) {
// return member functions during query.
if (name == "set_input") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
if (args[0].type_code() == kStr) {
this->SetInput(this->GetIndex(args[0]), args[1]);
} else {
this->SetInput(args[0], args[1]);
}
});
} else if (name == "get_output") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
this->GetOutput(args[0], args[1]);
});
} else if (name == "run") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
this->Run();
});
} else if (name == "load_params") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
this->LoadParamsFromBlob(args[0]);
});
} else {
return PackedFunc();
}
}
GraphExecutor::~GraphExecutor() {
for (DLTensor* t : storage_pool_) {
TVM_CCALL(TVMArrayFree(t));
}
}
void GraphExecutor::Run() {
// setup the array and requirements.
for (size_t i = 0; i < op_execs_.size(); ++i) {
if (op_execs_[i]) op_execs_[i]();
}
}
void GraphExecutor::Init(const nnvm::Graph& g, TVMContext ctx) {
graph_ = g;
ctx_ = ctx;
module_ = g.GetAttr<tvm::runtime::Module>("module");
this->SetupNameIndex();
this->SetupStorage();
this->SetupOpExecs();
}
int GraphExecutor::GetIndex(std::string name) {
CHECK(name_idx_.count(name))
<< name << " is not in the graph.";
return name_idx_.at(name);
}
void GraphExecutor::SetInput(int index, DLTensor* data_in) {
const auto& idx = graph_.indexed_graph();
CHECK_LT(static_cast<size_t>(index), idx.input_nodes().size());
uint32_t eid = idx.entry_id(idx.input_nodes()[index], 0);
TVM_CCALL(TVMArrayCopyFromTo(data_in, &data_entry_[eid], nullptr));
}
void GraphExecutor::GetOutput(int index, DLTensor* data_out) {
const auto& idx = graph_.indexed_graph();
CHECK_LT(static_cast<size_t>(index), idx.outputs().size());
uint32_t eid = idx.entry_id(idx.outputs()[index]);
TVM_CCALL(TVMArrayCopyFromTo(&data_entry_[eid], data_out, nullptr));
}
bool LoadDLTensor(dmlc::Stream* strm, DLTensor* tensor) {
uint64_t header, reserved;
CHECK(strm->Read(&header, sizeof(header)))
<< "Invalid DLTensor file format";
CHECK(strm->Read(&reserved, sizeof(reserved)))
<< "Invalid DLTensor file format";
CHECK(header == kTVMNDArrayMagic)
<< "Invalid DLTensor file format";
CHECK(strm->Read(&tensor->ctx, sizeof(tensor->ctx)))
<< "Invalid DLTensor file format";
CHECK(strm->Read(&tensor->ndim, sizeof(tensor->ndim)))
<< "Invalid DLTensor file format";
CHECK(strm->Read(&tensor->dtype, sizeof(tensor->dtype)))
<< "Invalid DLTensor file format";
int ndim = tensor->ndim;
CHECK(strm->Read(tensor->shape, sizeof(int64_t) * ndim))
<< "Invalid DLTensor file format";
int64_t size = 1;
int type_size = tensor->dtype.bits / 8;
for (int i = 0; i < ndim; ++i) {
size *= tensor->shape[i];
}
int64_t data_byte_size;
CHECK(strm->Read(&data_byte_size, sizeof(data_byte_size)))
<< "Invalid DLTensor file format";
CHECK(data_byte_size == type_size * size)
<< "Invalid DLTensor file format";
CHECK(strm->Read(tensor->data, type_size * size))
<< "Invalid DLTensor file format";
return true;
}
void GraphExecutor::LoadParams(dmlc::Stream *strm) {
uint64_t header, reserved;
CHECK(strm->Read(&header))
<< "Invalid parameters file format";
CHECK(header == kTVMNDArrayListMagic)
<< "Invalid parameters file format";
CHECK(strm->Read(&reserved))
<< "Invalid parameters file format";
std::vector<std::string> names;
CHECK(strm->Read(&names))
<< "Invalid parameters file format";
std::unordered_map<std::string, size_t> name_eid;
const auto& idx = graph_.indexed_graph();
for (int nid : idx.input_nodes()) {
name_eid.emplace(idx[nid].source->attrs.name, idx.entry_id(nid, 0));
}
uint64_t sz;
strm->Read(&sz, sizeof(sz));
size_t size = static_cast<size_t>(sz);
CHECK(size == names.size())
<< "Invalid parameters file format";
for (size_t i = 0; i < size; ++i) {
auto iter = name_eid.find(names[i]);
CHECK(iter != name_eid.end());
CHECK(LoadDLTensor(strm, &data_entry_[iter->second]))
<< "Invalid parameters file format";
}
}
void GraphExecutor::LoadParamsFromBlob(std::string param_blob) {
dmlc::MemoryStringStream strm(&param_blob);
this->LoadParams(&strm);
}
void GraphExecutor::SetupNameIndex() {
nnvm::Symbol s;
s.outputs = graph_.outputs;
std::vector<std::string> input_names = s.ListInputNames(nnvm::Symbol::kAll);
for (size_t i = 0; i < input_names.size(); ++i) {
name_idx_[input_names[i]] = i;
}
}
void GraphExecutor::SetupStorage() {
const auto& idx = graph_.indexed_graph();
// Grab saved optimization plan from graph.
auto vstorage = graph_.MoveCopyAttr<StorageVector>("storage_id");
const auto& vtype = graph_.GetAttr<DLTypeVector>("dltype");
data_shape_ = graph_.GetAttr<ShapeVector>("shape");
data_entry_.resize(idx.num_node_entries());
// Find the maximum space size.
int max_id = 0;
for (size_t i = 0; i < data_shape_.size(); ++i) {
max_id = std::max(vstorage[i] + 1, max_id);
}
for (const auto& e : idx.input_nodes()) {
vstorage[idx.entry_id(e, 0)] = max_id++;
}
// size of each storage pool entry
std::vector<size_t> pool_entry_bytes;
// Find the maximum space size.
for (size_t i = 0; i < data_shape_.size(); ++i) {
int storage_id = vstorage[i];
size_t size = data_shape_[i].Size();
CHECK_GE(storage_id, 0) << "Do not support runtime shape op";
DLDataType t = vtype[i];
size_t bits = t.bits * t.lanes;
CHECK_EQ(bits % 8U, 0U);
size_t bytes = (bits / 8U) * size;
size_t sid = static_cast<size_t>(storage_id);
if (sid >= pool_entry_bytes.size()) {
pool_entry_bytes.resize(sid + 1, 0);
}
pool_entry_bytes[sid] = std::max(pool_entry_bytes[sid], bytes);
}
// Allocate the space.
for (size_t i = 0; i < pool_entry_bytes.size(); ++i) {
TShape shape{static_cast<int64_t>(pool_entry_bytes[i] + 3) / 4};
DLTensor* tensor;
TVM_CCALL(TVMArrayAlloc(
shape.data(), 1, kFloat, 32, 1, ctx_.device_type, ctx_.device_id, &tensor));
storage_pool_.push_back(tensor);
}
// Assign the pooled entries.
for (size_t i = 0; i < data_entry_.size(); ++i) {
int storage_id = vstorage[i];
data_entry_[i] = *storage_pool_[storage_id];
data_entry_[i].shape = const_cast<int64_t*>(data_shape_[i].data());
data_entry_[i].ndim = data_shape_[i].ndim();
data_entry_[i].dtype = vtype[i];
}
}
void GraphExecutor::SetupOpExecs() {
static const nnvm::Op* tvm_op = nnvm::Op::Get("tvm_op");
const auto& idx = graph_.indexed_graph();
op_execs_.resize(idx.num_nodes());
// setup the array and requirements.
for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
const auto& inode = idx[nid];
if (inode.source->is_variable()) continue;
std::vector<DLTensor> args;
for (const auto& e : inode.inputs) {
args.push_back(data_entry_[idx.entry_id(e)]);
}
for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
uint32_t eid = idx.entry_id(nid, index);
args.push_back(data_entry_[eid]);
}
CHECK_EQ(inode.source->op(), tvm_op)
<< "transform the graph to tvm op";
op_execs_[nid] = CreateTVMOp(
inode.source->attrs, args, inode.inputs.size());
}
}
FOpExec GraphExecutor::CreateTVMOp(const nnvm::NodeAttrs& attrs,
std::vector<DLTensor> args,
size_t num_inputs) {
struct OpArgs {
std::vector<DLTensor> args;
std::vector<TVMValue> arg_values;
std::vector<int> arg_tcodes;
std::vector<int64_t> shape_data;
};
auto it = attrs.dict.find("func_name");
CHECK(it != attrs.dict.end())
<< "tvm_op must need func_name attr";
bool flatten = (attrs.dict.at("flatten_data") == "1");
std::shared_ptr<OpArgs> arg_ptr = std::make_shared<OpArgs>();
// setup address.
arg_ptr->args = std::move(args);
if (flatten) {
arg_ptr->shape_data.resize(arg_ptr->args.size());
}
for (size_t i = 0; i < arg_ptr->args.size(); ++i) {
TVMValue v;
DLTensor* t = &(arg_ptr->args[i]);
v.v_handle = t;
arg_ptr->arg_values.push_back(v);
arg_ptr->arg_tcodes.push_back(kArrayHandle);
if (flatten) {
int64_t s = 1;
arg_ptr->shape_data[i] = std::accumulate(
t->shape, t->shape + t->ndim, 1, std::multiplies<int64_t>());
t->ndim = 1;
t->shape = &(arg_ptr->shape_data[i]);
}
}
// get compiled function from module.
runtime::PackedFunc pf = module_.GetFunction(it->second, false);
CHECK(pf != nullptr) << "no such function in module: " << it->second;
auto fexec = [arg_ptr, pf] () {
runtime::TVMRetValue rv;
runtime::TVMArgs targs(arg_ptr->arg_values.data(),
arg_ptr->arg_tcodes.data(),
static_cast<int>(arg_ptr->arg_values.size()));
pf.CallPacked(targs, &rv);
};
return fexec;
}
/*! \brief Parse keyword arguments as PType arguments and save to parsed */
template<typename PType>
inline void ParamParser(nnvm::NodeAttrs* attrs) {
PType param;
try {
param.Init(attrs->dict);
} catch (const dmlc::ParamError& e) {
std::ostringstream os;
os << e.what();
os << ", in operator " << attrs->op->name << "("
<< "name=\"" << attrs->name << "\"";
for (const auto& k : attrs->dict) {
os << ", " << k.first << "=\"" << k.second << "\"";
}
os << ")";
throw dmlc::ParamError(os.str());
}
attrs->parsed = std::move(param);
}
DMLC_REGISTER_PARAMETER(TVMOpParam);
// ewise tvm op
NNVM_REGISTER_OP(tvm_op)
.set_attr_parser(ParamParser<TVMOpParam>)
.set_num_inputs([](const NodeAttrs& attrs) {
const TVMOpParam& param = nnvm::get<TVMOpParam>(attrs.parsed);
return param.num_inputs;
})
.set_num_outputs([](const NodeAttrs& attrs) {
const TVMOpParam& param = nnvm::get<TVMOpParam>(attrs.parsed);
return param.num_outputs;
});
TVM_REGISTER_GLOBAL("tvm_graph._load_executor")
.set_body([](TVMArgs args, TVMRetValue *rv) {
std::string sym_json = args[0];
std::string lib_fname = args[1];
std::string param_blob = args[2];
TVMContext ctx;
ctx.device_type = static_cast<DLDeviceType>(args[3].operator int());
ctx.device_id = args[4];
// load graph from json string
nnvm::Graph g;
g.attrs["json"] = std::make_shared<nnvm::any>(sym_json);
g = nnvm::ApplyPass(std::move(g), "LoadJSON");
// load module from file
static const PackedFunc* fsys_load_ = nullptr;
if (fsys_load_ == nullptr) {
fsys_load_ = runtime::Registry::Get("tvm.contrib.rpc.server.load_module");
CHECK(fsys_load_ != nullptr);
}
runtime::Module m = (*fsys_load_)(lib_fname);
g.attrs["module"] = std::make_shared<nnvm::any>(m);
std::shared_ptr<GraphExecutor> exec =
std::make_shared<GraphExecutor>();
exec->Init(g, ctx);
// load params form stream of string
exec->LoadParamsFromBlob(std::move(param_blob));
*rv = tvm::runtime::Module(exec);
});
} // namespace contrib
} // namespace tvm
namespace dmlc {
namespace json {
template<>
struct Handler<DLDataType> {
static void Write(JSONWriter *writer, const DLDataType& data) {
std::vector<int> tmp({data.code, data.bits, data.lanes});
writer->Write(tmp);
}
static void Read(JSONReader *reader, DLDataType* data) {
std::vector<int> tmp;
reader->Read(&tmp);
data->code = tmp[0];
data->bits = tmp[1];
data->lanes = tmp[2];
}
};
DMLC_JSON_ENABLE_ANY(std::vector<DLDataType>, list_dltype);
} // namespace dmlc
} // namespace json
/*!
* Copyright (c) 2017 by Contributors
* \file graph_executor.h
*/
#ifndef TVM_GRAPH_EXECUTOR_H_
#define TVM_GRAPH_EXECUTOR_H_
#include <dmlc/io.h>
#include <dmlc/memory_io.h>
#include <tvm/runtime/registry.h>
#include <tvm/runtime/packed_func.h>
#include <tvm/runtime/module.h>
#include <nnvm/graph.h>
#include <nnvm/graph_attr_types.h>
#include <nnvm/tuple.h>
#include <nnvm/pass.h>
#include <numeric>
#include <string>
namespace tvm {
namespace contrib {
using tvm::runtime::TVMArgs;
using tvm::runtime::TVMRetValue;
using tvm::runtime::PackedFunc;
using nnvm::StorageVector;
using nnvm::ShapeVector;
using nnvm::TShape;
using nnvm::NodeAttrs;
/*! \brief DLPack compatible data types */
using DLTypeVector = std::vector<DLDataType>;
/*! \brief The executor function */
using FOpExec = std::function<void()>;
/*! \brief macro to do C API call */
#define TVM_CCALL(func) \
{ \
int ret = (func); \
CHECK_EQ(ret, 0) \
<< TVMGetLastError(); \
}
constexpr uint64_t kTVMNDArrayMagic = 0xDD5E40F096B4A13F;
constexpr uint64_t kTVMNDArrayListMagic = 0xF7E58D4F05049CB7;
/*! \brief Graph Executor with TVM runtime */
class GraphExecutor : public runtime::ModuleNode {
public:
const char* type_key() const {
return "GraphExecutor";
}
PackedFunc GetFunction(
const std::string& name,
const std::shared_ptr<ModuleNode>& sptr_to_self);
// Destructor
~GraphExecutor();
// Setup with a given graph
void Init(const nnvm::Graph& g, TVMContext ctx);
// Get index of variable
int GetIndex(std::string name);
// Copy data to index-th input
void SetInput(int index, DLTensor* data_in);
// Copy index-th output to data_out
void GetOutput(int index, DLTensor* data_out);
// Load parameters from stream
void LoadParams(dmlc::Stream* strm);
// Load parameters from binary file blob
void LoadParamsFromBlob(std::string param_blob);
// Execute the graph.
void Run();
private:
// functions
void SetupNameIndex();
void SetupStorage();
void SetupOpExecs();
// Constructor to create TVM op
FOpExec CreateTVMOp(const nnvm::NodeAttrs& attrs,
std::vector<DLTensor> inputs,
size_t num_inputs);
// The graph to be executed.
nnvm::Graph graph_;
// The execution context
TVMContext ctx_;
// Common storage pool
std::vector<DLTensor*> storage_pool_;
// The data shape
std::vector<TShape> data_shape_;
// The data entry
std::vector<DLTensor> data_entry_;
// The operation lambda on each node
std::vector<FOpExec> op_execs_;
// The code module.
tvm::runtime::Module module_;
std::unordered_map<std::string, size_t> name_idx_;
};
struct TVMOpParam : public dmlc::Parameter<TVMOpParam> {
std::string func_name;
uint32_t num_inputs;
uint32_t num_outputs;
bool flatten_data;
DMLC_DECLARE_PARAMETER(TVMOpParam) {
DMLC_DECLARE_FIELD(func_name);
DMLC_DECLARE_FIELD(num_inputs)
.set_default(1);
DMLC_DECLARE_FIELD(num_outputs)
.set_default(1);
DMLC_DECLARE_FIELD(flatten_data)
.set_default(false);
}
};
} // namespace contrib
} // namespace tvm
#endif // TVM_GRAPH_EXECUTOR_H_
/*!
* Copyright (c) 2017 by Contributors
* \file graph_executor_ext.cc
*/
#include "./graph_executor.h"
namespace tvm {
namespace contrib {
bool SaveDLTensor(dmlc::Stream* strm, DLTensor* tensor) {
uint64_t header = kTVMNDArrayMagic, reserved = 0;
strm->Write(&header, sizeof(header));
strm->Write(&reserved, sizeof(reserved));
strm->Write(&tensor->ctx, sizeof(tensor->ctx));
strm->Write(&tensor->ndim, sizeof(tensor->ndim));
strm->Write(&tensor->dtype, sizeof(tensor->dtype));
int ndim = tensor->ndim;
strm->Write(tensor->shape, sizeof(int64_t) * ndim);
int type_size = tensor->dtype.bits / 8;
int64_t size = 1;
for (int i = 0; i < ndim; ++i) {
size *= tensor->shape[i];
}
int64_t data_byte_size = type_size * size;
strm->Write(&data_byte_size, sizeof(data_byte_size));
strm->Write(tensor->data, data_byte_size);
return true;
}
TVM_REGISTER_GLOBAL("tvm_graph._save_param_dict")
.set_body([](TVMArgs args, TVMRetValue *rv) {
std::string fname = args[0];
int num_params = args[1];
std::vector<std::string> names;
names.reserve(num_params);
std::vector<DLTensor*> arrays;
arrays.reserve(num_params);
for (int i = 2; i < (2 + 2*num_params); i += 2) {
names.emplace_back(args[i].operator std::string());
arrays.emplace_back(args[i+1].operator DLTensor*());
}
std::unique_ptr<dmlc::Stream> fo(dmlc::Stream::Create(fname.c_str(), "w"));
uint64_t header = kTVMNDArrayListMagic, reserved = 0;
fo->Write(&header, sizeof(header));
fo->Write(&reserved, sizeof(reserved));
fo->Write(names);
{
uint64_t sz = static_cast<uint64_t>(arrays.size());
fo->Write(&sz, sizeof(sz));
for (size_t i = 0; i < sz; ++i) {
SaveDLTensor(fo.get(), arrays[i]);
}
}
});
// Create executor
tvm::runtime::Module CreateExecutor(nnvm::Graph g, TVMContext ctx) {
std::shared_ptr<GraphExecutor> exec =
std::make_shared<GraphExecutor>();
exec->Init(g, ctx);
return tvm::runtime::Module(exec);
}
TVM_REGISTER_GLOBAL("tvm_graph._create_executor")
.set_body([](TVMArgs args, TVMRetValue *rv) {
void* graph_handle = args[0];
int device_type = args[1];
int device_id = args[2];
TVMContext ctx{static_cast<DLDeviceType>(device_type), device_id};
nnvm::Graph g = static_cast<nnvm::Graph*>(graph_handle)[0];
*rv = CreateExecutor(g, ctx);
});
TVM_REGISTER_GLOBAL("tvm_graph._get_module_from_graph")
.set_body([](TVMArgs args, TVMRetValue *rv) {
void* graph_handle = args[0];
nnvm::Graph* g = static_cast<nnvm::Graph*>(graph_handle);
*rv = g->MoveCopyAttr<tvm::runtime::Module>("module");
});
} // namespace contrib
} // namespace tvm
/*!
* Copyright (c) 2017 by Contributors
* \file graph_handle.cc
*/
#include <tvm/packed_func_ext.h>
#include "./graph_handle.h"
namespace tvm {
TVM_STATIC_IR_FUNCTOR(IRPrinter, vtable)
.set_dispatch<GraphHandleNode>([](const GraphHandleNode *op, IRPrinter *p) {
p->stream << "graph-handle("
<< "handle=0x" << std::hex
<< reinterpret_cast<uint64_t>(op->graph_handle) << ")";
});
TVM_REGISTER_NODE_TYPE(GraphHandleNode);
} // namespace tvm
/*!
* Copyright (c) 2017 by Contributors
* \file graph.h
* \brief Data structure about computational graph.
*/
#ifndef TVM_GRAPH_HANDLE_H_
#define TVM_GRAPH_HANDLE_H_
#include <string>
#include <tvm/base.h>
namespace tvm {
/*!
* \brief Computational graph handle.
* Use GraphHandle as its container type
*/
struct GraphHandleNode : public Node {
void *graph_handle;
void VisitAttrs(AttrVisitor* v) final {
v->Visit("graph_handle", &graph_handle);
}
static constexpr const char* _type_key = "GraphHandle";
TVM_DECLARE_NODE_TYPE_INFO(GraphHandleNode, Node);
};
/*! \brief Defines graph handle */
TVM_DEFINE_NODE_REF(GraphHandle, GraphHandleNode);
} // namespace tvm
#endif // TVM_GRAPH_HANDLE_H_
/*!
* Copyright (c) 2017 by Contributors
* \file Additional optimization pass of NNVM.
*/
#include <dmlc/json.h>
#include <nnvm/graph.h>
#include <nnvm/op_attr_types.h>
#include <nnvm/graph_attr_types.h>
#include <nnvm/tuple.h>
#include <nnvm/pass.h>
#include <tvm/operation.h>
#include <tvm/lowered_func.h>
#include "./op_attr_types.h"
namespace tvm {
namespace contrib {
using nnvm::any;
using nnvm::IndexedGraph;
// The single fuse rule.
enum class FuseRule {
kUknown,
kFuseToMaster,
kRealize
};
DLDataType GetDLType(int type_flag) {
if (type_flag == 0) return Type2TVMType(Float(32));
LOG(FATAL) << "unknown type_flag=" << type_flag;
return Type2TVMType(Float(32));
}
// Partition the graph into segments
// Each segment will be compiled into one operator.
// Need also mark the property of the segment.
nnvm::Graph GraphPartition(nnvm::Graph g) {
// setup ref counter
const IndexedGraph& idx = g.indexed_graph();
// Get attributes from the graph
const ShapeVector& shape_vec = g.GetAttr<ShapeVector>("shape");
const DTypeVector& dtype_vec = g.GetAttr<DTypeVector>("dtype");
// Transform to dltype
// In future, directly fo type inference in dltype.
DLTypeVector dltype_vec = DLTypeVector(dtype_vec.size());
for (size_t i = 0; i < dtype_vec.size(); ++i) {
dltype_vec[i] = GetDLType(dtype_vec[i]);
}
// Reference counter of each op node
// For now, always store result when an op is referred more than once.
std::vector<uint32_t> ref_count(idx.num_nodes(), 0);
for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
const auto& inode = idx[nid];
if (inode.source->is_variable()) continue;
for (const auto& e : inode.inputs) {
++ref_count[e.node_id];
}
}
for (const auto& e : idx.outputs()) {
// this line will realize all the outputs
ref_count[e.node_id] += 2;
}
// Pattern fo the subgraph
std::vector<TOpPattern> pattern_vec(idx.num_nodes(), kExtern);
// Whether node can be fused to parent.
std::vector<FuseRule> fuse_vec(idx.num_nodes(), FuseRule::kUknown);
// Master node id of fusion segment.
std::vector<int> master_vec(idx.num_nodes(), -1);
// Operator pattern
static auto& op_pattern = nnvm::Op::GetAttr<TOpPattern>("TOpPattern");
for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
const auto& inode = idx[nid];
if (inode.source->is_variable()) {
fuse_vec[nid] = FuseRule::kRealize; continue;
}
TOpPattern pt = op_pattern.get(inode.source->op(), kExtern);
if (pt <= kBroadcast) {
int chosen_master = -1;
bool ewise = inode.source->num_outputs() == 1;
for (const auto& e : inode.inputs) {
if (fuse_vec[e.node_id] == FuseRule::kUknown) {
TOpPattern ipt = pattern_vec[e.node_id];
if (ipt != kElemWise) ewise = false;
if (ipt <= kBroadcast) {
fuse_vec[e.node_id] = FuseRule::kFuseToMaster;
} else if (ipt == kComplex && chosen_master == -1 &&
shape_vec[idx.entry_id(nid, 0)] == shape_vec[idx.entry_id(e)]) {
chosen_master = master_vec[e.node_id];
fuse_vec[e.node_id] = FuseRule::kFuseToMaster;
} else {
fuse_vec[e.node_id] = FuseRule::kRealize;
}
}
if (ewise) {
if (shape_vec[idx.entry_id(nid, 0)] != shape_vec[idx.entry_id(e)]) {
ewise = false;
}
}
}
master_vec[nid] = chosen_master;
if (chosen_master != -1) {
pt = kComplex;
} else {
pt = ewise ? kElemWise : kBroadcast;
}
} else {
master_vec[nid] = nid;
for (const auto& e : inode.inputs) {
if (fuse_vec[e.node_id] == FuseRule::kUknown) {
fuse_vec[e.node_id] = FuseRule::kRealize;
if (master_vec[e.node_id] == -1) {
master_vec[e.node_id] = e.node_id;
}
}
}
}
pattern_vec[nid] = pt;
if (ref_count[nid] > 1) {
fuse_vec[nid] = FuseRule::kRealize;
if (master_vec[nid] == -1) {
master_vec[nid] = nid;
}
}
}
// point to the group root id of each node
std::vector<int> group_vec(idx.num_nodes(), -1);
for (uint32_t i = idx.num_nodes(); i != 0; --i) {
uint32_t nid = i - 1;
const auto& inode = idx[nid];
if (group_vec[nid] == -1) {
group_vec[nid] = nid;
}
// propagate the group id.
for (const auto& e : inode.inputs) {
if (fuse_vec[e.node_id] == FuseRule::kFuseToMaster) {
CHECK(group_vec[e.node_id] == -1||
group_vec[e.node_id] == group_vec[nid]);
group_vec[e.node_id] = group_vec[nid];
}
}
}
g.attrs["group_root"] = std::make_shared<any>(std::move(group_vec));
g.attrs["group_master"] = std::make_shared<any>(std::move(master_vec));
g.attrs["pattern"] = std::make_shared<any>(std::move(pattern_vec));
g.attrs["dltype"] = std::make_shared<any>(std::move(dltype_vec));
return g;
}
NNVM_REGISTER_PASS(GraphPartition)
.set_body(GraphPartition)
.depend_graph_attr("shape")
.depend_graph_attr("dtype")
.provide_graph_attr("dltype");
struct NodeEntryHash {
size_t operator()(const IndexedGraph::NodeEntry& e) const {
return e.node_id;
}
};
struct NodeEntryEqual {
size_t operator()(const IndexedGraph::NodeEntry& a,
const IndexedGraph::NodeEntry& b) const {
return a.node_id == b.node_id && a.index == b.index;
}
};
// Auxiliary data structure for representing fused op.
struct FuseEntry {
// The inputs
std::vector<IndexedGraph::NodeEntry> inputs;
// The input map
std::unordered_map<IndexedGraph::NodeEntry, Tensor,
NodeEntryHash, NodeEntryEqual> imap;
// Output tensors
Array<Tensor> outputs;
// Placeholder for inputs
Array<Tensor> placeholder;
// Computing schedule
Schedule schedule;
// Function name
std::string func_name;
};
// Fuse the partitioned graph into segments.
// Create a new graph with fused noded.
// Also inheritate attribute shape, dltype from previous graph.
nnvm::Graph GraphFuse(nnvm::Graph g) {
// setup ref counter
const IndexedGraph& idx = g.indexed_graph();
// Get attributes from the graph
const ShapeVector& shape_vec = g.GetAttr<ShapeVector>("shape");
const DLTypeVector& dltype_vec = g.GetAttr<DLTypeVector>("dltype");
const DTypeVector& dtype_vec = g.GetAttr<DTypeVector>("dtype");
const std::vector<int>& group_vec = g.GetAttr<std::vector<int> >("group_root");
const std::vector<int>& master_vec = g.GetAttr<std::vector<int> >("group_master");
const std::vector<TOpPattern>& pattern_vec =
g.GetAttr<std::vector<TOpPattern> >("pattern");
std::string target = g.GetAttr<std::string>("target");
std::vector<FuseEntry> fuse_vec(idx.num_nodes());
// setup inputs and placeholder.
for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
const auto& inode = idx[nid];
if (inode.source->is_variable()) continue;
CHECK_GE(group_vec[nid], 0);
int root_id = group_vec[nid];
FuseEntry& fe = fuse_vec[root_id];
TOpPattern pt = pattern_vec[root_id];
for (const auto& e : inode.inputs) {
if (group_vec[e.node_id] != root_id && fe.imap.count(e) == 0) {
Array<Expr> shape;
if (pt == kElemWise) {
// elementwise support flatten
int64_t prod = 1;
for (int64_t x : shape_vec[idx.entry_id(e)]) {
prod *= x;
}
CHECK_LE(prod, static_cast<int64_t>(std::numeric_limits<int>::max()));
shape.push_back(make_const(Int(32), prod));
} else {
for (int64_t x : shape_vec[idx.entry_id(e)]) {
CHECK_LE(x, static_cast<int64_t>(std::numeric_limits<int>::max()));
shape.push_back(make_const(Int(32), x));
}
}
std::ostringstream os_name;
os_name << "input" << fe.inputs.size();
Tensor data = placeholder(
shape, TVMType2Type(dltype_vec[idx.entry_id(e)]),
os_name.str());
fe.imap[e] = data;
fe.inputs.push_back(e);
fe.placeholder.push_back(data);
}
}
}
// Setup the Tensor
std::vector<Tensor> tensor_vec(idx.num_node_entries());
static auto& fcompute =
nnvm::Op::GetAttr<FTVMCompute>("FTVMCompute");
static auto& fschedule =
nnvm::Op::GetAttr<FTVMSchedule>("FTVMSchedule");
for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
const auto& inode = idx[nid];
if (inode.source->is_variable()) continue;
int root_id = group_vec[nid];
FuseEntry& fe = fuse_vec[root_id];
Array<Tensor> inputs;
// input loading
for (const auto& e : inode.inputs) {
if (group_vec[e.node_id] != root_id) {
auto it = fe.imap.find(e);
CHECK(it != fe.imap.end());
inputs.push_back(it->second);
} else {
Tensor t = tensor_vec[idx.entry_id(e)];
CHECK(t.defined());
inputs.push_back(t);
}
}
Array<Tensor> out = fcompute[inode.source->op()](
inode.source->attrs, inputs);
CHECK_EQ(out.size(), inode.source->num_outputs());
// schedule on root node, and use master's schedule
if (nid != root_id) {
for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
uint32_t eid = idx.entry_id(nid, index);
tensor_vec[eid] = out[index];
}
} else {
fe.outputs = out;
int master = master_vec[root_id];
CHECK_GE(master, 0);
fe.schedule = fschedule[idx[master].source->op()](
idx[master].source->attrs, fe.outputs, target);
std::ostringstream os;
os << idx[master].source->attrs.name + "_id" << nid;
fe.func_name = os.str();
}
}
static const PackedFunc& flower = GetPackedFunc("tvm_graph.lower");
static const PackedFunc& fbuild = GetPackedFunc("tvm_graph.build_target");
Array<tvm::LoweredFunc> funcs;
for (const FuseEntry& fe : fuse_vec) {
if (fe.schedule.defined()) {
Array<tvm::Tensor> args = fe.placeholder;
for (tvm::Tensor x : fe.outputs) {
args.push_back(x);
}
Array<tvm::LoweredFunc> ret = flower(fe.schedule, args, fe.func_name);
for (LoweredFunc x : ret) {
funcs.push_back(x);
}
}
}
tvm::runtime::Module module = fbuild(funcs, target);
// Final step: Remap the node, with given attribute
const nnvm::Op* tvm_op = nnvm::Op::Get("tvm_op");
std::unordered_map<uint32_t, nnvm::NodePtr> old_new;
for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
const auto& inode = idx[nid];
if (inode.source->is_variable()) {
nnvm::NodePtr np = nnvm::Node::Create();
np->attrs = inode.source->attrs;
old_new[nid] = np;
} else {
int root_id = group_vec[nid];
if (nid != root_id) continue;
FuseEntry& fe = fuse_vec[root_id];
nnvm::NodePtr np = nnvm::Node::Create();
np->attrs.op = tvm_op;
np->attrs.name = inode.source->attrs.name;
np->attrs.dict["num_inputs"] = std::to_string(fe.inputs.size());
np->attrs.dict["num_outputs"] = std::to_string(fe.outputs.size());
np->attrs.dict["func_name"] = fuse_vec[nid].func_name;
np->attrs.dict["flatten_data"] = std::to_string(pattern_vec[nid] == kElemWise);
np->op()->attr_parser(&(np->attrs));
for (const auto& e : fe.inputs) {
auto it = old_new.find(e.node_id);
CHECK(it != old_new.end())
<< "cannot find node_id=" << e.node_id;
np->inputs.emplace_back(
nnvm::NodeEntry{it->second, e.index, e.version});
}
for (const uint32_t node_id : inode.control_deps) {
auto it = old_new.find(node_id);
CHECK(it != old_new.end());
np->control_deps.emplace_back(it->second);
}
old_new[nid] = np;
}
}
nnvm::Graph ret;
for (const auto& e : idx.outputs()) {
auto it = old_new.find(group_vec[e.node_id]);
CHECK(it != old_new.end())
<< "cannot find node_id=" << e.node_id;
ret.outputs.emplace_back(
nnvm::NodeEntry{it->second, e.index, e.version});
}
const IndexedGraph& new_idx = ret.indexed_graph();
ShapeVector new_shape_vec = ShapeVector(new_idx.num_node_entries(), TShape());
DTypeVector new_dtype_vec = DTypeVector(new_idx.num_node_entries());
DLTypeVector new_dltype_vec = DLTypeVector(new_idx.num_node_entries());
for (const auto& kv : old_new) {
uint32_t nid = kv.first;
const auto& inode = idx[nid];
for (uint32_t i = 0; i < inode.source->num_outputs(); ++i) {
uint32_t new_eid = new_idx.entry_id(new_idx.node_id(kv.second.get()), i);
uint32_t old_eid = idx.entry_id(nid, i);
new_shape_vec[new_eid] = shape_vec[old_eid];
new_dtype_vec[new_eid] = dtype_vec[old_eid];
new_dltype_vec[new_eid] = dltype_vec[old_eid];
}
}
ret.attrs["shape"] = std::make_shared<any>(std::move(new_shape_vec));
ret.attrs["dtype"] = std::make_shared<any>(std::move(new_dtype_vec));
ret.attrs["dltype"] = std::make_shared<any>(std::move(new_dltype_vec));
ret.attrs["module"] = std::make_shared<any>(std::move(module));
ret = nnvm::ApplyPass(ret, "PlanMemory");
return ret;
}
NNVM_REGISTER_PASS(GraphFuse)
.set_body(GraphFuse);
const TLayoutInfo& GetDefaultLayout() {
static TLayoutInfo default_layout = "default";
return default_layout;
}
nnvm::NodePtr CreateLayoutTransformNode(const std::string& src,
const std::string& dst) {
static const nnvm::Op* trans_op = nnvm::Op::Get("layout_transform");
static int count = 0;
nnvm::NodePtr n = nnvm::Node::Create();
n->attrs.op = trans_op;
n->attrs.name = src + "_to_" + dst + std::to_string(count++);
n->attrs.dict["src_layout"] = src;
n->attrs.dict["dst_layout"] = dst;
n->op()->attr_parser(&(n->attrs));
return n;
}
/*!
* \brief A simple layout transform pass that will
* insert layout transform nodes automatically.
*/
nnvm::Graph LayoutTransform(nnvm::Graph src) {
static auto& op_layout_request =
nnvm::Op::GetAttr<FTVMLayoutRequest>("FTVMLayoutRequest");
static auto& op_vecop =
nnvm::Op::GetAttr<FTVMVectorizedOp>("FTVMVectorizedOp");
static auto& op_pattern = nnvm::Op::GetAttr<TOpPattern>("TOpPattern");
const ShapeVector& shape_vec = src.GetAttr<ShapeVector>("shape");
const std::vector<TLayoutInfo>& input_layouts =
src.GetAttr<std::vector<TLayoutInfo> >("layout");
const IndexedGraph& idx = src.indexed_graph();
std::vector<TLayoutInfo> produce_vec(idx.num_node_entries(), GetDefaultLayout());
std::vector<nnvm::NodePtr> mirror_vec(idx.num_nodes(), nullptr);
// use op pattern to decide whether an op is map
auto is_map_op = [&](size_t nid) {
TOpPattern pt = op_pattern.get(idx[nid].source->op(), kExtern);
bool is_map = (pt <= kBroadcast);
if (pt == kBroadcast) {
for (const auto& e : idx[nid].inputs) {
if (shape_vec[idx.entry_id(nid, 0)] != shape_vec[idx.entry_id(e)]) {
is_map = false;
break;
}
}
}
return is_map;
};
for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) {
const auto& inode = idx[nid];
nnvm::NodePtr new_node = nnvm::Node::Create();
*new_node = *(inode.source);
if (new_node->is_variable()) {
auto input_iter = std::find(
idx.input_nodes().cbegin(), idx.input_nodes().cend(), nid);
CHECK(input_iter != idx.input_nodes().cend());
size_t input_id = std::distance(idx.input_nodes().cbegin(), input_iter);
produce_vec[idx.entry_id(nid, 0)] = input_layouts[input_id];
mirror_vec[nid] = new_node;
continue;
}
if (op_vecop.count(inode.source->op())) {
new_node = op_vecop[inode.source->op()](inode.source);
new_node->inputs.resize(new_node->num_inputs());
}
// set up output and input layouts
std::vector<TLayoutInfo> request_ilayouts(new_node->num_inputs(), GetDefaultLayout());
if (op_layout_request.count(new_node->op())) {
std::vector<TLayoutInfo> produce_olayouts(new_node->num_outputs(), GetDefaultLayout());
CHECK(op_layout_request[new_node->op()](new_node->attrs, &request_ilayouts, &produce_olayouts))
<< "Layout request fail";
CHECK_EQ(request_ilayouts.size(), new_node->num_inputs());
CHECK_EQ(produce_olayouts.size(), new_node->num_outputs());
for (size_t i = 0; i < new_node->num_outputs(); ++i) {
produce_vec[idx.entry_id(nid, i)] = produce_olayouts[i];
}
}
bool map_layout = is_map_op(nid);
if (map_layout) {
const TLayoutInfo& layout = produce_vec[idx.entry_id(inode.inputs[0])];
for (const auto& e : inode.inputs) {
if (produce_vec[idx.entry_id(e)] != layout) {
map_layout = false;
break;
}
}
if (map_layout) {
for (size_t i = 0; i < inode.source->num_outputs(); ++i) {
produce_vec[idx.entry_id(nid, i)] = layout;
}
}
}
for (size_t i = 0; i < inode.inputs.size(); ++i) {
const auto& e = inode.inputs[i];
const nnvm::NodePtr& in = mirror_vec[e.node_id];
new_node->inputs[i] =
nnvm::NodeEntry{in, e.index, e.version};
TLayoutInfo produce = produce_vec[idx.entry_id(e)];
TLayoutInfo request = request_ilayouts[i];
if (!map_layout && (produce != request)) {
nnvm::NodePtr tnode = CreateLayoutTransformNode(produce, request);
tnode->attrs.name =
idx[e.node_id].source->attrs.name + "_" + request;
tnode->inputs.emplace_back(new_node->inputs[i]);
new_node->inputs[i] = nnvm::NodeEntry{tnode, 0, 0};
}
}
mirror_vec[nid] = new_node;
}
std::vector<nnvm::NodeEntry> outputs;
for (const auto& e : idx.outputs()) {
TLayoutInfo produce = produce_vec[idx.entry_id(e)];
if (produce != GetDefaultLayout()) {
nnvm::NodePtr tnode = CreateLayoutTransformNode(produce, GetDefaultLayout());
tnode->attrs.name =
idx[e.node_id].source->attrs.name + "_default";
tnode->inputs.emplace_back(
nnvm::NodeEntry{mirror_vec[e.node_id], e.index, e.version});
outputs.emplace_back(nnvm::NodeEntry{tnode, 0, 0});
} else {
outputs.emplace_back(
nnvm::NodeEntry{mirror_vec[e.node_id], e.index, e.version});
}
}
nnvm::Graph ret;
ret.outputs = std::move(outputs);
return ret;
}
NNVM_REGISTER_PASS(LayoutTransform)
.set_body(LayoutTransform);
DMLC_REGISTER_PARAMETER(LayoutTransformParam);
/*! \brief Parse keyword arguments as PType arguments and save to parsed */
template<typename PType>
inline void ParamParser(nnvm::NodeAttrs* attrs) {
PType param;
try {
param.Init(attrs->dict);
} catch (const dmlc::ParamError& e) {
std::ostringstream os;
os << e.what();
os << ", in operator " << attrs->op->name << "("
<< "name=\"" << attrs->name << "\"";
for (const auto& k : attrs->dict) {
os << ", " << k.first << "=\"" << k.second << "\"";
}
os << ")";
throw dmlc::ParamError(os.str());
}
attrs->parsed = std::move(param);
}
NNVM_REGISTER_OP(layout_transform)
.set_attr_parser(ParamParser<LayoutTransformParam>)
.set_num_inputs(1)
.set_num_outputs(1)
.add_argument("data", "NDArray-or-Symbol", "Input data")
.add_arguments(LayoutTransformParam::__FIELDS__());
nnvm::Graph PruneGraph(nnvm::Graph src) {
const auto& params = src.GetAttr<std::unordered_set<std::string> >("params");
std::unordered_set<nnvm::Node*> pruned;
nnvm::NodeEntryMap<nnvm::NodePtr> entry_var;
DFSVisit(src.outputs, [&](const nnvm::NodePtr& n) {
bool can_be_pruned = true;
if (n->is_variable()) {
if (params.count(n->attrs.name)) {
pruned.emplace(n.get());
}
can_be_pruned = false;
}
for (const auto& e : n->inputs) {
if (!pruned.count(e.node.get())) {
can_be_pruned = false;
}
}
if (can_be_pruned) {
pruned.emplace(n.get());
} else {
// scan again to find edge nodes, skip variables
for (auto& e : n->inputs) {
if (!e.node->is_variable() && pruned.count(e.node.get())) {
if (!entry_var.count(e)) {
nnvm::NodePtr var = nnvm::Node::Create();
var->attrs.name = e.node->attrs.name + "_output" + std::to_string(e.index);
entry_var.emplace(e, var);
}
e = nnvm::NodeEntry{entry_var.at(e), 0, 0};
}
}
}
});
nnvm::Graph pre_graph;
pre_graph.outputs.reserve(entry_var.size());
std::vector<std::string> output_names;
output_names.reserve(entry_var.size());
for (auto kv : entry_var) {
if (kv.first.node->is_variable()) continue;
pre_graph.outputs.emplace_back(kv.first);
output_names.emplace_back(kv.second->attrs.name);
}
pre_graph.attrs["pruned_params"] =
std::make_shared<dmlc::any>(std::move(output_names));
src.attrs["pre_graph"] =
std::make_shared<dmlc::any>(std::move(pre_graph));
return src;
}
NNVM_REGISTER_PASS(PruneGraph)
.set_body(PruneGraph);
} // namespace contrib
} // namespace tvm
/*!
* Copyright (c) 2016 by Contributors
* \file op_attr_types.h
* \brief The Expr and related elements in DataFlow construction.
*/
#ifndef TVM_OP_ATTR_TYPES_H_
#define TVM_OP_ATTR_TYPES_H_
#include <tvm/expr.h>
#include <tvm/tensor.h>
#include <tvm/schedule.h>
#include <tvm/packed_func_ext.h>
#include <tvm/runtime/registry.h>
#include <nnvm/op_attr_types.h>
#include <nnvm/graph_attr_types.h>
#include <nnvm/graph.h>
#include <vector>
#include <string>
namespace tvm {
namespace contrib {
using runtime::PackedFunc;
using nnvm::StorageVector;
using nnvm::ShapeVector;
using nnvm::DTypeVector;
using nnvm::TShape;
using nnvm::NodeAttrs;
/*! \brief DLPack compatible data types */
using DLTypeVector = std::vector<DLDataType>;
/*!
* \brief Computation description interface
* \param attrs The attribute of the node.
* \param inputs The input tensors(placeholders)
* \return The output description of the tensor.
*/
using FTVMCompute = std::function<
Array<Tensor>
(const NodeAttrs& attrs, const Array<Tensor>& inputs)>;
/*!
* \brief Build the computation schedule for
* op whose root is at current op.
* \param attrs The attribute of the node.
* \param outs The output tensors.
* \param target The build target.
* \return schedule The computation schedule.
*/
using FTVMSchedule = std::function<
Schedule(const NodeAttrs& attrs,
const Array<Tensor>& outs,
const std::string& target)>;
/*! \brief Layout Information. */
using TLayoutInfo = std::string;
/*!
* \brief The producer consumer function of node layout
* \param attrs The attribute of the node.
* \param ilayouts The input layouts that the node request.
* \param olayouts The output layouts that the node produce.
* \return bool The success flag.
*/
using FTVMLayoutRequest = std::function<bool (const NodeAttrs& attrs,
std::vector<TLayoutInfo> *ilayouts,
std::vector<TLayoutInfo> *olayouts)>;
/*! \brief The default layout. */
const TLayoutInfo& GetDefaultLayout();
/*! \brief Parameters of layout transform operator */
struct LayoutTransformParam : public dmlc::Parameter<LayoutTransformParam> {
std::string src_layout;
std::string dst_layout;
DMLC_DECLARE_PARAMETER(LayoutTransformParam) {
DMLC_DECLARE_FIELD(src_layout);
DMLC_DECLARE_FIELD(dst_layout);
}
};
/*! \brief Transform from normal operator to vectorized operator */
using FTVMVectorizedOp = std::function<nnvm::NodePtr (const nnvm::Node*)>;
// The storage result of op
enum OpPatternKind : int {
// Elementwise operation
kElemWise,
// Broadcast operation
kBroadcast,
// Complex operation, can fuse bcast in input/outputs
// but cannot chain another complex op
kComplex,
// Extern operation, cannot fuse anything.
kExtern
};
using TOpPattern = int;
/*!
* \brief Get PackedFunction from global registry and
* report error if it does not exist
* \param name The name of the function.
* \return The created PackedFunc.
*/
inline const PackedFunc& GetPackedFunc(const std::string& name) {
const PackedFunc* pf = tvm::runtime::Registry::Get(name);
CHECK(pf != nullptr) << "Cannot find function " << name << " in registry";
return *pf;
}
/*!
* \brief Create a Graph execution module by a given graph and the code module.
* \param g The graph to be executed.
* \param m The tvm module containing the functions.
* \return The created executor module.
*/
tvm::runtime::Module CreateExecutor(nnvm::Graph g);
} // namespace contrib
} // namespace tvm
#endif // TVM_OP_ATTR_TYPES_H_
/*!
* Copyright (c) 2017 by Contributors
* \file Operator Declarations.
*/
#include <nnvm/op.h>
#include <nnvm/op_attr_types.h>
#include "./op_attr_types.h"
namespace tvm {
namespace contrib {
using namespace nnvm;
inline bool SameShape(const NodeAttrs& attrs,
std::vector<TShape> *ishape,
std::vector<TShape> *oshape) {
if (ishape->size() == 0 || (*ishape)[0].ndim() == 0) return false;
for (TShape& pshape : *oshape) {
pshape = (*ishape)[0];
}
for (TShape& pshape : *ishape) {
pshape = (*ishape)[0];
}
return true;
}
NNVM_REGISTER_OP_GROUP(ElementwiseOpAttr)
.set_attr<TOpPattern>("TOpPattern", kBroadcast)
.set_attr<FInferShape>("FInferShape", SameShape);
NNVM_REGISTER_OP(__add_symbol__)
.describe("add two data together")
.set_num_inputs(2)
.include("ElementwiseOpAttr");
NNVM_REGISTER_OP(exp)
.describe("Take exp")
.set_num_inputs(1)
.include("ElementwiseOpAttr");
} // namespace contrib
} // namespace tvm
/*!
* Copyright (c) 2017 by Contributors
* \file Operator defintions in TVM.
*/
#include <nnvm/op.h>
#include <nnvm/op_attr_types.h>
#include "./op_attr_types.h"
namespace tvm {
namespace contrib {
using namespace nnvm;
Array<Tensor>
ComputeAdd(const NodeAttrs& attrs,
const Array<Tensor>& inputs) {
static const PackedFunc& pf = GetPackedFunc("tvm_graph.compute.add");
CHECK_EQ(inputs.size(), 2U);
Tensor ret = pf(inputs[0], inputs[1]);
return {ret};
}
Array<Tensor>
ComputeExp(const NodeAttrs& attrs,
const Array<Tensor>& inputs) {
static const PackedFunc& pf = GetPackedFunc("tvm_graph.compute.exp");
CHECK_EQ(inputs.size(), 1U);
Tensor ret = pf(inputs[0]);
return {ret};
}
Schedule ScheduleEWise(const NodeAttrs& attrs,
const Array<Tensor>& outs,
const std::string& target) {
static const PackedFunc& pf = GetPackedFunc("tvm_graph.schedule.ewise");
return pf(outs, target);
}
NNVM_REGISTER_OP(__add_symbol__)
.set_attr<FTVMCompute>("FTVMCompute", ComputeAdd)
.set_attr<FTVMSchedule>("FTVMSchedule", ScheduleEWise);
NNVM_REGISTER_OP(exp)
.set_attr<FTVMCompute>("FTVMCompute", ComputeExp)
.set_attr<FTVMSchedule>("FTVMSchedule", ScheduleEWise);
} // namespace contrib
} // namespace tvm
import tvm_graph as tg
import numpy as np
import tvm
def test_compile():
x = tg.Variable('x')
y = tg.Variable('y')
z = tg.exp(y + x)
shape = (10, 128)
dtype = tvm.float32
g = tg.build(z, "llvm",
shape={'x': shape,
'y': shape})
m = tg.bind(g, tvm.cpu(0))
# get member functions
set_input, run, get_output = m['set_input'], m['run'], m['get_output']
na = tvm.nd.array(np.ones(shape).astype(dtype))
nb = tvm.nd.array(np.ones(shape).astype(dtype))
# set inputs
set_input('x', na)
set_input('y', nb)
# execute
run()
# get outputs
out = tvm.nd.array(np.zeros(shape).astype(dtype))
get_output(0, out)
np.testing.assert_allclose(
out.asnumpy(), np.exp(na.asnumpy() + nb.asnumpy()))
if __name__ == "__main__":
test_compile()
import tvm
from tvm.contrib import util, rpc
import tvm_graph as tg
import numpy as np
import os
def test_rpc_executor():
host = 'localhost'
port = 9091
server = rpc.Server(host, port)
tmp = util.tempdir()
sym_fname = tmp.relpath('net.json')
lib_fname = tmp.relpath('net.o')
param_fname = tmp.relpath('net.param')
x = tg.Variable('x')
y = tg.Variable('y')
sym = tg.exp(y + x) + tg.exp(x + y)
shape = (10, 128)
dtype = tvm.float32
na = tvm.nd.array(np.ones(shape).astype(dtype))
nb = tvm.nd.array(np.ones(shape).astype(dtype))
tg.save_params(param_fname, {'x': na, 'y': nb})
remote = rpc.connect(host, port)
ctx = remote.cpu(0)
target = "llvm"
shapes = {'x': shape, 'y': shape}
sym_json = tg.compile_graph(lib_fname, sym, target, shapes)
remote.upload(lib_fname)
param_blob = bytearray(open(param_fname, "rb").read())
rm = tg.remote_load_exec(remote,
sym_json,
os.path.basename(lib_fname),
param_blob,
ctx)
run, get_output = rm['run'], rm['get_output']
nc = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
run()
get_output(0, nc)
npa = na.asnumpy()
npb = nb.asnumpy()
np.testing.assert_allclose(nc.asnumpy(),
np.exp(npa + npb) + np.exp(npb + npa))
server.terminate()
if __name__ == "__main__":
test_rpc_executor()
import tvm_graph as tg
import numpy as np
import tvm
def test_save_load():
shape = (10, 128)
dtype = tvm.float32
na = tvm.nd.array(np.ones(shape).astype(dtype))
nb = tvm.nd.array(np.ones(shape).astype(dtype))
x = tg.Variable('x')
y = tg.Variable('y')
z = tg.exp(y + x)
g = tg.build(z, "llvm", shape={'x': shape, 'y': shape})
m0 = tg.bind(g, tvm.cpu(0))
set_input0, run0, get_output0 = m0['set_input'], m0['run'], m0['get_output']
set_input0(0, na)
set_input0(1, nb)
run0()
out0 = tvm.nd.array(np.zeros(shape).astype(dtype))
get_output0(0, out0)
tg.save_params('test.params', {'x': na, 'y': nb})
# create another executor
m1 = tg.bind(g, tvm.cpu(0))
load_params1 = m1['load_params']
load_params1(bytearray(open('test.params', 'rb').read()))
run1, get_output1 = m1['run'], m1['get_output']
run1()
out1 = tvm.nd.array(np.zeros(shape).astype(dtype))
get_output1(0, out1)
np.testing.assert_allclose(out0.asnumpy(), out1.asnumpy())
if __name__ == "__main__":
test_save_load()
......@@ -525,27 +525,20 @@ llvm::Value* CodeGenLLVM::CreateCallExtern(const Call* op) {
llvm::Value* CodeGenLLVM::CreateIntrinsic(const Call* op) {
if (op->is_intrinsic("llvm_intrin")) {
CHECK_GE(op->args.size(), 1U);
CHECK_GE(op->args.size(), 2U);
llvm::Intrinsic::ID id = static_cast<llvm::Intrinsic::ID>(
op->args[0].as<UIntImm>()->value);
uint64_t num_signature = op->args[1].as<UIntImm>()->value;
std::vector<llvm::Value*> arg_value;
std::vector<llvm::Type*> arg_type;
for (size_t i = 1; i < op->args.size(); ++i) {
std::vector<llvm::Type*> sig_type;
for (size_t i = 2; i < op->args.size(); ++i) {
arg_value.push_back(MakeValue(op->args[i]));
arg_type.push_back(arg_value.back()->getType());
if (i - 2 < num_signature) {
sig_type.push_back(arg_value.back()->getType());
}
llvm::Function* f = llvm::Intrinsic::getDeclaration(
module_.get(), id, arg_type);
return builder_->CreateCall(f, arg_value);
} else if (op->is_intrinsic("llvm_builtin")) {
CHECK_GE(op->args.size(), 1U);
llvm::Intrinsic::ID id = static_cast<llvm::Intrinsic::ID>(
op->args[0].as<UIntImm>()->value);
std::vector<llvm::Value*> arg_value;
for (size_t i = 1; i < op->args.size(); ++i) {
arg_value.push_back(MakeValue(op->args[i]));
}
llvm::Function* f = llvm::Intrinsic::getDeclaration(module_.get(), id, {});
llvm::Function* f = llvm::Intrinsic::getDeclaration(
module_.get(), id, sig_type);
return builder_->CreateCall(f, arg_value);
} else if (op->is_intrinsic(Call::bitwise_and)) {
return builder_->CreateAnd(MakeValue(op->args[0]), MakeValue(op->args[1]));
......
......@@ -16,25 +16,8 @@ namespace llvm {
using namespace ir;
template<unsigned id>
inline void DispatchLLVMBuildin(const TVMArgs& targs, TVMRetValue* rv) {
Expr e = targs[0];
const Call* call = e.as<Call>();
CHECK(call != nullptr);
Array<Expr> cargs;
// intrin id.
cargs.push_back(UIntImm::make(UInt(32), id));
for (Expr arg : call->args) {
cargs.push_back(arg);
}
*rv = Call::make(
call->type, "llvm_builtin", cargs, Call::Intrinsic);
}
TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.prefetch")
.set_body(DispatchLLVMBuildin<::llvm::Intrinsic::prefetch>);
template<unsigned id>
// num_signature means number of arguments used to query signature
template<unsigned id, int num_signature>
inline void DispatchLLVMPureIntrin(const TVMArgs& targs, TVMRetValue* rv) {
Expr e = targs[0];
const Call* call = e.as<Call>();
......@@ -42,6 +25,8 @@ inline void DispatchLLVMPureIntrin(const TVMArgs& targs, TVMRetValue* rv) {
Array<Expr> cargs;
// intrin id.
cargs.push_back(UIntImm::make(UInt(32), id));
cargs.push_back(UIntImm::make(UInt(32), num_signature));
for (Expr arg : call->args) {
cargs.push_back(arg);
}
......@@ -49,7 +34,7 @@ inline void DispatchLLVMPureIntrin(const TVMArgs& targs, TVMRetValue* rv) {
call->type, "llvm_intrin", cargs, Call::PureIntrinsic);
}
template<unsigned id>
template<unsigned id, int num_signature>
inline void DispatchLLVMIntrin(const TVMArgs& targs, TVMRetValue* rv) {
Expr e = targs[0];
const Call* call = e.as<Call>();
......@@ -57,6 +42,7 @@ inline void DispatchLLVMIntrin(const TVMArgs& targs, TVMRetValue* rv) {
Array<Expr> cargs;
// intrin id.
cargs.push_back(UIntImm::make(UInt(32), id));
cargs.push_back(UIntImm::make(UInt(32), num_signature));
for (Expr arg : call->args) {
cargs.push_back(arg);
}
......@@ -64,20 +50,23 @@ inline void DispatchLLVMIntrin(const TVMArgs& targs, TVMRetValue* rv) {
call->type, "llvm_intrin", cargs, Call::Intrinsic);
}
TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.prefetch")
.set_body(DispatchLLVMIntrin<::llvm::Intrinsic::prefetch, 0>);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.exp")
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp>);
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::exp, 1>);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.fma")
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd>);
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 1>);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.log")
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::log>);
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::log, 1>);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.sqrt")
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt>);
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::sqrt, 1>);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.llvm.pow")
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow>);
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::pow, 1>);
} // namespace llvm
} // namespace codegen
......
#!/bin/bash
export PYTHONPATH=python:apps/extension/python
export PYTHONPATH=${PYTHONPATH}:apps/graph_executor/python:apps/graph_executor/nnvm/python
export LD_LIBRARY_PATH=lib:${LD_LIBRARY_PATH}
rm -rf python/tvm/*.pyc python/tvm/*/*.pyc
......@@ -14,12 +13,6 @@ make || exit -1
cd ../..
python -m nose -v apps/extension/tests || exit -1
# Test NNVM integration
cd apps/graph_executor
make || exit -1
cd ../..
python -m nose -v apps/graph_executor/tests || exit -1
TVM_FFI=cython python -m nose -v tests/python/integration || exit -1
TVM_FFI=ctypes python3 -m nose -v tests/python/integration || exit -1
TVM_FFI=cython python -m nose -v tests/python/contrib || exit -1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment