/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ /*! * \file tvm/runtime/vm/executable.cc * \brief The implementation of a virtual machine executable APIs. */ #include <dmlc/memory_io.h> #include <tvm/runtime/c_runtime_api.h> #include <tvm/runtime/registry.h> #include <tvm/runtime/vm.h> #include <algorithm> #include <memory> #include <iostream> #include <iomanip> #include <sstream> #include <utility> #include <vector> #include "serialize_util.h" namespace tvm { namespace runtime { namespace vm { #define STREAM_CHECK(val, section) \ CHECK(val) << "Invalid VM file format in the " << section << " section." \ << "\n"; // Helper to serialize a vm instruction. VMInstructionSerializer SerializeInstruction(const Instruction& instr); // Helper to deserialize a serialized vm instruction. Instruction DeserializeInstruction(const VMInstructionSerializer& instr); PackedFunc Executable::GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) { if (name == "get_lib") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->GetLib(); }); } else if (name == "get_bytecode") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->GetBytecode(); }); } else if (name == "get_stats") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->Stats(); }); } else if (name == "save") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->Save(); }); } else if (name == "get_function_arity") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { std::string func_name = args[0]; *rv = this->GetFunctionArity(func_name); }); } else if (name == "get_function_param_name") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { std::string func_name = args[0]; int index = args[1]; *rv = this->GetFunctionParameterName(func_name, index); }); } else { LOG(FATAL) << "Unknown packed function: " << name; return PackedFunc(nullptr); } } int Executable::GetFunctionArity(std::string func_name) const { auto it = global_map.find(func_name); if (it == global_map.end()) { LOG(ERROR) << "Cannot find function " << func_name << " in executable"; return -1; } const auto& func = functions[it->second]; return func.params.size(); } std::string Executable::GetFunctionParameterName(std::string func_name, uint32_t index) const { auto it = global_map.find(func_name); if (it == global_map.end()) { LOG(ERROR) << "Cannot find function " << func_name << " in executable"; return ""; } const auto& func = functions[it->second]; if (index > func.params.size()) { LOG(ERROR) << "Invalid parameter index"; return ""; } return func.params[index]; } std::string Executable::GetBytecode() const { std::ostringstream oss; for (size_t i = 0; i < functions.size(); ++i) { const auto& func = functions[i]; // Print the header of the function format. oss << "VM Function[" << i << "]: " << func.name << "("; for (const auto& param : func.params) { oss << param << ", "; } oss.seekp(-2, std::ios_base::end); oss << ")" << std::endl; oss << "# reg file size = " << func.register_file_size << std::endl; oss << "# instruction count = " << func.instructions.size() << std::endl; // Print the instructions of a `VMFunction`. // The part after ";" is the instruction in text format. oss << "opcode, fields # inst(text):" << std::endl; for (size_t idx = 0; idx < func.instructions.size(); ++idx) { const auto& instr = func.instructions[idx]; const auto& serialized_instr = SerializeInstruction(instr); oss << std::setw(2) << idx << ": " << serialized_instr.opcode << " "; for (auto it : serialized_instr.fields) { oss << it << " "; } oss << " # " << instr; if (oss.str().back() != '\n') oss << std::endl; } oss << std::endl; } return oss.str(); } std::string Executable::Stats() const { std::ostringstream oss; oss << "Relay VM executable statistics:" << std::endl; // Get the number of constants and the shape of each of them. oss << " Constant shapes (# " << constants.size() << "): ["; for (const auto& it : constants) { const auto constant = Downcast<NDArray>(it); const auto& shape = constant.Shape(); // Scalar if (shape.empty()) { oss << "scalar, "; continue; } oss << "["; for (auto s : shape) { oss << s << ", "; } oss.seekp(-2, oss.cur); oss << "], " << std::endl; } if (!constants.empty()) oss.seekp(-2, oss.cur); oss << "]" << std::endl; // Get the number of globals and the name of each of them. oss << " Globals (#" << global_map.size() << "): ["; for (const auto& it : global_map) { oss << "(\"" << it.first << "\", " << it.second << ")" << ", "; } if (!global_map.empty()) oss.seekp(-2, oss.cur); oss << "]" << std::endl; // Get the number of primitive ops and the name of each of them. oss << " Primitive ops (#" << primitive_map.size() << "): ["; std::vector<std::string> prim_ops; for (const auto& it : primitive_map) { auto packed_index = static_cast<size_t>(it.second); if (prim_ops.size() <= packed_index) { prim_ops.resize(packed_index + 1); } prim_ops[packed_index] = it.first; } for (const auto& it : prim_ops) { oss << it << ", "; } if (!prim_ops.empty()) oss.seekp(-2, oss.cur); oss << "]" << std::endl; return oss.str(); } void SaveHeader(dmlc::Stream* strm) { uint64_t header = kTVMVMBytecodeMagic; strm->Write(header); std::string version = TVM_VERSION; strm->Write(version); } TVMByteArray Executable::Save() { // Initialize the stream object. code_.clear(); dmlc::MemoryStringStream strm(&code_); // Save header SaveHeader(&strm); // Global section. SaveGlobalSection(&strm); // Constant section. SaveConstantSection(&strm); // Primitive names. SavePrimitiveOpNames(&strm); // Code section. SaveCodeSection(&strm); TVMByteArray arr; arr.data = code_.c_str(); arr.size = code_.length(); return arr; } void Executable::SaveGlobalSection(dmlc::Stream* strm) { std::vector<std::pair<std::string, Index> > globals(this->global_map.begin(), this->global_map.end()); auto comp = [](const std::pair<std::string, Index>& a, const std::pair<std::string, Index>& b) { return a.second < b.second; }; std::sort(globals.begin(), globals.end(), comp); std::vector<std::string> glbs; for (const auto& it : globals) { glbs.push_back(it.first); } strm->Write(glbs); } void Executable::SaveConstantSection(dmlc::Stream* strm) { std::vector<DLTensor*> arrays; for (const auto& obj : this->constants) { const auto cell = Downcast<runtime::NDArray>(obj); arrays.push_back(const_cast<DLTensor*>(cell.operator->())); } strm->Write(static_cast<uint64_t>(this->constants.size())); for (const auto& it : arrays) { runtime::SaveDLTensor(strm, it); } } void Executable::SavePrimitiveOpNames(dmlc::Stream* strm) { std::vector<std::string> primitive_names; for (const auto& it : this->primitive_map) { auto packed_index = static_cast<size_t>(it.second); if (primitive_names.size() <= packed_index) { primitive_names.resize(packed_index + 1); } primitive_names[packed_index] = it.first; } strm->Write(primitive_names); } // Serialize a virtual machine instruction. It creates a list that contains the // hash, opcode, and all fields of an instruction. // // For example, the function signature used to create an `AllocTensor` // instruction is: // Instruction AllocTensor(std::vector<Index> shape, DLDataType dtype, RegName dst) // // The serialized form will be: // `hash 5 dtype.code dtype.bits dtype.lanes ndim dst_register val1 val2 ... valn` // // where hash is the hash of serialized instruction that is computed internally // by the `VMInstructionExecutable`. It is used for sanity check before decoding. // 5 shows opcode of `AllocTensor`, `(dtype.code dtype.bits dtype.lanes)` // represents a `DLDataType`, `ndim` is the number of dimensions, `dst_register` // is the destination register, and the rest of it together indicates the shape // of the tensor to be allocated. VMInstructionSerializer SerializeInstruction(const Instruction& instr) { std::vector<Index> fields; // Save the opcode. DLOG(INFO) << "Serializing: " << instr << std::endl; switch (instr.op) { case Opcode::Move: { // Number of fields = 2 fields.assign({instr.from, instr.dst}); break; } case Opcode::Ret: { // Number of fields = 1 fields.push_back(instr.result); break; } case Opcode::Fatal: { // Number of fields = 0 break; } case Opcode::InvokePacked: { // Number of fields = 3 + instr.arity // Note that arity includes both input arguments and outputs. We will // put all the `arity` number of fields in the end for serialization. fields.assign({instr.packed_index, instr.arity, instr.output_size}); // Save the args. fields.insert(fields.end(), instr.packed_args, instr.packed_args + instr.arity); break; } case Opcode::AllocTensor: { // Number of fields = 5 + instr.alloc_tensor.ndim fields.push_back(instr.alloc_tensor.storage); // Save `DLDataType` and the dst register. const auto& dtype = instr.alloc_tensor.dtype; fields.push_back(dtype.code); fields.push_back(dtype.bits); fields.push_back(dtype.lanes); // The number of dimensions is not needed for constructing an // `AllocTensor` instruction as it equals to the length of the `shape` // vector. However, we save it to conveniently deserialize the instruction // because we will know how many fields are needed by the `shape` argument. fields.push_back(instr.alloc_tensor.ndim); fields.push_back(instr.dst); // Save the shape of the tensor. // Note that this field is rotated to the end of the list. fields.insert(fields.end(), instr.alloc_tensor.shape, instr.alloc_tensor.shape + instr.alloc_tensor.ndim); break; } case Opcode::AllocTensorReg: { // Number of fields = 6 fields.push_back(instr.alloc_tensor_reg.storage); fields.push_back(instr.alloc_tensor_reg.shape_register); // Save `DLDataType` and the dst register. const auto& dtype = instr.alloc_tensor_reg.dtype; fields.push_back(dtype.code); fields.push_back(dtype.bits); fields.push_back(dtype.lanes); fields.push_back(instr.dst); break; } case Opcode::AllocStorage: { fields.push_back(instr.alloc_storage.allocation_size); fields.push_back(instr.alloc_storage.alignment); // Save `DLDataType` and the dst register. const auto& dtype = instr.alloc_storage.dtype_hint; fields.push_back(dtype.code); fields.push_back(dtype.bits); fields.push_back(dtype.lanes); fields.push_back(instr.dst); break; } case Opcode::AllocADT: { // Number of fields = 3 + instr.num_fields fields.assign({instr.constructor_tag, instr.num_fields, instr.dst}); // Save the fields. fields.insert(fields.end(), instr.datatype_fields, instr.datatype_fields + instr.num_fields); break; } case Opcode::AllocClosure: { // Number of fields = 3 + instr.num_freevar fields.assign({instr.clo_index, instr.num_freevar, instr.dst}); // Save the free vars. fields.insert(fields.end(), instr.free_vars, instr.free_vars + instr.num_freevar); break; } case Opcode::If: { // Number of fields = 4 fields.assign({instr.if_op.test, instr.if_op.target, instr.if_op.true_offset, instr.if_op.false_offset}); break; } case Opcode::Invoke: { // Number of fields = 3 + instr.num_args fields.assign({instr.func_index, instr.num_args, instr.dst}); // Save the args. fields.insert(fields.end(), instr.invoke_args_registers, instr.invoke_args_registers + instr.num_args); break; } case Opcode::InvokeClosure: { // Number of fields = 3 + instr.num_closure_args fields.assign({instr.closure, instr.num_closure_args, instr.dst}); // Save the args. fields.insert(fields.end(), instr.closure_args, instr.closure_args + instr.num_closure_args); break; } case Opcode::LoadConst: { // Number of fields = 2 fields.assign({instr.const_index, instr.dst}); break; } case Opcode::LoadConsti: { // Number of fields = 2 fields.assign({instr.load_consti.val, instr.dst}); break; } case Opcode::GetField: { // Number of fields = 3 fields.assign({instr.object, instr.field_index, instr.dst}); break; } case Opcode::GetTag: { // Number of fields = 2 fields.assign({instr.get_tag.object, instr.dst}); break; } case Opcode::Goto: { // Number of fields = 1 fields.push_back(instr.pc_offset); break; } default: LOG(FATAL) << "Invalid opcode" << static_cast<int>(instr.op); break; } return VMInstructionSerializer(static_cast<Index>(instr.op), fields); } void Executable::SaveCodeSection(dmlc::Stream* strm) { // Save the number of functions. strm->Write(static_cast<uint64_t>(this->functions.size())); for (const auto& func : this->functions) { // Save the function info. VMFunctionSerializer func_format(func.name, func.register_file_size, func.instructions.size(), func.params); func_format.Save(strm); // Serialize each instruction. for (const auto& instr : func.instructions) { const auto& serialized_instr = SerializeInstruction(instr); serialized_instr.Save(strm); } } } void LoadHeader(dmlc::Stream* strm) { // Check header. uint64_t header; STREAM_CHECK(strm->Read(&header), "header"); STREAM_CHECK(header == kTVMVMBytecodeMagic, "header"); // Check version. std::string version; STREAM_CHECK(strm->Read(&version), "version"); STREAM_CHECK(version == TVM_VERSION, "version"); } runtime::Module Executable::Load(const std::string& code, const runtime::Module lib) { auto exec = make_object<Executable>(); exec->lib = lib; exec->code_ = code; dmlc::MemoryStringStream strm(&exec->code_); // Load header. LoadHeader(&strm); // Global section. exec->LoadGlobalSection(&strm); // Constant section. exec->LoadConstantSection(&strm); // Primitive names that will be invoked by `InvokePacked` instructions. exec->LoadPrimitiveOpNames(&strm); // Code section. exec->LoadCodeSection(&strm); return runtime::Module(exec); } void Executable::LoadGlobalSection(dmlc::Stream* strm) { std::vector<std::string> globals; STREAM_CHECK(strm->Read(&globals), "global"); for (size_t i = 0; i < globals.size(); i++) { this->global_map.insert({globals[i], i}); } } void Executable::LoadConstantSection(dmlc::Stream* strm) { uint64_t sz; // Load the number of constants. STREAM_CHECK(strm->Read(&sz, sizeof(sz)), "constant"); size_t size = static_cast<size_t>(sz); // Load each of the constants. for (size_t i = 0; i < size; i++) { runtime::NDArray constant; STREAM_CHECK(constant.Load(strm), "constant"); this->constants.push_back(constant); } } void Executable::LoadPrimitiveOpNames(dmlc::Stream* strm) { std::vector<std::string> primitive_names; STREAM_CHECK(strm->Read(&primitive_names), "primitive name"); for (size_t i = 0; i < primitive_names.size(); i++) { this->primitive_map.insert({primitive_names[i], i}); } } // Extract the `cnt` number of fields started at `start` from the list // `instr_fields`. inline std::vector<Index> ExtractFields(const std::vector<Index>& instr_fields, Index start, Index cnt) { CHECK_LE(static_cast<size_t>(start + cnt), instr_fields.size()); std::vector<Index> ret; for (auto i = start; i < start + cnt; i++) { ret.push_back(instr_fields[i]); } return ret; } Instruction DeserializeInstruction(const VMInstructionSerializer& instr) { Opcode opcode = static_cast<Opcode>(instr.opcode); switch (opcode) { case Opcode::Move: { // Number of fields = 2 DCHECK_EQ(instr.fields.size(), 2U); return Instruction::Move(instr.fields[0], instr.fields[1]); } case Opcode::Ret: { // Number of fields = 1 DCHECK_EQ(instr.fields.size(), 1U); return Instruction::Ret(instr.fields[0]); } case Opcode::Fatal: { // Number of fields = 0 DCHECK(instr.fields.empty()); return Instruction::Fatal(); } case Opcode::InvokePacked: { // Number of fields = 3 + instr.arity DCHECK_GE(instr.fields.size(), 3U); DCHECK_EQ(instr.fields.size(), 3U + static_cast<size_t>(instr.fields[1])); Index packed_index = instr.fields[0]; Index arity = instr.fields[1]; Index output_size = instr.fields[2]; std::vector<RegName> args = ExtractFields(instr.fields, 3, arity); return Instruction::InvokePacked(packed_index, arity, output_size, args); } case Opcode::AllocTensor: { // Number of fields = 6 + instr.alloc_tensor.ndim DCHECK_GE(instr.fields.size(), 6U); DCHECK_EQ(instr.fields.size(), 6U + static_cast<size_t>(instr.fields[4])); RegName storage_reg = instr.fields[0]; DLDataType dtype; dtype.code = instr.fields[1]; dtype.bits = instr.fields[2]; dtype.lanes = instr.fields[3]; Index ndim = instr.fields[4]; RegName dst = instr.fields[5]; std::vector<Index> shape = ExtractFields(instr.fields, 6, ndim); return Instruction::AllocTensor(storage_reg, shape, dtype, dst); } case Opcode::AllocTensorReg: { // Number of fields = 5 DCHECK_EQ(instr.fields.size(), 6U); RegName storage_reg = instr.fields[0]; Index shape_register = instr.fields[1]; DLDataType dtype; dtype.code = instr.fields[2]; dtype.bits = instr.fields[3]; dtype.lanes = instr.fields[4]; RegName dst = instr.fields[5]; return Instruction::AllocTensorReg(storage_reg, shape_register, dtype, dst); } case Opcode::AllocADT: { // Number of fields = 3 + instr.num_fields DCHECK_GE(instr.fields.size(), 3U); DCHECK_EQ(instr.fields.size(), 3U + static_cast<size_t>(instr.fields[1])); Index constructor_tag = instr.fields[0]; Index num_fields = instr.fields[1]; RegName dst = instr.fields[2]; std::vector<Index> fields = ExtractFields(instr.fields, 3, num_fields); return Instruction::AllocADT(constructor_tag, num_fields, fields, dst); } case Opcode::AllocClosure: { // Number of fields = 3 + instr.num_freevar DCHECK_GE(instr.fields.size(), 3U); DCHECK_EQ(instr.fields.size(), 3U + static_cast<size_t>(instr.fields[1])); Index clo_index = instr.fields[0]; Index num_freevar = instr.fields[1]; RegName dst = instr.fields[2]; std::vector<Index> free_vars = ExtractFields(instr.fields, 3, num_freevar); return Instruction::AllocClosure(clo_index, num_freevar, free_vars, dst); } case Opcode::AllocStorage: { DCHECK_GE(instr.fields.size(), 6U); Index allocation_size = instr.fields[0]; Index alignment = instr.fields[1]; DLDataType dtype; dtype.code = instr.fields[2]; dtype.bits = instr.fields[3]; dtype.lanes = instr.fields[4]; RegName dst = instr.fields[5]; return Instruction::AllocStorage( allocation_size, alignment, dtype, dst); } case Opcode::If: { // Number of fields = 4 DCHECK_EQ(instr.fields.size(), 4U); Index test = instr.fields[0]; Index target = instr.fields[1]; Index true_offset = instr.fields[2]; Index false_offset = instr.fields[3]; return Instruction::If(test, target, true_offset, false_offset); } case Opcode::Invoke: { // Number of fields = 3 + instr.num_args DCHECK_GE(instr.fields.size(), 3U); DCHECK_EQ(instr.fields.size(), 3U + static_cast<size_t>(instr.fields[1])); Index func_index = instr.fields[0]; Index num_args = instr.fields[1]; RegName dst = instr.fields[2]; std::vector<Index> args = ExtractFields(instr.fields, 3, num_args); return Instruction::Invoke(func_index, args, dst); } case Opcode::InvokeClosure: { // Number of fields = 3 + instr.num_closure_args DCHECK_GE(instr.fields.size(), 3U); DCHECK_EQ(instr.fields.size(), 3U + static_cast<size_t>(instr.fields[1])); Index closure = instr.fields[0]; Index num_closure_args = instr.fields[1]; RegName dst = instr.fields[2]; std::vector<Index> args = ExtractFields(instr.fields, 3, num_closure_args); return Instruction::InvokeClosure(closure, args, dst); } case Opcode::LoadConst: { // Number of fields = 2 DCHECK_EQ(instr.fields.size(), 2U); return Instruction::LoadConst(instr.fields[0], instr.fields[1]); } case Opcode::LoadConsti: { // Number of fields = 2 DCHECK_EQ(instr.fields.size(), 2U); return Instruction::LoadConsti(instr.fields[0], instr.fields[1]); } case Opcode::GetField: { // Number of fields = 3 DCHECK_EQ(instr.fields.size(), 3U); return Instruction::GetField(instr.fields[0], instr.fields[1], instr.fields[2]); } case Opcode::GetTag: { // Number of fields = 2 DCHECK_EQ(instr.fields.size(), 2U); return Instruction::GetTag(instr.fields[0], instr.fields[1]); } case Opcode::Goto: { // Number of fields = 1 DCHECK_EQ(instr.fields.size(), 1U); return Instruction::Goto(instr.fields[0]); } default: LOG(FATAL) << "Invalid opcode" << instr.opcode; return Instruction(); } } void Executable::LoadCodeSection(dmlc::Stream* strm) { // Load the number of functions. uint64_t sz; STREAM_CHECK(strm->Read(&sz, sizeof(sz)), "code"); size_t num_funcs = static_cast<size_t>(sz); this->functions.resize(num_funcs); for (size_t i = 0; i < num_funcs; i++) { // Load the function info. VMFunctionSerializer loaded_func; STREAM_CHECK(loaded_func.Load(strm), "code/function"); // Load the instructions. std::vector<Instruction> instructions; for (size_t j = 0; j < loaded_func.num_instructions; j++) { VMInstructionSerializer instr; std::vector<Index> instr_fields; STREAM_CHECK(instr.Load(strm), "code/instruction"); instructions.push_back(DeserializeInstruction(instr)); } // Create the VM function. VMFunction vm_func = VMFunction(loaded_func.name, loaded_func.params, instructions, loaded_func.register_file_size); auto it = this->global_map.find(loaded_func.name); CHECK(it != this->global_map.end()); CHECK_LE(it->second, this->global_map.size()); this->functions[it->second] = vm_func; } } TVM_REGISTER_GLOBAL("runtime.GetNumOfGlobals") .set_body([](TVMArgs args, TVMRetValue* rv) { runtime::Module mod = args[0]; const auto* exec = dynamic_cast<Executable*>(mod.operator->()); CHECK(exec); *rv = static_cast<int>(exec->global_map.size()); }); TVM_REGISTER_GLOBAL("runtime.GetGlobalFields") .set_body([](TVMArgs args, TVMRetValue* rv) { runtime::Module mod = args[0]; const auto* exec = dynamic_cast<Executable*>(mod.operator->()); CHECK(exec); int idx = args[1]; std::vector<std::pair<std::string, Index> > globals(exec->global_map.begin(), exec->global_map.end()); auto comp = [](const std::pair<std::string, Index>& a, const std::pair<std::string, Index>& b) { return a.second < b.second; }; std::sort(globals.begin(), globals.end(), comp); CHECK_LT(idx, globals.size()); *rv = globals[idx].first; }); TVM_REGISTER_GLOBAL("runtime.GetNumOfPrimitives") .set_body([](TVMArgs args, TVMRetValue* rv) { runtime::Module mod = args[0]; const auto* exec = dynamic_cast<Executable*>(mod.operator->()); CHECK(exec); *rv = static_cast<int>(exec->primitive_map.size()); }); TVM_REGISTER_GLOBAL("runtime.GetPrimitiveFields") .set_body([](TVMArgs args, TVMRetValue* rv) { runtime::Module mod = args[0]; const auto* exec = dynamic_cast<Executable*>(mod.operator->()); CHECK(exec); int idx = args[1]; CHECK_GE(idx, 0); CHECK_LT(idx, exec->primitive_map.size()); for (const auto& it : exec->primitive_map) { if (idx == static_cast<int>(it.second)) { *rv = it.first; break; } } }); TVM_REGISTER_GLOBAL("runtime.Load_Executable") .set_body_typed([]( std::string code, runtime::Module lib) { return Executable::Load(code, lib); }); } // namespace vm } // namespace runtime } // namespace tvm