Commit 5a177070 by Zhi Committed by Jared Roesch

[relay][vm] Reuse allocated device memory (#4170)

parent 6f9d028b
...@@ -747,6 +747,12 @@ class VirtualMachine : public runtime::ModuleNode { ...@@ -747,6 +747,12 @@ class VirtualMachine : public runtime::ModuleNode {
/*! \brief The parameter name to data mapping. */ /*! \brief The parameter name to data mapping. */
std::unordered_map<std::string, ObjectRef> params_; std::unordered_map<std::string, ObjectRef> params_;
/*!
* \brief The constant pool for runtime. It caches the device dependent
* object to avoid rellocation of constants during inference.
*/
std::vector<ObjectRef> const_pool_;
}; };
} // namespace vm } // namespace vm
......
...@@ -795,9 +795,18 @@ void VirtualMachine::RunLoop() { ...@@ -795,9 +795,18 @@ void VirtualMachine::RunLoop() {
} }
case Opcode::LoadConst: { case Opcode::LoadConst: {
auto constant_obj = exec->constants[instr.const_index]; auto constant_obj = exec->constants[instr.const_index];
// We cache the allocated object in the constant pool. To measure, the
// first iteration will set the pool up. The other iterations will
// directly reuse the allocated objects.
if (const_pool_.size() <= static_cast<size_t>(instr.const_index)) {
const_pool_.resize(instr.const_index + 1);
}
if (!const_pool_[instr.const_index].defined()) {
// TODO(wweic) ctx could be obtained from the ctxs list. // TODO(wweic) ctx could be obtained from the ctxs list.
auto device_obj = CopyTo(constant_obj, ctxs[0]); const_pool_[instr.const_index] = CopyTo(constant_obj, ctxs[0]);
WriteRegister(instr.dst, device_obj); }
WriteRegister(instr.dst, const_pool_[instr.const_index]);
pc++; pc++;
goto main_loop; goto main_loop;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment