/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ /*! * \file relay/backend/graph_mem_alloca.cc * \brief Memory index assignment pass for executing * the program in the graph runtime. */ #include <tvm/relay/expr.h> #include <tvm/relay/expr_functor.h> #include <tvm/relay/analysis.h> #include "../../common/arena.h" namespace tvm { namespace relay { using IntegerArray = Array<Integer>; struct StorageToken { /*! \brief Reference counter */ int ref_counter{0}; /*! \brief number of bytes */ size_t max_bytes{0}; /*! \brief The corresponding tensor type node. */ const TensorTypeNode* ttype{nullptr}; /*! \brief virtual device index that corresponds to the device_type in * DLContext. */ int device_type{0}; /*! \brief The storage id */ int64_t storage_id{-1}; }; class StorageAllocaBaseVisitor : public ExprVisitor { public: // run the visitor on a function. void Run(const Function& func) { for (Var param : func->params) { CreateToken(param.operator->(), false); } // must always keep output alive. for (StorageToken* tok : GetToken(func->body)) { tok->ref_counter += 1; } } void VisitExpr_(const ConstantNode* op) final { this->CreateToken(op, false); } void VisitExpr_(const VarNode* op) final { // Do nothing. } void VisitExpr_(const FunctionNode* op) final { // do not recursive into sub function. } void VisitExpr_(const GlobalVarNode* op) final { // Do nothing. } void VisitExpr_(const OpNode* op) final { // Do nothing. } void VisitExpr_(const TupleNode* op) final { std::vector<StorageToken*> fields; for (Expr field : op->fields) { auto tok = GetToken(field); CHECK_EQ(tok.size(), 1U); fields.push_back(tok[0]); } token_map_[op] = fields; } void VisitExpr_(const TupleGetItemNode* op) final { const auto& tok = GetToken(op->tuple); CHECK_LT(static_cast<size_t>(op->index), tok.size()); token_map_[op] = {tok[op->index]}; } void VisitExpr_(const IfNode* op) final { LOG(FATAL) << "if is not supported."; } void VisitExpr_(const LetNode* op) final { auto token = GetToken(op->value); token_map_[op->var.operator->()] = token; token_map_[op] = GetToken(op->body); } protected: /*! \brief internal token map */ std::unordered_map<const ExprNode*, std::vector<StorageToken*> > token_map_; /*! * \brief Get the necessary token. * \param expr The expression. * \return The corresponding token. */ const std::vector<StorageToken*>& GetToken(const Expr& expr) { this->VisitExpr(expr); auto it = token_map_.find(expr.operator->()); CHECK(it != token_map_.end()); return it->second; } /*! * \brief Populate the token map to set op's tokens * \param op The node to be processed. * \param can_realloc Whether we can re-allocate the memory. */ virtual void CreateToken(const ExprNode* op, bool can_realloc) = 0; }; class StorageAllocaInit : protected StorageAllocaBaseVisitor { public: explicit StorageAllocaInit(common::Arena* arena) : arena_(arena) {} /*! \return The internal token map */ std::unordered_map<const ExprNode*, std::vector<StorageToken*> > GetInitTokenMap(const Function& func) { node_device_map_ = CollectDeviceInfo(func); this->Run(func); return std::move(token_map_); } protected: using StorageAllocaBaseVisitor::VisitExpr_; void CreateToken(const ExprNode* op, bool can_realloc) final { CHECK(!token_map_.count(op)); std::vector<StorageToken*> tokens; int device_type = node_device_map_.count(GetRef<Expr>(op)) ? node_device_map_[GetRef<Expr>(op)]->value : 0; if (const auto* tuple_type = op->checked_type().as<TupleTypeNode>()) { for (Type t : tuple_type->fields) { const auto* ttype = t.as<TensorTypeNode>(); CHECK(ttype); StorageToken* token = arena_->make<StorageToken>(); token->ttype = ttype; token->device_type = device_type; tokens.push_back(token); } } else { const auto* ttype = op->checked_type().as<TensorTypeNode>(); CHECK(ttype); StorageToken* token = arena_->make<StorageToken>(); token->ttype = ttype; token->device_type = device_type; tokens.push_back(token); } token_map_[op] = tokens; } void VisitExpr_(const CallNode* op) final { // create token for the call node. CreateToken(op, true); // for each input, visit argument token. for (Expr arg : op->args) { for (StorageToken* tok : GetToken(arg)) { tok->ref_counter += 1; } } } private: // allocator common::Arena* arena_; Map<Expr, Integer> node_device_map_; }; class StorageAllocator : public StorageAllocaBaseVisitor { public: /*! * \return totoal number of bytes allocated */ size_t TotalAllocBytes() const { size_t total = 0; for (const auto* p : data_) { total += p->max_bytes; } return total; } // Run storage allocation for a function. Map<Expr, Array<IntegerArray> > Plan(const Function& func) { prototype_ = StorageAllocaInit(&arena_).GetInitTokenMap(func); this->Run(func); // The value of smap contains two integer arrays where the first array // contains the planned storage ids and the second holds the device types. Map<Expr, Array<IntegerArray> > smap; int num_annotated_nodes = 0; int num_nodes = 0; for (const auto& kv : token_map_) { std::vector<Integer> storage_ids; std::vector<Integer> device_types; for (StorageToken* tok : kv.second) { if (tok->device_type) { num_annotated_nodes++; } num_nodes++; storage_ids.push_back(tok->storage_id); device_types.push_back(tok->device_type); } smap.Set(GetRef<Expr>(kv.first), Array<IntegerArray>({storage_ids, device_types})); } // Either all or none of the nodes should be annotated. if (num_annotated_nodes != 0 && num_annotated_nodes != num_nodes) { LOG(FATAL) << num_annotated_nodes << " out of " << num_nodes << "expressions are assigned with virtual device types. Either all " "or none of the expressions are expected to be annotated."; } return smap; } protected: using StorageAllocaBaseVisitor::VisitExpr_; // override create token by getting token as prototype requirements. void CreateToken(const ExprNode* op, bool can_realloc) final { CHECK(!token_map_.count(op)); auto it = prototype_.find(op); CHECK(it != prototype_.end()); std::vector<StorageToken*> tokens; for (StorageToken* tok : it->second) { if (can_realloc) { tokens.push_back(Request(tok)); } else { // Allocate a new token, StorageToken* allocated_tok = Alloc(tok, GetMemorySize(tok)); allocated_tok->device_type = tok->device_type; // ensure it never get de-allocated. allocated_tok->ref_counter += 1; tokens.push_back(allocated_tok); } } token_map_[op] = tokens; } // The call map void VisitExpr_(const CallNode* op) final { std::vector<StorageToken*> args; // for each input, visit argument token. for (Expr arg : op->args) { for (StorageToken* tok : GetToken(arg)) { args.push_back(tok); } } // create token for the call node. CreateToken(op, true); // check if there is orphaned output that can be released immediately. for (StorageToken* tok : token_map_.at(op)) { CheckForRelease(tok); } for (StorageToken* tok : args) { tok->ref_counter -= 1; CheckForRelease(tok); } } /*! * \brief ceil(size/word_size) to get number of words. * \param size The original size. * \param word_size The element size. */ static size_t DivRoundUp(size_t size, size_t word_size) { return (size + word_size - 1) / word_size; } /*! * \brief Get the memory requirement. * \param prototype The prototype token. * \return The required memory size. */ size_t GetMemorySize(StorageToken* prototype) { const TensorTypeNode* ttype = prototype->ttype; CHECK(ttype != nullptr); size_t size = 1; for (IndexExpr dim : ttype->shape) { const int64_t* pval = as_const_int(dim); CHECK(pval != nullptr) << "Cannot allocate memory symbolic tensor shape " << ttype->shape; CHECK_GE(*pval, 0) << "Cannot allocate memory for tensor with negative shape" << *pval; size *= static_cast<size_t>(pval[0]); } size *= DivRoundUp(ttype->dtype.bits() * ttype->dtype.lanes(), 8); return size; } /*! * \brief Request a storage token for a given prototype. * \param prototype. The prototype storage token. * \return The result token. */ StorageToken* Request(StorageToken* prototype) { // calculate the size; size_t size = GetMemorySize(prototype); // search memory block in [size / match_range_, size * match_range_) if (match_range_ == 0) { return this->Alloc(prototype, size); } auto begin = free_.lower_bound(size / match_range_); auto mid = free_.lower_bound(size); auto end = free_.upper_bound(size * match_range_); // search for memory blocks larger than requested for (auto it = mid; it != end; ++it) { StorageToken *tok = it->second; if (tok->device_type != prototype->device_type) continue; CHECK_EQ(tok->ref_counter, 0); // Use exect matching strategy tok->max_bytes = std::max(size, tok->max_bytes); tok->ref_counter = prototype->ref_counter; // find a exact match, erase from map and return free_.erase(it); return tok; } // then search for memory blocks smaller than requested space for (auto it = mid; it != begin;) { --it; StorageToken *tok = it->second; if (tok->device_type != prototype->device_type) continue; CHECK_EQ(tok->ref_counter, 0); // Use exect matching strategy tok->max_bytes = std::max(size, tok->max_bytes); tok->ref_counter = prototype->ref_counter; // erase from map and return free_.erase(it); return tok; } // cannot find anything return a new one. return this->Alloc(prototype, size); } /*! * \brief Allocate a storage token by consuming prototype * \param prototype The prototype token. * \param size The size of memory being requested. */ StorageToken* Alloc(StorageToken* prototype, size_t size) { prototype->max_bytes = size; prototype->storage_id = static_cast<int64_t>(data_.size()); data_.push_back(prototype); return prototype; } /*! * \brief Check if we can release token. * \tok The token to be released. */ void CheckForRelease(StorageToken* tok) { CHECK_GE(tok->storage_id, 0); CHECK_GE(tok->ref_counter, 0); if (tok->ref_counter == 0) { free_.insert({tok->max_bytes, tok}); } } private: // allocator common::Arena arena_; // scale used for rough match size_t match_range_{16}; // free list of storage entry std::multimap<size_t, StorageToken*> free_; // all the storage resources available std::vector<StorageToken*> data_; /*! \brief internal prototype token map */ std::unordered_map<const ExprNode*, std::vector<StorageToken*> > prototype_; }; Map<Expr, Array<IntegerArray> > GraphPlanMemory(const Function& func) { return StorageAllocator().Plan(func); } TVM_REGISTER_GLOBAL("relay.backend.GraphPlanMemory") .set_body_typed<Map<Expr, Array<IntegerArray> >(const Function&)>(GraphPlanMemory); } // namespace relay } // namespace tvm