compile_engine.cc 12.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
/*!
 *  Copyright (c) 2018 by Contributors
 * \file relay/backend/compile_engine.cc
 * \brief Internal compialtion engine.
 */
#include <tvm/schedule.h>
#include <tvm/packed_func_ext.h>
#include <tvm/operation.h>
#include <tvm/runtime/registry.h>
#include <tvm/relay/pass.h>
#include <tvm/relay/expr_functor.h>
#include <tvm/relay/op_attr_types.h>
#include <utility>
#include <limits>
#include <mutex>
#include <functional>
#include "compile_engine.h"

namespace tvm {
namespace relay {

CCacheKey CCacheKeyNode::make(Function source_func, Target target) {
  auto n = make_node<CCacheKeyNode>();
  n->source_func = std::move(source_func);
  n->target = std::move(target);
  return CCacheKey(n);
}

// The getter to get schedule from compile engine.
// Get schedule from functor.
class ScheduleGetter :
      public ExprFunctor<Array<Tensor>(const Expr&)> {
 public:
  explicit ScheduleGetter(Target target)
      : target_(target) {}

  Array<IndexExpr> GetShape(const Array<IndexExpr>& shape) {
    // for now, we always use int32 shape when possible
    // even if the result of shape inference becomes int64.
    Array<IndexExpr> res;
    for (IndexExpr val : shape) {
      const int64_t* pval = as_const_int(val);
      if (pval != nullptr) {
        CHECK_LE(pval[0], std::numeric_limits<int32_t>::max());
        CHECK_GE(pval[0], std::numeric_limits<int32_t>::min());
        res.push_back(ir::IntImm::make(Int(32), *pval));
      } else {
        res.push_back(val);
      }
    }
    return res;
  }

  std::pair<Schedule, CachedFunc> Create(const Function& prim_func) {
    static auto fschedule =
        Op::GetAttr<FTVMSchedule>("FTVMSchedule");
    auto cache_node = make_node<CachedFuncNode>();
    cache_node->target = target_;
59
    for (Var param : prim_func->params) {
60
      Array<tvm::Tensor> inputs;
61
      if (const auto* ttype = param->checked_type().as<TensorTypeNode>()) {
62 63 64 65
        tvm::Tensor tensor = tvm::placeholder(
            GetShape(ttype->shape), ttype->dtype);
        cache_node->inputs.push_back(tensor);
        inputs.push_back(tensor);
66 67 68 69 70 71 72 73 74 75 76
      } else {
        // flatten tuple of tensor type.
        const auto* tuple_type = param->type_as<TupleTypeNode>();
        for (Type field : tuple_type->fields) {
          const auto* ttype = field.as<TensorTypeNode>();
          CHECK(ttype != nullptr);
          tvm::Tensor tensor = tvm::placeholder(
              GetShape(ttype->shape), ttype->dtype);
          cache_node->inputs.push_back(tensor);
          inputs.push_back(tensor);
        }
77
      }
78
      memo_[param] = inputs;
79 80 81 82 83 84 85
    }
    readable_name_stream_ << "fused";
    cache_node->outputs = this->VisitExpr(prim_func->body);
    cache_node->func_name = readable_name_stream_.str();
    CachedFunc cfunc(cache_node);
    CHECK(master_op_.defined());
    Schedule schedule = fschedule[master_op_](
86
        master_attrs_, cache_node->outputs, target_);
87 88 89
    for (const auto& scalar : scalars_) {
      schedule[scalar].compute_inline();
    }
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
    return std::make_pair(schedule, cfunc);
  }

  Array<Tensor> VisitExpr(const Expr& expr) {
    auto it = memo_.find(expr);
    if (it != memo_.end()) {
      return it->second;
    } else {
      Array<Tensor> res = ExprFunctor::VisitExpr(expr);
      memo_[expr] = res;
      return res;
    }
  }

  Array<Tensor> VisitExpr_(const VarNode* op) final {
105
    LOG(FATAL) << "Free variable " << op->name_hint();
106 107 108
    return {};
  }

109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
  Array<Tensor> VisitExpr_(const ConstantNode* op) final {
    CHECK(op->is_scalar());
    void* data = op->data->data;
    DataType dtype = TVMType2Type(op->data->dtype);
    Tensor value = tvm::compute({}, [&](const Array<tvm::Var>&) {
        if (dtype == Int(32)) {
          return make_const(dtype, static_cast<const int32_t*>(data)[0]);
        } else if (dtype == Int(64)) {
          return make_const(dtype, static_cast<const int64_t*>(data)[0]);
        } else if (dtype == Float(32)) {
          return make_const(dtype, static_cast<const float*>(data)[0]);
        } else if (dtype == Float(64)) {
          return make_const(dtype, static_cast<const double*>(data)[0]);
        } else if (dtype == Bool()) {
          return make_const(dtype, static_cast<const uint8_t*>(data)[0]);
        } else {
          LOG(FATAL) << "not handled";
          return tvm::Expr();
        }
      });
129
    scalars_.push_back(value->op);
130 131 132
    return {value};
  }

133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
  Array<Tensor> VisitExpr_(const CallNode* call_node) final {
    static auto fcompute =
        Op::GetAttr<FTVMCompute>("FTVMCompute");
    static auto fpattern =
        Op::GetAttr<TOpPattern>("TOpPattern");

    Array<Tensor> inputs;
    int count_tuple = 0;
    for (Expr arg : call_node->args) {
      if (arg->checked_type().as<TupleTypeNode>()) {
        ++count_tuple;
      }
      for (Tensor tensor : VisitExpr(arg)) {
        inputs.push_back(tensor);
      }
    }
    if (count_tuple) {
      CHECK_EQ(call_node->args.size(), 1U)
          << "Only allow function with a single tuple input";
    }
    CHECK(call_node->op.as<OpNode>())
        << "Primitive function only allows call into primitive ops";
    Op op = Downcast<Op>(call_node->op);
    Array<Tensor> outputs = fcompute[op](
        call_node->attrs,
        inputs,
        call_node->checked_type(),
        target_);

    int op_pattern = fpattern[op];
    if (op_pattern >= kCommReduce) {
164
      CHECK(!master_op_.defined() || master_op_pattern_ < kCommReduce)
165 166
          << "Two complicated op in a primitive function "
          << " master=" << master_op_ << " current=" << op;
167
    }
168
    if (op_pattern >= master_op_pattern_) {
169
      master_op_ = op;
170
      master_attrs_ = call_node->attrs;
171
      master_op_pattern_ = op_pattern;
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
    }
    if (outputs.size() != 1) {
      const auto* tuple_type =
          call_node->checked_type().as<TupleTypeNode>();
      CHECK(tuple_type) << "Expect output to be a tuple type";
      CHECK_EQ(tuple_type->fields.size(), outputs.size());
    }
    readable_name_stream_ << '_' << op->name;
    return outputs;
  }

  Array<Tensor> VisitExpr_(const FunctionNode* op) final {
    LOG(FATAL) << "Do not support sub function";
    return Array<Tensor>();
  }

  Array<Tensor> VisitExpr_(const LetNode* op) final {
    Array<Tensor> val = VisitExpr(op->value);
    CHECK(!memo_.count(op->var));
    memo_[op->var] = val;
    return VisitExpr(op->body);
  }

  Array<Tensor> VisitExpr_(const TupleNode* op) final {
    Array<Tensor> fields;
    for (Expr field : op->fields) {
      CHECK(field->checked_type().as<TensorTypeNode>())
          << "Only allow Tuple of Tensor";
      Array<Tensor> res = VisitExpr(field);
      CHECK_EQ(res.size(), 1);
      fields.push_back(res[0]);
    }
    return fields;
  }

  Array<Tensor> VisitExpr_(const TupleGetItemNode* op) final {
    const auto* tuple_type = op->tuple->type_as<TupleTypeNode>();
    Array<Tensor> tuple = VisitExpr(op->tuple);
    CHECK_EQ(tuple_type->fields.size(), tuple.size());
    CHECK_GE(op->index, 0);
    CHECK_LT(static_cast<size_t>(op->index), tuple.size());
    return {tuple[op->index]};
  }

 private:
  tvm::Target target_;
  Op master_op_;
219
  Attrs master_attrs_;
220
  int master_op_pattern_{0};
221 222
  std::ostringstream readable_name_stream_;
  std::unordered_map<Expr, Array<Tensor>, NodeHash, NodeEqual> memo_;
223
  Array<Operation> scalars_;
224 225 226 227 228
};


class CompileEngineImpl : public CompileEngineNode {
 public:
229
  // Lower the function.
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
  CachedFunc Lower(const CCacheKey& key)  {
    return LowerInternal(key)->cached_func;
  }

  // For now, build one module per function.
  PackedFunc JIT(const CCacheKey& key) final {
    CCacheValue value = LowerInternal(key);
    if (value->packed_func != nullptr) return value->packed_func;
    // build the function.
    if (const auto* f = runtime::Registry::Get("relay.backend.build")) {
      tvm::runtime::Module m = (*f)(value->cached_func->funcs, key->target);
      value->packed_func = m.GetFunction(value->cached_func->func_name);
    } else {
      LOG(FATAL) << "relay.backend.build is not registered";
    }
    return value->packed_func;
  }
  void Clear() final {
    cache_.clear();
  }
  // List all items in the cache.
  Array<NodeRef> ListItems() {
    std::lock_guard<std::mutex> lock(mutex_);
    Array<NodeRef> items;
    for (auto& kv : cache_) {
      items.push_back(kv.first);
      items.push_back(kv.second);
    }
    return items;
  }
  /*!
   * \brief Create schedule for target.
   * \param source_func The primitive function to be lowered.
   * \param target The target we want to create schedule for.
   * \return Pair of schedule and cache.
   *  The funcs field in cache is not yet populated.
   */
  std::pair<Schedule, CachedFunc> CreateSchedule(
      const Function& source_func, const Target& target) {
    return ScheduleGetter(target).Create(source_func);
  }

 private:
  // implement lowered func
  CCacheValue LowerInternal(const CCacheKey& key)  {
    std::lock_guard<std::mutex> lock(mutex_);
    CCacheValue value;
    auto it = cache_.find(key);
    if (it != cache_.end()) {
      it->second->use_count += 1;
      if (it->second->cached_func.defined()) return it->second;
      value = it->second;
    } else {
      value = CCacheValue(make_node<CCacheValueNode>());
      value->use_count = 0;
      cache_[key] = value;
    }
287 288 289
    // Enforce use the target.
    TargetContext target_ctx(key->target);

290 291 292 293
    CHECK(!value->cached_func.defined());
    auto spair = CreateSchedule(key->source_func, key->target);
    auto cache_node = make_node<CachedFuncNode>(
        *(spair.second.operator->()));
Haichen Shen committed
294
    cache_node->func_name = GetUniqueName(cache_node->func_name);
295 296 297 298 299 300 301 302 303 304
    // NOTE: array will copy on write.
    Array<Tensor> all_args = cache_node->inputs;
    for (Tensor arg : cache_node->outputs) {
      all_args.push_back(arg);
    }
    // lower the function
    if (const auto* f = runtime::Registry::Get("relay.backend.lower")) {
      cache_node->funcs = (*f)(
          spair.first, all_args, cache_node->func_name, key->source_func);
    } else {
Haichen Shen committed
305
      LOG(FATAL) << "relay.backend.lower is not registred";
306 307 308 309 310 311 312 313 314
    }
    value->cached_func = CachedFunc(cache_node);
    return value;
  }
  /*!
   * \brief Get unique name from name.
   * \param name The orginal name.
   * \return Updated name which is unique.
   */
Haichen Shen committed
315
  std::string GetUniqueName(std::string name) {
316 317 318
    for (size_t i = 0; i < name.length(); ++i) {
      if (name[i] == '.') name[i] = '_';
    }
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
    while (true) {
      auto it = name_map_.find(name);
      if (it == name_map_.end()) {
        name_map_[name] = 1;
        return name;
      } else {
        std::ostringstream os;
        os << name << "_" << it->second;
        ++(it->second);
        name = os.str();
      }
    }
    return name;
  }
  /*! \brief compiler cache lock*/
  std::mutex mutex_;
  /*! \brief internal name map to get an unique name */
  std::unordered_map<std::string, int> name_map_;
  /*! \brief internal compiler cache */
  std::unordered_map<CCacheKey, CCacheValue> cache_;
};

/*! \brief The global compile engine */
const CompileEngine& CompileEngine::Global() {
  // intentionally allocate raw pointer to avoid
  // free during destructuion.
  static CompileEngine* inst = new CompileEngine(
      make_node<CompileEngineImpl>());
  return *inst;
}


TVM_REGISTER_GLOBAL("relay.backend._make_CCacheKey")
.set_body_typed<CCacheKey(Function, Target)>(CCacheKeyNode::make);

TVM_REGISTER_GLOBAL("relay.backend._CompileEngineGlobal")
.set_body_typed<CompileEngine()>([]() {
    return CompileEngine::Global();
  });

TVM_REGISTER_GLOBAL("relay.backend._CompileEngineClear")
.set_body_typed<void(const CompileEngine&)>([](CompileEngine self) {
    self->Clear();
  });

TVM_REGISTER_GLOBAL("relay.backend._CompileEngineLower")
.set_body_typed<CachedFunc(CompileEngine, CCacheKey)>(
    [](CompileEngine self, CCacheKey key) {
      return self->Lower(key);
    });

TVM_REGISTER_GLOBAL("relay.backend._CompileEngineJIT")
.set_body_typed<PackedFunc(CompileEngine, CCacheKey)>(
    [](CompileEngine self, CCacheKey key) {
      return self->JIT(key);
    });

TVM_REGISTER_GLOBAL("relay.backend._CompileEngineListItems")
.set_body_typed<Array<NodeRef>(CompileEngine)>(
    [](CompileEngine self){
      return static_cast<CompileEngineImpl*>(self.operator->())->ListItems();
    });
}  // namespace relay
}  // namespace tvm