codegen.cc 10.2 KB
Newer Older
Zhi committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

/*!
 * \file src/relay/backend/contrib/dnnl/codegen.cc
 * \brief Implementation of DNNL codegen APIs.
 */

#include <tvm/relay/attrs/nn.h>
#include <tvm/relay/expr_functor.h>
#include <tvm/relay/transform.h>
#include <tvm/relay/type.h>
#include <tvm/runtime/module.h>
#include <tvm/runtime/registry.h>

#include <fstream>
#include <sstream>

#include "../codegen_c/codegen_c.h"

namespace tvm {
namespace relay {
namespace contrib {

// TODO(@zhiics, @comaniac): This is a basic implementation. We should implement
// all utilities and make a base class for users to implement.
class CodegenDNNL : public ExprVisitor, public CodegenCBase {
 public:
  explicit CodegenDNNL(const std::string& id) { this->ext_func_id_ = id; }

  void VisitExpr_(const VarNode* node) final {
48
    ext_func_args_.push_back(GetRef<Var>(node));
Zhi committed
49
    out_.clear();
50 51 52
    Output output;
    output.name = node->name_hint();
    out_.push_back(output);
Zhi committed
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
  }

  void VisitExpr_(const TupleGetItemNode* op) final {
    // Do nothing
  }

  void VisitExpr_(const CallNode* call) final {
    std::ostringstream decl_stream;
    std::ostringstream buf_stream;
    // Args: ID
    std::vector<std::string> args;

    // Get the arguments for various DNNL kernels.
    if (IsOp(call, "nn.conv2d")) {
      decl_stream << "dnnl_conv2d";
      args = Conv2d(call);
    } else if (IsOp(call, "nn.dense")) {
      decl_stream << "dnnl_dense";
      args = Dense(call);
    } else if (IsOp(call, "nn.relu")) {
      decl_stream << "dnnl_relu";
      args = Relu(call);
    } else if (IsOp(call, "nn.batch_norm")) {
      decl_stream << "dnnl_bn";
      args = BatchNorm(call);
    } else if (IsOp(call, "add")) {
      decl_stream << "dnnl_add";
      args = Add(call);
    } else {
      LOG(FATAL) << "Unsupported op: " << AsText(call->op, false);
    }

    // Make function call with input buffers when visiting arguments
    bool first = true;
    decl_stream << "(";
    for (size_t i = 0; i < call->args.size(); ++i) {
      VisitExpr(call->args[i]);
      for (auto out : out_) {
        if (!first) {
          decl_stream << ", ";
        }
        first = false;
95
        decl_stream << out.name;
Zhi committed
96 97 98 99 100
      }
    }

    // Analyze the output buffer
    auto type_node = call->checked_type().as<TensorTypeNode>();
101 102
    CHECK(type_node);
    const auto& dtype = GetDtypeString(type_node);
Zhi committed
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
    std::string out = "buf_" + std::to_string(buf_idx_++);
    auto out_shape = GetShape(call->checked_type());
    int out_size = 1;
    for (size_t i = 0; i < out_shape.size(); ++i) {
      out_size *= out_shape[i];
    }
    this->PrintIndents();
    buf_stream << "float* " << out << " = (float*)std::malloc(4 * " << out_size << ");";
    buf_decl_.push_back(buf_stream.str());
    decl_stream << ", " << out;

    // Attach attribute arguments
    for (size_t i = 0; i < args.size(); ++i) {
      decl_stream << ", " << args[i];
    }
    decl_stream << ");";
    ext_func_body.push_back(decl_stream.str());

    // Update output buffer
    out_.clear();
123 124 125 126 127 128
    Output output;
    output.name = out;
    output.dtype = dtype;
    output.need_copy = true;
    output.size = out_size;
    out_.push_back(output);
Zhi committed
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
  }

  std::string JIT(void) {
    return JitImpl(ext_func_id_, ext_func_args_, buf_decl_, ext_func_body, out_);
  }

 private:
  std::vector<std::string> Conv2d(const CallNode* call) {
    std::vector<std::string> args;
    const auto* conv2d_attr = call->attrs.as<Conv2DAttrs>();
    CHECK(conv2d_attr);

    auto ishape = GetShape(call->args[0]->checked_type());
    auto wshape = GetShape(call->args[1]->checked_type());

    // Args: N, C, H, W
    for (auto s : ishape) {
      args.push_back(std::to_string(s));
    }

    // Args: O, G, Ph, Pw, Kh, Kw, Sh, Sw
    args.push_back(std::to_string(wshape[0]));
    args.push_back(std::to_string(conv2d_attr->groups));
152 153
    args.push_back(std::to_string(conv2d_attr->padding[0].as<IntImmNode>()->value));
    args.push_back(std::to_string(conv2d_attr->padding[1].as<IntImmNode>()->value));
Zhi committed
154 155
    args.push_back(std::to_string(wshape[2]));
    args.push_back(std::to_string(wshape[3]));
156 157
    args.push_back(std::to_string(conv2d_attr->strides[0].as<IntImmNode>()->value));
    args.push_back(std::to_string(conv2d_attr->strides[1].as<IntImmNode>()->value));
Zhi committed
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222

    return args;
  }

  std::vector<std::string> Dense(const CallNode* call) {
    std::vector<std::string> args;
    auto ishape = GetShape(call->args[0]->checked_type());
    auto wshape = GetShape(call->args[1]->checked_type());

    // Args: N, C, O
    args.push_back(std::to_string(ishape[0]));
    args.push_back(std::to_string(ishape[1]));
    args.push_back(std::to_string(wshape[0]));

    return args;
  }

  std::vector<std::string> Relu(const CallNode* call) {
    std::vector<std::string> args;
    auto ishape = GetShape(call->args[0]->checked_type());

    // Args: N, C, H, W
    for (auto s : ishape) {
      args.push_back(std::to_string(s));
    }

    return args;
  }

  std::vector<std::string> BatchNorm(const CallNode* call) {
    std::vector<std::string> args;
    const auto* bn_attr = call->attrs.as<BatchNormAttrs>();
    auto ishape = GetShape(call->args[0]->checked_type());

    // Args: N, C, H, W
    for (auto s : ishape) {
      args.push_back(std::to_string(s));
    }

    // Args: epsilon
    args.push_back(std::to_string(bn_attr->epsilon));

    return args;
  }

  std::vector<std::string> Add(const CallNode* call) {
    std::vector<std::string> args;
    auto ishape = GetShape(call->args[0]->checked_type());

    // Args: H, W
    for (auto s : ishape) {
      args.push_back(std::to_string(s));
    }

    return args;
  }

  /*! \brief The id of the external dnnl ext_func. */
  std::string ext_func_id_{""};
  /*!
   * \brief The index to track the output buffer. Each kernel will redirect the
   * output to a buffer that may be consumed by other kernels.
   */
  int buf_idx_{0};
  /*! \brief The arguments used by a wrapped function that calls DNNL kernels. */
223
  Array<Var> ext_func_args_;
Zhi committed
224 225 226 227 228
  /*! \brief statement of the function that will be compiled using DNNL kernels. */
  std::vector<std::string> ext_func_body;
  /*! \brief The declaration of intermeidate buffers. */
  std::vector<std::string> buf_decl_;
  /*! \brief The name of the the outputs. */
229
  std::vector<Output> out_;
Zhi committed
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
};

/*!
 * \brief The DNNL codegen helper to generate wrapepr function calls of DNNL
 * libraries. The code is a CSourceModule that can be compiled separately and
 * linked together with a DSOModule.
 */
class DNNLModuleCodegen : public CSourceModuleCodegenBase {
 public:
  // Create a corresponding DNNL function for the given relay Function.
  void GenDNNLFunc(const Function& func) {
    CHECK(func.defined()) << "Input error: expect a Relay function.";
    const auto* call = func->body.as<CallNode>();
    CHECK(call) << "DNNL expects a single convolution or dense op";

    // Record the external symbol for runtime lookup.
    auto sid = GetExtSymbol(func);

Zhi committed
248
    CodegenDNNL builder(sid);
Zhi committed
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
    builder.VisitExpr(func->body);
    code_stream_ << builder.JIT();
  }

  /*!
   * \brief The overridden function that will create a CSourceModule. In order
   * to compile the generated C source code, users need to specify the paths to
   * some libraries, including some TVM required and dnnl specific ones. To make
   * linking simpiler, the DNNL kernels are wrapped in a TVM compatible manner
   * and live under tvm/src/runtime/contrib/dnnl folder.
   *
   * \param ref An object ref that could be either a Relay function or module.
   *
   * \return The runtime module that contains C source code.
   */
264
  runtime::Module CreateCSourceModule(const ObjectRef& ref) override {
Zhi committed
265 266 267 268 269
    // Create headers
    code_stream_ << "#include <cstdint>\n";
    code_stream_ << "#include <cstdlib>\n";
    code_stream_ << "#include <cstring>\n";
    code_stream_ << "#include <tvm/runtime/c_runtime_api.h>\n";
270
    code_stream_ << "#include <tvm/runtime/packed_func.h>\n";
Zhi committed
271 272 273 274 275 276 277 278 279 280
    code_stream_ << "#include <dlpack/dlpack.h>\n";
    // dnnl_kernel file is saved under src/runtime/contrib/dnnl so that we don't
    // expose it to ordinary users. To make export_library use it, users need to
    // pass -I${PATH_TO_TVM}/src/runtime/contrib
    code_stream_ << "#include <dnnl/dnnl_kernel.h>\n";
    code_stream_ << "using namespace tvm::runtime::contrib;\n";
    code_stream_ << "\n";

    if (ref->IsInstance<FunctionNode>()) {
      GenDNNLFunc(Downcast<Function>(ref));
281 282
    } else if (ref->IsInstance<IRModuleNode>()) {
      IRModule mod = Downcast<IRModule>(ref);
Zhi committed
283 284 285 286 287 288 289 290 291
      for (const auto& it : mod->functions) {
        GenDNNLFunc(Downcast<Function>(it.second));
      }
    } else {
      LOG(FATAL) << "The input ref is expected to be a Relay function or module"
                 << "\n";
    }

    // Create a CSourceModule
292
    const auto* pf = runtime::Registry::Get("runtime.CSourceModuleCreate");
Zhi committed
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
    CHECK(pf != nullptr) << "Cannot find csource module to create the external runtime module";
    return (*pf)(code_stream_.str(), "cc");
  }

 private:
  /*!
   * \brief The code stream that prints the code that will be compiled using
   * external codegen tools.
   */
  std::ostringstream code_stream_;
};

/*!
 * \brief The external compiler/codegen tool. It takes a Relay expression/module and
 * compile it into a runtime module.
 */
309
runtime::Module DNNLCompiler(const ObjectRef& ref) {
Zhi committed
310 311 312 313
  DNNLModuleCodegen dnnl;
  return dnnl.CreateCSourceModule(ref);
}

314
TVM_REGISTER_GLOBAL("relay.ext.dnnl").set_body_typed(DNNLCompiler);
Zhi committed
315 316 317 318

}  // namespace contrib
}  // namespace relay
}  // namespace tvm