codegen_cuda.h 2.85 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

20 21 22 23 24 25 26 27 28 29 30
/*!
 *  Copyright (c) 2017 by Contributors
 * \file codegen_cuda.h
 * \brief Utility to generate cuda code
 */
#ifndef TVM_CODEGEN_CODEGEN_CUDA_H_
#define TVM_CODEGEN_CODEGEN_CUDA_H_

#include <tvm/codegen.h>
#include <tvm/packed_func_ext.h>
#include <string>
31
#include "codegen_c.h"
32 33 34 35

namespace tvm {
namespace codegen {

36
class CodeGenCUDA final : public CodeGenC {
37
 public:
38
  CodeGenCUDA();
39
  void Init(bool output_ssa);
40
  void AddFunction(LoweredFunc f);
41
  std::string Finish();
42 43 44
  bool need_include_path() {
    return (enable_fp16_ || enable_int8_ || need_math_constants_h_);
  }
45
  // override behavior
46
  void VisitStmt_(const ir::For* op) final;
47
  void PrintStorageSync(const Call* op) final;
48
  void PrintStorageScope(const std::string& scope, std::ostream& os) final;  // NOLINT(*)
49 50 51
  void PrintVecBinaryOp(
      const std::string&op, Type t,
      Expr lhs, Expr rhs, std::ostream& os) final;  // NOLINT(*)
52
  void PrintType(Type t, std::ostream& os) final; // NOLINT(*)
53 54 55 56
  void PrintVecElemLoad(
      const std::string& vec, Type t, int i, std::ostream& os) final;  // NOLINT(*)
  void PrintVecElemStore(
      const std::string& vec, Type t, int i, const std::string& value) final;
57
  void BindThreadIndex(const IterVar& iv) final;  // NOLINT(*)
58
  // overload visitor
59
  void VisitExpr_(const Ramp* op, std::ostream& os) final; // NOLINT(*)
60
  void VisitExpr_(const Broadcast* op, std::ostream& os) final; // NOLINT(*)
61
  void VisitExpr_(const FloatImm *op, std::ostream& os) final;
62 63
  void VisitStmt_(const Evaluate *op) final;

64
 private:
65 66 67 68 69 70
  // Whether global barrier is needed.
  bool need_global_barrier_{false};
  // Global barrier state
  std::string vid_global_barrier_state_;
  // Global barrier expected node.
  std::string vid_global_barrier_expect_;
71 72
  // whether enable fp16
  bool enable_fp16_{false};
73 74
  // whether enable int8
  bool enable_int8_{false};
75 76 77
  // whether need math_constants.h
  bool need_math_constants_h_{false};
  friend void PrintConst(const FloatImm* op, std::ostream& os, CodeGenCUDA* p);
78 79 80 81 82 83
};

}  // namespace codegen
}  // namespace tvm

#endif  // TVM_CODEGEN_CODEGEN_CUDA_H_