Commit 53c0363d by Peter Yeh Committed by masahi

update rocm intrin rule (#4499)

parent 599775f4
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
*/ */
#ifdef TVM_LLVM_VERSION #ifdef TVM_LLVM_VERSION
#include "intrin_rule_llvm.h"
#include <tvm/ir.h> #include <tvm/ir.h>
#include <tvm/expr.h> #include <tvm/expr.h>
#include <tvm/api_registry.h> #include <tvm/api_registry.h>
...@@ -45,27 +44,28 @@ inline void DispatchExternOCML(const TVMArgs& args, TVMRetValue* rv) { ...@@ -45,27 +44,28 @@ inline void DispatchExternOCML(const TVMArgs& args, TVMRetValue* rv) {
namespace llvm { namespace llvm {
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.floor") TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.floor")
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::floor, 1>); .set_body(DispatchExternOCML);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.ceil") TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.ceil")
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::ceil, 1>); .set_body(DispatchExternOCML);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.round") TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.round")
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::round, 1>); .set_body(DispatchExternOCML);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.trunc") TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.trunc")
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::trunc, 1>); .set_body(DispatchExternOCML);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.fabs") TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.fabs")
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::fabs, 1>); .set_body(DispatchExternOCML);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.exp") TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.exp")
.set_body(DispatchExternOCML); .set_body(DispatchExternOCML);
// On AMD GPU, fma is slower than mac TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.erf")
// removing fma dispatch allows backend to generate faster mac instruction .set_body(DispatchExternOCML);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.fma") TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.fma")
.set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 1>); .set_body(DispatchExternOCML);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.log") TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.log")
.set_body(DispatchExternOCML); .set_body(DispatchExternOCML);
...@@ -78,6 +78,16 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.pow") ...@@ -78,6 +78,16 @@ TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.pow")
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.tanh") TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.tanh")
.set_body(DispatchExternOCML); .set_body(DispatchExternOCML);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.cos")
.set_body(DispatchExternOCML);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.sin")
.set_body(DispatchExternOCML);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.atan")
.set_body(DispatchExternOCML);
} // namespace llvm } // namespace llvm
} // namespace codegen } // namespace codegen
} // namespace tvm } // namespace tvm
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment