Commit 20144de2 by masahi Committed by Tianqi Chen

[ROCM] remove fma dispatch (#591)

* removed fma dispatch

* added comments to explain why remove fma

* fix lint

* use fmuladd intrin for fma dispatch
parent 6a5d6165
...@@ -29,8 +29,10 @@ namespace llvm { ...@@ -29,8 +29,10 @@ namespace llvm {
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.exp") TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.exp")
.set_body(DispatchExternOCML); .set_body(DispatchExternOCML);
// On AMD GPU, fma is slower than mac
// removing fma dispatch allows backend to generate faster mac instruction
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.fma") TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.fma")
.set_body(DispatchExternOCML); .set_body(DispatchLLVMPureIntrin<::llvm::Intrinsic::fmuladd, 1>);
TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.log") TVM_REGISTER_GLOBAL("tvm.intrin.rule.rocm.log")
.set_body(DispatchExternOCML); .set_body(DispatchExternOCML);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment