Commit 622be047 by Tianqi Chen Committed by GitHub

[BUILD][LLVM] Support LLVM mainline 5.0 6.0 (#356)

* [BUILD][LLVM] Support LLVM mainline 5.0 6.0

* Reduce parallelism
parent c6ebb5a1
......@@ -4,7 +4,11 @@
// See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/
// tvm libraries
tvm_lib = 'lib/libtvm.so, lib/libtvm_runtime.so, config.mk'
tvm_runtime = "lib/libtvm_runtime.so, config.mk"
tvm_lib = "lib/libtvm.so, " + tvm_runtime
// LLVM upstream lib
tvm_multilib = "lib/libtvm_llvm40.so, lib/libtvm_llvm50.so, lib/libtvm_llvm60.so, " + tvm_runtime
// command to start a docker container
docker_run = 'tests/ci_build/ci_build.sh'
// timeout in minutes
......@@ -56,7 +60,7 @@ def make(docker_type, make_flag) {
}
// pack libraries for later use
def pack_lib(name, libs=tvm_lib) {
def pack_lib(name, libs) {
sh """
echo "Packing ${libs} into ${name}"
echo ${libs} | sed -e 's/,/ /g' | xargs md5sum
......@@ -66,7 +70,7 @@ def pack_lib(name, libs=tvm_lib) {
// unpack libraries saved before
def unpack_lib(name, libs=tvm_lib) {
def unpack_lib(name, libs) {
unstash name
sh """
echo "Unpacked ${libs} from ${name}"
......@@ -88,8 +92,15 @@ stage('Build') {
echo USE_RPC=1 >> config.mk
echo USE_BLAS=openblas >> config.mk
"""
make('gpu', '-j4')
pack_lib('gpu')
make('gpu', '-j2')
sh "mv lib/libtvm.so lib/libtvm_llvm40.so"
sh "echo LLVM_CONFIG=llvm-config-5.0 >> config.mk"
make('gpu', '-j2')
sh "mv lib/libtvm.so lib/libtvm_llvm50.so"
sh "echo LLVM_CONFIG=llvm-config-6.0 >> config.mk"
make('gpu', '-j2')
sh "mv lib/libtvm.so lib/libtvm_llvm60.so"
pack_lib('gpu', tvm_multilib)
}
}
},
......@@ -103,8 +114,8 @@ stage('Build') {
echo USE_OPENCL=0 >> config.mk
echo USE_RPC=0 >> config.mk
"""
make('cpu', '-j4')
pack_lib('cpu')
make('cpu', '-j2')
pack_lib('cpu', tvm_lib)
}
}
},
......@@ -119,8 +130,15 @@ stage('Build') {
echo LLVM_CONFIG=llvm-config-4.0 >> config.mk
echo USE_RPC=1 >> config.mk
"""
make('i386', '-j4')
pack_lib('i386')
make('i386', '-j2')
sh "mv lib/libtvm.so lib/libtvm_llvm40.so"
sh "echo LLVM_CONFIG=llvm-config-5.0 >> config.mk"
make('i386', '-j2')
sh "mv lib/libtvm.so lib/libtvm_llvm50.so"
sh "echo LLVM_CONFIG=llvm-config-6.0 >> config.mk"
make('i386', '-j2')
sh "mv lib/libtvm.so lib/libtvm_llvm60.so"
pack_lib('i386', tvm_multilib)
}
}
},
......@@ -145,7 +163,7 @@ stage('Build') {
sh "${docker_run} emscripten ./tests/scripts/task_web_build.sh"
}
}
pack_lib('weblib')
pack_lib('weblib', tvm_lib)
}
}
}
......@@ -156,7 +174,13 @@ stage('Unit Test') {
node('GPU' && 'linux') {
ws('workspace/tvm/ut-python-gpu') {
init_git()
unpack_lib('gpu', tvm_lib)
unpack_lib('gpu', tvm_multilib)
sh "cp lib/libtvm_llvm40.so lib/libtvm.so"
timeout(time: max_time, unit: 'MINUTES') {
sh "${docker_run} gpu ./tests/scripts/task_python_unittest.sh"
}
// Test on the lastest mainline.
sh "cp lib/libtvm_llvm60.so lib/libtvm.so"
timeout(time: max_time, unit: 'MINUTES') {
sh "${docker_run} gpu ./tests/scripts/task_python_unittest.sh"
}
......@@ -167,11 +191,17 @@ stage('Unit Test') {
node('CPU' && 'linux') {
ws('workspace/tvm/ut-python-i386') {
init_git()
unpack_lib('i386', tvm_lib)
unpack_lib('i386', tvm_multilib)
sh "cp lib/libtvm_llvm40.so lib/libtvm.so"
timeout(time: max_time, unit: 'MINUTES') {
sh "${docker_run} i386 ./tests/scripts/task_python_unittest.sh"
sh "${docker_run} i386 ./tests/scripts/task_python_integration.sh"
}
// Test on llvm 5.0
sh "cp lib/libtvm_llvm50.so lib/libtvm.so"
timeout(time: max_time, unit: 'MINUTES') {
sh "${docker_run} i386 ./tests/scripts/task_python_integration.sh"
}
}
}
},
......@@ -190,7 +220,8 @@ stage('Unit Test') {
node('GPU' && 'linux') {
ws('workspace/tvm/ut-java') {
init_git()
unpack_lib('gpu', tvm_lib)
unpack_lib('gpu', tvm_multilib)
sh "cp lib/libtvm_llvm40.so lib/libtvm.so"
timeout(time: max_time, unit: 'MINUTES') {
sh "${docker_run} gpu ./tests/scripts/task_java_unittest.sh"
}
......@@ -204,7 +235,8 @@ stage('Integration Test') {
node('GPU' && 'linux') {
ws('workspace/tvm/it-python-gpu') {
init_git()
unpack_lib('gpu')
unpack_lib('gpu', tvm_multilib)
sh "cp lib/libtvm_llvm40.so lib/libtvm.so"
timeout(time: max_time, unit: 'MINUTES') {
sh "${docker_run} gpu ./tests/scripts/task_python_integration.sh"
sh "${docker_run} gpu ./tests/scripts/task_python_topi.sh"
......@@ -216,7 +248,7 @@ stage('Integration Test') {
node('emcc') {
ws('workspace/tvm/it-weblib') {
init_git()
unpack_lib('weblib')
unpack_lib('weblib', tvm_lib)
sh "${docker_run} emscripten echo testing javascript..."
timeout(time: max_time, unit: 'MINUTES') {
sh "${docker_run} emscripten ./tests/scripts/task_web_test.sh"
......@@ -228,7 +260,8 @@ stage('Integration Test') {
node('GPU' && 'linux') {
ws('workspace/tvm/docs-python-gpu') {
init_git()
unpack_lib('gpu')
unpack_lib('gpu', tvm_multilib)
sh "cp lib/libtvm_llvm40.so lib/libtvm.so"
timeout(time: max_time, unit: 'MINUTES') {
sh "${docker_run} gpu ./tests/scripts/task_python_docs.sh"
}
......
......@@ -20,11 +20,34 @@ ifndef DLPACK_PATH
DLPACK_PATH = $(ROOTDIR)/dlpack
endif
UNAME_S := $(shell uname -s)
# The flags
LLVM_CFLAGS= -fno-rtti -DDMLC_ENABLE_RTTI=0
LDFLAGS = -pthread -lm -ldl
INCLUDE_FLAGS = -Iinclude -I$(DLPACK_PATH)/include -I$(DMLC_CORE_PATH)/include -IHalideIR/src -Itopi/include
CFLAGS = -std=c++11 -Wall -O2 $(INCLUDE_FLAGS) -fPIC
FRAMEWORKS =
OBJCFLAGS = -fno-objc-arc
EMCC_FLAGS= -s RESERVED_FUNCTION_POINTERS=2 -s NO_EXIT_RUNTIME=1 -s MAIN_MODULE=1 -DDMLC_LOG_STACK_TRACE=0\
-std=c++11 -Oz $(INCLUDE_FLAGS)
# llvm configuration
ifdef LLVM_CONFIG
LLVM_VERSION=$(shell $(LLVM_CONFIG) --version| cut -b 1,3)
LLVM_INCLUDE=$(filter -I%, $(shell $(LLVM_CONFIG) --cxxflags))
LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags --libs --system-libs)
LLVM_CFLAGS += $(LLVM_INCLUDE) -DTVM_LLVM_VERSION=$(LLVM_VERSION)
else
LLVM_VERSION=00
endif
# The source code dependencies
LIB_HALIDEIR = HalideIR/lib/libHalideIR.a
CC_SRC = $(filter-out src/contrib/%.cc src/runtime/%.cc,\
CC_SRC = $(filter-out src/contrib/%.cc src/runtime/%.cc src/codgen/llvm/%.cc,\
$(wildcard src/*/*.cc src/*/*/*.cc))
LLVM_SRC = $(wildcard src/codegen/llvm/*.cc src/codegen/llvm/*/*.cc)
METAL_SRC = $(wildcard src/runtime/metal/*.mm)
CUDA_SRC = $(wildcard src/runtime/cuda/*.cc)
ROCM_SRC = $(wildcard src/runtime/rocm/*.cc)
......@@ -33,31 +56,21 @@ RPC_SRC = $(wildcard src/runtime/rpc/*.cc)
RUNTIME_SRC = $(wildcard src/runtime/*.cc)
# Objectives
LLVM_BUILD = build/llvm${LLVM_VERSION}
LLVM_OBJ = $(patsubst src/%.cc, ${LLVM_BUILD}/%.o, $(LLVM_SRC))
METAL_OBJ = $(patsubst src/%.mm, build/%.o, $(METAL_SRC))
CUDA_OBJ = $(patsubst src/%.cc, build/%.o, $(CUDA_SRC))
ROCM_OBJ = $(patsubst src/%.cc, build/%.o, $(ROCM_SRC))
OPENCL_OBJ = $(patsubst src/%.cc, build/%.o, $(OPENCL_SRC))
RPC_OBJ = $(patsubst src/%.cc, build/%.o, $(RPC_SRC))
CC_OBJ = $(patsubst src/%.cc, build/%.o, $(CC_SRC))
CC_OBJ = $(patsubst src/%.cc, build/%.o, $(CC_SRC)) $(LLVM_OBJ)
RUNTIME_OBJ = $(patsubst src/%.cc, build/%.o, $(RUNTIME_SRC))
CONTRIB_OBJ =
UNAME_S := $(shell uname -s)
# Deps
ALL_DEP = $(CC_OBJ) $(CONTRIB_OBJ) $(LIB_HALIDEIR)
RUNTIME_DEP = $(RUNTIME_OBJ)
# The flags
LDFLAGS = -pthread -lm -ldl
INCLUDE_FLAGS = -Iinclude -I$(DLPACK_PATH)/include -I$(DMLC_CORE_PATH)/include -IHalideIR/src -Itopi/include
CFLAGS = -std=c++11 -Wall -O2 $(INCLUDE_FLAGS) -fPIC
LLVM_CFLAGS= -fno-rtti -DDMLC_ENABLE_RTTI=0
FRAMEWORKS =
OBJCFLAGS = -fno-objc-arc
EMCC_FLAGS= -s RESERVED_FUNCTION_POINTERS=2 -s NO_EXIT_RUNTIME=1 -s MAIN_MODULE=1 -DDMLC_LOG_STACK_TRACE=0\
-std=c++11 -Oz $(INCLUDE_FLAGS)
# Dependency specific rules
ifdef CUDA_PATH
NVCC=$(CUDA_PATH)/bin/nvcc
......@@ -111,14 +124,6 @@ ifeq ($(USE_RPC), 1)
RUNTIME_DEP += $(RPC_OBJ)
endif
# llvm configuration
ifdef LLVM_CONFIG
LLVM_VERSION=$(shell $(LLVM_CONFIG) --version| cut -b 1,3)
LLVM_INCLUDE=$(filter -I%, $(shell $(LLVM_CONFIG) --cxxflags))
LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags --libs --system-libs)
LLVM_CFLAGS += $(LLVM_INCLUDE) -DTVM_LLVM_VERSION=$(LLVM_VERSION)
endif
include make/contrib/cblas.mk
include make/contrib/nnpack.mk
include make/contrib/cudnn.mk
......@@ -169,11 +174,10 @@ test: $(TEST)
include verilog/verilog.mk
verilog: $(VER_LIBS)
# Special rules for LLVM related modules.
build/codegen/llvm/%.o: src/codegen/llvm/%.cc
${LLVM_BUILD}/codegen/llvm/%.o: src/codegen/llvm/%.cc
@mkdir -p $(@D)
$(CXX) $(CFLAGS) $(LLVM_CFLAGS) -MM -MT build/codegen/llvm/$*.o $< >build/codegen/llvm/$*.d
$(CXX) $(CFLAGS) $(LLVM_CFLAGS) -MM -MT ${LLVM_BUILD}/codegen/llvm/$*.o $< >${LLVM_BUILD}/codegen/llvm/$*.d
$(CXX) -c $(CFLAGS) $(LLVM_CFLAGS) -c $< -o $@
build/runtime/metal/%.o: src/runtime/metal/%.mm
......
......@@ -50,7 +50,7 @@ The configuration of tvm can be modified by ```config.mk```
- First copy ```make/config.mk``` to the project root, on which
any local modification will be ignored by git, then modify the according flags.
- TVM optionally depends on LLVM. LLVM is required for CPU codegen that needs LLVM.
- LLVM 4.0 is needed for build with LLVM
- LLVM 4.0 or higher is needed for build with LLVM
- By default CUDA and OpenCL code generator do not require llvm.
## Python Package Installation
......
......@@ -244,7 +244,11 @@ void CodeGenLLVM::AddFunction(const LoweredFunc& f) {
if (is_restricted_) {
for (size_t i = 0; i < f->args.size(); ++i) {
if (f->args[i].type().is_handle()) {
#if TVM_LLVM_VERSION >= 50
function_->addParamAttr(i, llvm::Attribute::NoAlias);
#else
function_->setDoesNotAlias(i + 1);
#endif
}
}
}
......@@ -301,7 +305,12 @@ void CodeGenLLVM::Optimize() {
// place optimization pass
llvm::PassManagerBuilder builder;
builder.OptLevel = 3;
#if TVM_LLVM_VERSION >= 50
builder.Inliner = llvm::createFunctionInliningPass(builder.OptLevel, 0, false);
#else
builder.Inliner = llvm::createFunctionInliningPass(builder.OptLevel, 0);
#endif
builder.LoopVectorize = true;
builder.SLPVectorize = true;
// pass manager
......@@ -795,7 +804,11 @@ void CodeGenLLVM::CreateComputeScope(const AttrStmt* op) {
new_vmap[var.get()] = v;
if (var.type().is_handle() && !alias_var_set_.count(var.get())) {
// set non alias.
#if TVM_LLVM_VERSION >= 50
fcompute->addParamAttr(idx + 1, llvm::Attribute::NoAlias);
#else
fcompute->setDoesNotAlias(idx + 1);
#endif
}
}
std::swap(function_, fcompute);
......
......@@ -97,7 +97,9 @@ GetLLVMTargetMachine(const std::string& target_str, bool allow_null) {
}
// set target option
llvm::TargetOptions opt;
#if TVM_LLVM_VERSION < 50
opt.LessPreciseFPMADOption = true;
#endif
opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
opt.UnsafeFPMath = true;
opt.NoInfsFPMath = true;
......
......@@ -32,6 +32,7 @@
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Support/FileSystem.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/Casting.h>
#include <llvm/Support/TargetRegistry.h>
......
......@@ -47,8 +47,9 @@ class LLVMModuleNode final : public runtime::ModuleNode {
std::lock_guard<std::mutex> lock(mutex_);
const std::string& fname = (name == runtime::symbol::tvm_module_main ?
entry_func_ : name);
BackendPackedCFunc faddr =
reinterpret_cast<BackendPackedCFunc>(ee_->getFunctionAddress(fname));
reinterpret_cast<BackendPackedCFunc>(GetFunctionAddr(fname));
if (faddr == nullptr) return PackedFunc();
return PackedFunc([faddr, sptr_to_self](TVMArgs args, TVMRetValue* rv) {
int ret = (*faddr)(
......@@ -163,17 +164,33 @@ class LLVMModuleNode final : public runtime::ModuleNode {
ee_->runStaticConstructorsDestructors(false);
// setup context address.
entry_func_ =
reinterpret_cast<const char*>(
ee_->getGlobalValueAddress(runtime::symbol::tvm_module_main));
reinterpret_cast<const char*>(GetGlobalAddr(runtime::symbol::tvm_module_main));
if (void** ctx_addr = reinterpret_cast<void**>(
ee_->getGlobalValueAddress(runtime::symbol::tvm_module_ctx))) {
GetGlobalAddr(runtime::symbol::tvm_module_ctx))) {
*ctx_addr = this;
}
runtime::InitContextFunctions([this](const char *name) {
auto value = ee_->getGlobalValueAddress(name);
return value;
return GetGlobalAddr(name);
});
}
// Get global address from execution engine.
uint64_t GetGlobalAddr(const std::string& name) {
// first verifies if GV exists.
if (mptr_->getGlobalVariable(name) != nullptr) {
return ee_->getGlobalValueAddress(name);
} else {
return 0;
}
}
uint64_t GetFunctionAddr(const std::string& name) {
// first verifies if GV exists.
if (mptr_->getFunction(name) != nullptr) {
return ee_->getFunctionAddress(name);
} else {
return 0;
}
}
// The target configuration string
std::string target_;
// Name of entry function.
......
......@@ -2,5 +2,16 @@ echo deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-4.0 main\
>> /etc/apt/sources.list.d/llvm.list
echo deb-src http://apt.llvm.org/xenial/ llvm-toolchain-xenial-4.0 main\
>> /etc/apt/sources.list.d/llvm.list
echo deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-5.0 main\
>> /etc/apt/sources.list.d/llvm.list
echo deb-src http://apt.llvm.org/xenial/ llvm-toolchain-xenial-5.0 main\
>> /etc/apt/sources.list.d/llvm.list
echo deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial main\
>> /etc/apt/sources.list.d/llvm.list
echo deb-src http://apt.llvm.org/xenial/ llvm-toolchain-xenial main\
>> /etc/apt/sources.list.d/llvm.list
wget -O - http://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
apt-get update && apt-get install -y --force-yes llvm-4.0
apt-get update && apt-get install -y --force-yes llvm-4.0 llvm-5.0 llvm-6.0
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment