[BUILD] Windows build pass on LLVM/CUDA/OPENCL (#57)

e4387940 · Tianqi Chen · GitHub · 33310206 · e4387940 · e4387940
Commit e4387940 authored Feb 27, 2017 by Tianqi Chen Committed by GitHub Feb 27, 2017
19 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -92,6 +92,6 @@ ENV/
 *~
 build
 config.mk
-build_win
+build_*
 Win32
 *.dir
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
 cmake_minimum_required(VERSION 3.5)
-project(tvm)
+project(tvm C CXX)
-include(cmake/Util.cmake)
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake)
+  include(${CMAKE_CURRENT_SOURCE_DIR}/build/private/local_config.cmake)
+endif()
-option(USE_OPENCL "Build with OpenCL" OFF)
+include(cmake/Util.cmake)
-option(USE_CUDA "Build with CUDA" OFF)
+tvm_option(USE_CUDA "Build with CUDA" ON)
-option(USE_LLVM "Build with LLVM" OFF)
+tvm_option(USE_OPENCL "Build with OpenCL" ON)
-option(USE_RTTI "Build with RTTI" OFF)
+tvm_option(USE_LLVM "Build with LLVM" OFF)
+tvm_option(USE_RTTI "Build with RTTI" OFF)
+tvm_option(USE_MSVC_MT "Build with MT" OFF)
-# include path
 include_directories("include")
 include_directories("HalideIR/src")
 set(TVM_LINKER_LIBS "")
@@ -20,24 +23,22 @@ if(MSVC)
  add_definitions(-D_CRT_SECURE_NO_WARNINGS)
  add_definitions(-D_SCL_SECURE_NO_WARNINGS)
  add_definitions(-DTVM_EXPORTS)
-  foreach(flag_var
-        CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
-        CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
-    if(${flag_var} MATCHES "/MD")
-      string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
-    endif(${flag_var} MATCHES "/MD")
-  endforeach(flag_var)
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /bigobj")
+  if(USE_MSVC_MT)
+    foreach(flag_var
+        CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+        CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+      if(${flag_var} MATCHES "/MD")
+        string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+      endif(${flag_var} MATCHES "/MD")
+    endforeach(flag_var)
+  endif()
 else(MSVC)
  include(CheckCXXCompilerFlag)
  check_cxx_compiler_flag("-std=c++11"    SUPPORT_CXX11)
-  check_cxx_compiler_flag("-msse2"        SUPPORT_MSSE2)
  set(CMAKE_C_FLAGS "-O3 -fno-rtti -Wall -std=c++11 -fPIC")
-  if(SUPPORT_OPENMP)
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp")
-  endif()
  set(CMAKE_CXX_FLAGS ${CMAKE_C_FLAGS})
 endif(MSVC)
@@ -49,6 +50,7 @@ tvm_source_group("Source\\arithmetic"   GLOB "src/arithmetic/*.cc")
 tvm_source_group("Source\\schedule"   GLOB "src/schedule/*.cc")
 tvm_source_group("Source\\codegen"   GLOB "src/codegen/*.cc")
 tvm_source_group("Source\\codegen\\llvm"   GLOB "src/codegen/llvm/*.cc")
+tvm_source_group("Source\\codegen\\stack_vm"   GLOB "src/codegen/stack_vm/*.cc")
 tvm_source_group("Source\\pass"   GLOB "src/pass/*.cc")
 tvm_source_group("Source\\runtime"   GLOB "src/runtime/*.cc")
 tvm_source_group("Source\\runtime\\cuda"   GLOB "src/runtime/cuda/*.cc")
@@ -58,7 +60,7 @@ file(GLOB COMPILER_SRCS
    src/api/*.cc
    src/arithmetic/*.cc
    src/codegen/*.cc
-    src/stack_vm/*.cc
+    src/codegen/stack_vm/*.cc
    src/lang/*.cc
    src/pass/*.cc
    src/schedule/*.cc
@@ -71,19 +73,44 @@ file(GLOB RUNTIME_CUDA_SRCS src/runtime/cuda/*.cc)
 file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc)
 if(USE_CUDA)
+find_package(CUDA)
+  find_package(CUDA QUIET REQUIRED)
+  message(STATUS "Build with CUDA support...")
+	include_directories(${CUDA_INCLUDE_DIRS})
+  list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CUDART_LIBRARY})
+  list(APPEND TVM_RUNTIME_LINKER_LIBS ${CUDA_CUDA_LIBRARY})
  list(APPEND RUNTIME_SRCS ${RUNTIME_CUDA_SRCS})
+  if(MSVC)
+    find_library(CUDA_NVRTC_LIB nvrtc
+      ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
+      ${CUDA_TOOLKIT_ROOT_DIR}/lib/win32)
+    list(APPEND TVM_LINKER_LIBS ${CUDA_NVRTC_LIB})
+  endif()
+  add_definitions(-DTVM_CUDA_RUNTIME=1)
 else(USE_CUDA)
  add_definitions(-DTVM_CUDA_RUNTIME=0)
 endif(USE_CUDA)
 if(USE_OPENCL)
+  find_package(OPENCL QUIET REQUIRED)
+  message(STATUS "Build with OpenCL support...")
+  include_directories(${OPENCL_INCLUDE_DIRS})
+  list(APPEND TVM_RUNTIME_LINKER_LIBS ${OpenCL_LIBRARIES})
  list(APPEND RUNTIME_SRCS ${RUNTIME_OPENCL_SRCS})
+  add_definitions(-DTVM_OPENCL_RUNTIME=1)
 else(USE_OPENCL)
  add_definitions(-DTVM_OPENCL_RUNTIME=0)
 endif(USE_OPENCL)
 if(USE_LLVM)
-  add_definitions(-DTVM_LLVM_VERSION=40)
+  find_package(LLVM REQUIRED CONFIG)
+  message(STATUS "Build with LLVM support...")
+  include_directories(${LLVM_INCLUDE_DIRS})
+  add_definitions(${LLVM_DEFINITIONS})
+  llvm_map_components_to_libnames(LLVM_LIBS all)
+  list(REMOVE_ITEM LLVM_LIBS LTO)
+  list(APPEND TVM_LINKER_LIBS ${LLVM_LIBS})
+  add_definitions(-DTVM_LLVM_VERSION=${LLVM_PACKAGE_VERSION})
  list(APPEND COMPILER_SRCS ${COMPILER_LLVM_SRCS})
 endif(USE_LLVM)
@@ -109,9 +136,7 @@ else()
    set(CMAKE_SHARED_LIBRARY_PREFIX "")
 endif()
 add_library(libtvm SHARED ${COMPILER_SRCS} ${RUNTIME_SRCS})
 add_library(libtvm_runtime SHARED ${RUNTIME_SRCS})
+target_link_libraries(libtvm ${TVM_LINKER_LIBS} ${TVM_RUNTIME_LINKER_LIBS})
-target_link_libraries(libtvm ${TVM_LINKER_LIBS})
 target_link_libraries(libtvm_runtime  ${TVM_RUNTIME_LINKER_LIBS})
--- a/cmake/Util.cmake
+++ b/cmake/Util.cmake
@@ -12,3 +12,44 @@ function(tvm_source_group group)
    source_group(${group} FILES ${srcs2})
  endif()
 endfunction()
+#######################################################
+# An option that the user can select. Can accept condition to control when option is available for user.
+# Usage:
+#   tvm_option(<option_variable> "doc string" <initial value or boolean expression> [IF <condition>])
+function(tvm_option variable description value)
+  set(__value ${value})
+  set(__condition "")
+  set(__varname "__value")
+  foreach(arg ${ARGN})
+    if(arg STREQUAL "IF" OR arg STREQUAL "if")
+      set(__varname "__condition")
+    else()
+      list(APPEND ${__varname} ${arg})
+    endif()
+  endforeach()
+  unset(__varname)
+  if("${__condition}" STREQUAL "")
+    set(__condition 2 GREATER 1)
+  endif()
+  if(${__condition})
+    if("${__value}" MATCHES ";")
+      if(${__value})
+        option(${variable} "${description}" ON)
+      else()
+        option(${variable} "${description}" OFF)
+      endif()
+    elseif(DEFINED ${__value})
+      if(${__value})
+        option(${variable} "${description}" ON)
+      else()
+        option(${variable} "${description}" OFF)
+      endif()
+    else()
+      option(${variable} "${description}" ${__value})
+    endif()
+  else()
+    unset(${variable} CACHE)
+  endif()
+endfunction()
\ No newline at end of file
--- a/python/tvm/addon/nvcc_compiler.py
+++ b/python/tvm/addon/nvcc_compiler.py
@@ -46,7 +46,8 @@ def compile_source(code, target="ptx", arch=None,
    file_target = path_target if path_target else temp_target
    cmd = ["nvcc"]
    cmd += ["--%s" % target, "-O3"]
-    cmd += ["-arch", arch]
+    if arch:
+        cmd += ["-arch", arch]
    cmd += ["-o", file_target]
    if options:

--- a/python/tvm/libinfo.py
+++ b/python/tvm/libinfo.py
@@ -44,7 +44,7 @@ def find_lib_path():
        raise RuntimeError('Cannot find the files.\n' +
                           'List of candidates:\n' + str('\n'.join(dll_path)))
    if use_runtime:
-        sys.stderr.write("Loading runtime library... this is execution only\n")
+        sys.stderr.write("Loading runtime library %s... exec only\n" % lib_found[0])
        sys.stderr.flush()
    return lib_found

--- a/python/tvm/module.py
+++ b/python/tvm/module.py
@@ -32,7 +32,7 @@ class Module(ModuleBase):
        modules : list of Modules
            The module
        """
-        nmod = ImportsSize(self)
+        nmod = _ImportsSize(self)
        return [_GetImport(self, i) for i in range(nmod)]
    def save(self, file_name, fmt=""):

--- a/src/codegen/llvm/llvm_common.h
+++ b/src/codegen/llvm/llvm_common.h
@@ -39,7 +39,6 @@
 #include <utility>
 #include <string>
 namespace tvm {
 namespace codegen {

--- a/src/codegen/llvm/llvm_module.cc
+++ b/src/codegen/llvm/llvm_module.cc
@@ -6,6 +6,7 @@
 #ifdef TVM_LLVM_VERSION
 #include <tvm/runtime/packed_func.h>
 #include <tvm/codegen.h>
+#include <mutex>
 #include "./llvm_common.h"
 #include "./codegen_llvm.h"
 #include "../../runtime/file_util.h"

--- a/src/codegen/stack_vm/codegen_stack_vm.cc
+++ b/src/codegen/stack_vm/codegen_stack_vm.cc
@@ -417,7 +417,7 @@ TVM_STATIC_IR_FUNCTOR(CodeGenStackVM, vtable)
 .set_dispatch<LetStmt>([](const LetStmt *op, CodeGenStackVM* p) {
    p->Push(op->value);
    int64_t vid = p->AllocVarID(op->var.get());
-    p->PushOp(StackVM::STORE_HEAP, vid);
+    p->PushOp(StackVM::STORE_HEAP, static_cast<int>(vid));
    p->Push(op->body);
  })
 .set_dispatch<Ramp>([](const Ramp *op, CodeGenStackVM* p) {
@@ -445,7 +445,7 @@ TVM_STATIC_IR_FUNCTOR(CodeGenStackVM, vtable)
 .set_dispatch<Let>([](const Let *op, CodeGenStackVM* p) {
    p->Push(op->value);
    int64_t vid = p->AllocVarID(op->var.get());
-    p->PushOp(StackVM::STORE_HEAP, vid);
+    p->PushOp(StackVM::STORE_HEAP, static_cast<int>(vid));
    p->Push(op->body);
  })
 .set_dispatch<Load>([](const Load *op, CodeGenStackVM* p) {

--- a/src/lang/operation.cc
+++ b/src/lang/operation.cc
@@ -125,7 +125,7 @@ inline bool prove_equal(Expr lhs, Expr rhs) {
 }
 int ScanOpNode::num_outputs() const {
-  return update.size();
+  return static_cast<int>(update.size());
 }
 Array<IterVar> ScanOpNode::root_iter_vars() const {
  return Array<IterVar>{scan_axis};

--- a/src/pass/make_api.cc
+++ b/src/pass/make_api.cc
@@ -103,7 +103,7 @@ LoweredFunc MakeAPI(Stmt body,
        MakeAssertEQ(v_num_packed_args, num_packed_args, os.str()));
  }
-  for (size_t i = 0; i < api_args.size(); ++i) {
+  for (int i = 0; i < static_cast<int>(api_args.size()); ++i) {
    Var v_arg = f_arg_decl(i);
    if (i < static_cast<size_t>(num_packed_args)) {
      seq_init.emplace_back(LetStmt::make(

--- a/src/runtime/c_runtime_api.cc
+++ b/src/runtime/c_runtime_api.cc
@@ -89,7 +89,7 @@ struct TVMRuntimeEntry {
    if (val != nullptr) {
      num_par_threads = atoi(val);
    } else {
-      num_par_threads = std::thread::hardware_concurrency();
+      num_par_threads = std::thread::hardware_concurrency() / 2;
    }
  }
 };
@@ -127,7 +127,7 @@ int TVMModGetFunction(TVMModuleHandle mod,
                      TVMFunctionHandle *func) {
  API_BEGIN();
  PackedFunc pf = static_cast<Module*>(mod)->GetFunction(
-      func_name, query_imports);
+      func_name, query_imports != 0);
  if (pf != nullptr) {
    *func = new PackedFunc(pf);
  } else {

--- a/src/runtime/cuda/cuda_module.cc
+++ b/src/runtime/cuda/cuda_module.cc
@@ -39,7 +39,7 @@ class CUDAModuleNode : public runtime::ModuleNode {
  ~CUDAModuleNode() {
    for (size_t i = 0; i < module_.size(); ++i) {
      if (module_[i] != nullptr) {
-        CUDA_CALL(cudaSetDevice(i));
+        CUDA_CALL(cudaSetDevice(static_cast<int>(i)));
        CUDA_DRIVER_CALL(cuModuleUnload(module_[i]));
      }
    }

--- a/src/runtime/dso_module.cc
+++ b/src/runtime/dso_module.cc
@@ -75,11 +75,13 @@ class DSOModuleNode : public ModuleNode {
  HMODULE lib_handle_{nullptr};
  // Load the library
  void Load(const std::string& name) {
-    lib_handle_ = LoadLibrary(name.c_str());
+    // use wstring version that is needed by LLVM.
+    std::wstring wname(name.begin(), name.end());
+    lib_handle_ = LoadLibraryW(wname.c_str());
  }
  BackendPackedCFunc GetFuncPtr(const std::string& name) {
    return reinterpret_cast<BackendPackedCFunc>(
-        GetProcAddress(lib_handle_, name.c_str()));  // NOLINT(*)
+        GetProcAddress(lib_handle_, (LPCSTR)name.c_str()));  // NOLINT(*)
  }
  void* GetGlobalVPtr(const std::string& name) {
    return reinterpret_cast<void*>(

--- a/src/runtime/module.cc
+++ b/src/runtime/module.cc
@@ -119,9 +119,9 @@ TVM_REGISTER_GLOBAL(_module__GetImport)
        imports().at(args[1].operator int());
    });
-TVM_REGISTER_GLOBAL(_module__GetTyeKey)
+TVM_REGISTER_GLOBAL(_module__GetTypeKey)
 .set_body([](TVMArgs args, TVMRetValue *ret) {
-    *ret = args[0].operator Module()->type_key();
+    *ret = std::string(args[0].operator Module()->type_key());
    });
 TVM_REGISTER_GLOBAL(_module__LoadFromFile)

--- a/src/schedule/bound.cc
+++ b/src/schedule/bound.cc
@@ -389,7 +389,7 @@ void InferRootBound(const Stage& stage,
  bool direct_consume_by_parent = false;
  for (int i = 0; i < stage->op->num_outputs(); ++i) {
    Tensor t = stage->op.output(i);
-    tmap.emplace(t, TensorDom(t.ndim()));
+    tmap.emplace(t, TensorDom(static_cast<int>(t.ndim())));
    auto it = feed_graph.find(t);
    if (it != feed_graph.end()) {
      for (const Operation& op : it->second) {

--- a/src/schedule/graph.cc
+++ b/src/schedule/graph.cc
@@ -22,6 +22,9 @@ struct TensorDimKey {
  TensorDimKey(const Tensor& t, int dim)
      : f(t->op), value_index(t->value_index), dim(dim) {
  }
+  TensorDimKey(const Tensor& t, size_t dim)
+      : f(t->op), value_index(t->value_index), dim(static_cast<int>(dim)) {
+  }
  inline bool operator==(const TensorDimKey& other) const {
    return f == other.f &&
        value_index == other.value_index &&
@@ -183,7 +186,7 @@ ReachGraph GetReachGraph(const Array<Operation>& ops) {
      const auto& init = op.as<ScanOpNode>()->init;
      for (size_t i = 0; i < update.size(); ++i) {
        Tensor t = op.output(i);
-        for (size_t k = 1; k < update[i]->shape.size(); ++k) {
+        for (int k = 1; k < static_cast<int>(update[i]->shape.size()); ++k) {
          reach[TensorDimKey(t, k)].emplace_back(
              TensorDimKey(update[i], k));
          reach[TensorDimKey(t, k)].emplace_back(
@@ -203,7 +206,7 @@ ReachGraph GetReachGraph(const Array<Operation>& ops) {
        if (call != nullptr && call->func.defined()) {
          if (!bset.count(call->func.get())) return;
          for (size_t i = 0; i < call->args.size(); ++i) {
-            TensorDimKey dkey(call, i);
+            TensorDimKey dkey(call, static_cast<int>(i));
            auto fpush = [&dkey, &vmap, &reach](const NodeRef& node) {
              const Variable *v = node.as<Variable>();
              auto it = vmap.find(v);
@@ -319,7 +322,7 @@ Map<IterVar, Expr> ScanFixPointAnalysis(
        if (call != nullptr && call->func.defined()) {
          for (size_t i = 0; i < call->args.size(); ++i) {
            auto it = vmap.find(call->args[i].get());
-            TensorDimKey src(call, i);
+            TensorDimKey src(call, static_cast<int>(i));
            if (it != vmap.end()) {
              f_merge_key(it->second, src);
            } else {

--- a/src/schedule/schedule_lang.cc
+++ b/src/schedule/schedule_lang.cc
@@ -264,7 +264,7 @@ Schedule::Schedule(Array<Operation> ops) {
  }
  for (Operation op : post_order) {
    Stage stage(op);
-    stage->is_output = output_set.count(op);
+    stage->is_output = output_set.count(op) != 0;
    n->stages.push_back(stage);
    n->stage_map.Set(op, stage);
    // mark scan updates.

--- a/tests/python/integration/test_ewise.py
+++ b/tests/python/integration/test_ewise.py
@@ -21,8 +21,10 @@ def test_add():
    # one line to build the function.
    def check_device(device, host="stackvm"):
        if not tvm.codegen.enabled(host):
+            print("skip because %s is not enabled.." % host)
            return
        if not tvm.codegen.enabled(device):
+            print("skip because %s is not enabled.." % device)
            return
        fadd = tvm.build(s, [A, B, C],
                         device, host,