Add support for multiple OpenCL platforms (#1345)

cb68c82c · MORITA Kazutaka · Tianqi Chen · f927e1f3 · cb68c82c · cb68c82c
Commit cb68c82c authored Jul 10, 2018 by MORITA Kazutaka Committed by Tianqi Chen Jul 09, 2018
17 changed files
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -39,6 +39,9 @@ set(USE_CUDA OFF)
 # - /path/to/rocm: use specific path to rocm
 set(USE_ROCM OFF)

+# Whether enable SDAccel runtime
+set(USE_SDACCEL OFF)
+
 # Whether enable OpenCL runtime
 set(USE_OPENCL OFF)


--- a/cmake/modules/OpenCL.cmake
+++ b/cmake/modules/OpenCL.cmake
@@ -7,6 +7,18 @@ if(OpenCL_FOUND)
  include_directories(${OpenCL_INCLUDE_DIRS})
 endif(OpenCL_FOUND)

+if(USE_SDACCEL)
+  message(STATUS "Build with SDAccel support")
+  file(GLOB RUNTIME_SDACCEL_SRCS src/runtime/opencl/sdaccel/*.cc)
+  list(APPEND RUNTIME_SRCS ${RUNTIME_SDACCEL_SRCS})
+  if(NOT USE_OPENCL)
+    message(STATUS "Enable OpenCL support required for SDAccel")
+    set(USE_OPENCL ON)
+  endif()
+else()
+  list(APPEND COMPILER_SRCS src/codegen/opt/build_sdaccel_off.cc)
+endif(USE_SDACCEL)
+
 if(USE_OPENCL)
  find_package(OpenCL REQUIRED)
  message(STATUS "Build with OpenCL support")

--- a/include/tvm/runtime/c_runtime_api.h
+++ b/include/tvm/runtime/c_runtime_api.h
@@ -60,6 +60,7 @@ typedef int64_t tvm_index_t;

 /*! \brief Extension device types in TVM */
 typedef enum {
+  kDLSDAccel = 6,
  kDLVulkan = 7,
  kOpenGL = 11,
  // Extension DRAM type, used for quickly test extension device

--- a/python/tvm/_ffi/runtime_ctypes.py
+++ b/python/tvm/_ffi/runtime_ctypes.py
@@ -95,6 +95,7 @@ class TVMContext(ctypes.Structure):
        1 : 'cpu',
        2 : 'gpu',
        4 : 'opencl',
+        6 : 'sdaccel',
        7 : 'vulkan',
        8 : 'metal',
        9 : 'vpi',
@@ -111,7 +112,7 @@ class TVMContext(ctypes.Structure):
        'nvptx': 2,
        'cl': 4,
        'opencl': 4,
-        'sdaccel': 4,
+        'sdaccel': 6,
        'vulkan': 7,
        'metal': 8,
        'vpi': 9,

--- a/python/tvm/contrib/sdaccel.py
+++ b/python/tvm/contrib/sdaccel.py
 """Utility for Interacting with SDAccel Tools"""
 import subprocess
 import os
-import re
 from . import util
 from ..api import register_func


-def _vhls_to_opencl(code):
-    """Convert source code from Vivado HLS to OpenCL."""
-    out = ''
-    for line in code.split('\n'):
-        if re.match(r'#include', line):
-            # OpenCL doesn't support include.
-            continue
-        if re.match(r'#pragma', line):
-            # Remove Vivado HLS specific pragmas.
-            continue
-
-        if re.match(r'extern "C"', line):
-            line = re.sub(r'^extern "C"', "__kernel", line)
-            # Add __global to pointer parameters.
-            line = re.sub(r'(\w+)\s*\*', r"__global \1*", line)
-
-        out += line + '\n'
-
-    return out
-
-
-def _fake_compile_vhls(code):
-    """Fake compile Vivado HLS code for SDAccel.
-
-    Compile the Vivado HLS code as an OpenCL code, and generate a program
-    binary for GPU which can be used instead of xclbin.
-
-    Parameters
-    ----------
-    code : str
-        The Vivado HLS code.
-
-    Return
-    ------
-    binary : bytearray
-        The program binary which can be passed to clCreateProgramWithBinary
-    """
-    try:
-        import pyopencl as cl
-    except ImportError:
-        raise ImportError('PyOpenCL is required for testing SDAccel backend.')
-    ctx = cl.Context(dev_type=cl.device_type.GPU)
-    program = cl.Program(ctx, _vhls_to_opencl(code)).build()
-    binary = bytearray(program.binaries[0])
-    return binary
-
-
 @register_func("tvm_callback_sdaccel_compile")
 def compile_vhls(code, kernel):
    """Compile Vivado HLS code for SDAccel.
@@ -87,9 +39,7 @@ def compile_vhls(code, kernel):
    platform = os.environ.get("XCL_PLATFORM", os.environ.get("AWS_PLATFORM"))

    if platform is None:
-        # If we don't have the Xilinx toolchain, create a program binary for
-        # GPU and use it for testing.
-        return _fake_compile_vhls(code)
+        raise RuntimeError("No Xlinx device specified.")

    # build xo
    args = [xocc, "-c", "-t", target, "--platform", platform, "-o", tmp_xo, "-k", kernel] + \

--- a/src/codegen/codegen_vhls.cc
+++ b/src/codegen/codegen_vhls.cc
@@ -6,7 +6,7 @@
 #include <string>
 #include "./codegen_vhls.h"
 #include "./build_common.h"
-#include "../runtime/opencl/opencl_module.h"
+#include "../runtime/opencl/sdaccel/sdaccel_module.h"

 namespace tvm {
 namespace codegen {
@@ -91,7 +91,7 @@ runtime::Module BuildSDAccel(Array<LoweredFunc> funcs) {
  } else {
    LOG(FATAL) << "Cannot compile Vivado HLS code.";
  }
-  return OpenCLModuleCreate(xclbin, "xclbin", ExtractFuncInfo(funcs), code);
+  return SDAccelModuleCreate(xclbin, "xclbin", ExtractFuncInfo(funcs), code);
 }

 TVM_REGISTER_API("codegen.build_sdaccel")

--- a/src/codegen/opt/build_sdaccel_off.cc
+++ b/src/codegen/opt/build_sdaccel_off.cc
+/*!
+ *  Copyright (c) 2018 by Contributors
+ *  Optional module when build opencl is switched to off
+ */
+#include "../codegen_source_base.h"
+#include "../../runtime/opencl/opencl_module.h"
+
+namespace tvm {
+namespace runtime {
+
+Module SDAccelModuleCreate(
+    std::string data,
+    std::string fmt,
+    std::unordered_map<std::string, FunctionInfo> fmap,
+    std::string source) {
+  LOG(WARNING) << "OpenCL runtime not enabled, return a source module...";
+  return codegen::DeviceSourceModuleCreate(data, fmt, fmap, "sdaccel");
+}
+
+}  // namespace runtime
+}  // namespace tvm
--- a/src/pass/verify_memory.cc
+++ b/src/pass/verify_memory.cc
@@ -36,7 +36,7 @@ class MemoryAccessVerifier final : protected IRVisitor {

  /// Interface to perform memory access verification
  void Run() {
-    if (!IsGPUDevice(dev_type_)) return;
+    if (!IsGPUDevice(dev_type_) && !IsFPGADevice(dev_type_)) return;
    IRVisitor::Visit(func_->body);
  }

@@ -143,6 +143,10 @@ class MemoryAccessVerifier final : protected IRVisitor {
           kDLVulkan == dev_type || kDLMetal == dev_type ||
           kDLROCM == dev_type || kOpenGL == dev_type;
  }
+  /// Check if a given DLDeviceType/TVMDeviceExtType value denotes FPGA device.
+  static bool IsFPGADevice(int dev_type) {
+    return kDLSDAccel == dev_type;
+  }

 private:
  /// Status of visitor

--- a/src/runtime/c_runtime_api.cc
+++ b/src/runtime/c_runtime_api.cc
@@ -31,6 +31,7 @@ inline std::string DeviceName(int type) {
    case kDLCPU: return "cpu";
    case kDLGPU: return "gpu";
    case kDLOpenCL: return "opencl";
+    case kDLSDAccel: return "sdaccel";
    case kDLVulkan: return "vulkan";
    case kDLMetal: return "metal";
    case kDLVPI: return "vpi";

--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -21,6 +21,10 @@
 #include <string>
 #include <vector>
 #include "../workspace_pool.h"
+#include "../pack_args.h"
+#include "../thread_storage_scope.h"
+#include "../meta_data.h"
+#include "../file_util.h"

 namespace tvm {
 namespace runtime {
@@ -97,17 +101,23 @@ inline const char* CLGetErrorString(cl_int error) {
    OPENCL_CHECK_ERROR(e);                                            \
  }

+class OpenCLThreadEntry;
+
 /*!
 * \brief Process global OpenCL workspace.
 */
-class OpenCLWorkspace final : public DeviceAPI {
+class OpenCLWorkspace : public DeviceAPI {
 public:
  // global platform id
  cl_platform_id platform_id;
+  // global platform name
+  std::string platform_name;
  // global context of this process
  cl_context context{nullptr};
  // whether the workspace it initialized.
  bool initialized_{false};
+  // the device type
+  std::string device_type;
  // the devices
  std::vector<cl_device_id> devices;
  // the queues
@@ -128,10 +138,17 @@ class OpenCLWorkspace final : public DeviceAPI {
    }
  }
  // Initialzie the device.
-  void Init();
+  void Init(const std::string& device_type, const std::string& platform_name = "");
+  virtual void Init() {
+    Init("gpu");
+  }
+  // Check whether the context is OpenCL or not.
+  virtual bool IsOpenCLDevice(TVMContext ctx) {
+    return ctx.device_type == kDLOpenCL;
+  }
  // get the queue of the context
  cl_command_queue GetQueue(TVMContext ctx) {
-    CHECK_EQ(ctx.device_type, kDLOpenCL);
+    CHECK(IsOpenCLDevice(ctx));
    this->Init();
    CHECK(ctx.device_id >= 0  && static_cast<size_t>(ctx.device_id) < queues.size())
        << "Invalid OpenCL device_id=" << ctx.device_id;
@@ -157,6 +174,12 @@ class OpenCLWorkspace final : public DeviceAPI {
  void StreamSync(TVMContext ctx, TVMStreamHandle stream) final;
  void* AllocWorkspace(TVMContext ctx, size_t size, TVMType type_hint) final;
  void FreeWorkspace(TVMContext ctx, void* data) final;
+
+  /*!
+   * \brief Get the thread local ThreadEntry
+   */
+  virtual OpenCLThreadEntry* GetThreadEntry();
+
  // get the global workspace
  static const std::shared_ptr<OpenCLWorkspace>& Global();
 };
@@ -179,15 +202,87 @@ class OpenCLThreadEntry {
  /*! \brief workspace pool */
  WorkspacePool pool;
  // constructor
-  OpenCLThreadEntry()
-      : pool(kDLOpenCL, OpenCLWorkspace::Global()) {
+  OpenCLThreadEntry(DLDeviceType device_type, std::shared_ptr<DeviceAPI> device)
+      : pool(device_type, device) {
    context.device_id = 0;
-    context.device_type = kDLOpenCL;
+    context.device_type = device_type;
  }
+  OpenCLThreadEntry()
+      : OpenCLThreadEntry(kDLOpenCL, OpenCLWorkspace::Global()) {}
+
  // get the global workspace
  static OpenCLThreadEntry* ThreadLocal();
 };
 }  // namespace cl
+
+// Module to support thread-safe multi-device execution.
+// OpenCL runtime is a bit tricky because clSetKernelArg is not thread-safe
+// To make the call thread-safe, we create a thread-local kernel table
+// and lazily install new kernels into the kernel table when the kernel is called.
+// The kernels are recycled when the module get destructed.
+class OpenCLModuleNode : public ModuleNode {
+ public:
+  // Kernel table reference entry.
+  struct KTRefEntry {
+    size_t kernel_id;
+    size_t version;
+  };
+  explicit OpenCLModuleNode(std::string data,
+                            std::string fmt,
+                            std::unordered_map<std::string, FunctionInfo> fmap,
+                            std::string source)
+      : data_(data), fmt_(fmt), fmap_(fmap), source_(source) {}
+  // destructor
+  ~OpenCLModuleNode();
+
+  /*!
+   * \brief Get the global workspace
+   */
+  virtual const std::shared_ptr<cl::OpenCLWorkspace>& GetGlobalWorkspace();
+
+  virtual const char* type_key() const;
+
+  PackedFunc GetFunction(
+      const std::string& name,
+      const std::shared_ptr<ModuleNode>& sptr_to_self) final;
+  void SaveToFile(const std::string& file_name,
+                  const std::string& format) final;
+  void SaveToBinary(dmlc::Stream* stream) final;
+  std::string GetSource(const std::string& format) final;
+  // Initialize the programs
+  void Init();
+  // install a new kernel to thread local entry
+  cl_kernel InstallKernel(cl::OpenCLWorkspace* w,
+                          cl::OpenCLThreadEntry* t,
+                          const std::string& func_name,
+                          const KTRefEntry& e);
+
+ protected:
+  // The workspace, need to keep reference to use it in destructor.
+  // In case of static destruction order problem.
+  std::shared_ptr<cl::OpenCLWorkspace> workspace_;
+  // the binary data
+  std::string data_;
+
+ private:
+  // The format
+  std::string fmt_;
+  // function information table.
+  std::unordered_map<std::string, FunctionInfo> fmap_;
+  // Module local mutex
+  std::mutex build_lock_;
+  // The OpenCL source.
+  std::string source_;
+  // the binary data
+  cl_program program_{nullptr};
+  // build info
+  std::vector<bool> device_built_flag_;
+  // kernel id cache
+  std::unordered_map<std::string, KTRefEntry> kid_map_;
+  // kernels build so far.
+  std::vector<cl_kernel> kernels_;
+};
+
 }  // namespace runtime
 }  // namespace tvm
 #endif  // TVM_RUNTIME_OPENCL_OPENCL_COMMON_H_
--- a/src/runtime/opencl/opencl_device_api.cc
+++ b/src/runtime/opencl/opencl_device_api.cc
@@ -10,13 +10,17 @@ namespace tvm {
 namespace runtime {
 namespace cl {

+OpenCLThreadEntry* OpenCLWorkspace::GetThreadEntry() {
+  return OpenCLThreadEntry::ThreadLocal();
+}
+
 const std::shared_ptr<OpenCLWorkspace>& OpenCLWorkspace::Global() {
  static std::shared_ptr<OpenCLWorkspace> inst = std::make_shared<OpenCLWorkspace>();
  return inst;
 }

 void OpenCLWorkspace::SetDevice(TVMContext ctx) {
-  OpenCLThreadEntry::ThreadLocal()->context.device_id = ctx.device_id;
+  GetThreadEntry()->context.device_id = ctx.device_id;
 }

 void OpenCLWorkspace::GetAttr(
@@ -121,13 +125,13 @@ void OpenCLWorkspace::CopyDataFromTo(const void* from,
                                     TVMStreamHandle stream) {
  this->Init();
  CHECK(stream == nullptr);
-  if (ctx_from.device_type == kDLOpenCL && ctx_to.device_type == kDLOpenCL) {
+  if (IsOpenCLDevice(ctx_from) && IsOpenCLDevice(ctx_to)) {
    OPENCL_CALL(clEnqueueCopyBuffer(
        this->GetQueue(ctx_to),
        static_cast<cl_mem>((void*)from),  // NOLINT(*)
        static_cast<cl_mem>(to),
        from_offset, to_offset, size, 0, nullptr, nullptr));
-  } else if (ctx_from.device_type == kDLOpenCL && ctx_to.device_type == kDLCPU) {
+  } else if (IsOpenCLDevice(ctx_from) && ctx_to.device_type == kDLCPU) {
    OPENCL_CALL(clEnqueueReadBuffer(
        this->GetQueue(ctx_from),
        static_cast<cl_mem>((void*)from),  // NOLINT(*)
@@ -135,7 +139,7 @@ void OpenCLWorkspace::CopyDataFromTo(const void* from,
        static_cast<char*>(to) + to_offset,
        0, nullptr, nullptr));
    OPENCL_CALL(clFinish(this->GetQueue(ctx_from)));
-  } else if (ctx_from.device_type == kDLCPU && ctx_to.device_type == kDLOpenCL) {
+  } else if (ctx_from.device_type == kDLCPU && IsOpenCLDevice(ctx_to)) {
    OPENCL_CALL(clEnqueueWriteBuffer(
        this->GetQueue(ctx_to),
        static_cast<cl_mem>(to),
@@ -156,11 +160,11 @@ void OpenCLWorkspace::StreamSync(TVMContext ctx, TVMStreamHandle stream) {
 void* OpenCLWorkspace::AllocWorkspace(TVMContext ctx,
                                      size_t size,
                                      TVMType type_hint) {
-  return OpenCLThreadEntry::ThreadLocal()->pool.AllocWorkspace(ctx, size);
+  return GetThreadEntry()->pool.AllocWorkspace(ctx, size);
 }

 void OpenCLWorkspace::FreeWorkspace(TVMContext ctx, void* data) {
-  OpenCLThreadEntry::ThreadLocal()->pool.FreeWorkspace(ctx, data);
+  GetThreadEntry()->pool.FreeWorkspace(ctx, data);
 }

 typedef dmlc::ThreadLocalStore<OpenCLThreadEntry> OpenCLThreadStore;
@@ -223,38 +227,39 @@ bool MatchPlatformInfo(
  return param_value.find(value) != std::string::npos;
 }

-void OpenCLWorkspace::Init() {
+void OpenCLWorkspace::Init(const std::string& device_type, const std::string& platform_name) {
  if (initialized_) return;
  std::lock_guard<std::mutex> lock(this->mu);
  if (initialized_) return;
  initialized_ = true;
  if (context != nullptr) return;
  // matched platforms
-  std::vector<cl_platform_id> platform_matched = cl::GetPlatformIDs();
-  if (platform_matched.size() == 0) {
+  std::vector<cl_platform_id> platform_ids = cl::GetPlatformIDs();
+  if (platform_ids.size() == 0) {
    LOG(WARNING) << "No OpenCL platform matched given existing options ...";
    return;
  }
-  if (platform_matched.size() > 1) {
-    LOG(WARNING) << "Multiple OpenCL platforms matched, use the first one ... ";
-  }
-  this->platform_id = platform_matched[0];
-  LOG(INFO) << "Initialize OpenCL platform \'"
-            << cl::GetPlatformInfo(this->platform_id, CL_PLATFORM_NAME) << '\'';
-  std::string device_types[] = {"accelerator", "gpu", "cpu"};
-  std::vector<cl_device_id> devices_matched;
-  for (auto type : device_types) {
-    devices_matched = cl::GetDeviceIDs(this->platform_id, type);
+  this->platform_id = nullptr;
+  for (auto platform_id : platform_ids) {
+    if (!MatchPlatformInfo(platform_id, CL_PLATFORM_NAME, platform_name)) {
+      continue;
+    }
+    std::vector<cl_device_id> devices_matched = cl::GetDeviceIDs(platform_id, device_type);
    if (devices_matched.size() > 0) {
+      this->platform_id = platform_id;
+      this->platform_name = cl::GetPlatformInfo(platform_id, CL_PLATFORM_NAME);
+      this->device_type = device_type;
+      this->devices = devices_matched;
+      LOG(INFO) << "Initialize OpenCL platform \'" << this->platform_name << '\'';
      break;
    }
-    LOG(INFO) << "No OpenCL device any device matched given the options: " << type << " mode";
+    LOG(INFO) << "\'" << cl::GetPlatformInfo(platform_id, CL_PLATFORM_NAME)
+              << "\' platform has no OpenCL device: " << device_type << " mode";
  }
-  if (devices_matched.size() == 0) {
+  if (this->platform_id == nullptr) {
    LOG(WARNING) << "No OpenCL device";
    return;
  }
-  this->devices = devices_matched;
  cl_int err_code;
  this->context = clCreateContext(
      nullptr, this->devices.size(), &(this->devices[0]),
@@ -272,11 +277,6 @@ void OpenCLWorkspace::Init() {
  }
 }

-bool InitOpenCL(TVMArgs args, TVMRetValue* rv) {
-  cl::OpenCLWorkspace::Global()->Init();
-  return true;
-}
-
 TVM_REGISTER_GLOBAL("device_api.opencl")
 .set_body([](TVMArgs args, TVMRetValue* rv) {
    DeviceAPI* ptr = OpenCLWorkspace::Global().get();

--- a/src/runtime/opencl/opencl_module.cc
+++ b/src/runtime/opencl/opencl_module.cc
--- a/src/runtime/opencl/opencl_module.h
+++ b/src/runtime/opencl/opencl_module.h
@@ -15,7 +15,7 @@
 namespace tvm {
 namespace runtime {
 /*!
- * \brief create a opencl module from data.
+ * \brief create a opencl module for GPU devices from data.
 *
 * \param data The module data.
 * \param fmt The format of the data, can be "clbin", "cl"

--- a/src/runtime/opencl/sdaccel/sdaccel_common.h
+++ b/src/runtime/opencl/sdaccel/sdaccel_common.h
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file sdaccel_common.h
+ * \brief SDAccel common header
+ */
+#ifndef TVM_RUNTIME_OPENCL_SDACCEL_SDACCEL_COMMON_H_
+#define TVM_RUNTIME_OPENCL_SDACCEL_SDACCEL_COMMON_H_
+
+#include "../opencl_common.h"
+
+namespace tvm {
+namespace runtime {
+namespace cl {
+
+/*!
+ * \brief Process global SDAccel workspace.
+ */
+class SDAccelWorkspace final : public OpenCLWorkspace {
+ public:
+  // override OpenCL device API
+  void Init() final;
+  bool IsOpenCLDevice(TVMContext ctx) final;
+  OpenCLThreadEntry* GetThreadEntry() final;
+  // get the global workspace
+  static const std::shared_ptr<OpenCLWorkspace>& Global();
+};
+
+
+/*! \brief Thread local workspace for SDAccel*/
+class SDAccelThreadEntry : public OpenCLThreadEntry {
+ public:
+  // constructor
+  SDAccelThreadEntry()
+      : OpenCLThreadEntry(static_cast<DLDeviceType>(kDLSDAccel), SDAccelWorkspace::Global()) {}
+
+  // get the global workspace
+  static SDAccelThreadEntry* ThreadLocal();
+};
+}  // namespace cl
+}  // namespace runtime
+}  // namespace tvm
+#endif  // TVM_RUNTIME_OPENCL_SDACCEL_SDACCEL_COMMON_H_
--- a/src/runtime/opencl/sdaccel/sdaccel_device_api.cc
+++ b/src/runtime/opencl/sdaccel/sdaccel_device_api.cc
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file sdaccel_device_api.cc
+ */
+#include <tvm/runtime/registry.h>
+#include <dmlc/thread_local.h>
+#include <tvm/container.h>
+#include <tvm/ir.h>
+#include <tvm/packed_func_ext.h>
+#include "./sdaccel_common.h"
+
+namespace tvm {
+namespace runtime {
+namespace cl {
+
+OpenCLThreadEntry* SDAccelWorkspace::GetThreadEntry() {
+  return SDAccelThreadEntry::ThreadLocal();
+}
+
+const std::shared_ptr<OpenCLWorkspace>& SDAccelWorkspace::Global() {
+  static std::shared_ptr<OpenCLWorkspace> inst = std::make_shared<SDAccelWorkspace>();
+  return inst;
+}
+
+void SDAccelWorkspace::Init() {
+  OpenCLWorkspace::Init("accelerator", "Xilinx");
+}
+
+bool SDAccelWorkspace::IsOpenCLDevice(TVMContext ctx) {
+  return ctx.device_type == static_cast<DLDeviceType>(kDLSDAccel);
+}
+
+typedef dmlc::ThreadLocalStore<SDAccelThreadEntry> SDAccelThreadStore;
+
+SDAccelThreadEntry* SDAccelThreadEntry::ThreadLocal() {
+  return SDAccelThreadStore::Get();
+}
+
+TVM_REGISTER_GLOBAL("device_api.sdaccel")
+.set_body([](TVMArgs args, TVMRetValue* rv) {
+    DeviceAPI* ptr = SDAccelWorkspace::Global().get();
+    *rv = static_cast<void*>(ptr);
+  });
+
+}  // namespace cl
+}  // namespace runtime
+}  // namespace tvm
--- a/src/runtime/opencl/sdaccel/sdaccel_module.cc
+++ b/src/runtime/opencl/sdaccel/sdaccel_module.cc
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file sdaccel_module.cc
+ */
+#include <dmlc/memory_io.h>
+#include <tvm/runtime/registry.h>
+#include <vector>
+#include <string>
+#include <unordered_map>
+#include "./sdaccel_common.h"
+#include "./sdaccel_module.h"
+
+namespace tvm {
+namespace runtime {
+
+class SDAccelModuleNode : public OpenCLModuleNode {
+ public:
+  explicit SDAccelModuleNode(std::string data,
+                             std::string fmt,
+                             std::unordered_map<std::string, FunctionInfo> fmap,
+                             std::string source)
+      : OpenCLModuleNode(data, fmt, fmap, source) {}
+  const std::shared_ptr<cl::OpenCLWorkspace>& GetGlobalWorkspace() final;
+  const char* type_key() const final;
+};
+
+const std::shared_ptr<cl::OpenCLWorkspace>& SDAccelModuleNode::GetGlobalWorkspace() {
+  return cl::SDAccelWorkspace::Global();
+}
+
+const char* SDAccelModuleNode::type_key() const {
+  return "sdaccel";
+}
+
+Module SDAccelModuleCreate(
+    std::string data,
+    std::string fmt,
+    std::unordered_map<std::string, FunctionInfo> fmap,
+    std::string source) {
+  std::shared_ptr<SDAccelModuleNode> n =
+      std::make_shared<SDAccelModuleNode>(data, fmt, fmap, source);
+  n->Init();
+  return Module(n);
+}
+
+Module SDAccelModuleLoadFile(const std::string& file_name,
+                             const std::string& format) {
+  std::string data;
+  std::unordered_map<std::string, FunctionInfo> fmap;
+  std::string fmt = GetFileFormat(file_name, format);
+  std::string meta_file = GetMetaFilePath(file_name);
+  LoadBinaryFromFile(file_name, &data);
+  LoadMetaDataFromFile(meta_file, &fmap);
+  return SDAccelModuleCreate(data, fmt, fmap, std::string());
+}
+
+Module SDAccelModuleLoadBinary(void* strm) {
+  dmlc::Stream* stream = static_cast<dmlc::Stream*>(strm);
+  std::string data;
+  std::unordered_map<std::string, FunctionInfo> fmap;
+  std::string fmt;
+  stream->Read(&fmt);
+  stream->Read(&fmap);
+  stream->Read(&data);
+  return SDAccelModuleCreate(data, fmt, fmap, std::string());
+}
+
+TVM_REGISTER_GLOBAL("module.loadfile_xclbin")
+.set_body([](TVMArgs args, TVMRetValue* rv) {
+    *rv = SDAccelModuleLoadFile(args[0], args[1]);
+  });
+
+TVM_REGISTER_GLOBAL("module.loadfile_awsxclbin")
+.set_body([](TVMArgs args, TVMRetValue* rv) {
+    *rv = SDAccelModuleLoadFile(args[0], args[1]);
+  });
+}  // namespace runtime
+}  // namespace tvm
--- a/src/runtime/opencl/sdaccel/sdaccel_module.h
+++ b/src/runtime/opencl/sdaccel/sdaccel_module.h
+/*!
+ *  Copyright (c) 2018 by Contributors
+ * \file sdaccel_module.h
+ * \brief Execution handling of OPENCL kernels for SDAccel FPGAs
+ */
+#ifndef TVM_RUNTIME_OPENCL_SDACCEL_SDACCEL_MODULE_H_
+#define TVM_RUNTIME_OPENCL_SDACCEL_SDACCEL_MODULE_H_
+
+#include <tvm/runtime/packed_func.h>
+#include <memory>
+#include <vector>
+#include <string>
+#include "../../meta_data.h"
+
+namespace tvm {
+namespace runtime {
+/*!
+ * \brief create a opencl module for SDAccel from data.
+ *
+ * \param data The module data.
+ * \param fmt The format of the data, can be "xclbin", "awsxclbin"
+ * \param fmap The map function information map of each function.
+ */
+Module SDAccelModuleCreate(
+    std::string data,
+    std::string fmt,
+    std::unordered_map<std::string, FunctionInfo> fmap,
+    std::string source);
+}  // namespace runtime
+}  // namespace tvm
+#endif  // TVM_RUNTIME_OPENCL_SDACCEL_SDACCEL_MODULE_H_