Commit 9e1a5ec4 by Tianqi Chen Committed by GitHub

[RUNTIME] Enable OpenCL (#17)

parent e9ff9a89
......@@ -26,6 +26,7 @@ endif
export LDFLAGS = -pthread -lm
export CFLAGS = -std=c++11 -Wall -O2\
-Iinclude -Idmlc-core/include -IHalideIR/src -fPIC
export FRAMEWORKS=
ifneq ($(ADD_CFLAGS), NONE)
CFLAGS += $(ADD_CFLAGS)
......@@ -43,6 +44,20 @@ else
CFLAGS += -DTVM_CUDA_RUNTIME=0
endif
ifeq ($(USE_OPENCL), 1)
CFLAGS += -DTVM_OPENCL_RUNTIME=1
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S), Darwin)
FRAMEWORKS += -framework OpenCL
else
LDFLAGS += -lOpenCL
endif
else
CFLAGS += -DTVM_OPENCL_RUNTIME=0
endif
include tests/cpp/unittest.mk
test: $(TEST)
......@@ -59,7 +74,7 @@ lib/libtvm.a: $(ALL_DEP)
lib/libtvm.so: $(ALL_DEP)
@mkdir -p $(@D)
$(CXX) $(CFLAGS) -shared -o $@ $(filter %.o %.a, $^) $(LDFLAGS)
$(CXX) $(CFLAGS) $(FRAMEWORKS) -shared -o $@ $(filter %.o %.a, $^) $(LDFLAGS)
$(LIB_HALIDE_IR): LIBHALIDEIR
......
......@@ -151,6 +151,23 @@ typedef TVMArray* TVMArrayHandle;
TVM_DLL const char *TVMGetLastError(void);
/*!
* \brief Initialize certain type of devices, this may
* not be necessary for all device types. But is needed for OpenCL.
*
* \param dev_mask The device mask of device type to be initialized
* \param option_keys Additional option keys to pass.
* \param option_vals Additional option values to pass
* \param num_options Number of options to be passed into it.
* \param out_code 1: success, 0: already initialized
* \return Whether the function is successful.
*/
TVM_DLL int TVMDeviceInit(int dev_mask,
const char** option_keys,
const char** option_vals,
int num_options,
int *out_code);
/*!
* \brief Whether the specified context is enabled.
*
* \param ctx The context to be checked.
......
......@@ -37,6 +37,9 @@ ADD_CFLAGS =
# whether use CUDA during compile
USE_CUDA = 1
# whether use OpenCL during compile
USE_OPENCL = 0
# add the path to CUDA library to link and compile flag
# if you have already add them to environment variable, leave it as NONE
# USE_CUDA_PATH = /usr/local/cuda
......
......@@ -12,7 +12,7 @@ from . import collections
from . import schedule
from . import ndarray as nd
from .ndarray import cpu, gpu, opencl
from .ndarray import cpu, gpu, opencl, init_opencl
from ._base import TVMError
from .function import *
......@@ -7,7 +7,7 @@ import ctypes
import numpy as np
from .._base import _LIB
from .._base import c_array
from .._base import c_array, c_str
from .._base import check_call
......@@ -182,6 +182,30 @@ def sync(ctx):
check_call(_LIB.TVMSynchronize(ctx, None))
def init_opencl(**kwargs):
"""Initialize the opencl with the options.
Parameters
----------
kwargs : dict
The options
"""
keys = []
vals = []
for k, v in kwargs.items():
keys.append(c_str(k))
vals.append(c_str(v))
dev_mask = ctypes.c_int(4)
out_code = ctypes.c_int()
check_call(_LIB.TVMDeviceInit(
dev_mask,
c_array(ctypes.c_char_p, keys),
c_array(ctypes.c_char_p, vals),
ctypes.c_int(len(keys)),
ctypes.byref(out_code)))
return out_code.value != 0
class NDArrayBase(object):
"""A simple Device/CPU Array object in runtime."""
__slots__ = ["handle"]
......
......@@ -9,6 +9,7 @@ import numpy as _np
from ._ctypes._runtime_api import TVMContext, TVMDataType, NDArrayBase
from ._ctypes._runtime_api import cpu, gpu, opencl, empty, sync
from ._ctypes._runtime_api import _init_runtime_module
from ._ctypes._runtime_api import init_opencl
class NDArray(NDArrayBase):
......
......@@ -24,7 +24,7 @@ class Schedule(NodeBase):
k = k.op
if not isinstance(k, _tensor.Operation):
raise ValueError("Expect schedule key to be Tensor or Operation")
if not k in self.stage_map:
if k not in self.stage_map:
raise ValueError("Cannot find the operation %s in schedule" % (str(k)))
return self.stage_map[k]
......
......@@ -64,6 +64,23 @@ inline size_t GetDataAlignment(TVMArray* arr) {
using namespace tvm::runtime;
int TVMDeviceInit(int dev_mask,
const char** option_keys,
const char** option_vals,
int num_options,
int* out_code) {
API_BEGIN();
*out_code = 1;
switch (dev_mask) {
case kOpenCL: {
*out_code = DeviceInit<kOpenCL>(option_keys, option_vals, num_options);
break;
}
default: break;
}
API_END();
}
int TVMContextEnabled(TVMContext ctx,
int* out_enabled) {
API_BEGIN();
......
/*!
* Copyright (c) 2016 by Contributors
* \file device_api.hx
* \file device_api.h
* \brief Device specific API
*/
#ifndef TVM_RUNTIME_DEVICE_API_H_
......@@ -12,6 +12,21 @@
namespace tvm {
namespace runtime {
/*!
* \brief Initialize the device.
* \param option_keys Additional option keys to pass.
* \param option_vals Additional option values to pass
* \param num_options Number of options to be passed into it.
* \return 0 if success, 1: if already initialized
* \tparam xpu The device mask.
*/
template<TVMDeviceMask xpu>
inline bool DeviceInit(const char** option_keys,
const char** option_vals,
int num_options) {
return true;
}
/*!
* \brief Whether ctx is enabled.
* \param ctx The device context to perform operation.
* \tparam xpu The device mask.
......@@ -93,7 +108,8 @@ inline void StreamSync(TVMContext ctx, TVMStreamHandle stream);
} // namespace runtime
} // namespace tvm
#include "./device_api_gpu.h"
#include "./device_api_cpu.h"
#include "./device_api_gpu.h"
#include "./device_api_opencl.h"
#endif // TVM_RUNTIME_DEVICE_API_H_
/*!
* Copyright (c) 2016 by Contributors
* \file ctxice_api_gpu.h
* \file device_api_gpu.h
* \brief GPU specific API
*/
#ifndef TVM_RUNTIME_DEVICE_API_GPU_H_
......@@ -14,15 +14,6 @@
namespace tvm {
namespace runtime {
/*!
* \brief Check CUDA error.
* \param msg Message to print if an error occured.
*/
#define CHECK_CUDA_ERROR(msg) \
{ \
cudaError_t e = cudaGetLastError(); \
CHECK_EQ(e, cudaSuccess) << (msg) << " CUDA: " << cudaGetErrorString(e); \
}
/*!
* \brief Protected CUDA call.
......
/*!
* Copyright (c) 2016 by Contributors
* \file device_api_opencl.h
* \brief OpenCL specific API
*/
#ifndef TVM_RUNTIME_DEVICE_API_OPENCL_H_
#define TVM_RUNTIME_DEVICE_API_OPENCL_H_
#if TVM_OPENCL_RUNTIME
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
#include <mutex>
#include <string>
#include <vector>
namespace tvm {
namespace runtime {
namespace cl {
static_assert(sizeof(cl_mem) ==sizeof(void*),
"Required to store cl_mem inside void*");
inline const char* CLGetErrorString(cl_int error) {
switch (error) {
case CL_SUCCESS: return "CL_SUCCESS";
case CL_DEVICE_NOT_FOUND: return "CL_DEVICE_NOT_FOUND";
case CL_DEVICE_NOT_AVAILABLE: return "CL_DEVICE_NOT_AVAILABLE";
case CL_COMPILER_NOT_AVAILABLE: return "CL_COMPILER_NOT_AVAILABLE";
case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case CL_OUT_OF_RESOURCES: return "CL_OUT_OF_RESOURCES";
case CL_OUT_OF_HOST_MEMORY: return "CL_OUT_OF_HOST_MEMORY";
case CL_PROFILING_INFO_NOT_AVAILABLE: return "CL_PROFILING_INFO_NOT_AVAILABLE";
case CL_MEM_COPY_OVERLAP: return "CL_MEM_COPY_OVERLAP";
case CL_IMAGE_FORMAT_MISMATCH: return "CL_IMAGE_FORMAT_MISMATCH";
case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case CL_BUILD_PROGRAM_FAILURE: return "CL_BUILD_PROGRAM_FAILURE";
case CL_MAP_FAILURE: return "CL_MAP_FAILURE";
case CL_INVALID_VALUE: return "CL_INVALID_VALUE";
case CL_INVALID_DEVICE_TYPE: return "CL_INVALID_DEVICE_TYPE";
case CL_INVALID_PLATFORM: return "CL_INVALID_PLATFORM";
case CL_INVALID_DEVICE: return "CL_INVALID_DEVICE";
case CL_INVALID_CONTEXT: return "CL_INVALID_CONTEXT";
case CL_INVALID_QUEUE_PROPERTIES: return "CL_INVALID_QUEUE_PROPERTIES";
case CL_INVALID_COMMAND_QUEUE: return "CL_INVALID_COMMAND_QUEUE";
case CL_INVALID_HOST_PTR: return "CL_INVALID_HOST_PTR";
case CL_INVALID_MEM_OBJECT: return "CL_INVALID_MEM_OBJECT";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case CL_INVALID_IMAGE_SIZE: return "CL_INVALID_IMAGE_SIZE";
case CL_INVALID_SAMPLER: return "CL_INVALID_SAMPLER";
case CL_INVALID_BINARY: return "CL_INVALID_BINARY";
case CL_INVALID_BUILD_OPTIONS: return "CL_INVALID_BUILD_OPTIONS";
case CL_INVALID_PROGRAM: return "CL_INVALID_PROGRAM";
case CL_INVALID_PROGRAM_EXECUTABLE: return "CL_INVALID_PROGRAM_EXECUTABLE";
case CL_INVALID_KERNEL_NAME: return "CL_INVALID_KERNEL_NAME";
case CL_INVALID_KERNEL_DEFINITION: return "CL_INVALID_KERNEL_DEFINITION";
case CL_INVALID_KERNEL: return "CL_INVALID_KERNEL";
case CL_INVALID_ARG_INDEX: return "CL_INVALID_ARG_INDEX";
case CL_INVALID_ARG_VALUE: return "CL_INVALID_ARG_VALUE";
case CL_INVALID_ARG_SIZE: return "CL_INVALID_ARG_SIZE";
case CL_INVALID_KERNEL_ARGS: return "CL_INVALID_KERNEL_ARGS";
case CL_INVALID_WORK_DIMENSION: return "CL_INVALID_WORK_DIMENSION";
case CL_INVALID_WORK_GROUP_SIZE: return "CL_INVALID_WORK_GROUP_SIZE";
case CL_INVALID_WORK_ITEM_SIZE: return "CL_INVALID_WORK_ITEM_SIZE";
case CL_INVALID_GLOBAL_OFFSET: return "CL_INVALID_GLOBAL_OFFSET";
case CL_INVALID_EVENT_WAIT_LIST: return "CL_INVALID_EVENT_WAIT_LIST";
case CL_INVALID_EVENT: return "CL_INVALID_EVENT";
case CL_INVALID_OPERATION: return "CL_INVALID_OPERATION";
case CL_INVALID_GL_OBJECT: return "CL_INVALID_GL_OBJECT";
case CL_INVALID_BUFFER_SIZE: return "CL_INVALID_BUFFER_SIZE";
case CL_INVALID_MIP_LEVEL: return "CL_INVALID_MIP_LEVEL";
default: return "Unknown OpenCL error code";
}
}
/*!
* \brief Protected OpenCL call
* \param func Expression to call.
*/
#define OPENCL_CHECK_ERROR(e) \
{ \
CHECK(e == CL_SUCCESS) \
<< "OpenCL Error, code=" << e << ": " << cl::CLGetErrorString(e); \
}
#define OPENCL_CALL(func) \
{ \
cl_int e = (func); \
OPENCL_CHECK_ERROR(e); \
}
// Process local opencl workspace
class OpenCLWorkspace {
public:
// global platform id
cl_platform_id platform_id;
// global context of this process
cl_context context{nullptr};
// the devices
std::vector<cl_device_id> devices;
// the queues
std::vector<cl_command_queue> queues;
// the mutex for initialization
std::mutex mu;
// destructor
~OpenCLWorkspace() {
if (context != nullptr) {
OPENCL_CALL(clReleaseContext(context));
}
}
// whether the workspace is initialized.
inline bool initialized() const {
return context != nullptr;
}
// get the queue of the context
cl_command_queue GetQueue(TVMContext ctx) const {
CHECK_EQ(ctx.dev_mask, kOpenCL);
CHECK(initialized())
<< "The OpenCL is not initialized";
CHECK(ctx.dev_id >= 0 && static_cast<size_t>(ctx.dev_id) < queues.size())
<< "Invalid OpenCL dev_id=" << ctx.dev_id;
return queues[ctx.dev_id];
}
// get the global workspace
static OpenCLWorkspace* Global() {
static OpenCLWorkspace inst;
return &inst;
}
};
inline std::string GetPlatformInfo(
cl_platform_id pid, cl_platform_info param_name) {
size_t ret_size;
OPENCL_CALL(clGetPlatformInfo(pid, param_name, 0, nullptr, &ret_size));
std::string ret;
ret.resize(ret_size);
OPENCL_CALL(clGetPlatformInfo(pid, param_name, ret_size, &ret[0], nullptr));
return ret;
}
inline std::string GetDeviceInfo(
cl_device_id pid, cl_device_info param_name) {
size_t ret_size;
OPENCL_CALL(clGetDeviceInfo(pid, param_name, 0, nullptr, &ret_size));
std::string ret;
ret.resize(ret_size);
OPENCL_CALL(clGetDeviceInfo(pid, param_name, ret_size, &ret[0], nullptr));
return ret;
}
inline std::vector<cl_platform_id> GetPlatformIDs() {
cl_uint ret_size;
OPENCL_CALL(clGetPlatformIDs(0, nullptr, &ret_size));
std::vector<cl_platform_id> ret;
ret.resize(ret_size);
OPENCL_CALL(clGetPlatformIDs(ret_size, &ret[0], nullptr));
return ret;
}
inline std::vector<cl_device_id> GetDeviceIDs(
cl_platform_id pid, std::string device_type) {
cl_device_type dtype = CL_DEVICE_TYPE_ALL;
if (device_type == "cpu") dtype = CL_DEVICE_TYPE_CPU;
if (device_type == "gpu") dtype = CL_DEVICE_TYPE_CPU;
if (device_type == "accelerator") dtype = CL_DEVICE_TYPE_ACCELERATOR;
cl_uint ret_size;
OPENCL_CALL(clGetDeviceIDs(pid, dtype, 0, nullptr, &ret_size));
std::vector<cl_device_id> ret;
ret.resize(ret_size);
OPENCL_CALL(clGetDeviceIDs(pid, dtype, ret_size, &ret[0], nullptr));
return ret;
}
inline bool MatchPlatformInfo(
cl_platform_id pid,
cl_platform_info param_name,
std::string value) {
if (value.length() == 0) return true;
std::string param_value = GetPlatformInfo(pid, param_name);
return param_value.find(value) != std::string::npos;
}
} // namespace cl
template<>
inline bool DeviceInit<kOpenCL>(const char** option_keys,
const char** option_vals,
int num_options) {
cl::OpenCLWorkspace* w = cl::OpenCLWorkspace::Global();
std::lock_guard<std::mutex>(w->mu);
if (w->initialized()) return false;
// matching conditions
std::string platform_name, device_type;
for (int i = 0; i < num_options; ++i) {
std::string key = option_keys[i];
std::string val = option_vals[i];
if (key == "platform_name") {
platform_name = val;
} else if (key == "device_type") {
device_type = val;
} else {
LOG(FATAL) << "unknown DeviceInit option " << key;
}
}
// matched platforms
std::vector<cl_platform_id> platform_matched;
for (cl_platform_id pid : cl::GetPlatformIDs()) {
bool matched = true;
if (!cl::MatchPlatformInfo(pid, CL_PLATFORM_NAME, platform_name)) matched = false;
if (matched) platform_matched.push_back(pid);
}
if (platform_matched.size() == 0) {
LOG(FATAL) << "No OpenCL platform matched given existing options ...";
}
if (platform_matched.size() > 1) {
LOG(WARNING) << "Multiple OpenCL platforms matched, use the first one ... ";
}
w->platform_id = platform_matched[0];
LOG(INFO) << "Initialize OpenCL platform \'"
<< cl::GetPlatformInfo(w->platform_id, CL_PLATFORM_NAME) << '\'';
std::vector<cl_device_id> devices_matched =
cl::GetDeviceIDs(w->platform_id, device_type);
CHECK_GT(devices_matched.size(), 0U)
<< "No OpenCL device any device matched given the options";
w->devices = devices_matched;
cl_int err_code;
w->context = clCreateContext(
nullptr, w->devices.size(), &(w->devices[0]),
nullptr, nullptr, &err_code);
OPENCL_CHECK_ERROR(err_code);
CHECK_EQ(w->queues.size(), 0U);
for (size_t i = 0; i < w->devices.size(); ++i) {
cl_device_id did = w->devices[i];
w->queues.push_back(
clCreateCommandQueue(w->context, did, 0, &err_code));
OPENCL_CHECK_ERROR(err_code);
LOG(INFO) << "opencl(" << i
<< ")=\'" << cl::GetDeviceInfo(did, CL_DEVICE_NAME)
<< "\' cl_device_id=" << did;
}
return true;
}
template<>
inline void* AllocDataSpace<kOpenCL>(TVMContext ctx, size_t size, size_t alignment) {
cl::OpenCLWorkspace* w = cl::OpenCLWorkspace::Global();
cl_int err_code;
cl_mem mptr = clCreateBuffer(
w->context, CL_MEM_READ_WRITE, size, nullptr, &err_code);
OPENCL_CHECK_ERROR(err_code);
return mptr;
}
template<>
inline void FreeDataSpace<kOpenCL>(TVMContext ctx, void* ptr) {
cl_mem mptr = static_cast<cl_mem>(ptr);
OPENCL_CALL(clReleaseMemObject(mptr));
}
template<>
inline void CopyDataFromTo<kOpenCL>(const void* from,
void* to,
size_t size,
TVMContext ctx_from,
TVMContext ctx_to,
TVMStreamHandle stream) {
CHECK(stream == nullptr);
cl::OpenCLWorkspace* w = cl::OpenCLWorkspace::Global();
if (ctx_from.dev_mask == kOpenCL && ctx_to.dev_mask == kOpenCL) {
OPENCL_CALL(clEnqueueCopyBuffer(
w->GetQueue(ctx_to),
static_cast<cl_mem>((void*)from), // NOLINT(*)
static_cast<cl_mem>(to),
0, 0, size, 0, nullptr, nullptr));
} else if (ctx_from.dev_mask == kOpenCL && ctx_to.dev_mask == kCPU) {
OPENCL_CALL(clEnqueueReadBuffer(
w->GetQueue(ctx_from),
static_cast<cl_mem>((void*)from), // NOLINT(*)
CL_FALSE, 0, size, to,
0, nullptr, nullptr));
OPENCL_CALL(clFinish(w->GetQueue(ctx_from)));
} else if (ctx_from.dev_mask == kCPU && ctx_to.dev_mask == kOpenCL) {
OPENCL_CALL(clEnqueueWriteBuffer(
w->GetQueue(ctx_to),
static_cast<cl_mem>(to),
CL_FALSE, 0, size, from,
0, nullptr, nullptr));
OPENCL_CALL(clFinish(w->GetQueue(ctx_to)));
} else {
LOG(FATAL) << "Expect copy from/to GPU or between GPU";
}
}
template<>
inline void StreamSync<kOpenCL>(TVMContext ctx, TVMStreamHandle stream) {
CHECK(stream == nullptr);
cl::OpenCLWorkspace* w = cl::OpenCLWorkspace::Global();
OPENCL_CALL(clFinish(w->GetQueue(ctx)));
}
} // namespace runtime
} // namespace tvm
#endif // TVM_OPENCL_RUNTIME
#endif // TVM_RUNTIME_DEVICE_API_OPENCL_H_
......@@ -2,6 +2,7 @@ import tvm
import numpy as np
def enabled_ctx_list():
tvm.init_opencl()
ctx_list = [tvm.cpu(0), tvm.gpu(0), tvm.opencl(0)]
ctx_list = [ctx for ctx in ctx_list if ctx.enabled]
return ctx_list
......
......@@ -16,13 +16,15 @@ fi
cp make/config.mk config.mk
echo "USE_CUDA=0" >> config.mk
echo "USE_OPENCL=0" >> config.mk
if [ ! ${TRAVIS_OS_NAME} == "osx" ]; then
if [ ${TRAVIS_OS_NAME} == "osx" ]; then
echo "USE_OPENCL=1" >> config.mk
else
# use g++-4.8 for linux
if [ ${CXX} == "g++" ]; then
export CXX=g++-4.8
fi
echo "USE_OPENCL=0" >> config.mk
fi
if [ ${TASK} == "cpp_test" ] || [ ${TASK} == "all_test" ]; then
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment