Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
9e1a5ec4
Commit
9e1a5ec4
authored
Jan 17, 2017
by
Tianqi Chen
Committed by
GitHub
Jan 17, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[RUNTIME] Enable OpenCL (#17)
parent
e9ff9a89
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
415 additions
and
18 deletions
+415
-18
Makefile
+16
-1
include/tvm/c_runtime_api.h
+17
-0
make/config.mk
+3
-0
python/tvm/__init__.py
+1
-1
python/tvm/_ctypes/_runtime_api.py
+25
-1
python/tvm/ndarray.py
+1
-0
python/tvm/schedule.py
+1
-1
src/runtime/c_runtime_api.cc
+17
-0
src/runtime/device_api.h
+18
-2
src/runtime/device_api_gpu.h
+1
-10
src/runtime/device_api_opencl.h
+310
-0
tests/python/test_runtime_ndarray.py
+1
-0
tests/travis/run_test.sh
+4
-2
No files found.
Makefile
View file @
9e1a5ec4
...
...
@@ -26,6 +26,7 @@ endif
export
LDFLAGS
=
-pthread
-lm
export
CFLAGS
=
-std
=
c++11
-Wall
-O2
\
-Iinclude
-Idmlc-core
/include
-IHalideIR
/src
-fPIC
export
FRAMEWORKS
=
ifneq
($(ADD_CFLAGS),
NONE)
CFLAGS
+=
$(ADD_CFLAGS)
...
...
@@ -43,6 +44,20 @@ else
CFLAGS
+=
-DTVM_CUDA_RUNTIME
=
0
endif
ifeq
($(USE_OPENCL),
1)
CFLAGS
+=
-DTVM_OPENCL_RUNTIME
=
1
UNAME_S
:=
$(
shell
uname
-s
)
ifeq
($(UNAME_S),
Darwin)
FRAMEWORKS
+=
-framework
OpenCL
else
LDFLAGS
+=
-lOpenCL
endif
else
CFLAGS
+=
-DTVM_OPENCL_RUNTIME
=
0
endif
include
tests/cpp/unittest.mk
test
:
$(TEST)
...
...
@@ -59,7 +74,7 @@ lib/libtvm.a: $(ALL_DEP)
lib/libtvm.so
:
$(ALL_DEP)
@
mkdir
-p
$
(
@D
)
$(CXX)
$(CFLAGS)
-shared
-o
$@
$
(
filter %.o %.a,
$^
)
$(LDFLAGS)
$(CXX)
$(CFLAGS)
$(FRAMEWORKS)
-shared
-o
$@
$
(
filter %.o %.a,
$^
)
$(LDFLAGS)
$(LIB_HALIDE_IR)
:
LIBHALIDEIR
...
...
include/tvm/c_runtime_api.h
View file @
9e1a5ec4
...
...
@@ -151,6 +151,23 @@ typedef TVMArray* TVMArrayHandle;
TVM_DLL
const
char
*
TVMGetLastError
(
void
);
/*!
* \brief Initialize certain type of devices, this may
* not be necessary for all device types. But is needed for OpenCL.
*
* \param dev_mask The device mask of device type to be initialized
* \param option_keys Additional option keys to pass.
* \param option_vals Additional option values to pass
* \param num_options Number of options to be passed into it.
* \param out_code 1: success, 0: already initialized
* \return Whether the function is successful.
*/
TVM_DLL
int
TVMDeviceInit
(
int
dev_mask
,
const
char
**
option_keys
,
const
char
**
option_vals
,
int
num_options
,
int
*
out_code
);
/*!
* \brief Whether the specified context is enabled.
*
* \param ctx The context to be checked.
...
...
make/config.mk
View file @
9e1a5ec4
...
...
@@ -37,6 +37,9 @@ ADD_CFLAGS =
# whether use CUDA during compile
USE_CUDA = 1
# whether use OpenCL during compile
USE_OPENCL = 0
# add the path to CUDA library to link and compile flag
# if you have already add them to environment variable, leave it as NONE
# USE_CUDA_PATH = /usr/local/cuda
...
...
python/tvm/__init__.py
View file @
9e1a5ec4
...
...
@@ -12,7 +12,7 @@ from . import collections
from
.
import
schedule
from
.
import
ndarray
as
nd
from
.ndarray
import
cpu
,
gpu
,
opencl
from
.ndarray
import
cpu
,
gpu
,
opencl
,
init_opencl
from
._base
import
TVMError
from
.function
import
*
python/tvm/_ctypes/_runtime_api.py
View file @
9e1a5ec4
...
...
@@ -7,7 +7,7 @@ import ctypes
import
numpy
as
np
from
.._base
import
_LIB
from
.._base
import
c_array
from
.._base
import
c_array
,
c_str
from
.._base
import
check_call
...
...
@@ -182,6 +182,30 @@ def sync(ctx):
check_call
(
_LIB
.
TVMSynchronize
(
ctx
,
None
))
def
init_opencl
(
**
kwargs
):
"""Initialize the opencl with the options.
Parameters
----------
kwargs : dict
The options
"""
keys
=
[]
vals
=
[]
for
k
,
v
in
kwargs
.
items
():
keys
.
append
(
c_str
(
k
))
vals
.
append
(
c_str
(
v
))
dev_mask
=
ctypes
.
c_int
(
4
)
out_code
=
ctypes
.
c_int
()
check_call
(
_LIB
.
TVMDeviceInit
(
dev_mask
,
c_array
(
ctypes
.
c_char_p
,
keys
),
c_array
(
ctypes
.
c_char_p
,
vals
),
ctypes
.
c_int
(
len
(
keys
)),
ctypes
.
byref
(
out_code
)))
return
out_code
.
value
!=
0
class
NDArrayBase
(
object
):
"""A simple Device/CPU Array object in runtime."""
__slots__
=
[
"handle"
]
...
...
python/tvm/ndarray.py
View file @
9e1a5ec4
...
...
@@ -9,6 +9,7 @@ import numpy as _np
from
._ctypes._runtime_api
import
TVMContext
,
TVMDataType
,
NDArrayBase
from
._ctypes._runtime_api
import
cpu
,
gpu
,
opencl
,
empty
,
sync
from
._ctypes._runtime_api
import
_init_runtime_module
from
._ctypes._runtime_api
import
init_opencl
class
NDArray
(
NDArrayBase
):
...
...
python/tvm/schedule.py
View file @
9e1a5ec4
...
...
@@ -24,7 +24,7 @@ class Schedule(NodeBase):
k
=
k
.
op
if
not
isinstance
(
k
,
_tensor
.
Operation
):
raise
ValueError
(
"Expect schedule key to be Tensor or Operation"
)
if
not
k
in
self
.
stage_map
:
if
k
not
in
self
.
stage_map
:
raise
ValueError
(
"Cannot find the operation
%
s in schedule"
%
(
str
(
k
)))
return
self
.
stage_map
[
k
]
...
...
src/runtime/c_runtime_api.cc
View file @
9e1a5ec4
...
...
@@ -64,6 +64,23 @@ inline size_t GetDataAlignment(TVMArray* arr) {
using
namespace
tvm
::
runtime
;
int
TVMDeviceInit
(
int
dev_mask
,
const
char
**
option_keys
,
const
char
**
option_vals
,
int
num_options
,
int
*
out_code
)
{
API_BEGIN
();
*
out_code
=
1
;
switch
(
dev_mask
)
{
case
kOpenCL
:
{
*
out_code
=
DeviceInit
<
kOpenCL
>
(
option_keys
,
option_vals
,
num_options
);
break
;
}
default:
break
;
}
API_END
();
}
int
TVMContextEnabled
(
TVMContext
ctx
,
int
*
out_enabled
)
{
API_BEGIN
();
...
...
src/runtime/device_api.h
View file @
9e1a5ec4
/*!
* Copyright (c) 2016 by Contributors
* \file device_api.h
x
* \file device_api.h
* \brief Device specific API
*/
#ifndef TVM_RUNTIME_DEVICE_API_H_
...
...
@@ -12,6 +12,21 @@
namespace
tvm
{
namespace
runtime
{
/*!
* \brief Initialize the device.
* \param option_keys Additional option keys to pass.
* \param option_vals Additional option values to pass
* \param num_options Number of options to be passed into it.
* \return 0 if success, 1: if already initialized
* \tparam xpu The device mask.
*/
template
<
TVMDeviceMask
xpu
>
inline
bool
DeviceInit
(
const
char
**
option_keys
,
const
char
**
option_vals
,
int
num_options
)
{
return
true
;
}
/*!
* \brief Whether ctx is enabled.
* \param ctx The device context to perform operation.
* \tparam xpu The device mask.
...
...
@@ -93,7 +108,8 @@ inline void StreamSync(TVMContext ctx, TVMStreamHandle stream);
}
// namespace runtime
}
// namespace tvm
#include "./device_api_gpu.h"
#include "./device_api_cpu.h"
#include "./device_api_gpu.h"
#include "./device_api_opencl.h"
#endif // TVM_RUNTIME_DEVICE_API_H_
src/runtime/device_api_gpu.h
View file @
9e1a5ec4
/*!
* Copyright (c) 2016 by Contributors
* \file
ctx
ice_api_gpu.h
* \file
dev
ice_api_gpu.h
* \brief GPU specific API
*/
#ifndef TVM_RUNTIME_DEVICE_API_GPU_H_
...
...
@@ -14,15 +14,6 @@
namespace
tvm
{
namespace
runtime
{
/*!
* \brief Check CUDA error.
* \param msg Message to print if an error occured.
*/
#define CHECK_CUDA_ERROR(msg) \
{ \
cudaError_t e = cudaGetLastError(); \
CHECK_EQ(e, cudaSuccess) << (msg) << " CUDA: " << cudaGetErrorString(e); \
}
/*!
* \brief Protected CUDA call.
...
...
src/runtime/device_api_opencl.h
0 → 100644
View file @
9e1a5ec4
/*!
* Copyright (c) 2016 by Contributors
* \file device_api_opencl.h
* \brief OpenCL specific API
*/
#ifndef TVM_RUNTIME_DEVICE_API_OPENCL_H_
#define TVM_RUNTIME_DEVICE_API_OPENCL_H_
#if TVM_OPENCL_RUNTIME
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/opencl.h>
#endif
#include <mutex>
#include <string>
#include <vector>
namespace
tvm
{
namespace
runtime
{
namespace
cl
{
static_assert
(
sizeof
(
cl_mem
)
==
sizeof
(
void
*
),
"Required to store cl_mem inside void*"
);
inline
const
char
*
CLGetErrorString
(
cl_int
error
)
{
switch
(
error
)
{
case
CL_SUCCESS
:
return
"CL_SUCCESS"
;
case
CL_DEVICE_NOT_FOUND
:
return
"CL_DEVICE_NOT_FOUND"
;
case
CL_DEVICE_NOT_AVAILABLE
:
return
"CL_DEVICE_NOT_AVAILABLE"
;
case
CL_COMPILER_NOT_AVAILABLE
:
return
"CL_COMPILER_NOT_AVAILABLE"
;
case
CL_MEM_OBJECT_ALLOCATION_FAILURE
:
return
"CL_MEM_OBJECT_ALLOCATION_FAILURE"
;
case
CL_OUT_OF_RESOURCES
:
return
"CL_OUT_OF_RESOURCES"
;
case
CL_OUT_OF_HOST_MEMORY
:
return
"CL_OUT_OF_HOST_MEMORY"
;
case
CL_PROFILING_INFO_NOT_AVAILABLE
:
return
"CL_PROFILING_INFO_NOT_AVAILABLE"
;
case
CL_MEM_COPY_OVERLAP
:
return
"CL_MEM_COPY_OVERLAP"
;
case
CL_IMAGE_FORMAT_MISMATCH
:
return
"CL_IMAGE_FORMAT_MISMATCH"
;
case
CL_IMAGE_FORMAT_NOT_SUPPORTED
:
return
"CL_IMAGE_FORMAT_NOT_SUPPORTED"
;
case
CL_BUILD_PROGRAM_FAILURE
:
return
"CL_BUILD_PROGRAM_FAILURE"
;
case
CL_MAP_FAILURE
:
return
"CL_MAP_FAILURE"
;
case
CL_INVALID_VALUE
:
return
"CL_INVALID_VALUE"
;
case
CL_INVALID_DEVICE_TYPE
:
return
"CL_INVALID_DEVICE_TYPE"
;
case
CL_INVALID_PLATFORM
:
return
"CL_INVALID_PLATFORM"
;
case
CL_INVALID_DEVICE
:
return
"CL_INVALID_DEVICE"
;
case
CL_INVALID_CONTEXT
:
return
"CL_INVALID_CONTEXT"
;
case
CL_INVALID_QUEUE_PROPERTIES
:
return
"CL_INVALID_QUEUE_PROPERTIES"
;
case
CL_INVALID_COMMAND_QUEUE
:
return
"CL_INVALID_COMMAND_QUEUE"
;
case
CL_INVALID_HOST_PTR
:
return
"CL_INVALID_HOST_PTR"
;
case
CL_INVALID_MEM_OBJECT
:
return
"CL_INVALID_MEM_OBJECT"
;
case
CL_INVALID_IMAGE_FORMAT_DESCRIPTOR
:
return
"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"
;
case
CL_INVALID_IMAGE_SIZE
:
return
"CL_INVALID_IMAGE_SIZE"
;
case
CL_INVALID_SAMPLER
:
return
"CL_INVALID_SAMPLER"
;
case
CL_INVALID_BINARY
:
return
"CL_INVALID_BINARY"
;
case
CL_INVALID_BUILD_OPTIONS
:
return
"CL_INVALID_BUILD_OPTIONS"
;
case
CL_INVALID_PROGRAM
:
return
"CL_INVALID_PROGRAM"
;
case
CL_INVALID_PROGRAM_EXECUTABLE
:
return
"CL_INVALID_PROGRAM_EXECUTABLE"
;
case
CL_INVALID_KERNEL_NAME
:
return
"CL_INVALID_KERNEL_NAME"
;
case
CL_INVALID_KERNEL_DEFINITION
:
return
"CL_INVALID_KERNEL_DEFINITION"
;
case
CL_INVALID_KERNEL
:
return
"CL_INVALID_KERNEL"
;
case
CL_INVALID_ARG_INDEX
:
return
"CL_INVALID_ARG_INDEX"
;
case
CL_INVALID_ARG_VALUE
:
return
"CL_INVALID_ARG_VALUE"
;
case
CL_INVALID_ARG_SIZE
:
return
"CL_INVALID_ARG_SIZE"
;
case
CL_INVALID_KERNEL_ARGS
:
return
"CL_INVALID_KERNEL_ARGS"
;
case
CL_INVALID_WORK_DIMENSION
:
return
"CL_INVALID_WORK_DIMENSION"
;
case
CL_INVALID_WORK_GROUP_SIZE
:
return
"CL_INVALID_WORK_GROUP_SIZE"
;
case
CL_INVALID_WORK_ITEM_SIZE
:
return
"CL_INVALID_WORK_ITEM_SIZE"
;
case
CL_INVALID_GLOBAL_OFFSET
:
return
"CL_INVALID_GLOBAL_OFFSET"
;
case
CL_INVALID_EVENT_WAIT_LIST
:
return
"CL_INVALID_EVENT_WAIT_LIST"
;
case
CL_INVALID_EVENT
:
return
"CL_INVALID_EVENT"
;
case
CL_INVALID_OPERATION
:
return
"CL_INVALID_OPERATION"
;
case
CL_INVALID_GL_OBJECT
:
return
"CL_INVALID_GL_OBJECT"
;
case
CL_INVALID_BUFFER_SIZE
:
return
"CL_INVALID_BUFFER_SIZE"
;
case
CL_INVALID_MIP_LEVEL
:
return
"CL_INVALID_MIP_LEVEL"
;
default:
return
"Unknown OpenCL error code"
;
}
}
/*!
* \brief Protected OpenCL call
* \param func Expression to call.
*/
#define OPENCL_CHECK_ERROR(e) \
{ \
CHECK(e == CL_SUCCESS) \
<< "OpenCL Error, code=" << e << ": " << cl::CLGetErrorString(e); \
}
#define OPENCL_CALL(func) \
{ \
cl_int e = (func); \
OPENCL_CHECK_ERROR(e); \
}
// Process local opencl workspace
class
OpenCLWorkspace
{
public
:
// global platform id
cl_platform_id
platform_id
;
// global context of this process
cl_context
context
{
nullptr
};
// the devices
std
::
vector
<
cl_device_id
>
devices
;
// the queues
std
::
vector
<
cl_command_queue
>
queues
;
// the mutex for initialization
std
::
mutex
mu
;
// destructor
~
OpenCLWorkspace
()
{
if
(
context
!=
nullptr
)
{
OPENCL_CALL
(
clReleaseContext
(
context
));
}
}
// whether the workspace is initialized.
inline
bool
initialized
()
const
{
return
context
!=
nullptr
;
}
// get the queue of the context
cl_command_queue
GetQueue
(
TVMContext
ctx
)
const
{
CHECK_EQ
(
ctx
.
dev_mask
,
kOpenCL
);
CHECK
(
initialized
())
<<
"The OpenCL is not initialized"
;
CHECK
(
ctx
.
dev_id
>=
0
&&
static_cast
<
size_t
>
(
ctx
.
dev_id
)
<
queues
.
size
())
<<
"Invalid OpenCL dev_id="
<<
ctx
.
dev_id
;
return
queues
[
ctx
.
dev_id
];
}
// get the global workspace
static
OpenCLWorkspace
*
Global
()
{
static
OpenCLWorkspace
inst
;
return
&
inst
;
}
};
inline
std
::
string
GetPlatformInfo
(
cl_platform_id
pid
,
cl_platform_info
param_name
)
{
size_t
ret_size
;
OPENCL_CALL
(
clGetPlatformInfo
(
pid
,
param_name
,
0
,
nullptr
,
&
ret_size
));
std
::
string
ret
;
ret
.
resize
(
ret_size
);
OPENCL_CALL
(
clGetPlatformInfo
(
pid
,
param_name
,
ret_size
,
&
ret
[
0
],
nullptr
));
return
ret
;
}
inline
std
::
string
GetDeviceInfo
(
cl_device_id
pid
,
cl_device_info
param_name
)
{
size_t
ret_size
;
OPENCL_CALL
(
clGetDeviceInfo
(
pid
,
param_name
,
0
,
nullptr
,
&
ret_size
));
std
::
string
ret
;
ret
.
resize
(
ret_size
);
OPENCL_CALL
(
clGetDeviceInfo
(
pid
,
param_name
,
ret_size
,
&
ret
[
0
],
nullptr
));
return
ret
;
}
inline
std
::
vector
<
cl_platform_id
>
GetPlatformIDs
()
{
cl_uint
ret_size
;
OPENCL_CALL
(
clGetPlatformIDs
(
0
,
nullptr
,
&
ret_size
));
std
::
vector
<
cl_platform_id
>
ret
;
ret
.
resize
(
ret_size
);
OPENCL_CALL
(
clGetPlatformIDs
(
ret_size
,
&
ret
[
0
],
nullptr
));
return
ret
;
}
inline
std
::
vector
<
cl_device_id
>
GetDeviceIDs
(
cl_platform_id
pid
,
std
::
string
device_type
)
{
cl_device_type
dtype
=
CL_DEVICE_TYPE_ALL
;
if
(
device_type
==
"cpu"
)
dtype
=
CL_DEVICE_TYPE_CPU
;
if
(
device_type
==
"gpu"
)
dtype
=
CL_DEVICE_TYPE_CPU
;
if
(
device_type
==
"accelerator"
)
dtype
=
CL_DEVICE_TYPE_ACCELERATOR
;
cl_uint
ret_size
;
OPENCL_CALL
(
clGetDeviceIDs
(
pid
,
dtype
,
0
,
nullptr
,
&
ret_size
));
std
::
vector
<
cl_device_id
>
ret
;
ret
.
resize
(
ret_size
);
OPENCL_CALL
(
clGetDeviceIDs
(
pid
,
dtype
,
ret_size
,
&
ret
[
0
],
nullptr
));
return
ret
;
}
inline
bool
MatchPlatformInfo
(
cl_platform_id
pid
,
cl_platform_info
param_name
,
std
::
string
value
)
{
if
(
value
.
length
()
==
0
)
return
true
;
std
::
string
param_value
=
GetPlatformInfo
(
pid
,
param_name
);
return
param_value
.
find
(
value
)
!=
std
::
string
::
npos
;
}
}
// namespace cl
template
<>
inline
bool
DeviceInit
<
kOpenCL
>
(
const
char
**
option_keys
,
const
char
**
option_vals
,
int
num_options
)
{
cl
::
OpenCLWorkspace
*
w
=
cl
::
OpenCLWorkspace
::
Global
();
std
::
lock_guard
<
std
::
mutex
>
(
w
->
mu
);
if
(
w
->
initialized
())
return
false
;
// matching conditions
std
::
string
platform_name
,
device_type
;
for
(
int
i
=
0
;
i
<
num_options
;
++
i
)
{
std
::
string
key
=
option_keys
[
i
];
std
::
string
val
=
option_vals
[
i
];
if
(
key
==
"platform_name"
)
{
platform_name
=
val
;
}
else
if
(
key
==
"device_type"
)
{
device_type
=
val
;
}
else
{
LOG
(
FATAL
)
<<
"unknown DeviceInit option "
<<
key
;
}
}
// matched platforms
std
::
vector
<
cl_platform_id
>
platform_matched
;
for
(
cl_platform_id
pid
:
cl
::
GetPlatformIDs
())
{
bool
matched
=
true
;
if
(
!
cl
::
MatchPlatformInfo
(
pid
,
CL_PLATFORM_NAME
,
platform_name
))
matched
=
false
;
if
(
matched
)
platform_matched
.
push_back
(
pid
);
}
if
(
platform_matched
.
size
()
==
0
)
{
LOG
(
FATAL
)
<<
"No OpenCL platform matched given existing options ..."
;
}
if
(
platform_matched
.
size
()
>
1
)
{
LOG
(
WARNING
)
<<
"Multiple OpenCL platforms matched, use the first one ... "
;
}
w
->
platform_id
=
platform_matched
[
0
];
LOG
(
INFO
)
<<
"Initialize OpenCL platform
\'
"
<<
cl
::
GetPlatformInfo
(
w
->
platform_id
,
CL_PLATFORM_NAME
)
<<
'\''
;
std
::
vector
<
cl_device_id
>
devices_matched
=
cl
::
GetDeviceIDs
(
w
->
platform_id
,
device_type
);
CHECK_GT
(
devices_matched
.
size
(),
0U
)
<<
"No OpenCL device any device matched given the options"
;
w
->
devices
=
devices_matched
;
cl_int
err_code
;
w
->
context
=
clCreateContext
(
nullptr
,
w
->
devices
.
size
(),
&
(
w
->
devices
[
0
]),
nullptr
,
nullptr
,
&
err_code
);
OPENCL_CHECK_ERROR
(
err_code
);
CHECK_EQ
(
w
->
queues
.
size
(),
0U
);
for
(
size_t
i
=
0
;
i
<
w
->
devices
.
size
();
++
i
)
{
cl_device_id
did
=
w
->
devices
[
i
];
w
->
queues
.
push_back
(
clCreateCommandQueue
(
w
->
context
,
did
,
0
,
&
err_code
));
OPENCL_CHECK_ERROR
(
err_code
);
LOG
(
INFO
)
<<
"opencl("
<<
i
<<
")=
\'
"
<<
cl
::
GetDeviceInfo
(
did
,
CL_DEVICE_NAME
)
<<
"
\'
cl_device_id="
<<
did
;
}
return
true
;
}
template
<>
inline
void
*
AllocDataSpace
<
kOpenCL
>
(
TVMContext
ctx
,
size_t
size
,
size_t
alignment
)
{
cl
::
OpenCLWorkspace
*
w
=
cl
::
OpenCLWorkspace
::
Global
();
cl_int
err_code
;
cl_mem
mptr
=
clCreateBuffer
(
w
->
context
,
CL_MEM_READ_WRITE
,
size
,
nullptr
,
&
err_code
);
OPENCL_CHECK_ERROR
(
err_code
);
return
mptr
;
}
template
<>
inline
void
FreeDataSpace
<
kOpenCL
>
(
TVMContext
ctx
,
void
*
ptr
)
{
cl_mem
mptr
=
static_cast
<
cl_mem
>
(
ptr
);
OPENCL_CALL
(
clReleaseMemObject
(
mptr
));
}
template
<>
inline
void
CopyDataFromTo
<
kOpenCL
>
(
const
void
*
from
,
void
*
to
,
size_t
size
,
TVMContext
ctx_from
,
TVMContext
ctx_to
,
TVMStreamHandle
stream
)
{
CHECK
(
stream
==
nullptr
);
cl
::
OpenCLWorkspace
*
w
=
cl
::
OpenCLWorkspace
::
Global
();
if
(
ctx_from
.
dev_mask
==
kOpenCL
&&
ctx_to
.
dev_mask
==
kOpenCL
)
{
OPENCL_CALL
(
clEnqueueCopyBuffer
(
w
->
GetQueue
(
ctx_to
),
static_cast
<
cl_mem
>
((
void
*
)
from
),
// NOLINT(*)
static_cast
<
cl_mem
>
(
to
),
0
,
0
,
size
,
0
,
nullptr
,
nullptr
));
}
else
if
(
ctx_from
.
dev_mask
==
kOpenCL
&&
ctx_to
.
dev_mask
==
kCPU
)
{
OPENCL_CALL
(
clEnqueueReadBuffer
(
w
->
GetQueue
(
ctx_from
),
static_cast
<
cl_mem
>
((
void
*
)
from
),
// NOLINT(*)
CL_FALSE
,
0
,
size
,
to
,
0
,
nullptr
,
nullptr
));
OPENCL_CALL
(
clFinish
(
w
->
GetQueue
(
ctx_from
)));
}
else
if
(
ctx_from
.
dev_mask
==
kCPU
&&
ctx_to
.
dev_mask
==
kOpenCL
)
{
OPENCL_CALL
(
clEnqueueWriteBuffer
(
w
->
GetQueue
(
ctx_to
),
static_cast
<
cl_mem
>
(
to
),
CL_FALSE
,
0
,
size
,
from
,
0
,
nullptr
,
nullptr
));
OPENCL_CALL
(
clFinish
(
w
->
GetQueue
(
ctx_to
)));
}
else
{
LOG
(
FATAL
)
<<
"Expect copy from/to GPU or between GPU"
;
}
}
template
<>
inline
void
StreamSync
<
kOpenCL
>
(
TVMContext
ctx
,
TVMStreamHandle
stream
)
{
CHECK
(
stream
==
nullptr
);
cl
::
OpenCLWorkspace
*
w
=
cl
::
OpenCLWorkspace
::
Global
();
OPENCL_CALL
(
clFinish
(
w
->
GetQueue
(
ctx
)));
}
}
// namespace runtime
}
// namespace tvm
#endif // TVM_OPENCL_RUNTIME
#endif // TVM_RUNTIME_DEVICE_API_OPENCL_H_
tests/python/test_runtime_ndarray.py
View file @
9e1a5ec4
...
...
@@ -2,6 +2,7 @@ import tvm
import
numpy
as
np
def
enabled_ctx_list
():
tvm
.
init_opencl
()
ctx_list
=
[
tvm
.
cpu
(
0
),
tvm
.
gpu
(
0
),
tvm
.
opencl
(
0
)]
ctx_list
=
[
ctx
for
ctx
in
ctx_list
if
ctx
.
enabled
]
return
ctx_list
...
...
tests/travis/run_test.sh
View file @
9e1a5ec4
...
...
@@ -16,13 +16,15 @@ fi
cp make/config.mk config.mk
echo
"USE_CUDA=0"
>>
config.mk
echo
"USE_OPENCL=0"
>>
config.mk
if
[
!
${
TRAVIS_OS_NAME
}
==
"osx"
]
;
then
if
[
${
TRAVIS_OS_NAME
}
==
"osx"
]
;
then
echo
"USE_OPENCL=1"
>>
config.mk
else
# use g++-4.8 for linux
if
[
${
CXX
}
==
"g++"
]
;
then
export
CXX
=
g++-4.8
fi
echo
"USE_OPENCL=0"
>>
config.mk
fi
if
[
${
TASK
}
==
"cpp_test"
]
||
[
${
TASK
}
==
"all_test"
]
;
then
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment