Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
c468558e
Commit
c468558e
authored
Sep 25, 2017
by
Tianqi Chen
Committed by
GitHub
Sep 25, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[CUDA] auto detect compatibility when arch is not passed (#490)
parent
c6a20452
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
61 additions
and
31 deletions
+61
-31
include/tvm/runtime/device_api.h
+2
-1
python/tvm/_ffi/runtime_ctypes.py
+14
-0
python/tvm/contrib/cc.py
+1
-4
python/tvm/contrib/nvcc.py
+22
-18
src/runtime/cuda/cuda_device_api.cc
+11
-0
src/runtime/metal/metal_device_api.mm
+1
-0
src/runtime/opencl/opencl_device_api.cc
+1
-0
src/runtime/rocm/rocm_device_api.cc
+1
-0
topi/python/topi/transform.py
+1
-1
topi/recipe/broadcast/test_broadcast_map.py
+1
-1
topi/recipe/conv/depthwise_conv2d_test.py
+1
-1
topi/recipe/conv/test_conv2d_hwcn_map.py
+1
-1
topi/recipe/gemm/cuda_gemm_square.py
+1
-1
topi/recipe/reduce/test_reduce_map.py
+1
-1
topi/recipe/rnn/lstm.py
+1
-1
topi/recipe/rnn/matexp.py
+1
-1
No files found.
include/tvm/runtime/device_api.h
View file @
c468558e
...
...
@@ -18,7 +18,8 @@ namespace runtime {
enum
DeviceAttrKind
:
int
{
kExist
=
0
,
kMaxThreadsPerBlock
=
1
,
kWarpSize
=
2
kWarpSize
=
2
,
kComputeVersion
=
3
};
/*! \brief Number of bytes each allocation must align to */
...
...
python/tvm/_ffi/runtime_ctypes.py
View file @
c468558e
...
...
@@ -131,6 +131,20 @@ class TVMContext(ctypes.Structure):
return
_api_internal
.
_GetDeviceAttr
(
self
.
device_type
,
self
.
device_id
,
2
)
@property
def
compute_version
(
self
):
"""Get compute verison number in string.
Currently used to get compute capability of CUDA device.
Returns
-------
version : str
The version string in `major.minor` format.
"""
return
_api_internal
.
_GetDeviceAttr
(
self
.
device_type
,
self
.
device_id
,
3
)
def
sync
(
self
):
"""Synchronize until jobs finished at the context."""
check_call
(
_LIB
.
TVMSynchronize
(
self
.
device_type
,
self
.
device_id
,
None
))
...
...
python/tvm/contrib/cc.py
View file @
c468558e
...
...
@@ -39,11 +39,8 @@ def create_shared(output,
if
options
:
cmd
+=
options
args
=
' '
.
join
(
cmd
)
proc
=
subprocess
.
Popen
(
args
,
shell
=
True
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
STDOUT
)
cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
STDOUT
)
(
out
,
_
)
=
proc
.
communicate
()
if
proc
.
returncode
!=
0
:
...
...
python/tvm/contrib/nvcc.py
View file @
c468558e
# pylint: disable=invalid-name
"""Utility to invoke nvcc compiler in the system"""
from
__future__
import
absolute_import
as
_abs
import
sys
import
subprocess
from
.
import
util
from
..
import
ndarray
as
nd
def
compile_cuda
(
code
,
target
=
"ptx"
,
arch
=
None
,
options
=
None
,
path_target
=
None
):
def
compile_cuda
(
code
,
target
=
"ptx"
,
arch
=
None
,
options
=
None
,
path_target
=
None
):
"""Compile cuda code with NVCC from env.
Parameters
...
...
@@ -39,32 +43,32 @@ def compile_cuda(code, target="ptx", arch=None,
with
open
(
temp_code
,
"w"
)
as
out_file
:
out_file
.
write
(
code
)
if
target
==
"cubin"
and
arch
is
None
:
raise
ValueError
(
"arch(sm_xy) must be passed for generating cubin"
)
if
arch
is
None
:
if
nd
.
gpu
(
0
)
.
exist
:
# auto detect the compute arch argument
arch
=
"sm_"
+
""
.
join
(
nd
.
gpu
(
0
)
.
compute_version
.
split
(
'.'
))
else
:
raise
ValueError
(
"arch(sm_xy) is not passed, and we cannot detect it from env"
)
file_target
=
path_target
if
path_target
else
temp_target
cmd
=
[
"nvcc"
]
cmd
+=
[
"--
%
s"
%
target
,
"-O3"
]
if
arch
:
cmd
+=
[
"-arch"
,
arch
]
cmd
+=
[
"-arch"
,
arch
]
cmd
+=
[
"-o"
,
file_target
]
if
options
:
cmd
+=
options
cmd
+=
[
temp_code
]
args
=
' '
.
join
(
cmd
)
proc
=
subprocess
.
Popen
(
args
,
shell
=
True
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
STDOUT
)
cmd
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
STDOUT
)
(
out
,
_
)
=
proc
.
communicate
()
if
proc
.
returncode
!=
0
:
sys
.
stderr
.
write
(
"Compilation error:
\n
"
)
sys
.
stderr
.
write
(
str
(
out
))
sys
.
stderr
.
flush
()
cubin
=
None
else
:
cubin
=
bytearray
(
open
(
file_target
,
"rb"
)
.
read
())
return
cubin
msg
=
"Compilation error:
\n
"
msg
+=
out
raise
RuntimeError
(
msg
)
return
bytearray
(
open
(
file_target
,
"rb"
)
.
read
())
src/runtime/cuda/cuda_device_api.cc
View file @
c468558e
...
...
@@ -40,6 +40,17 @@ class CUDADeviceAPI final : public DeviceAPI {
&
value
,
cudaDevAttrWarpSize
,
ctx
.
device_id
));
break
;
}
case
kComputeVersion
:
{
std
::
ostringstream
os
;
CUDA_CALL
(
cudaDeviceGetAttribute
(
&
value
,
cudaDevAttrComputeCapabilityMajor
,
ctx
.
device_id
));
os
<<
value
<<
"."
;
CUDA_CALL
(
cudaDeviceGetAttribute
(
&
value
,
cudaDevAttrComputeCapabilityMinor
,
ctx
.
device_id
));
os
<<
value
;
*
rv
=
os
.
str
();
return
;
}
}
*
rv
=
value
;
}
...
...
src/runtime/metal/metal_device_api.mm
View file @
c468558e
...
...
@@ -39,6 +39,7 @@ void MetalWorkspace::GetAttr(
*rv = 1;
break;
}
case kComputeVersion: return;
case kExist: break;
}
}
...
...
src/runtime/opencl/opencl_device_api.cc
View file @
c468558e
...
...
@@ -45,6 +45,7 @@ void OpenCLWorkspace::GetAttr(
*
rv
=
1
;
break
;
}
case
kComputeVersion
:
return
;
case
kExist
:
break
;
}
}
...
...
src/runtime/rocm/rocm_device_api.cc
View file @
c468558e
...
...
@@ -44,6 +44,7 @@ class ROCMDeviceAPI final : public DeviceAPI {
value
=
64
;
break
;
}
case
kComputeVersion
:
return
;
}
*
rv
=
value
;
}
...
...
topi/python/topi/transform.py
View file @
c468558e
...
...
@@ -143,7 +143,7 @@ def split(ary, indices_or_sections, axis=0):
begin_ids
=
[
seg_size
*
i
for
i
in
range
(
indices_or_sections
)]
elif
isinstance
(
indices_or_sections
,
(
tuple
,
list
)):
assert
tuple
(
indices_or_sections
)
==
tuple
(
sorted
(
indices_or_sections
)),
\
"Should be sorted, recieved
%
s"
%
str
(
indices_or_sections
)
"Should be sorted, recieved
%
s"
%
str
(
indices_or_sections
)
begin_ids
=
[
0
]
+
list
(
indices_or_sections
)
else
:
raise
NotImplementedError
...
...
topi/recipe/broadcast/test_broadcast_map.py
View file @
c468558e
...
...
@@ -12,7 +12,7 @@ USE_MANUAL_CODE = False
@tvm.register_func
def
tvm_callback_cuda_compile
(
code
):
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
,
options
=
[
"-arch=sm_52"
]
)
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
)
return
ptx
...
...
topi/recipe/conv/depthwise_conv2d_test.py
View file @
c468558e
...
...
@@ -13,7 +13,7 @@ USE_MANUAL_CODE = False
@tvm.register_func
def
tvm_callback_cuda_compile
(
code
):
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
,
options
=
[
"-arch=sm_37"
])
# 37 for k80(ec2 instance
)
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
)
return
ptx
def
write_code
(
code
,
fname
):
...
...
topi/recipe/conv/test_conv2d_hwcn_map.py
View file @
c468558e
...
...
@@ -12,7 +12,7 @@ USE_MANUAL_CODE = False
@tvm.register_func
def
tvm_callback_cuda_compile
(
code
):
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
,
options
=
[
"-arch=sm_37"
]
)
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
)
return
ptx
def
write_code
(
code
,
fname
):
...
...
topi/recipe/gemm/cuda_gemm_square.py
View file @
c468558e
...
...
@@ -9,7 +9,7 @@ USE_MANUAL_CODE = False
@tvm.register_func
def
tvm_callback_cuda_compile
(
code
):
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
,
options
=
[
"-arch=sm_52"
]
)
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
)
return
ptx
def
write_code
(
code
,
fname
):
...
...
topi/recipe/reduce/test_reduce_map.py
View file @
c468558e
...
...
@@ -12,7 +12,7 @@ USE_MANUAL_CODE = False
@tvm.register_func
def
tvm_callback_cuda_compile
(
code
):
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
,
options
=
[
"-arch=sm_52"
]
)
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
)
return
ptx
...
...
topi/recipe/rnn/lstm.py
View file @
c468558e
...
...
@@ -17,7 +17,7 @@ UNROLL_WLOAD = True
@tvm.register_func
def
tvm_callback_cuda_compile
(
code
):
"""Use nvcc compiler for better perf."""
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
,
options
=
[
"-arch=sm_52"
]
)
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
)
return
ptx
def
write_code
(
code
,
fname
):
...
...
topi/recipe/rnn/matexp.py
View file @
c468558e
...
...
@@ -24,7 +24,7 @@ SKIP_CHECK = False
@tvm.register_func
def
tvm_callback_cuda_compile
(
code
):
"""Use nvcc compiler for better perf."""
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
,
options
=
[
"-arch=sm_52"
]
)
ptx
=
nvcc
.
compile_cuda
(
code
,
target
=
"ptx"
)
return
ptx
def
write_code
(
code
,
fname
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment