Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
fab4f9cc
Commit
fab4f9cc
authored
Jul 04, 2018
by
masahi
Committed by
Tianqi Chen
Jul 03, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[NVPTX] libdevice support, enable NVPTX backend in topi tests (#1365)
parent
211ab978
Show whitespace changes
Inline
Side-by-side
Showing
22 changed files
with
121 additions
and
32 deletions
+121
-32
apps/benchmark/gpu_imagenet_bench.py
+1
-1
src/codegen/llvm/codegen_nvptx.cc
+15
-2
src/codegen/llvm/intrin_rule_nvptx.cc
+64
-0
src/codegen/llvm/intrin_rule_rocm.cc
+1
-1
topi/python/topi/cuda/depthwise_conv2d.py
+1
-1
topi/tests/python/test_topi_broadcast.py
+2
-0
topi/tests/python/test_topi_conv2d_hwcn.py
+1
-1
topi/tests/python/test_topi_conv2d_nchw.py
+3
-2
topi/tests/python/test_topi_conv2d_transpose_nchw.py
+1
-1
topi/tests/python/test_topi_dense.py
+1
-1
topi/tests/python/test_topi_depthwise_conv2d.py
+2
-0
topi/tests/python/test_topi_depthwise_conv2d_back_input.py
+1
-0
topi/tests/python/test_topi_depthwise_conv2d_back_weight.py
+1
-0
topi/tests/python/test_topi_l2norm.py
+1
-1
topi/tests/python/test_topi_lrn.py
+1
-1
topi/tests/python/test_topi_math.py
+1
-1
topi/tests/python/test_topi_pooling.py
+2
-2
topi/tests/python/test_topi_reduce.py
+11
-7
topi/tests/python/test_topi_relu.py
+1
-1
topi/tests/python/test_topi_resize.py
+1
-1
topi/tests/python/test_topi_softmax.py
+8
-7
topi/tests/python/test_topi_upsampling.py
+1
-1
No files found.
apps/benchmark/gpu_imagenet_bench.py
View file @
fab4f9cc
...
...
@@ -25,7 +25,7 @@ def main():
choices
=
[
'resnet'
,
'mobilenet'
],
help
=
"The model type."
)
parser
.
add_argument
(
'--target'
,
type
=
str
,
required
=
True
,
choices
=
[
'cuda'
,
'rocm'
,
'opencl'
,
'metal'
],
choices
=
[
'cuda'
,
'rocm'
,
'opencl'
,
'metal'
,
'nvptx'
],
help
=
"Compilation target."
)
parser
.
add_argument
(
'--opt-level'
,
type
=
int
,
default
=
1
,
help
=
"Level of optimization."
)
parser
.
add_argument
(
'--num-iter'
,
type
=
int
,
default
=
1000
,
help
=
"Number of iteration during benchmark."
)
...
...
src/codegen/llvm/codegen_nvptx.cc
View file @
fab4f9cc
...
...
@@ -121,6 +121,20 @@ class CodeGenNVPTX : public CodeGenLLVM {
// Additional optimization hook to tweak the builder.
}
void
Optimize
()
final
{
for
(
auto
&
f
:
*
module_
)
{
auto
fname
=
static_cast
<
std
::
string
>
(
f
.
getName
());
if
(
fname
.
substr
(
0
,
4
)
!=
"__nv"
)
continue
;
// This is to strip off unused __nv_* functions from the final module
// The one that is actually used will be inlined at call site
// Adapted from Halide's runtime linker
if
(
!
f
.
isDeclaration
()
&&
!
f
.
hasFnAttribute
(
llvm
::
Attribute
::
NoInline
))
{
f
.
setLinkage
(
llvm
::
GlobalValue
::
AvailableExternallyLinkage
);
}
}
CodeGenLLVM
::
Optimize
();
}
protected
:
void
InitTarget
(
llvm
::
TargetMachine
*
tm
)
final
{
// Maximum vector lane = float4
...
...
@@ -179,8 +193,7 @@ runtime::Module BuildNVPTX(Array<LoweredFunc> funcs, std::string target) {
}
mlib
->
setTargetTriple
(
tm
->
getTargetTriple
().
str
());
mlib
->
setDataLayout
(
tm
->
createDataLayout
());
// TODO(tqchen) libdevice linking not yet working.
// cg->AddLinkModule(std::move(mlib));
cg
->
AddLinkModule
(
std
::
move
(
mlib
));
}
}
std
::
unique_ptr
<
llvm
::
Module
>
module
=
cg
->
Finish
();
...
...
src/codegen/llvm/intrin_rule_nvptx.cc
0 → 100644
View file @
fab4f9cc
/*!
* Copyright (c) 2017 by Contributors
* \file intrin_rule_nvptx.cc
*/
#ifdef TVM_LLVM_VERSION
#include <tvm/ir.h>
#include <tvm/expr.h>
#include <tvm/api_registry.h>
#include <sstream>
namespace
tvm
{
namespace
codegen
{
inline
void
DispatchExternLibDevice
(
const
TVMArgs
&
args
,
TVMRetValue
*
rv
)
{
Expr
e
=
args
[
0
];
using
namespace
ir
;
const
Call
*
call
=
e
.
as
<
Call
>
();
CHECK
(
call
!=
nullptr
);
CHECK
(
call
->
type
.
bits
()
==
32
||
call
->
type
.
bits
()
==
64
)
<<
"Only support float32 or float64."
;
std
::
ostringstream
intrinsic_name
;
intrinsic_name
<<
"__nv_"
<<
call
->
name
;
if
(
call
->
type
.
bits
()
==
32
)
intrinsic_name
<<
"f"
;
*
rv
=
Call
::
make
(
call
->
type
,
intrinsic_name
.
str
(),
call
->
args
,
Call
::
PureExtern
);
}
namespace
llvm
{
TVM_REGISTER_GLOBAL
(
"tvm.intrin.rule.nvptx.floor"
)
.
set_body
(
DispatchExternLibDevice
);
TVM_REGISTER_GLOBAL
(
"tvm.intrin.rule.nvptx.ceil"
)
.
set_body
(
DispatchExternLibDevice
);
TVM_REGISTER_GLOBAL
(
"tvm.intrin.rule.nvptx.round"
)
.
set_body
(
DispatchExternLibDevice
);
TVM_REGISTER_GLOBAL
(
"tvm.intrin.rule.nvptx.trunc"
)
.
set_body
(
DispatchExternLibDevice
);
TVM_REGISTER_GLOBAL
(
"tvm.intrin.rule.nvptx.exp"
)
.
set_body
(
DispatchExternLibDevice
);
TVM_REGISTER_GLOBAL
(
"tvm.intrin.rule.nvptx.fma"
)
.
set_body
(
DispatchExternLibDevice
);
TVM_REGISTER_GLOBAL
(
"tvm.intrin.rule.nvptx.log"
)
.
set_body
(
DispatchExternLibDevice
);
TVM_REGISTER_GLOBAL
(
"tvm.intrin.rule.nvptx.sqrt"
)
.
set_body
(
DispatchExternLibDevice
);
TVM_REGISTER_GLOBAL
(
"tvm.intrin.rule.nvptx.pow"
)
.
set_body
(
DispatchExternLibDevice
);
TVM_REGISTER_GLOBAL
(
"tvm.intrin.rule.nvptx.tanh"
)
.
set_body
(
DispatchExternLibDevice
);
}
// namespace llvm
}
// namespace codegen
}
// namespace tvm
#endif // LLVM_VERSION
src/codegen/llvm/intrin_rule_rocm.cc
View file @
fab4f9cc
/*!
* Copyright (c) 2017 by Contributors
* \file intrin_rule_
llv
m.cc
* \file intrin_rule_
roc
m.cc
*/
#ifdef TVM_LLVM_VERSION
...
...
topi/python/topi/cuda/depthwise_conv2d.py
View file @
fab4f9cc
...
...
@@ -158,7 +158,7 @@ def schedule_depthwise_conv2d_nhwc(outs):
# num_thread here could be 728, it is larger than cuda.max_num_threads
num_thread
=
tvm
.
ir_pass
.
Simplify
(
temp
.
shape
[
3
])
.
value
target
=
tvm
.
target
.
current_target
()
if
target
and
target
.
target_name
!=
"cuda"
:
if
target
and
(
target
.
target_name
not
in
[
"cuda"
,
"nvptx"
])
:
num_thread
=
target
.
max_num_threads
xoc
,
xic
=
s
[
Output
]
.
split
(
c
,
factor
=
num_thread
)
s
[
Output
]
.
reorder
(
xoc
,
b
,
h
,
w
,
xic
)
...
...
topi/tests/python/test_topi_broadcast.py
View file @
fab4f9cc
...
...
@@ -30,6 +30,7 @@ def verify_broadcast_to_ele(in_shape, out_shape, fbcast):
check_device
(
"cuda"
)
check_device
(
"metal"
)
check_device
(
"rocm"
)
check_device
(
"nvptx"
)
def
verify_broadcast_binary_ele
(
lhs_shape
,
rhs_shape
,
...
...
@@ -85,6 +86,7 @@ def verify_broadcast_binary_ele(lhs_shape, rhs_shape,
check_device
(
"cuda"
)
check_device
(
"metal"
)
check_device
(
"rocm"
)
check_device
(
"nvptx"
)
def
test_broadcast_to
():
verify_broadcast_to_ele
((
1
,),
(
10
,),
topi
.
broadcast_to
)
...
...
topi/tests/python/test_topi_conv2d_hwcn.py
View file @
fab4f9cc
...
...
@@ -52,7 +52,7 @@ def verify_conv2d_hwcn(batch, in_channel, in_size, num_filter, kernel, stride, p
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-5
)
np
.
testing
.
assert_allclose
(
c
.
asnumpy
(),
c_np
,
rtol
=
1e-5
)
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
]:
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
,
'nvptx'
]:
check_device
(
device
)
...
...
topi/tests/python/test_topi_conv2d_nchw.py
View file @
fab4f9cc
...
...
@@ -44,8 +44,9 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p
w
=
tvm
.
nd
.
array
(
w_np
,
ctx
)
b
=
tvm
.
nd
.
array
(
np
.
zeros
(
get_const_tuple
(
B
.
shape
),
dtype
=
B
.
dtype
),
ctx
)
c
=
tvm
.
nd
.
array
(
np
.
zeros
(
get_const_tuple
(
C
.
shape
),
dtype
=
C
.
dtype
),
ctx
)
no_unroll_explicit
=
device
in
[
"cuda"
,
"nvptx"
,
"rocm"
]
with
tvm
.
build_config
(
auto_unroll_max_step
=
1400
,
unroll_explicit
=
(
device
!=
"cuda"
)
):
unroll_explicit
=
not
no_unroll_explicit
):
func1
=
tvm
.
build
(
s1
,
[
A
,
W
,
B
],
device
,
name
=
"conv2d_
%
d_
%
d_
%
d_
%
d_
%
d_
%
d_
%
d_
%
d"
%
(
batch
,
in_channel
,
in_size
,
num_filter
,
kernel
,
stride
,
padding
,
dilation
))
func2
=
tvm
.
build
(
s2
,
[
A
,
W
,
C
],
device
,
name
=
"relu_
%
d_
%
d_
%
d_
%
d_
%
d_
%
d_
%
d_
%
d"
%
(
batch
,
in_channel
,
in_size
,
num_filter
,
kernel
,
stride
,
padding
,
dilation
))
func1
(
a
,
w
,
b
)
...
...
@@ -53,7 +54,7 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-5
)
np
.
testing
.
assert_allclose
(
c
.
asnumpy
(),
c_np
,
rtol
=
1e-5
)
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
]:
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
,
'nvptx'
]:
check_device
(
device
)
...
...
topi/tests/python/test_topi_conv2d_transpose_nchw.py
View file @
fab4f9cc
...
...
@@ -51,7 +51,7 @@ def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel,
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-5
)
np
.
testing
.
assert_allclose
(
c
.
asnumpy
(),
c_np
,
rtol
=
1e-5
)
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
]:
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
,
'nvptx'
]:
check_device
(
device
)
...
...
topi/tests/python/test_topi_dense.py
View file @
fab4f9cc
...
...
@@ -45,7 +45,7 @@ def verify_dense(batch, in_dim, out_dim, use_bias=True):
f
(
a
,
b
,
c
,
d
)
np
.
testing
.
assert_allclose
(
d
.
asnumpy
(),
d_np
,
rtol
=
1e-5
)
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
]:
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
,
'nvptx'
]:
check_device
(
device
)
def
test_dense
():
...
...
topi/tests/python/test_topi_depthwise_conv2d.py
View file @
fab4f9cc
...
...
@@ -93,6 +93,7 @@ def depthwise_conv2d_with_workload_nchw(batch, in_channel, in_height, channel_mu
check_device
(
"metal"
)
check_device
(
"rocm"
)
check_device
(
"vulkan"
)
check_device
(
"nvptx"
)
def
depthwise_conv2d_with_workload_nhwc
(
batch
,
in_channel
,
in_height
,
channel_multiplier
,
filter_height
,
stride_h
,
padding
,
dilation
=
1
):
...
...
@@ -184,6 +185,7 @@ def depthwise_conv2d_with_workload_nhwc(batch, in_channel, in_height, channel_mu
check_device
(
"metal"
)
check_device
(
"rocm"
)
check_device
(
"vulkan"
)
check_device
(
"nvptx"
)
def
test_depthwise_conv2d
():
print
(
"testing nchw"
)
...
...
topi/tests/python/test_topi_depthwise_conv2d_back_input.py
View file @
fab4f9cc
...
...
@@ -87,6 +87,7 @@ def verify_depthwise_conv2d_back_input(batch, in_channel, in_h, channel_multipli
check_device
(
"metal"
)
check_device
(
"rocm"
)
check_device
(
"vulkan"
)
check_device
(
"nvptx"
)
def
test_topi_depthwise_conv2d_backward_input_nhwc
():
verify_depthwise_conv2d_back_input
(
16
,
256
,
56
,
1
,
3
,
1
,
1
)
...
...
topi/tests/python/test_topi_depthwise_conv2d_back_weight.py
View file @
fab4f9cc
...
...
@@ -80,6 +80,7 @@ def verify_depthwise_conv2d_back_weight(batch, in_channel, in_h, channel_multipl
check_device
(
"metal"
)
check_device
(
"rocm"
)
check_device
(
"vulkan"
)
check_device
(
"nvptx"
)
def
test_topi_depthwise_conv2d_backward_weight_nhwc
():
verify_depthwise_conv2d_back_weight
(
16
,
256
,
56
,
1
,
3
,
1
,
1
)
...
...
topi/tests/python/test_topi_l2norm.py
View file @
fab4f9cc
...
...
@@ -31,7 +31,7 @@ def verify_l2_normalize(ishape, eps, axis=None):
f
(
a
,
b
)
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-5
)
for
device
in
[
'llvm'
,
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
]:
for
device
in
[
'llvm'
,
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
,
'nvptx'
]:
check_device
(
device
)
def
test_l2_normalize
():
...
...
topi/tests/python/test_topi_lrn.py
View file @
fab4f9cc
...
...
@@ -30,7 +30,7 @@ def verify_lrn(shape, size, axis, bias, alpha, beta):
f
(
a
,
b
)
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-5
)
for
device
in
[
'llvm'
,
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
]:
for
device
in
[
'llvm'
,
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
,
'nvptx'
]:
check_device
(
device
)
def
test_lrn
():
...
...
topi/tests/python/test_topi_math.py
View file @
fab4f9cc
...
...
@@ -39,7 +39,7 @@ def test_ewise():
foo
(
a
,
b
)
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-5
,
atol
=
1e-5
)
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
,
'llvm'
]:
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
,
'llvm'
,
'nvptx'
]:
check_device
(
device
)
...
...
topi/tests/python/test_topi_pooling.py
View file @
fab4f9cc
...
...
@@ -63,7 +63,7 @@ def verify_pool(n, ic, ih, kh, sh, padding, pool_type, ceil_mode, count_include_
f
(
a
,
b
)
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-5
)
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
]:
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
,
'nvptx'
]:
check_device
(
device
)
def
test_pool
():
...
...
@@ -104,7 +104,7 @@ def verify_global_pool(n, c, h, w, pool_type):
f
(
a
,
b
)
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-5
)
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
]:
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
,
'nvptx'
]:
check_device
(
device
)
def
test_global_pool
():
...
...
topi/tests/python/test_topi_reduce.py
View file @
fab4f9cc
...
...
@@ -25,12 +25,11 @@ def _my_npy_argmin(arr, axis, keepdims):
return
arr
.
argmin
(
axis
=
axis
)
.
reshape
(
out_shape
)
def
verify_reduce_map_ele
(
in_shape
,
axis
,
keepdims
,
type
=
"sum"
):
def
verify_reduce_map_ele
(
in_shape
,
axis
,
keepdims
,
type
=
"sum"
,
dtype
=
"float32"
):
# Build the logic and compile the function
dat_dtype
=
"float32"
A
=
tvm
.
placeholder
(
shape
=
in_shape
,
name
=
"A"
,
dtype
=
dat_dtype
)
A
=
tvm
.
placeholder
(
shape
=
in_shape
,
name
=
"A"
,
dtype
=
dtype
)
A1
=
topi
.
sqrt
(
topi
.
exp
(
A
))
out_dtype
=
"float32"
out_dtype
=
dtype
if
type
==
"sum"
:
B
=
topi
.
sum
(
A1
,
axis
=
axis
,
keepdims
=
keepdims
)
elif
type
==
"max"
:
...
...
@@ -57,8 +56,8 @@ def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum"):
foo
=
tvm
.
build
(
s
,
[
A
,
B
],
device
,
name
=
type
)
# Test
in_npy
=
np
.
random
.
uniform
(
size
=
in_shape
)
.
astype
(
np
.
float32
)
in_npy_map
=
np
.
sqrt
(
np
.
exp
(
in_npy
))
.
astype
(
np
.
float32
)
in_npy
=
np
.
random
.
uniform
(
size
=
in_shape
)
.
astype
(
dtype
)
in_npy_map
=
np
.
sqrt
(
np
.
exp
(
in_npy
))
.
astype
(
dtype
)
if
type
==
"sum"
:
out_npy
=
in_npy_map
.
sum
(
axis
=
axis
,
keepdims
=
keepdims
)
elif
type
==
"max"
:
...
...
@@ -91,7 +90,7 @@ def verify_reduce_map_ele(in_shape, axis, keepdims, type="sum"):
np
.
testing
.
assert_allclose
(
out_tvm_val
,
in_npy_map
.
min
(
axis
=
axis
),
1E-3
,
1E-3
)
else
:
np
.
testing
.
assert_allclose
(
out_tvm
.
asnumpy
(),
out_npy
,
1E-3
,
1E-3
)
for
device
in
[
"cuda"
,
"opencl"
,
"metal"
,
"llvm"
,
"rocm"
,
"vulkan"
]:
for
device
in
[
"cuda"
,
"opencl"
,
"metal"
,
"llvm"
,
"rocm"
,
"vulkan"
,
"nvptx"
]:
check_device
(
device
)
...
...
@@ -128,6 +127,11 @@ def test_reduce_map():
axis
=
None
,
keepdims
=
False
,
type
=
"sum"
)
verify_reduce_map_ele
(
in_shape
=
(
128
,
24
,
128
,
24
),
axis
=
(
1
,
2
,
3
),
keepdims
=
True
,
type
=
"sum"
,
dtype
=
"float64"
)
if
__name__
==
"__main__"
:
test_reduce_map
()
topi/tests/python/test_topi_relu.py
View file @
fab4f9cc
...
...
@@ -27,7 +27,7 @@ def verify_relu(m, n):
foo
(
a
,
b
)
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-5
)
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
]:
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
,
'nvptx'
]:
check_device
(
device
)
...
...
topi/tests/python/test_topi_resize.py
View file @
fab4f9cc
...
...
@@ -40,7 +40,7 @@ def verify_bilinear_scale(batch, in_channel, in_height, in_width, out_height, ou
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-3
,
atol
=
1e-3
)
for
device
in
[
'llvm'
,
'cuda'
,
'vulkan'
]:
for
device
in
[
'llvm'
,
'cuda'
,
'vulkan'
,
'nvptx'
]:
check_device
(
device
)
def
test_resize
():
...
...
topi/tests/python/test_topi_softmax.py
View file @
fab4f9cc
...
...
@@ -7,8 +7,8 @@ import topi.testing
import
logging
from
topi.util
import
get_const_tuple
def
verify_softmax
(
m
,
n
):
A
=
tvm
.
placeholder
((
m
,
n
),
name
=
'A'
)
def
verify_softmax
(
m
,
n
,
dtype
=
"float32"
):
A
=
tvm
.
placeholder
((
m
,
n
),
dtype
=
dtype
,
name
=
'A'
)
B
=
topi
.
nn
.
softmax
(
A
)
# confirm lower works
s
=
tvm
.
create_schedule
([
B
.
op
])
...
...
@@ -32,16 +32,16 @@ def verify_softmax(m, n):
foo
(
a
,
b
)
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-5
)
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
]:
for
device
in
[
'cuda'
,
'opencl'
,
'metal'
,
'rocm'
,
'vulkan'
,
'nvptx'
]:
check_device
(
device
)
def
test_softmax
():
verify_softmax
(
32
,
10
)
verify_softmax
(
3
,
4
)
verify_softmax
(
32
,
10
,
"float64"
)
def
verify_log_softmax
(
m
,
n
):
A
=
tvm
.
placeholder
((
m
,
n
),
name
=
'A'
)
def
verify_log_softmax
(
m
,
n
,
dtype
=
"float32"
):
A
=
tvm
.
placeholder
((
m
,
n
),
dtype
=
dtype
,
name
=
'A'
)
B
=
topi
.
nn
.
log_softmax
(
A
)
# confirm lower works
s
=
tvm
.
create_schedule
([
B
.
op
])
...
...
@@ -63,13 +63,14 @@ def verify_log_softmax(m, n):
foo
(
a
,
b
)
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-5
)
for
device
in
[
"cuda"
,
"opencl"
,
"metal"
,
"rocm"
,
"vulkan"
]:
for
device
in
[
"cuda"
,
"opencl"
,
"metal"
,
"rocm"
,
"vulkan"
,
"nvptx"
]:
check_device
(
device
)
def
test_log_softmax
():
verify_log_softmax
(
32
,
10
)
verify_log_softmax
(
3
,
4
)
verify_log_softmax
(
32
,
10
,
"float64"
)
if
__name__
==
"__main__"
:
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
...
...
topi/tests/python/test_topi_upsampling.py
View file @
fab4f9cc
...
...
@@ -41,7 +41,7 @@ def verify_upsampling(batch, in_channel, in_height, in_width, scale, layout='NCH
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-5
)
for
device
in
[
'llvm'
,
'cuda'
,
'vulkan'
]:
for
device
in
[
'llvm'
,
'cuda'
,
'vulkan'
,
'nvptx'
]:
check_device
(
device
)
def
test_upsampling
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment