Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
9c12ec81
Unverified
Commit
9c12ec81
authored
Apr 23, 2020
by
Wei Pan
Committed by
GitHub
Apr 23, 2020
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[cuDNN] Add cuDNN grouped convolutions support (#5319)
Signed-off-by: Wei Pan <weip@nvidia.com>
parent
a3b13973
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
170 additions
and
85 deletions
+170
-85
python/tvm/contrib/cudnn.py
+25
-11
python/tvm/relay/op/strategy/cuda.py
+18
-2
src/runtime/contrib/cudnn/conv_forward.cc
+25
-12
src/runtime/contrib/cudnn/cudnn_utils.h
+0
-1
tests/python/contrib/test_cudnn.py
+59
-53
topi/python/topi/cuda/conv2d.py
+5
-4
topi/python/topi/testing/conv2d_nhwc_python.py
+36
-1
topi/python/topi/testing/conv3d_ncdhw_python.py
+1
-0
topi/tests/python/test_topi_conv2d_nchw.py
+1
-1
No files found.
python/tvm/contrib/cudnn.py
View file @
9c12ec81
...
...
@@ -182,7 +182,8 @@ def conv_output_shape(tensor_format,
x_shape
,
w_shape
,
data_dtype
,
conv_dtype
):
conv_dtype
,
groups
=
1
):
"""Get output shape of 2D or 3D convolution
Paramters
...
...
@@ -205,6 +206,8 @@ def conv_output_shape(tensor_format,
data type
conv_dtype: str
convolution type
groups: int
number of groups
Returns
-------
...
...
@@ -228,7 +231,8 @@ def conv_output_shape(tensor_format,
_get_np_int32_array_handle
(
wshape
),
_get_np_int32_array_handle
(
oshape
),
data_dtype
,
conv_dtype
)
conv_dtype
,
groups
)
return
list
(
oshape
)
...
...
@@ -240,7 +244,8 @@ def conv_find_algo(tensor_format,
w_shape
,
y_shape
,
data_dtype
,
conv_dtype
):
conv_dtype
,
groups
=
1
):
"""Choose the best algo for the given input.
Paramters
...
...
@@ -265,6 +270,8 @@ def conv_find_algo(tensor_format,
data type
conv_dtype: str
convolution type
groups: int
number of groups
Returns
-------
...
...
@@ -287,7 +294,8 @@ def conv_find_algo(tensor_format,
_get_np_int32_array_handle
(
wshape
),
_get_np_int32_array_handle
(
yshape
),
data_dtype
,
conv_dtype
)
conv_dtype
,
groups
)
def
conv_forward
(
x
,
...
...
@@ -298,7 +306,8 @@ def conv_forward(x,
conv_mode
,
tensor_format
,
algo
,
conv_dtype
):
conv_dtype
,
groups
=
1
):
"""Create an extern op that compute 2D or 3D convolution with CuDNN
Parameters
...
...
@@ -325,6 +334,8 @@ def conv_forward(x,
if algo == -1, the best algo will be chosen by CUDNN
conv_dtype: str
convolution type
groups: int
the number of groups
Returns
-------
...
...
@@ -335,8 +346,7 @@ def conv_forward(x,
assert
dims
in
(
4
,
5
)
conv_dtype
=
x
.
dtype
if
conv_dtype
is
None
else
conv_dtype
pad
,
stride
,
dilation
,
_
,
_
=
\
_prepare_global_func_params
(
dims
-
2
,
pad
,
stride
,
dilation
)
pad
,
stride
,
dilation
,
_
,
_
=
_prepare_global_func_params
(
dims
-
2
,
pad
,
stride
,
dilation
)
oshape
=
conv_output_shape
(
tensor_format
,
pad
,
...
...
@@ -345,7 +355,8 @@ def conv_forward(x,
list
(
x
.
shape
),
list
(
w
.
shape
),
x
.
dtype
,
conv_dtype
)
conv_dtype
,
groups
)
if
algo
==
-
1
:
# For now if we try to call `cudnnFindConvolutionForwardAlgorithm` when
# using INT8 data type, CuDNN will crash down.
...
...
@@ -361,7 +372,8 @@ def conv_forward(x,
list
(
w
.
shape
),
oshape
,
x
.
dtype
,
conv_dtype
)
conv_dtype
,
groups
)
if
dims
==
4
:
return
te
.
extern
(
...
...
@@ -380,7 +392,8 @@ def conv_forward(x,
ins
[
0
],
ins
[
1
],
outs
[
0
],
conv_dtype
),
name
=
"y"
)
conv_dtype
,
groups
),
name
=
"y"
)
return
te
.
extern
(
oshape
,
[
x
,
w
],
...
...
@@ -401,7 +414,8 @@ def conv_forward(x,
ins
[
0
],
ins
[
1
],
outs
[
0
],
conv_dtype
),
name
=
"y"
)
conv_dtype
,
groups
),
name
=
"y"
)
def
softmax
(
x
,
axis
=-
1
):
"""Compute softmax using CuDNN
...
...
python/tvm/relay/op/strategy/cuda.py
View file @
9c12ec81
...
...
@@ -161,7 +161,9 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
if
layout
in
[
"NCHW"
,
"NHWC"
]
and
padding
[
0
]
==
padding
[
2
]
and
\
padding
[
1
]
==
padding
[
3
]:
strategy
.
add_implementation
(
wrap_compute_conv2d
(
topi
.
cuda
.
conv2d_cudnn
,
True
),
wrap_compute_conv2d
(
topi
.
cuda
.
conv2d_cudnn
,
need_data_layout
=
True
,
has_groups
=
True
),
wrap_topi_schedule
(
topi
.
cuda
.
schedule_conv2d_cudnn
),
name
=
"conv2d_cudnn.cuda"
,
plevel
=
15
)
...
...
@@ -181,6 +183,20 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
else
:
raise
RuntimeError
(
"Unsupported depthwise_conv2d layout {}"
.
format
(
layout
))
else
:
# group_conv2d
# add cudnn implementation, if any
cudnn_impl
=
False
if
target
.
target_name
==
"cuda"
and
"cudnn"
in
target
.
libs
:
if
layout
in
[
"NCHW"
,
"NHWC"
]
and
padding
[
0
]
==
padding
[
2
]
and
\
padding
[
1
]
==
padding
[
3
]:
strategy
.
add_implementation
(
wrap_compute_conv2d
(
topi
.
cuda
.
conv2d_cudnn
,
need_data_layout
=
True
,
has_groups
=
True
),
wrap_topi_schedule
(
topi
.
cuda
.
schedule_conv2d_cudnn
),
name
=
"conv2d_cudnn.cuda"
,
plevel
=
15
)
cudnn_impl
=
True
if
layout
==
'NCHW'
:
# TODO(@vinx13, @icemelon9): Use group_conv2d_NCHWc_int8 when dtype is int8/uint8.
assert
kernel_layout
==
"OIHW"
...
...
@@ -194,7 +210,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
wrap_compute_conv2d
(
topi
.
cuda
.
group_conv2d_NCHWc_int8
,
True
),
wrap_topi_schedule
(
topi
.
cuda
.
schedule_group_conv2d_NCHWc_int8
),
name
=
"group_conv2d_NCHWc_int8.cuda"
)
el
se
:
el
if
not
cudnn_impl
:
raise
RuntimeError
(
"Unsupported group_conv2d layout {}"
.
format
(
layout
))
return
strategy
...
...
src/runtime/contrib/cudnn/conv_forward.cc
View file @
9c12ec81
...
...
@@ -35,6 +35,7 @@ void ConvolutionForward(
int
format
,
int
algo
,
int
dims
,
int
groups
,
const
int
pad
[],
const
int
stride
[],
const
int
dilation
[],
...
...
@@ -62,8 +63,10 @@ void ConvolutionForward(
// Note: For 2D tenor, using ND setters causes CUDNN_STATUS_NOT_SUPPORTED error
// in following cudnnGetConvolutionForwardWorkspaceSize() when data type is fp16, int
CUDNN_CALL
(
cudnnSetConvolutionGroupCount
(
entry_ptr
->
conv_entry
.
conv_desc
,
groups
));
if
(
dims
==
2
)
{
// Set Desc
// Set Desc
CUDNN_CALL
(
cudnnSetConvolution2dDescriptor
(
entry_ptr
->
conv_entry
.
conv_desc
,
pad
[
0
],
pad
[
1
],
...
...
@@ -183,6 +186,7 @@ void ConvolutionForward(
void
OutputShape
(
int
format
,
int
dims
,
int
groups
,
const
int
pad
[],
const
int
stride
[],
const
int
dilation
[],
...
...
@@ -202,6 +206,7 @@ void OutputShape(
int
full_dims
=
dims
+
2
;
// conv desc
CUDNN_CALL
(
cudnnSetConvolutionGroupCount
(
entry_ptr
->
conv_entry
.
conv_desc
,
groups
));
CUDNN_CALL
(
cudnnSetConvolutionNdDescriptor
(
entry_ptr
->
conv_entry
.
conv_desc
,
dims
,
pad
,
...
...
@@ -240,6 +245,7 @@ void OutputShape(
// Set Input
std
::
vector
<
int
>
tensor_stride
(
full_dims
);
GetCudnnStride
(
full_dims
,
x_dim
,
tensor_stride
.
data
());
CUDNN_CALL
(
cudnnSetTensorNdDescriptor
(
entry_ptr
->
conv_entry
.
input_desc
,
data_type
,
full_dims
,
...
...
@@ -264,6 +270,7 @@ void OutputShape(
void
FindAlgo
(
int
format
,
int
dims
,
int
groups
,
const
int
pad
[],
const
int
stride
[],
const
int
dilation
[],
...
...
@@ -284,6 +291,7 @@ void FindAlgo(
int
full_dims
=
dims
+
2
;
// conv desc
CUDNN_CALL
(
cudnnSetConvolutionGroupCount
(
entry_ptr
->
conv_entry
.
conv_desc
,
groups
));
CUDNN_CALL
(
cudnnSetConvolutionNdDescriptor
(
entry_ptr
->
conv_entry
.
conv_desc
,
dims
,
pad
,
...
...
@@ -360,16 +368,18 @@ TVM_REGISTER_GLOBAL("tvm.contrib.cudnn.conv2d.forward")
int
algo
=
args
[
2
];
int
pad_v
[
2
],
stride_v
[
2
],
dilation_v
[
2
];
for
(
int
i
=
0
;
i
<
2
;
i
++
)
{
pad_v
[
i
]
=
args
[
3
+
i
];
stride_v
[
i
]
=
args
[
5
+
i
];
dilation_v
[
i
]
=
args
[
7
+
i
];
pad_v
[
i
]
=
args
[
3
+
i
];
stride_v
[
i
]
=
args
[
5
+
i
];
dilation_v
[
i
]
=
args
[
7
+
i
];
}
DLTensor
*
x
=
args
[
9
];
DLTensor
*
w
=
args
[
10
];
DLTensor
*
y
=
args
[
11
];
std
::
string
conv_dtype
=
args
[
12
];
int
groups
=
args
[
13
];
ConvolutionForward
(
mode
,
format
,
algo
,
2
,
pad_v
,
stride_v
,
dilation_v
,
x
,
w
,
y
,
conv_dtype
);
ConvolutionForward
(
mode
,
format
,
algo
,
2
,
groups
,
pad_v
,
stride_v
,
dilation_v
,
x
,
w
,
y
,
conv_dtype
);
});
...
...
@@ -380,17 +390,18 @@ TVM_REGISTER_GLOBAL("tvm.contrib.cudnn.conv3d.forward")
int
algo
=
args
[
2
];
int
pad_v
[
3
],
stride_v
[
3
],
dilation_v
[
3
];
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
pad_v
[
i
]
=
args
[
3
+
i
];
stride_v
[
i
]
=
args
[
6
+
i
];
dilation_v
[
i
]
=
args
[
9
+
i
];
pad_v
[
i
]
=
args
[
3
+
i
];
stride_v
[
i
]
=
args
[
6
+
i
];
dilation_v
[
i
]
=
args
[
9
+
i
];
}
DLTensor
*
x
=
args
[
12
];
DLTensor
*
w
=
args
[
13
];
DLTensor
*
y
=
args
[
14
];
std
::
string
conv_dtype
=
args
[
15
];
int
groups
=
args
[
16
];
ConvolutionForward
(
mode
,
format
,
algo
,
3
,
pad_v
,
stride_v
,
dilation_v
,
x
,
w
,
y
,
conv_dtype
);
ConvolutionForward
(
mode
,
format
,
algo
,
3
,
groups
,
pad_v
,
stride_v
,
dilation_v
,
x
,
w
,
y
,
conv_dtype
);
});
...
...
@@ -406,8 +417,9 @@ TVM_REGISTER_GLOBAL("tvm.contrib.cudnn.conv.output_shape")
void
*
out_shape
=
args
[
7
];
std
::
string
data_dtype
=
args
[
8
];
std
::
string
conv_dtype
=
args
[
9
];
int
groups
=
args
[
10
];
OutputShape
(
format
,
dims
,
pad
,
stride
,
dilation
,
x_dim
,
OutputShape
(
format
,
dims
,
groups
,
pad
,
stride
,
dilation
,
x_dim
,
w_dim
,
out_shape
,
data_dtype
,
conv_dtype
);
});
...
...
@@ -424,8 +436,9 @@ TVM_REGISTER_GLOBAL("tvm.contrib.cudnn.conv.find_algo")
int
*
y_dim
=
static_cast
<
int
*>
(
static_cast
<
void
*>
(
args
[
7
]));
std
::
string
data_dtype
=
args
[
8
];
std
::
string
conv_dtype
=
args
[
9
];
int
groups
=
args
[
10
];
FindAlgo
(
format
,
dims
,
pad
,
stride
,
dilation
,
x_dim
,
FindAlgo
(
format
,
dims
,
groups
,
pad
,
stride
,
dilation
,
x_dim
,
w_dim
,
y_dim
,
data_dtype
,
conv_dtype
,
ret
);
});
...
...
src/runtime/contrib/cudnn/cudnn_utils.h
View file @
9c12ec81
...
...
@@ -78,7 +78,6 @@ struct ConvEntry {
runtime
::
DeviceAPI
*
cuda_api
;
void
*
workspace
{
nullptr
};
size_t
workspace_size
{
0
};
int
group_count
{
0
};
ConvEntry
();
~
ConvEntry
();
void
UpdateWorkspace
(
const
size_t
wsize
);
...
...
tests/python/contrib/test_cudnn.py
View file @
9c12ec81
...
...
@@ -17,11 +17,11 @@
import
tvm
from
tvm
import
te
from
tvm.contrib
import
cudnn
from
tvm.contrib.nvcc
import
have_fp16
import
numpy
as
np
import
topi.testing
def
verify_conv2d
(
data_dtype
,
conv_dtype
,
tensor_format
=
0
):
def
verify_conv2d
(
data_dtype
,
conv_dtype
,
tensor_format
=
0
,
groups
=
1
):
in_channel
=
4
out_channel
=
16
filter_h
=
3
...
...
@@ -34,7 +34,7 @@ def verify_conv2d(data_dtype, conv_dtype, tensor_format=0):
dilation_w
=
1
batch
=
3
height
=
32
w
eight
=
32
w
idth
=
32
if
not
tvm
.
runtime
.
enabled
(
"cuda"
):
print
(
"skip because cuda is not enabled..."
)
...
...
@@ -42,12 +42,17 @@ def verify_conv2d(data_dtype, conv_dtype, tensor_format=0):
if
not
tvm
.
get_global_func
(
"tvm.contrib.cudnn.conv.output_shape"
,
True
):
print
(
"skip because cudnn is not enabled..."
)
return
if
data_dtype
==
"float16"
and
not
have_fp16
(
tvm
.
gpu
(
0
)
.
compute_version
):
print
(
"Skip because gpu does not have fp16 support"
)
return
# schedule
if
tensor_format
==
0
:
xshape
=
[
batch
,
in_channel
,
height
,
w
eight
]
wshape
=
[
out_channel
,
in_channel
,
filter_h
,
filter_w
]
xshape
=
[
batch
,
in_channel
,
height
,
w
idth
]
wshape
=
[
out_channel
,
in_channel
//
groups
,
filter_h
,
filter_w
]
else
:
xshape
=
[
batch
,
height
,
w
eight
,
in_channel
]
wshape
=
[
out_channel
,
filter_h
,
filter_w
,
in_channel
]
xshape
=
[
batch
,
height
,
w
idth
,
in_channel
]
wshape
=
[
out_channel
,
filter_h
,
filter_w
,
in_channel
//
groups
]
X
=
te
.
placeholder
(
xshape
,
name
=
'X'
,
dtype
=
data_dtype
)
W
=
te
.
placeholder
(
wshape
,
name
=
'W'
,
dtype
=
data_dtype
)
...
...
@@ -59,39 +64,41 @@ def verify_conv2d(data_dtype, conv_dtype, tensor_format=0):
conv_mode
=
1
,
tensor_format
=
tensor_format
,
conv_dtype
=
conv_dtype
,
algo
=-
1
)
algo
=-
1
,
groups
=
groups
)
yshape
=
[
x
.
value
for
x
in
Y
.
shape
]
s
=
te
.
create_schedule
(
Y
.
op
)
def
verify
():
ctx
=
tvm
.
gpu
(
0
)
f
=
tvm
.
build
(
s
,
[
X
,
W
,
Y
],
"cuda"
,
target_host
=
"llvm"
,
name
=
"conv2d"
)
x_np
=
np
.
random
.
uniform
(
-
1
,
1
,
xshape
)
.
astype
(
data_dtype
)
w_np
=
np
.
random
.
uniform
(
-
1
,
1
,
wshape
)
.
astype
(
data_dtype
)
y_np
=
np
.
zeros
(
yshape
)
.
astype
(
data_dtype
)
x
=
tvm
.
nd
.
array
(
x_np
,
ctx
)
w
=
tvm
.
nd
.
array
(
w_np
,
ctx
)
y
=
tvm
.
nd
.
array
(
y_np
,
ctx
)
if
tensor_format
==
0
:
c_np
=
topi
.
testing
.
conv2d_nchw_python
(
x_np
,
w_np
,
1
,
1
)
elif
tensor_format
==
1
:
wt
=
w_np
.
transpose
((
1
,
2
,
3
,
0
))
#OHWI => HWIO
c_np
=
topi
.
testing
.
conv2d_nhwc_python
(
x_np
,
wt
,
1
,
1
)
f
(
x
,
w
,
y
)
tvm
.
testing
.
assert_allclose
(
y
.
asnumpy
(),
c_np
,
atol
=
3e-5
,
rtol
=
1e-3
)
verify
()
# validation
ctx
=
tvm
.
gpu
(
0
)
f
=
tvm
.
build
(
s
,
[
X
,
W
,
Y
],
"cuda"
,
target_host
=
"llvm"
,
name
=
"conv2d"
)
x_np
=
np
.
random
.
uniform
(
-
1
,
1
,
xshape
)
.
astype
(
data_dtype
)
w_np
=
np
.
random
.
uniform
(
-
1
,
1
,
wshape
)
.
astype
(
data_dtype
)
y_np
=
np
.
zeros
(
yshape
)
.
astype
(
data_dtype
)
x
=
tvm
.
nd
.
array
(
x_np
,
ctx
)
w
=
tvm
.
nd
.
array
(
w_np
,
ctx
)
y
=
tvm
.
nd
.
array
(
y_np
,
ctx
)
if
tensor_format
==
0
:
c_np
=
topi
.
testing
.
conv2d_nchw_python
(
x_np
,
w_np
,
1
,
1
,
groups
=
groups
)
elif
tensor_format
==
1
:
wt
=
w_np
.
transpose
((
1
,
2
,
3
,
0
))
#OHWI => HWIO
c_np
=
topi
.
testing
.
conv2d_nhwc_python
(
x_np
,
wt
,
1
,
1
,
groups
=
groups
)
f
(
x
,
w
,
y
)
tvm
.
testing
.
assert_allclose
(
y
.
asnumpy
(),
c_np
,
atol
=
1e-2
,
rtol
=
1e-2
)
def
test_conv2d
():
verify_conv2d
(
"float32"
,
"float32"
,
tensor_format
=
0
)
verify_conv2d
(
"float16"
,
"float32"
,
tensor_format
=
1
)
#Not pass accuracy test, need check
#verify_conv2d("float16", "float16", tensor_format=0)
verify_conv2d
(
"float16"
,
"float16"
,
tensor_format
=
0
)
verify_conv2d
(
"int8"
,
"int32"
,
tensor_format
=
1
)
verify_conv2d
(
"float32"
,
"float32"
,
tensor_format
=
0
,
groups
=
2
)
verify_conv2d
(
"float16"
,
"float32"
,
tensor_format
=
1
,
groups
=
2
)
verify_conv2d
(
"float16"
,
"float16"
,
tensor_format
=
0
,
groups
=
2
)
verify_conv2d
(
"int8"
,
"int32"
,
tensor_format
=
1
,
groups
=
2
)
def
verify_conv3d
(
data_dtype
,
conv_dtype
,
tensor_format
=
0
):
def
verify_conv3d
(
data_dtype
,
conv_dtype
,
tensor_format
=
0
,
groups
=
1
):
in_channel
=
4
out_channel
=
16
filter_d
=
3
...
...
@@ -109,7 +116,7 @@ def verify_conv3d(data_dtype, conv_dtype, tensor_format=0):
batch
=
3
depth
=
32
height
=
32
w
eight
=
32
w
idth
=
32
if
not
tvm
.
runtime
.
enabled
(
"cuda"
):
print
(
"skip because cuda is not enabled..."
)
...
...
@@ -118,8 +125,9 @@ def verify_conv3d(data_dtype, conv_dtype, tensor_format=0):
print
(
"skip because cudnn is not enabled..."
)
return
xshape
=
[
batch
,
in_channel
,
depth
,
height
,
weight
]
wshape
=
[
out_channel
,
in_channel
,
filter_d
,
filter_h
,
filter_w
]
# schedule
xshape
=
[
batch
,
in_channel
,
depth
,
height
,
width
]
wshape
=
[
out_channel
,
in_channel
//
groups
,
filter_d
,
filter_h
,
filter_w
]
X
=
te
.
placeholder
(
xshape
,
name
=
'X'
,
dtype
=
data_dtype
)
W
=
te
.
placeholder
(
wshape
,
name
=
'W'
,
dtype
=
data_dtype
)
...
...
@@ -131,33 +139,31 @@ def verify_conv3d(data_dtype, conv_dtype, tensor_format=0):
conv_mode
=
1
,
tensor_format
=
tensor_format
,
algo
=-
1
,
conv_dtype
=
conv_dtype
)
conv_dtype
=
conv_dtype
,
groups
=
groups
)
yshape
=
[
x
.
value
for
x
in
Y
.
shape
]
s
=
te
.
create_schedule
(
Y
.
op
)
def
verify
():
ctx
=
tvm
.
gpu
(
0
)
f
=
tvm
.
build
(
s
,
[
X
,
W
,
Y
],
"cuda"
,
target_host
=
"llvm"
,
name
=
"conv3d"
)
x_np
=
np
.
random
.
uniform
(
-
1
,
1
,
xshape
)
.
astype
(
data_dtype
)
w_np
=
np
.
random
.
uniform
(
-
1
,
1
,
wshape
)
.
astype
(
data_dtype
)
y_np
=
np
.
zeros
(
yshape
)
.
astype
(
data_dtype
)
x
=
tvm
.
nd
.
array
(
x_np
,
ctx
)
w
=
tvm
.
nd
.
array
(
w_np
,
ctx
)
y
=
tvm
.
nd
.
array
(
y_np
,
ctx
)
if
tensor_format
==
0
:
c_np
=
topi
.
testing
.
conv3d_ncdhw_python
(
x_np
,
w_np
,
1
,
1
)
else
:
raise
AssertionError
(
"For now, conv3d tensor format only support: 0(NCHW)"
)
f
(
x
,
w
,
y
)
tvm
.
testing
.
assert_allclose
(
y
.
asnumpy
(),
c_np
,
atol
=
3e-5
,
rtol
=
1e-4
)
verify
()
# validation
ctx
=
tvm
.
gpu
(
0
)
f
=
tvm
.
build
(
s
,
[
X
,
W
,
Y
],
"cuda"
,
target_host
=
"llvm"
,
name
=
"conv3d"
)
x_np
=
np
.
random
.
uniform
(
-
1
,
1
,
xshape
)
.
astype
(
data_dtype
)
w_np
=
np
.
random
.
uniform
(
-
1
,
1
,
wshape
)
.
astype
(
data_dtype
)
y_np
=
np
.
zeros
(
yshape
)
.
astype
(
data_dtype
)
x
=
tvm
.
nd
.
array
(
x_np
,
ctx
)
w
=
tvm
.
nd
.
array
(
w_np
,
ctx
)
y
=
tvm
.
nd
.
array
(
y_np
,
ctx
)
if
tensor_format
==
0
:
c_np
=
topi
.
testing
.
conv3d_ncdhw_python
(
x_np
,
w_np
,
1
,
1
,
groups
)
else
:
raise
AssertionError
(
"For now, conv3d tensor format only support: 0(NCHW)"
)
f
(
x
,
w
,
y
)
tvm
.
testing
.
assert_allclose
(
y
.
asnumpy
(),
c_np
,
atol
=
3e-5
,
rtol
=
1e-4
)
def
test_conv3d
():
verify_conv3d
(
"float32"
,
"float32"
,
tensor_format
=
0
)
verify_conv3d
(
"float32"
,
"float32"
,
tensor_format
=
0
,
groups
=
2
)
def
verify_softmax
(
shape
,
axis
,
dtype
=
"float32"
):
A
=
te
.
placeholder
(
shape
,
dtype
=
dtype
,
name
=
'A'
)
...
...
topi/python/topi/cuda/conv2d.py
View file @
9c12ec81
...
...
@@ -66,8 +66,8 @@ def schedule_conv2d_nhwc(cfg, outs):
@autotvm.register_topi_compute
(
"conv2d_cudnn.cuda"
)
def
conv2d_cudnn
(
cfg
,
data
,
kernel
,
strides
,
padding
,
dilation
,
layout
=
'NCHW'
,
out_dtype
=
'float32'
):
def
conv2d_cudnn
(
cfg
,
data
,
kernel
,
strides
,
padding
,
dilation
,
groups
=
1
,
layout
=
'NCHW'
,
out_dtype
=
'float32'
):
"""Compute conv2d using CuDNN library"""
if
layout
==
'NCHW'
:
tensor_format
=
0
# CUDNN_TENSOR_NCHW
...
...
@@ -89,7 +89,7 @@ def conv2d_cudnn(cfg, data, kernel, strides, padding, dilation, layout='NCHW',
pt
,
pl
,
pb
,
pr
=
get_pad_tuple
(
padding
,
(
KH
,
KW
))
OH
=
(
H
+
pt
+
pb
-
KH
)
//
stride_h
+
1
OW
=
(
W
+
pl
+
pr
-
KW
)
//
stride_w
+
1
cfg
.
add_flop
(
2
*
N
*
OH
*
OW
*
CO
*
CI
*
((
KH
-
1
)
*
dilation_h
+
1
)
*
\
cfg
.
add_flop
(
groups
*
2
*
N
*
OH
*
OW
*
CO
*
CI
*
((
KH
-
1
)
*
dilation_h
+
1
)
*
\
((
KW
-
1
)
*
dilation_w
+
1
))
if
data
.
dtype
==
"int8"
or
kernel
.
dtype
==
"int8"
:
...
...
@@ -107,7 +107,8 @@ def conv2d_cudnn(cfg, data, kernel, strides, padding, dilation, layout='NCHW',
conv_mode
=
1
,
tensor_format
=
tensor_format
,
algo
=-
1
,
# let CUDNN choose the best algo
conv_dtype
=
dtype
)
conv_dtype
=
dtype
,
groups
=
groups
)
@autotvm.register_topi_schedule
(
"conv2d_cudnn.cuda"
)
...
...
topi/python/topi/testing/conv2d_nhwc_python.py
View file @
9c12ec81
...
...
@@ -21,7 +21,7 @@ import scipy.signal
from
topi.nn.util
import
get_pad_tuple
def
conv2d_nhwc_python
(
a_np
,
w_np
,
stride
,
padding
):
def
_
conv2d_nhwc_python
(
a_np
,
w_np
,
stride
,
padding
):
"""Convolution operator in NHWC layout.
Parameters
...
...
@@ -77,3 +77,38 @@ def conv2d_nhwc_python(a_np, w_np, stride, padding):
apad
,
np
.
rot90
(
np
.
rot90
(
wt
[
f
,
c
])),
mode
=
'valid'
)
bt
[
n
,
f
]
+=
out
[::
stride_h
,
::
stride_w
]
return
bt
.
transpose
((
0
,
2
,
3
,
1
))
def
conv2d_nhwc_python
(
a_np
,
w_np
,
stride
,
padding
,
groups
=
1
):
"""Convolution operator in NHWC layout.
Parameters
----------
a_np : numpy.ndarray
4-D with shape [batch, in_height, in_width, in_channel]
w_np : numpy.ndarray
4-D with shape [filter_height, filter_width, in_channel // groups, num_filter]
stride : int or a list/tuple of two ints
Stride size, or [stride_height, stride_width]
padding : int or str or a list/tuple of 2 or 4 ints
Padding size, or ['VALID', 'SAME'], or
[pad_height, pad_width] for 2 ints, or
[pad_top, pad_left, pad_bottom, pad_right] for 2 ints
groups : int
Number of groups
Returns
-------
b_np : np.ndarray
4-D with shape [batch, out_height, out_width, out_channel]
"""
a_slices
=
np
.
array_split
(
a_np
,
groups
,
axis
=
3
)
w_slices
=
np
.
array_split
(
w_np
,
groups
,
axis
=
3
)
b_slices
=
[
_conv2d_nhwc_python
(
a_slice
,
w_slice
,
stride
,
padding
)
for
a_slice
,
w_slice
in
zip
(
a_slices
,
w_slices
)]
b_np
=
np
.
concatenate
(
b_slices
,
axis
=
3
)
return
b_np
topi/python/topi/testing/conv3d_ncdhw_python.py
View file @
9c12ec81
...
...
@@ -73,6 +73,7 @@ def conv3d_ncdhw_python(a_np, w_np, stride, padding, groups=1):
padding : int or str or a list/tuple of three ints
Padding size, or ['VALID', 'SAME'], or [pad_depth, pad_height, pad_width]
groups : int
Number of groups
...
...
topi/tests/python/test_topi_conv2d_nchw.py
View file @
9c12ec81
...
...
@@ -75,7 +75,7 @@ def verify_conv2d_nchw(batch, in_channel, in_size, num_filter, kernel, stride, p
with
tvm
.
target
.
create
(
device
):
if
"cudnn"
in
device
:
C
=
fcompute
(
A
,
W
,
(
stride
,
stride
),
padding
,
(
dilation
,
dilation
),
"NCHW"
,
dtype
)
C
=
fcompute
(
A
,
W
,
(
stride
,
stride
),
padding
,
(
dilation
,
dilation
),
1
,
"NCHW"
,
dtype
)
else
:
C
=
fcompute
(
A
,
W
,
(
stride
,
stride
),
padding
,
(
dilation
,
dilation
),
dtype
)
if
add_bias
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment