Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
07e56b9a
Commit
07e56b9a
authored
Aug 14, 2017
by
Yuwei HU
Committed by
Tianqi Chen
Aug 14, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update depthwise_conv2d schedule and testing (#328)
parent
8edd047b
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
152 additions
and
141 deletions
+152
-141
topi/python/topi/cuda/__init__.py
+1
-1
topi/python/topi/cuda/depthwise_conv2d.py
+29
-30
topi/python/topi/nn/mapping.py
+1
-1
topi/python/topi/testing/__init__.py
+1
-0
topi/python/topi/testing/depthwise_conv2d_python.py
+62
-0
topi/recipe/conv/depthwise_conv2d_test.py
+34
-59
topi/tests/python/test_topi_depthwise_conv2d.py
+24
-50
No files found.
topi/python/topi/cuda/__init__.py
View file @
07e56b9a
...
...
@@ -4,6 +4,6 @@ from __future__ import absolute_import as _abs
from
.conv2d_nchw
import
schedule_conv2d_nchw
from
.conv2d_hwcn
import
schedule_conv2d_hwcn
from
.depthwise_conv2d
_map
import
schedule_depthwise_conv2d_map
from
.depthwise_conv2d
import
schedule_depthwise_conv2d
from
.reduction
import
schedule_reduce
from
.broadcast
import
schedule_broadcast_to
topi/python/topi/cuda/depthwise_conv2d
_map
.py
→
topi/python/topi/cuda/depthwise_conv2d.py
View file @
07e56b9a
...
...
@@ -3,25 +3,24 @@
import
tvm
from
..util
import
get_const_tuple
def
schedule_depthwise_conv2d_map
(
op
):
"""Schedule for depthwise_conv2d map ops.
This include scale-shift and relu.
def
schedule_depthwise_conv2d
(
outs
):
"""Schedule for depthwise_conv2d.
Parameters
----------
o
p: Operation
The
symbolic description of the operation, should be depthwise_conv2d or
depthwise_conv2d followed by a sequence of one-to-one-mapping operat
ors.
o
uts: Array of Tensor
The
computation graph description of depthwise_conv2d
in the format of an array of tens
ors.
Returns
-------
s: Schedule
The computation schedule for
the op
.
The computation schedule for
depthwise_conv2d
.
"""
s
=
tvm
.
create_schedule
(
op
)
def
schedule_depthwise_conv2d
(
PaddedInput
,
Filter
,
DepthwiseConv2d
):
"""Schedule for depthwise_conv2d declared in topi.nn.conv"""
outs
=
[
outs
]
if
isinstance
(
outs
,
tvm
.
tensor
.
Tensor
)
else
outs
s
=
tvm
.
create_schedule
([
x
.
op
for
x
in
outs
])
def
_schedule
(
PaddedInput
,
Filter
,
DepthwiseConv2d
):
out_shape
=
get_const_tuple
(
DepthwiseConv2d
.
shape
)
out_height
=
out_shape
[
2
]
out_width
=
out_shape
[
3
]
...
...
@@ -35,27 +34,27 @@ def schedule_depthwise_conv2d_map(op):
Output
=
DepthwiseConv2d
CL
=
s
.
cache_write
(
DepthwiseConv2d
,
"local"
)
else
:
Output
=
op
.
output
(
0
)
Output
=
o
uts
[
0
]
.
o
p
.
output
(
0
)
s
[
DepthwiseConv2d
]
.
set_scope
(
"local"
)
# schedule parameters
num_thread
=
8
num_thread_x
=
8
num_thread_y
=
8
num_vthread_x
=
1
num_vthread_y
=
1
blocking_h
=
out_height
blocking_w
=
out_width
if
out_height
%
48
==
0
:
blocking_h
=
48
elif
out_height
%
32
==
0
:
if
out_height
%
32
==
0
:
blocking_h
=
32
if
out_width
%
48
==
0
:
blocking_w
=
48
num_vthread_y
=
3
elif
out_width
%
32
==
0
:
num_thread_x
=
2
num_vthread_x
=
2
if
out_width
%
32
==
0
:
blocking_w
=
32
num_thread_y
=
16
num_vthread_y
=
2
block_x
=
tvm
.
thread_axis
(
"blockIdx.x"
)
block_y
=
tvm
.
thread_axis
(
"blockIdx.y"
)
thread_x
=
tvm
.
thread_axis
((
0
,
num_thread
),
"threadIdx.x"
)
thread_y
=
tvm
.
thread_axis
((
0
,
num_thread
),
"threadIdx.y"
)
thread_x
=
tvm
.
thread_axis
((
0
,
num_thread
_x
),
"threadIdx.x"
)
thread_y
=
tvm
.
thread_axis
((
0
,
num_thread
_y
),
"threadIdx.y"
)
thread_vx
=
tvm
.
thread_axis
((
0
,
num_vthread_x
),
"vthread"
,
name
=
"vx"
)
thread_vy
=
tvm
.
thread_axis
((
0
,
num_vthread_y
),
"vthread"
,
name
=
"vy"
)
# split and bind
...
...
@@ -65,10 +64,10 @@ def schedule_depthwise_conv2d_map(op):
s
[
Output
]
.
bind
(
bx
,
block_x
)
by1
,
y1i
=
s
[
Output
]
.
split
(
Output
.
op
.
axis
[
2
],
factor
=
blocking_h
)
tvx
,
vxi
=
s
[
Output
]
.
split
(
y1i
,
nparts
=
num_vthread_x
)
tx
,
xi
=
s
[
Output
]
.
split
(
vxi
,
nparts
=
num_thread
)
tx
,
xi
=
s
[
Output
]
.
split
(
vxi
,
nparts
=
num_thread
_x
)
by2
,
y2i
=
s
[
Output
]
.
split
(
Output
.
op
.
axis
[
3
],
factor
=
blocking_w
)
tvy
,
vyi
=
s
[
Output
]
.
split
(
y2i
,
nparts
=
num_vthread_y
)
ty
,
yi
=
s
[
Output
]
.
split
(
vyi
,
nparts
=
num_thread
)
ty
,
yi
=
s
[
Output
]
.
split
(
vyi
,
nparts
=
num_thread
_y
)
s
[
Output
]
.
reorder
(
by1
,
by2
,
tvx
,
tvy
,
tx
,
ty
,
xi
,
yi
)
by
=
s
[
Output
]
.
fuse
(
by1
,
by2
)
s
[
Output
]
.
bind
(
tvx
,
thread_vx
)
...
...
@@ -85,21 +84,21 @@ def schedule_depthwise_conv2d_map(op):
s
[
DepthwiseConv2d
]
.
compute_at
(
s
[
Output
],
ty
)
# input's shared memory load
s
[
IS
]
.
compute_at
(
s
[
Output
],
by
)
tx
,
xi
=
s
[
IS
]
.
split
(
IS
.
op
.
axis
[
2
],
nparts
=
num_thread
)
ty
,
yi
=
s
[
IS
]
.
split
(
IS
.
op
.
axis
[
3
],
nparts
=
num_thread
)
tx
,
xi
=
s
[
IS
]
.
split
(
IS
.
op
.
axis
[
2
],
nparts
=
num_thread
_x
)
ty
,
yi
=
s
[
IS
]
.
split
(
IS
.
op
.
axis
[
3
],
nparts
=
num_thread
_y
)
s
[
IS
]
.
bind
(
tx
,
thread_x
)
s
[
IS
]
.
bind
(
ty
,
thread_y
)
# filter's shared memory load
s
[
FS
]
.
compute_at
(
s
[
Output
],
by
)
s
[
FS
]
.
reorder
(
FS
.
op
.
axis
[
2
],
FS
.
op
.
axis
[
3
],
FS
.
op
.
axis
[
1
])
tx
,
xi
=
s
[
FS
]
.
split
(
FS
.
op
.
axis
[
2
],
nparts
=
num_thread
)
ty
,
yi
=
s
[
FS
]
.
split
(
FS
.
op
.
axis
[
3
],
nparts
=
num_thread
)
tx
,
xi
=
s
[
FS
]
.
split
(
FS
.
op
.
axis
[
2
],
nparts
=
num_thread
_x
)
ty
,
yi
=
s
[
FS
]
.
split
(
FS
.
op
.
axis
[
3
],
nparts
=
num_thread
_y
)
s
[
FS
]
.
bind
(
tx
,
thread_x
)
s
[
FS
]
.
bind
(
ty
,
thread_y
)
def
traverse
(
OP
):
# inline all one-to-one-mapping operators except the last stage (output)
if
OP
.
tag
==
'ewise'
or
OP
.
tag
==
'scale_shift'
:
if
'ewise'
in
OP
.
tag
or
'bcast'
in
OP
.
tag
:
if
OP
not
in
s
.
outputs
:
s
[
OP
]
.
compute_inline
()
for
tensor
in
OP
.
input_tensors
:
...
...
@@ -110,7 +109,7 @@ def schedule_depthwise_conv2d_map(op):
PaddedInput
=
OP
.
input_tensors
[
0
]
Filter
=
OP
.
input_tensors
[
1
]
DepthwiseConv2d
=
OP
.
output
(
0
)
schedule_depthwise_conv2d
(
PaddedInput
,
Filter
,
DepthwiseConv2d
)
_schedule
(
PaddedInput
,
Filter
,
DepthwiseConv2d
)
traverse
(
op
)
traverse
(
o
uts
[
0
]
.
o
p
)
return
s
topi/python/topi/nn/mapping.py
View file @
07e56b9a
...
...
@@ -3,7 +3,7 @@
from
__future__
import
absolute_import
as
_abs
import
tvm
@tvm.tag_scope
(
tag
=
"scale_shift"
)
@tvm.tag_scope
(
tag
=
"
bcast_
scale_shift"
)
def
scale_shift
(
Input
,
Scale
,
Shift
):
"""Batch normalization operator in inference.
...
...
topi/python/topi/testing/__init__.py
View file @
07e56b9a
...
...
@@ -6,4 +6,5 @@ from __future__ import absolute_import as _abs
from
.conv2d_hwcn_python
import
conv2d_hwcn_python
from
.conv2d_nchw_python
import
conv2d_nchw_python
from
.depthwise_conv2d_python
import
depthwise_conv2d_python
from
.dilate_python
import
dilate_python
topi/python/topi/testing/depthwise_conv2d_python.py
0 → 100644
View file @
07e56b9a
# pylint: disable=invalid-name, unused-variable, line-too-long
"""Depthwise convolution in python"""
import
numpy
as
np
from
scipy
import
signal
def
depthwise_conv2d_python
(
input_np
,
filter_np
,
stride
,
padding
):
"""Depthwise convolution operator in NCHW layout.
Parameters
----------
input_np : numpy.ndarray
4-D with shape [batch, in_channel, in_height, in_width]
filter_np : numpy.ndarray
4-D with shape [in_channel, channel_multiplier, filter_height, filter_width]
stride : list / tuple of 2 ints
[stride_height, stride_width]
padding : str
'VALID' or 'SAME'
Returns
-------
output_np : np.ndarray
4-D with shape [batch, out_channel, out_height, out_width]
"""
batch
,
in_channel
,
in_height
,
in_width
=
input_np
.
shape
_
,
channel_multiplier
,
filter_height
,
filter_width
=
filter_np
.
shape
stride_h
,
stride_w
=
stride
# calculate output shape
if
padding
==
'VALID'
:
out_channel
=
in_channel
*
channel_multiplier
out_height
=
(
in_height
-
filter_height
)
//
stride_h
+
1
out_width
=
(
in_width
-
filter_width
)
//
stride_w
+
1
output_np
=
np
.
zeros
((
batch
,
out_channel
,
out_height
,
out_width
))
for
i
in
range
(
batch
):
for
j
in
range
(
out_channel
):
output_np
[
i
,
j
,
:,
:]
=
signal
.
convolve2d
(
input_np
[
i
,
j
//
channel_multiplier
,
:,
:],
\
np
.
rot90
(
filter_np
[
j
//
channel_multiplier
,
j
%
channel_multiplier
,
:,
:],
2
),
\
mode
=
'valid'
)[
0
:(
in_height
-
filter_height
+
1
):
stride_h
,
0
:(
in_width
-
filter_height
+
1
):
stride_w
]
if
padding
==
'SAME'
:
out_channel
=
in_channel
*
channel_multiplier
out_height
=
np
.
int
(
np
.
ceil
(
float
(
in_height
)
/
float
(
stride_h
)))
out_width
=
np
.
int
(
np
.
ceil
(
float
(
in_width
)
/
float
(
stride_w
)))
output_np
=
np
.
zeros
((
batch
,
out_channel
,
out_height
,
out_width
))
pad_along_height
=
np
.
int
(
np
.
max
((
out_height
-
1
)
*
stride_h
+
filter_height
-
in_height
,
0
))
pad_along_width
=
np
.
int
(
np
.
max
((
out_width
-
1
)
*
stride_w
+
filter_width
-
in_width
,
0
))
pad_top_tvm
=
np
.
int
(
np
.
ceil
(
float
(
pad_along_height
)
/
2
))
pad_left_tvm
=
np
.
int
(
np
.
ceil
(
float
(
pad_along_width
)
/
2
))
pad_top_scipy
=
np
.
int
(
np
.
ceil
(
float
(
filter_height
-
1
)
/
2
))
pad_left_scipy
=
np
.
int
(
np
.
ceil
(
float
(
filter_width
-
1
)
/
2
))
index_h
=
pad_top_scipy
-
pad_top_tvm
index_w
=
pad_left_scipy
-
pad_left_tvm
for
i
in
range
(
batch
):
for
j
in
range
(
out_channel
):
output_np
[
i
,
j
,
:,
:]
=
signal
.
convolve2d
(
input_np
[
i
,
j
//
channel_multiplier
,
:,
:],
\
np
.
rot90
(
filter_np
[
j
//
channel_multiplier
,
j
%
channel_multiplier
,
:,
:],
2
),
\
mode
=
'same'
)[
index_h
:
in_height
:
stride_h
,
index_w
:
in_width
:
stride_w
]
return
output_np
topi/recipe/conv/depthwise_conv2d_
map_
test.py
→
topi/recipe/conv/depthwise_conv2d_test.py
View file @
07e56b9a
...
...
@@ -5,10 +5,10 @@ from scipy import signal
from
tvm.contrib
import
nvcc
import
topi
from
topi.
nn.
util
import
get_const_tuple
from
topi.cuda.depthwise_conv2d
_map
import
schedule_depthwise_conv2d_map
from
topi.util
import
get_const_tuple
from
topi.cuda.depthwise_conv2d
import
schedule_depthwise_conv2d
TASK
=
"depthwise_conv2d
_map
"
TASK
=
"depthwise_conv2d"
USE_MANUAL_CODE
=
False
@tvm.register_func
...
...
@@ -29,20 +29,20 @@ def tvm_callback_cuda_postproc(code):
code
=
open
(
"perf/
%
s_manual.cu"
%
TASK
)
.
read
()
return
code
def
test_depthwise_conv2d
_map
():
def
test_depthwise_conv2d
():
"""You may test different settings."""
batch
=
2
batch
=
1
in_channel
=
256
in_height
=
32
in_width
=
32
in_height
=
96
in_width
=
96
filter_channel
=
in_channel
channel_multiplier
=
2
filter_height
=
5
filter_width
=
5
channel_multiplier
=
1
filter_height
=
3
filter_width
=
3
stride_h
=
2
stride_w
=
2
stride_h
=
1
stride_w
=
1
padding
=
'SAME'
# or 'VALID'
...
...
@@ -57,40 +57,14 @@ def test_depthwise_conv2d_map():
ScaleShift
=
topi
.
nn
.
scale_shift
(
DepthwiseConv2d
,
Scale
,
Shift
)
Relu
=
topi
.
nn
.
relu
(
ScaleShift
)
# Schedule
s1
=
schedule_depthwise_conv2d
_map
(
DepthwiseConv2d
.
op
)
s2
=
schedule_depthwise_conv2d
_map
(
ScaleShift
.
op
)
s3
=
schedule_depthwise_conv2d
_map
(
Relu
.
op
)
s1
=
schedule_depthwise_conv2d
(
DepthwiseConv2d
)
s2
=
schedule_depthwise_conv2d
(
ScaleShift
)
s3
=
schedule_depthwise_conv2d
(
Relu
)
def
depthwise_conv2d_map_scipy
(
input_np
,
filter_np
,
scale_np
,
shift_np
):
out_shape
=
get_const_tuple
(
DepthwiseConv2d
.
shape
)
out_channel
=
out_shape
[
1
]
out_height
=
out_shape
[
2
]
out_width
=
out_shape
[
3
]
depthwise_conv2d_scipy
=
np
.
zeros
((
batch
,
out_channel
,
out_height
,
out_width
),
dtype
=
DepthwiseConv2d
.
dtype
)
scale_shift_scipy
=
np
.
zeros
((
batch
,
out_channel
,
out_height
,
out_width
),
dtype
=
ScaleShift
.
dtype
)
relu_scipy
=
np
.
zeros
((
batch
,
out_channel
,
out_height
,
out_width
),
dtype
=
Relu
.
dtype
)
if
padding
==
'SAME'
:
pad_top_tvm
=
np
.
int
(
np
.
ceil
(
float
(
np
.
max
((
out_height
-
1
)
*
stride_h
+
filter_height
-
in_height
,
0
))
/
2
))
pad_left_tvm
=
np
.
int
(
np
.
ceil
(
float
(
np
.
max
((
out_width
-
1
)
*
stride_w
+
filter_width
-
in_width
,
0
))
/
2
))
pad_top_scipy
=
np
.
int
(
np
.
ceil
(
float
(
filter_height
-
1
)
/
2
))
pad_left_scipy
=
np
.
int
(
np
.
ceil
(
float
(
filter_width
-
1
)
/
2
))
index_h
=
pad_top_scipy
-
pad_top_tvm
index_w
=
pad_left_scipy
-
pad_left_tvm
for
i
in
range
(
batch
):
for
j
in
range
(
out_channel
):
depthwise_conv2d_scipy
[
i
,
j
,:,:]
=
signal
.
convolve2d
(
input_np
[
i
,
j
//
channel_multiplier
,:,:],
np
.
rot90
(
filter_np
[
j
//
channel_multiplier
,
j
%
channel_multiplier
,:,:],
2
),
mode
=
'same'
)[
index_h
:
in_height
:
stride_h
,
index_w
:
in_width
:
stride_w
]
if
padding
==
'VALID'
:
for
i
in
range
(
batch
):
for
j
in
range
(
out_channel
):
depthwise_conv2d_scipy
[
i
,
j
,:,:]
=
signal
.
convolve2d
(
input_np
[
i
,
j
//
channel_multiplier
,:,:],
np
.
rot90
(
filter_np
[
j
//
channel_multiplier
,
j
%
channel_multiplier
,:,:],
2
),
mode
=
'valid'
)[
0
:(
in_height
-
filter_height
+
1
):
stride_h
,
0
:(
in_width
-
filter_height
+
1
):
stride_w
]
for
c
in
range
(
out_channel
):
scale_shift_scipy
[:,
c
,:,:]
=
depthwise_conv2d_scipy
[:,
c
,:,:]
*
scale_np
[
c
]
+
shift_np
[
c
]
relu_scipy
[:,:,:,:]
=
np
.
maximum
(
scale_shift_scipy
[:,:,:,:],
0
)
return
depthwise_conv2d_scipy
,
scale_shift_scipy
,
relu_scipy
input_np
=
np
.
random
.
uniform
(
size
=
get_const_tuple
(
Input
.
shape
))
.
astype
(
Input
.
dtype
)
filter_np
=
np
.
random
.
uniform
(
size
=
get_const_tuple
(
Filter
.
shape
))
.
astype
(
Filter
.
dtype
)
scale_np
=
np
.
random
.
uniform
(
size
=
(
in_channel
*
channel_multiplier
))
.
astype
(
Scale
.
dtype
)
shift_np
=
np
.
random
.
uniform
(
size
=
(
in_channel
*
channel_multiplier
))
.
astype
(
Shift
.
dtype
)
def
check_device
(
device
):
if
not
tvm
.
module
.
enabled
(
device
):
...
...
@@ -102,35 +76,36 @@ def test_depthwise_conv2d_map():
f2
=
tvm
.
build
(
s2
,
[
Input
,
Filter
,
Scale
,
Shift
,
ScaleShift
],
device
)
f3
=
tvm
.
build
(
s3
,
[
Input
,
Filter
,
Scale
,
Shift
,
Relu
],
device
)
# Prepare data
input_np
=
np
.
random
.
uniform
(
size
=
get_const_tuple
(
Input
.
shape
))
.
astype
(
Input
.
dtype
)
filter_np
=
np
.
random
.
uniform
(
size
=
get_const_tuple
(
Filter
.
shape
))
.
astype
(
Filter
.
dtype
)
input_tvm
=
tvm
.
nd
.
array
(
input_np
,
ctx
)
filter_tvm
=
tvm
.
nd
.
array
(
filter_np
,
ctx
)
scale_np
=
np
.
random
.
uniform
(
size
=
(
in_channel
*
channel_multiplier
))
.
astype
(
Scale
.
dtype
)
shift_np
=
np
.
random
.
uniform
(
size
=
(
in_channel
*
channel_multiplier
))
.
astype
(
Shift
.
dtype
)
scale_tvm
=
tvm
.
nd
.
array
(
scale_np
,
ctx
)
shift_tvm
=
tvm
.
nd
.
array
(
shift_np
,
ctx
)
depthwise_conv2d_tvm
=
tvm
.
nd
.
array
(
np
.
zeros
(
shape
=
get_const_tuple
(
DepthwiseConv2d
.
shape
),
dtype
=
DepthwiseConv2d
.
dtype
),
ctx
)
depthwise_conv2d_tvm
=
tvm
.
nd
.
array
(
np
.
zeros
(
shape
=
get_const_tuple
(
DepthwiseConv2d
.
shape
),
dtype
=
DepthwiseConv2d
.
dtype
),
ctx
)
scale_shift_tvm
=
tvm
.
nd
.
array
(
np
.
zeros
(
shape
=
get_const_tuple
(
ScaleShift
.
shape
),
dtype
=
ScaleShift
.
dtype
),
ctx
)
relu_tvm
=
tvm
.
nd
.
array
(
np
.
zeros
(
shape
=
get_const_tuple
(
Relu
.
shape
),
dtype
=
Relu
.
dtype
),
ctx
)
# Measure time cost of kernel 1 (depthwise_conv2d)
timer_1
=
f1
.
time_evaluator
(
f1
.
entry_name
,
ctx
,
number
=
1000
0
)
timer_1
=
f1
.
time_evaluator
(
f1
.
entry_name
,
ctx
,
number
=
1000
)
tcost_1
=
timer_1
(
input_tvm
,
filter_tvm
,
depthwise_conv2d_tvm
)
.
mean
# Measure time cost of kernel 2 (depthwise_conv2d + scale_shift)
timer_2
=
f2
.
time_evaluator
(
f2
.
entry_name
,
ctx
,
number
=
1000
0
)
timer_2
=
f2
.
time_evaluator
(
f2
.
entry_name
,
ctx
,
number
=
1000
)
tcost_2
=
timer_2
(
input_tvm
,
filter_tvm
,
scale_tvm
,
shift_tvm
,
scale_shift_tvm
)
.
mean
# Measure time cost of kernel 3 (depthwise_conv2d + scale_shift + relu)
timer_3
=
f3
.
time_evaluator
(
f3
.
entry_name
,
ctx
,
number
=
1000
0
)
timer_3
=
f3
.
time_evaluator
(
f3
.
entry_name
,
ctx
,
number
=
1000
)
tcost_3
=
timer_3
(
input_tvm
,
filter_tvm
,
scale_tvm
,
shift_tvm
,
relu_tvm
)
.
mean
print
(
"Input shape = "
+
str
(
get_const_tuple
(
Input
.
shape
)))
print
(
"Filter shape = "
+
str
(
get_const_tuple
(
Filter
.
shape
)))
print
(
"Stride = (
%
d,
%
d)"
%
(
stride_h
,
stride_w
))
print
(
"padding =
%
s
\n
"
%
padding
)
print
(
"Output shape = "
+
str
(
get_const_tuple
(
DepthwiseConv2d
.
shape
)))
print
(
"average time cost of 10000 runs (depthwise_conv2d) =
%
g sec"
%
tcost_1
)
print
(
"average time cost of 10000 runs (depthwise_conv2d + scale_shift) =
%
g sec"
%
tcost_2
)
print
(
"average time cost of 10000 runs (depthwise_conv2d + scale_shift + relu) =
%
g sec"
%
tcost_3
)
depthwise_conv2d_scipy
,
scale_shift_scipy
,
relu_scipy
=
depthwise_conv2d_map_scipy
(
input_np
,
filter_np
,
scale_np
,
shift_np
)
print
(
"average time cost of 1000 runs (depthwise_conv2d) =
%
g sec"
%
tcost_1
)
print
(
"average time cost of 1000 runs (depthwise_conv2d + scale_shift) =
%
g sec"
%
tcost_2
)
print
(
"average time cost of 1000 runs (depthwise_conv2d + scale_shift + relu) =
%
g sec"
%
tcost_3
)
# correctness
depthwise_conv2d_scipy
=
topi
.
testing
.
depthwise_conv2d_python
(
input_np
,
filter_np
,
stride
=
[
stride_h
,
stride_w
],
padding
=
padding
)
scale_shift_scipy
=
np
.
zeros
(
shape
=
get_const_tuple
(
ScaleShift
.
shape
))
for
c
in
range
(
in_channel
*
channel_multiplier
):
scale_shift_scipy
[:,
c
,:,:]
=
depthwise_conv2d_scipy
[:,
c
,:,:]
*
scale_np
[
c
]
+
shift_np
[
c
]
relu_scipy
=
np
.
maximum
(
scale_shift_scipy
,
0
)
np
.
testing
.
assert_allclose
(
depthwise_conv2d_tvm
.
asnumpy
(),
depthwise_conv2d_scipy
,
rtol
=
1e-5
)
np
.
testing
.
assert_allclose
(
scale_shift_tvm
.
asnumpy
(),
scale_shift_scipy
,
rtol
=
1e-5
)
np
.
testing
.
assert_allclose
(
relu_tvm
.
asnumpy
(),
relu_scipy
,
rtol
=
1e-5
)
...
...
@@ -138,10 +113,10 @@ def test_depthwise_conv2d_map():
with
tvm
.
build_config
(
auto_unroll_max_step
=
32
,
auto_unroll_min_depth
=
0
,
unroll_explicit
=
Tru
e
,
unroll_explicit
=
Fals
e
,
detect_global_barrier
=
False
,
restricted_func
=
True
):
check_device
(
"cuda"
)
if
__name__
==
"__main__"
:
test_depthwise_conv2d
_map
()
test_depthwise_conv2d
()
topi/tests/python/test_topi_depthwise_conv2d
_map
.py
→
topi/tests/python/test_topi_depthwise_conv2d.py
View file @
07e56b9a
...
...
@@ -3,9 +3,9 @@ import topi
import
numpy
as
np
from
scipy
import
signal
from
topi.util
import
get_const_tuple
from
topi.cuda.depthwise_conv2d
_map
import
schedule_depthwise_conv2d_map
from
topi.cuda.depthwise_conv2d
import
schedule_depthwise_conv2d
def
depthwise_conv2d_
map_
with_workload
(
batch
,
in_channel
,
in_height
,
channel_multiplier
,
filter_height
,
stride_h
,
padding
):
def
depthwise_conv2d_with_workload
(
batch
,
in_channel
,
in_height
,
channel_multiplier
,
filter_height
,
stride_h
,
padding
):
in_width
=
in_height
filter_channel
=
in_channel
filter_width
=
filter_height
...
...
@@ -21,40 +21,14 @@ def depthwise_conv2d_map_with_workload(batch, in_channel, in_height, channel_mul
ScaleShift
=
topi
.
nn
.
scale_shift
(
DepthwiseConv2d
,
Scale
,
Shift
)
Relu
=
topi
.
nn
.
relu
(
ScaleShift
)
# schedule
s1
=
schedule_depthwise_conv2d
_map
(
DepthwiseConv2d
.
op
)
s2
=
schedule_depthwise_conv2d
_map
(
ScaleShift
.
op
)
s3
=
schedule_depthwise_conv2d
_map
(
Relu
.
op
)
s1
=
schedule_depthwise_conv2d
(
DepthwiseConv2d
)
s2
=
schedule_depthwise_conv2d
(
ScaleShift
)
s3
=
schedule_depthwise_conv2d
(
Relu
)
def
depthwise_conv2d_map_scipy
(
input_np
,
filter_np
,
scale_np
,
shift_np
):
out_shape
=
get_const_tuple
(
DepthwiseConv2d
.
shape
)
out_channel
=
out_shape
[
1
]
out_height
=
out_shape
[
2
]
out_width
=
out_shape
[
3
]
depthwise_conv2d_scipy
=
np
.
zeros
((
batch
,
out_channel
,
out_height
,
out_width
),
dtype
=
DepthwiseConv2d
.
dtype
)
scale_shift_scipy
=
np
.
zeros
((
batch
,
out_channel
,
out_height
,
out_width
),
dtype
=
ScaleShift
.
dtype
)
relu_scipy
=
np
.
zeros
((
batch
,
out_channel
,
out_height
,
out_width
),
dtype
=
Relu
.
dtype
)
if
padding
==
'SAME'
:
pad_top_tvm
=
np
.
int
(
np
.
ceil
(
float
(
np
.
max
((
out_height
-
1
)
*
stride_h
+
filter_height
-
in_height
,
0
))
/
2
))
pad_left_tvm
=
np
.
int
(
np
.
ceil
(
float
(
np
.
max
((
out_width
-
1
)
*
stride_w
+
filter_width
-
in_width
,
0
))
/
2
))
pad_top_scipy
=
np
.
int
(
np
.
ceil
(
float
(
filter_height
-
1
)
/
2
))
pad_left_scipy
=
np
.
int
(
np
.
ceil
(
float
(
filter_width
-
1
)
/
2
))
index_h
=
pad_top_scipy
-
pad_top_tvm
index_w
=
pad_left_scipy
-
pad_left_tvm
for
i
in
range
(
batch
):
for
j
in
range
(
out_channel
):
depthwise_conv2d_scipy
[
i
,
j
,:,:]
=
signal
.
convolve2d
(
input_np
[
i
,
j
//
channel_multiplier
,:,:],
np
.
rot90
(
filter_np
[
j
//
channel_multiplier
,
j
%
channel_multiplier
,:,:],
2
),
mode
=
'same'
)[
index_h
:
in_height
:
stride_h
,
index_w
:
in_width
:
stride_w
]
if
padding
==
'VALID'
:
for
i
in
range
(
batch
):
for
j
in
range
(
out_channel
):
depthwise_conv2d_scipy
[
i
,
j
,:,:]
=
signal
.
convolve2d
(
input_np
[
i
,
j
//
channel_multiplier
,:,:],
np
.
rot90
(
filter_np
[
j
//
channel_multiplier
,
j
%
channel_multiplier
,:,:],
2
),
mode
=
'valid'
)[
0
:(
in_height
-
filter_height
+
1
):
stride_h
,
0
:(
in_width
-
filter_height
+
1
):
stride_w
]
for
c
in
range
(
out_channel
):
scale_shift_scipy
[:,
c
,:,:]
=
depthwise_conv2d_scipy
[:,
c
,:,:]
*
scale_np
[
c
]
+
shift_np
[
c
]
relu_scipy
[:,:,:,:]
=
np
.
maximum
(
scale_shift_scipy
[:,:,:,:],
0
)
return
depthwise_conv2d_scipy
,
scale_shift_scipy
,
relu_scipy
input_np
=
np
.
random
.
uniform
(
size
=
get_const_tuple
(
Input
.
shape
))
.
astype
(
Input
.
dtype
)
filter_np
=
np
.
random
.
uniform
(
size
=
get_const_tuple
(
Filter
.
shape
))
.
astype
(
Filter
.
dtype
)
scale_np
=
np
.
random
.
uniform
(
size
=
get_const_tuple
(
Scale
.
shape
))
.
astype
(
Scale
.
dtype
)
shift_np
=
np
.
random
.
uniform
(
size
=
get_const_tuple
(
Shift
.
shape
))
.
astype
(
Shift
.
dtype
)
def
check_device
(
device
):
if
not
tvm
.
module
.
enabled
(
device
):
...
...
@@ -66,12 +40,8 @@ def depthwise_conv2d_map_with_workload(batch, in_channel, in_height, channel_mul
f2
=
tvm
.
build
(
s2
,
[
Input
,
Filter
,
Scale
,
Shift
,
ScaleShift
],
device
)
f3
=
tvm
.
build
(
s3
,
[
Input
,
Filter
,
Scale
,
Shift
,
Relu
],
device
)
# prepare data
input_np
=
np
.
random
.
uniform
(
size
=
get_const_tuple
(
Input
.
shape
))
.
astype
(
Input
.
dtype
)
filter_np
=
np
.
random
.
uniform
(
size
=
get_const_tuple
(
Filter
.
shape
))
.
astype
(
Filter
.
dtype
)
input_tvm
=
tvm
.
nd
.
array
(
input_np
,
ctx
)
filter_tvm
=
tvm
.
nd
.
array
(
filter_np
,
ctx
)
scale_np
=
np
.
random
.
uniform
(
size
=
get_const_tuple
(
Scale
.
shape
))
.
astype
(
Scale
.
dtype
)
shift_np
=
np
.
random
.
uniform
(
size
=
get_const_tuple
(
Shift
.
shape
))
.
astype
(
Shift
.
dtype
)
scale_tvm
=
tvm
.
nd
.
array
(
scale_np
,
ctx
)
shift_tvm
=
tvm
.
nd
.
array
(
shift_np
,
ctx
)
depthwise_conv2d_tvm
=
tvm
.
nd
.
array
(
np
.
zeros
(
shape
=
get_const_tuple
(
DepthwiseConv2d
.
shape
),
dtype
=
DepthwiseConv2d
.
dtype
),
ctx
)
...
...
@@ -87,7 +57,11 @@ def depthwise_conv2d_map_with_workload(batch, in_channel, in_height, channel_mul
timer_3
=
f3
.
time_evaluator
(
f3
.
entry_name
,
ctx
,
number
=
1
)
tcost_3
=
timer_3
(
input_tvm
,
filter_tvm
,
scale_tvm
,
shift_tvm
,
relu_tvm
)
.
mean
# correctness with scipy
depthwise_conv2d_scipy
,
scale_shift_scipy
,
relu_scipy
=
depthwise_conv2d_map_scipy
(
input_np
,
filter_np
,
scale_np
,
shift_np
)
depthwise_conv2d_scipy
=
topi
.
testing
.
depthwise_conv2d_python
(
input_np
,
filter_np
,
stride
=
[
stride_h
,
stride_w
],
padding
=
padding
)
scale_shift_scipy
=
np
.
zeros
(
shape
=
get_const_tuple
(
ScaleShift
.
shape
))
for
c
in
range
(
in_channel
*
channel_multiplier
):
scale_shift_scipy
[:,
c
,:,:]
=
depthwise_conv2d_scipy
[:,
c
,:,:]
*
scale_np
[
c
]
+
shift_np
[
c
]
relu_scipy
=
np
.
maximum
(
scale_shift_scipy
,
0
)
np
.
testing
.
assert_allclose
(
depthwise_conv2d_tvm
.
asnumpy
(),
depthwise_conv2d_scipy
,
rtol
=
1e-5
)
np
.
testing
.
assert_allclose
(
scale_shift_tvm
.
asnumpy
(),
scale_shift_scipy
,
rtol
=
1e-5
)
np
.
testing
.
assert_allclose
(
relu_tvm
.
asnumpy
(),
relu_scipy
,
rtol
=
1e-5
)
...
...
@@ -97,16 +71,16 @@ def depthwise_conv2d_map_with_workload(batch, in_channel, in_height, channel_mul
check_device
(
"metal"
)
def
test_depthwise_conv2d
_map
():
depthwise_conv2d_
map_
with_workload
(
1
,
728
,
64
,
1
,
3
,
1
,
"SAME"
)
depthwise_conv2d_
map_
with_workload
(
1
,
728
,
32
,
1
,
3
,
1
,
"SAME"
)
depthwise_conv2d_
map_
with_workload
(
4
,
256
,
64
,
2
,
5
,
2
,
"SAME"
)
depthwise_conv2d_
map_
with_workload
(
4
,
256
,
32
,
2
,
5
,
2
,
"SAME"
)
depthwise_conv2d_
map_
with_workload
(
1
,
728
,
64
,
1
,
3
,
1
,
"VALID"
)
depthwise_conv2d_
map_
with_workload
(
1
,
728
,
32
,
1
,
3
,
1
,
"VALID"
)
depthwise_conv2d_
map_
with_workload
(
4
,
256
,
64
,
2
,
5
,
2
,
"VALID"
)
depthwise_conv2d_
map_
with_workload
(
4
,
256
,
32
,
2
,
5
,
2
,
"VALID"
)
def
test_depthwise_conv2d
():
depthwise_conv2d_with_workload
(
1
,
728
,
64
,
1
,
3
,
1
,
"SAME"
)
depthwise_conv2d_with_workload
(
1
,
728
,
32
,
1
,
3
,
1
,
"SAME"
)
depthwise_conv2d_with_workload
(
4
,
256
,
64
,
2
,
5
,
2
,
"SAME"
)
depthwise_conv2d_with_workload
(
4
,
256
,
32
,
2
,
5
,
2
,
"SAME"
)
depthwise_conv2d_with_workload
(
1
,
728
,
64
,
1
,
3
,
1
,
"VALID"
)
depthwise_conv2d_with_workload
(
1
,
728
,
32
,
1
,
3
,
1
,
"VALID"
)
depthwise_conv2d_with_workload
(
4
,
256
,
64
,
2
,
5
,
2
,
"VALID"
)
depthwise_conv2d_with_workload
(
4
,
256
,
32
,
2
,
5
,
2
,
"VALID"
)
if
__name__
==
"__main__"
:
test_depthwise_conv2d
_map
()
test_depthwise_conv2d
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment