Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
866d458c
Commit
866d458c
authored
Oct 30, 2018
by
Lianmin Zheng
Committed by
Yizhi Liu
Oct 30, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[TOPI][AUTOTVM] Improve style (#2034)
* [TOPI] Improve the style of using autotvm * fix
parent
bc48811f
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
128 additions
and
135 deletions
+128
-135
topi/python/topi/arm_cpu/conv2d.py
+76
-81
topi/python/topi/mali/conv2d.py
+48
-39
topi/python/topi/nn/conv2d.py
+0
-11
topi/python/topi/x86/conv2d.py
+4
-4
No files found.
topi/python/topi/arm_cpu/conv2d.py
View file @
866d458c
...
@@ -12,34 +12,40 @@ from ..util import traverse_inline, get_const_tuple, const_matrix
...
@@ -12,34 +12,40 @@ from ..util import traverse_inline, get_const_tuple, const_matrix
from
..nn
import
pad
,
conv2d
,
conv2d_alter_layout
,
conv2d_winograd_without_weight_transform
from
..nn
import
pad
,
conv2d
,
conv2d_alter_layout
,
conv2d_winograd_without_weight_transform
from
..nn.util
import
get_const_int
,
get_pad_tuple
from
..nn.util
import
get_const_int
,
get_pad_tuple
def
_conv_arg_to_workload
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
):
@autotvm.register_topi_compute
(
conv2d
,
'arm_cpu'
,
[
'direct'
])
"""convert argument to workload"""
def
conv2d_arm_cpu
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
):
if
len
(
kernel
.
shape
)
==
4
:
"""TOPI compute callback for conv2d
raw_kernel
=
kernel
else
:
# the input kernel is transformed by alter_op_layout
Parameters
shape
=
get_const_tuple
(
kernel
.
shape
)
----------
raw_kernel
=
tvm
.
placeholder
((
shape
[
0
]
*
shape
[
4
],
shape
[
1
],
shape
[
2
],
shape
[
3
]),
cfg: ConfigEntity
dtype
=
kernel
.
dtype
)
The config for this template
return
(
'conv2d'
,
)
+
autotvm
.
task
.
args_to_workload
(
[
data
,
raw_kernel
,
strides
,
padding
,
layout
,
out_dtype
])
data : tvm.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
@conv2d.register
(
'arm_cpu'
)
@autotvm.task.dispatcher
kernel : tvm.Tensor
def
conv2d_arm_cpu
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
):
4-D with shape [num_filter, in_channel, filter_height, filter_width] or
"""TOPI compute callback. Mark this function as a dispatcher, so
pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height,
this template can assign config according to workload
filter_width, num_filter_block]
strides : list of two ints
[stride_height, stride_width]
padding : list of two ints
[pad_height, pad_width]
layout : str
layout of data
out_dtype: str
The output type. This is used for mixed precision.
Returns
Returns
-------
-------
workload: Tuple
output : tvm.Tensor
Dispatcher will use this workload to query corresponding config.
4-D with shape [batch, out_channel, out_height, out_width]
Then use cfg.template_key to call a registered template.
"""
"""
return
_conv_arg_to_workload
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
)
@conv2d_arm_cpu.register
([
'direct'
])
def
decl_spatial_pack
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
):
"""spatial packing template"""
return
_decl_spatial_pack
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
num_tile
=
2
)
return
_decl_spatial_pack
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
num_tile
=
2
)
@autotvm.register_topi_schedule
(
schedule_conv2d_nchw
,
'arm_cpu'
,
[
'direct'
,
'winograd'
])
@autotvm.register_topi_schedule
(
schedule_conv2d_nchw
,
'arm_cpu'
,
[
'direct'
,
'winograd'
])
...
@@ -93,8 +99,6 @@ def schedule_conv2d_nchw_arm_cpu(cfg, outs):
...
@@ -93,8 +99,6 @@ def schedule_conv2d_nchw_arm_cpu(cfg, outs):
def
_decl_spatial_pack
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
num_tile
):
def
_decl_spatial_pack
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
num_tile
):
assert
layout
==
"NCHW"
,
"Only support NCHW"
assert
layout
==
"NCHW"
,
"Only support NCHW"
# create workload according to raw arguments
# create workload according to raw arguments
wkl
=
_conv_arg_to_workload
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
)
out_dtype
=
out_dtype
or
data
.
dtype
out_dtype
=
out_dtype
or
data
.
dtype
N
,
CI
,
IH
,
IW
=
get_const_tuple
(
data
.
shape
)
N
,
CI
,
IH
,
IW
=
get_const_tuple
(
data
.
shape
)
if
len
(
kernel
.
shape
)
==
4
:
if
len
(
kernel
.
shape
)
==
4
:
...
@@ -177,8 +181,7 @@ def _decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype, n
...
@@ -177,8 +181,7 @@ def _decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype, n
output
=
tvm
.
compute
(
oshape
,
lambda
n
,
co
,
h
,
w
:
output
=
tvm
.
compute
(
oshape
,
lambda
n
,
co
,
h
,
w
:
conv
[
n
][
co
//
VC
][
h
//
VH
][
w
//
VW
][
h
%
VH
][
w
%
VW
][
co
%
VC
],
conv
[
n
][
co
//
VC
][
h
//
VH
][
w
//
VW
][
h
%
VH
][
w
%
VW
][
co
%
VC
],
name
=
'output_unpack'
,
tag
=
'spatial_conv2d_output'
,
name
=
'output_unpack'
,
tag
=
'spatial_conv2d_output'
)
attrs
=
{
'workload'
:
wkl
})
return
output
return
output
def
_schedule_spatial_pack
(
cfg
,
s
,
data_vec
,
kernel_vec
,
def
_schedule_spatial_pack
(
cfg
,
s
,
data_vec
,
kernel_vec
,
...
@@ -238,16 +241,13 @@ def _schedule_spatial_pack(cfg, s, data_vec, kernel_vec,
...
@@ -238,16 +241,13 @@ def _schedule_spatial_pack(cfg, s, data_vec, kernel_vec,
return
s
return
s
@conv2d_arm_cpu.register
(
'winograd'
)
@autotvm.register_topi_compute
(
conv2d
,
'arm_cpu'
,
[
'winograd'
])
def
decl_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
):
def
conv2d_arm_cpu_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
):
""" TOPI compute callback. Use winograd template """
tile_size
=
4
tile_size
=
4
return
_decl_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
)
return
_decl_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
)
def
_decl_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
):
def
_decl_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
):
# create workload according to raw arguments
wkl
=
_winograd_conv_arg_to_workload
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
)
N
,
CI
,
IH
,
IW
=
get_const_tuple
(
data
.
shape
)
N
,
CI
,
IH
,
IW
=
get_const_tuple
(
data
.
shape
)
if
len
(
kernel
.
shape
)
==
4
:
if
len
(
kernel
.
shape
)
==
4
:
pre_computed
=
False
pre_computed
=
False
...
@@ -368,8 +368,7 @@ def _decl_winograd(cfg, data, kernel, strides, padding, layout, out_dtype, tile_
...
@@ -368,8 +368,7 @@ def _decl_winograd(cfg, data, kernel, strides, padding, layout, out_dtype, tile_
# unpack output
# unpack output
output
=
tvm
.
compute
((
N
,
K
,
H
,
W
),
lambda
n
,
k
,
h
,
w
:
output
=
tvm
.
compute
((
N
,
K
,
H
,
W
),
lambda
n
,
k
,
h
,
w
:
Y
[
k
][
n
*
nH
*
nW
+
(
h
//
m
)
*
nW
+
w
//
m
][
h
%
m
][
w
%
m
],
Y
[
k
][
n
*
nH
*
nW
+
(
h
//
m
)
*
nW
+
w
//
m
][
h
%
m
][
w
%
m
],
name
=
'output'
,
tag
=
'winograd_conv2d_output'
,
name
=
'output'
,
tag
=
'winograd_conv2d_output'
)
attrs
=
{
'workload'
:
wkl
})
# we have to manually assign effective GFLOP for winograd
# we have to manually assign effective GFLOP for winograd
cfg
.
add_flop
(
2
*
N
*
K
*
H
*
W
*
KH
*
KW
*
C
)
cfg
.
add_flop
(
2
*
N
*
K
*
H
*
W
*
KH
*
KW
*
C
)
...
@@ -458,36 +457,11 @@ def _schedule_winograd(cfg, s, output, last):
...
@@ -458,36 +457,11 @@ def _schedule_winograd(cfg, s, output, last):
s
[
output
]
.
compute_inline
()
s
[
output
]
.
compute_inline
()
def
_winograd_conv_arg_to_workload
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
):
"""convert argument to workload"""
K
=
3
shape
=
get_const_tuple
(
kernel
.
shape
)
alpha
=
tile_size
+
K
-
1
if
len
(
kernel
.
shape
)
==
4
:
assert
shape
[
2
:]
==
(
K
,
K
)
CO
,
CI
=
shape
[:
2
]
else
:
assert
shape
[:
2
]
==
(
alpha
,
alpha
)
CO
,
CI
,
VCO
=
shape
[
2
:]
CO
*=
VCO
raw_kernel
=
tvm
.
placeholder
((
CO
,
CI
,
K
,
K
),
dtype
=
kernel
.
dtype
)
return
(
'conv2d'
,
)
+
autotvm
.
task
.
args_to_workload
(
[
data
,
raw_kernel
,
strides
,
padding
,
layout
,
out_dtype
])
##### REGISTER TOPI COMPUTE / SCHEDULE FOR WINOGRAD WITH WEIGHT TRANSFORM #####
##### REGISTER TOPI COMPUTE / SCHEDULE FOR WINOGRAD WITH WEIGHT TRANSFORM #####
@conv2d_winograd_without_weight_transform.register
([
'arm_cpu'
])
@autotvm.register_topi_compute
(
conv2d_winograd_without_weight_transform
,
'arm_cpu'
,
[
'winograd'
])
@autotvm.task.dispatcher
def
conv2d_winograd_ww
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
):
def
winograd_ww_config_dispatcher_
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
):
"""TOPI compute callback"""
return
_winograd_conv_arg_to_workload
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
return
_decl_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
)
tile_size
)
@winograd_ww_config_dispatcher_.register
([
'winograd'
])
def
decl_winograd_ww
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
):
return
_decl_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
)
@autotvm.register_topi_schedule
(
schedule_conv2d_winograd_without_weight_transform
,
@autotvm.register_topi_schedule
(
schedule_conv2d_winograd_without_weight_transform
,
...
@@ -514,8 +488,7 @@ def _alter_conv2d_layout_arm(attrs, inputs, tinfos):
...
@@ -514,8 +488,7 @@ def _alter_conv2d_layout_arm(attrs, inputs, tinfos):
new_attrs
=
{
k
:
attrs
[
k
]
for
k
in
attrs
.
keys
()}
new_attrs
=
{
k
:
attrs
[
k
]
for
k
in
attrs
.
keys
()}
assert
attrs
.
get_int_tuple
(
"dilation"
)
==
(
1
,
1
),
"Does not support dilation "
\
dilation
=
attrs
.
get_int_tuple
(
"dilation"
)
"when alter_op_layout is enabled"
strides
=
attrs
.
get_int_tuple
(
"strides"
)
strides
=
attrs
.
get_int_tuple
(
"strides"
)
padding
=
attrs
.
get_int_tuple
(
"padding"
)
padding
=
attrs
.
get_int_tuple
(
"padding"
)
groups
=
attrs
.
get_int
(
'groups'
)
groups
=
attrs
.
get_int
(
'groups'
)
...
@@ -523,21 +496,38 @@ def _alter_conv2d_layout_arm(attrs, inputs, tinfos):
...
@@ -523,21 +496,38 @@ def _alter_conv2d_layout_arm(attrs, inputs, tinfos):
out_dtype
=
attrs
[
"out_dtype"
]
out_dtype
=
attrs
[
"out_dtype"
]
out_dtype
=
tinfos
[
0
]
.
dtype
if
out_dtype
==
"same"
else
out_dtype
out_dtype
=
tinfos
[
0
]
.
dtype
if
out_dtype
==
"same"
else
out_dtype
if
groups
==
1
:
if
layout
!=
'NCHW'
or
groups
!=
1
or
dilation
!=
(
1
,
1
):
return
None
data
,
kernel
=
tinfos
[
0
:
2
]
N
,
CI
,
H
,
W
=
get_const_tuple
(
data
.
shape
)
CO
,
_
,
KH
,
KW
=
get_const_tuple
(
kernel
.
shape
)
# query config of this workload
# query config of this workload
workload
=
_conv_arg_to_workload
(
tinfos
[
0
],
tinfos
[
1
],
strides
,
padding
,
workload
=
autotvm
.
task
.
args_to_workload
(
layout
,
out_dtype
)
[
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
],
conv2d
)
cfg
=
autotvm
.
DispatchContext
.
current
.
query
(
tvm
.
target
.
current_target
(),
workload
)
target
=
tvm
.
target
.
current_target
()
dispatch_ctx
=
autotvm
.
DispatchContext
.
current
cfg
=
dispatch_ctx
.
query
(
target
,
workload
)
if
cfg
.
is_fallback
:
# if is fallback, clear query cache and return None
if
cfg
.
is_fallback
:
# if is fallback, clear query cache and return None
autotvm
.
task
.
clear_fallback_cache
(
tvm
.
target
.
current_target
()
,
workload
)
autotvm
.
task
.
clear_fallback_cache
(
target
,
workload
)
return
None
return
None
if
cfg
.
template_key
==
'direct'
:
# packing weight tensor
if
cfg
.
template_key
==
'direct'
:
# pack weight tensor
new_attrs
[
'kernel_layout'
]
=
'OIHW
%
do'
%
(
cfg
[
'tile_co'
]
.
size
[
-
1
])
VC
=
cfg
[
'tile_co'
]
.
size
[
-
1
]
new_attrs
[
'kernel_layout'
]
=
'OIHW
%
do'
%
VC
# Store the same config for the altered operator (workload)
new_data
=
data
new_kernel
=
tvm
.
placeholder
((
CO
//
VC
,
CI
,
KH
,
KW
,
VC
),
dtype
=
kernel
.
dtype
)
new_workload
=
autotvm
.
task
.
args_to_workload
(
[
new_data
,
new_kernel
,
strides
,
padding
,
'NCHW'
,
out_dtype
],
conv2d
)
dispatch_ctx
.
update
(
target
,
new_workload
,
cfg
)
return
sym
.
conv2d
(
*
copy_inputs
,
**
new_attrs
)
return
sym
.
conv2d
(
*
copy_inputs
,
**
new_attrs
)
else
:
# pre-compute weight transformation in winograd
else
:
# pre-compute weight transformation in winograd
if
"-device=arm_cpu"
in
tvm
.
target
.
current_target
()
.
options
:
if
"-device=arm_cpu"
in
target
.
options
:
tile_size
=
4
tile_size
=
4
VC
=
cfg
[
'tile_k'
]
.
size
[
-
1
]
VC
=
cfg
[
'tile_k'
]
.
size
[
-
1
]
else
:
else
:
...
@@ -545,16 +535,21 @@ def _alter_conv2d_layout_arm(attrs, inputs, tinfos):
...
@@ -545,16 +535,21 @@ def _alter_conv2d_layout_arm(attrs, inputs, tinfos):
tile_size
=
_pick_tile_size
(
tinfos
[
0
],
tinfos
[
1
])
tile_size
=
_pick_tile_size
(
tinfos
[
0
],
tinfos
[
1
])
VC
=
cfg
[
'tile_bna'
]
.
val
VC
=
cfg
[
'tile_bna'
]
.
val
weight
=
sym
.
contrib
.
conv2d_winograd_weight_transform
(
copy_inputs
[
1
],
weight
=
sym
.
contrib
.
conv2d_winograd_weight_transform
(
copy_inputs
[
1
],
tile_size
=
tile_size
)
tile_size
=
tile_size
)
CO
,
CI
,
KH
,
KW
=
get_const_tuple
(
tinfos
[
1
]
.
shape
)
weight
=
sym
.
reshape
(
weight
,
weight
=
sym
.
reshape
(
weight
,
shape
=
(
KH
+
tile_size
-
1
,
KW
+
tile_size
-
1
,
CO
//
VC
,
VC
,
CI
))
shape
=
(
KH
+
tile_size
-
1
,
KW
+
tile_size
-
1
,
CO
//
VC
,
VC
,
CI
))
weight
=
sym
.
transpose
(
weight
,
axes
=
[
0
,
1
,
2
,
4
,
3
])
weight
=
sym
.
transpose
(
weight
,
axes
=
[
0
,
1
,
2
,
4
,
3
])
copy_inputs
[
1
]
=
weight
copy_inputs
[
1
]
=
weight
new_attrs
[
'tile_size'
]
=
tile_size
new_attrs
[
'tile_size'
]
=
tile_size
return
sym
.
contrib
.
conv2d_winograd_without_weight_transform
(
*
copy_inputs
,
**
new_attrs
)
# do nothing for depthwise convolution
# Store the same config for the altered operator (workload)
return
None
new_data
=
data
new_weight
=
tvm
.
placeholder
((
KH
+
tile_size
-
1
,
KH
+
tile_size
-
1
,
CO
//
VC
,
CI
,
VC
),
kernel
.
dtype
)
new_workload
=
autotvm
.
task
.
args_to_workload
(
[
new_data
,
new_weight
,
strides
,
padding
,
new_attrs
[
'layout'
],
out_dtype
,
tile_size
],
conv2d_winograd_without_weight_transform
)
dispatch_ctx
.
update
(
target
,
new_workload
,
cfg
)
return
sym
.
contrib
.
conv2d_winograd_without_weight_transform
(
*
copy_inputs
,
**
new_attrs
)
topi/python/topi/mali/conv2d.py
View file @
866d458c
...
@@ -12,27 +12,43 @@ from ..nn import conv2d, conv2d_winograd_without_weight_transform, \
...
@@ -12,27 +12,43 @@ from ..nn import conv2d, conv2d_winograd_without_weight_transform, \
get_pad_tuple
,
pad
,
conv2d_alter_layout
get_pad_tuple
,
pad
,
conv2d_alter_layout
# reuse some compute declarations from ARM CPU
# reuse some compute declarations from ARM CPU
from
..arm_cpu.conv2d
import
_conv_arg_to_workload
,
_decl_spatial_pack
,
\
from
..arm_cpu.conv2d
import
_decl_spatial_pack
,
_alter_conv2d_layout_arm
_winograd_conv_arg_to_workload
,
_alter_conv2d_layout_arm
@conv2d.register
(
'mali'
)
@autotvm.register_topi_compute
(
conv2d
,
'mali'
,
[
'direct'
])
@autotvm.task.dispatcher
def
conv2d_mali
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
):
def
conv2d_mali
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
):
"""TOPI compute callback for conv2d
"""TOPI compute callback. Mark this function as a dispatcher, so
this template can assign config according to workload
Parameters
----------
cfg: ConfigEntity
The config for this template
data : tvm.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
kernel : tvm.Tensor
4-D with shape [num_filter, in_channel, filter_height, filter_width] or
pre-packed 5-D with shape [num_filter_chunk, in_channel, filter_height,
filter_width, num_filter_block]
strides : list of two ints
[stride_height, stride_width]
padding : list of two ints
[pad_height, pad_width]
layout : str
layout of data
out_dtype: str
The output type. This is used for mixed precision.
Returns
Returns
-------
-------
workload: Tuple
output : tvm.Tensor
Dispatcher will use this workload to query corresponding config.
4-D with shape [batch, out_channel, out_height, out_width]
Then use cfg.template_key to call a registered template.
"""
"""
return
_conv_arg_to_workload
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
)
@conv2d_mali.register
([
'direct'
])
def
decl_spatial_pack
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
):
"""spatial packing template"""
return
_decl_spatial_pack
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
num_tile
=
3
)
return
_decl_spatial_pack
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
num_tile
=
3
)
@autotvm.register_topi_schedule
(
schedule_conv2d_nchw
,
'mali'
,
[
'direct'
,
'winograd'
])
@autotvm.register_topi_schedule
(
schedule_conv2d_nchw
,
'mali'
,
[
'direct'
,
'winograd'
])
...
@@ -158,8 +174,8 @@ def _pick_tile_size(data, kernel):
...
@@ -158,8 +174,8 @@ def _pick_tile_size(data, kernel):
else
:
else
:
return
2
return
2
@
conv2d_mali.register
(
'winograd'
)
@
autotvm.register_topi_compute
(
conv2d
,
'mali'
,
[
'winograd'
]
)
def
decl
_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
):
def
conv2d_mali
_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
):
tile_size
=
_pick_tile_size
(
data
,
kernel
)
tile_size
=
_pick_tile_size
(
data
,
kernel
)
return
_decl_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
)
return
_decl_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
)
...
@@ -305,9 +321,7 @@ def _decl_winograd(cfg, data, kernel, strides, padding, layout, out_dtype, tile_
...
@@ -305,9 +321,7 @@ def _decl_winograd(cfg, data, kernel, strides, padding, layout, out_dtype, tile_
# thw following term is used to make the padding effective,
# thw following term is used to make the padding effective,
# otherwise the padding will be eliminated by bound inference
# otherwise the padding will be eliminated by bound inference
+
tvm
.
const
(
0
,
out_dtype
)
*
M
[
alpha
-
1
][
alpha
-
1
][
CO
-
1
][
P_round
-
1
],
+
tvm
.
const
(
0
,
out_dtype
)
*
M
[
alpha
-
1
][
alpha
-
1
][
CO
-
1
][
P_round
-
1
],
name
=
'output'
,
tag
=
'winograd_conv2d_output'
,
name
=
'output'
,
tag
=
'winograd_conv2d_output'
)
attrs
=
{
'workload'
:
_winograd_conv_arg_to_workload
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
)})
# we have to manually assign effective GFLOP for winograd
# we have to manually assign effective GFLOP for winograd
cfg
.
add_flop
(
2
*
N
*
CO
*
H
*
W
*
KH
*
KW
*
CI
)
cfg
.
add_flop
(
2
*
N
*
CO
*
H
*
W
*
KH
*
KW
*
CI
)
...
@@ -410,28 +424,14 @@ def _schedule_winograd(cfg, s, op):
...
@@ -410,28 +424,14 @@ def _schedule_winograd(cfg, s, op):
s
[
Y
]
.
compute_at
(
s
[
output
],
tt
)
s
[
Y
]
.
compute_at
(
s
[
output
],
tt
)
@conv2d_alter_layout.register
([
"mali"
])
def
_alter_conv2d_layout
(
attrs
,
inputs
,
tinfos
):
try
:
return
_alter_conv2d_layout_arm
(
attrs
,
inputs
,
tinfos
)
except
KeyError
:
# to filter out fallback opencl templates
return
None
##### REGISTER TOPI COMPUTE / SCHEDULE FOR WINOGRAD WITH WEIGHT TRANSFORM #####
##### REGISTER TOPI COMPUTE / SCHEDULE FOR WINOGRAD WITH WEIGHT TRANSFORM #####
@conv2d_winograd_without_weight_transform.register
([
'mali'
])
@autotvm.register_topi_compute
(
conv2d_winograd_without_weight_transform
,
'mali'
,
[
'winograd'
])
@autotvm.task.dispatcher
def
conv2d_winograd_ww
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
):
def
winograd_ww_config_dispatcher_
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
):
"""TOPI compute callback"""
return
_winograd_conv_arg_to_workload
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
return
_decl_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
)
tile_size
)
@winograd_ww_config_dispatcher_.register
([
'winograd'
])
def
decl_winograd_ww
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
):
return
_decl_winograd
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
)
@autotvm.
task.
register_topi_schedule
(
schedule_conv2d_winograd_without_weight_transform
,
@autotvm.register_topi_schedule
(
schedule_conv2d_winograd_without_weight_transform
,
'mali'
,
[
'winograd'
])
'mali'
,
[
'winograd'
])
def
schedule_conv2d_winograd_without_weight_transform_
(
cfg
,
outs
):
def
schedule_conv2d_winograd_without_weight_transform_
(
cfg
,
outs
):
"""TOPI schedule callback"""
"""TOPI schedule callback"""
...
@@ -445,6 +445,15 @@ def schedule_conv2d_winograd_without_weight_transform_(cfg, outs):
...
@@ -445,6 +445,15 @@ def schedule_conv2d_winograd_without_weight_transform_(cfg, outs):
return
s
return
s
##### REGISTER ALTER OP LAYOUT #####
@conv2d_alter_layout.register
([
"mali"
])
def
_alter_conv2d_layout
(
attrs
,
inputs
,
tinfos
):
try
:
return
_alter_conv2d_layout_arm
(
attrs
,
inputs
,
tinfos
)
except
KeyError
:
# to filter out fallback opencl templates
return
None
##### SCHECULE UTILITIES #####
##### SCHECULE UTILITIES #####
def
tile_and_bind
(
s
,
tensor
,
y
,
x
,
y_factor
,
x_factor
=
None
):
def
tile_and_bind
(
s
,
tensor
,
y
,
x
,
y_factor
,
x_factor
=
None
):
""" tile and bind to GPU threads """
""" tile and bind to GPU threads """
...
...
topi/python/topi/nn/conv2d.py
View file @
866d458c
...
@@ -85,17 +85,6 @@ def _get_workload(data, kernel, stride, padding, out_dtype):
...
@@ -85,17 +85,6 @@ def _get_workload(data, kernel, stride, padding, out_dtype):
return
Workload
(
data
.
dtype
,
out_dtype
,
IH
,
IW
,
CI
,
CO
,
KH
,
KW
,
HPAD
,
WPAD
,
HSTR
,
WSTR
)
return
Workload
(
data
.
dtype
,
out_dtype
,
IH
,
IW
,
CI
,
CO
,
KH
,
KW
,
HPAD
,
WPAD
,
HSTR
,
WSTR
)
@tvm.target.generic_func
def
_get_schedule
(
wkl
):
# pylint: disable=unreachable
""" Get the platform specific schedule. """
target
=
tvm
.
target
.
current_target
()
raise
RuntimeError
(
"No schedule for current target:{}"
.
format
(
target
))
# This return has no use, merely to supress pylint warning
return
wkl
def
conv2d_nchw
(
Input
,
Filter
,
stride
,
padding
,
out_dtype
=
None
):
def
conv2d_nchw
(
Input
,
Filter
,
stride
,
padding
,
out_dtype
=
None
):
"""Convolution operator in NCHW layout.
"""Convolution operator in NCHW layout.
...
...
topi/python/topi/x86/conv2d.py
View file @
866d458c
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
import
tvm
import
tvm
from
tvm
import
autotvm
from
tvm
import
autotvm
from
tvm.autotvm.task.nnvm_integration
import
deserialize_args
from
tvm.autotvm.task.nnvm_integration
import
deserialize_args
from
tvm.autotvm.task
import
register
,
get_config
from
tvm.autotvm.task
import
get_config
from
..
import
generic
,
tag
from
..
import
generic
,
tag
from
..
import
nn
from
..
import
nn
from
..util
import
get_const_tuple
from
..util
import
get_const_tuple
...
@@ -145,7 +145,7 @@ def _declaration_conv_impl(cfg, data, kernel, strides, padding, layout, out_dtyp
...
@@ -145,7 +145,7 @@ def _declaration_conv_impl(cfg, data, kernel, strides, padding, layout, out_dtyp
return
unpack
return
unpack
@autotvm.
task.
register_topi_schedule
(
generic
.
schedule_conv2d_nchw
,
'cpu'
,
[
'direct'
])
@autotvm.register_topi_schedule
(
generic
.
schedule_conv2d_nchw
,
'cpu'
,
[
'direct'
])
def
schedule_conv2d
(
cfg
,
outs
):
def
schedule_conv2d
(
cfg
,
outs
):
"""Create schedule for tensors"""
"""Create schedule for tensors"""
s
=
tvm
.
create_schedule
([
x
.
op
for
x
in
outs
])
s
=
tvm
.
create_schedule
([
x
.
op
for
x
in
outs
])
...
@@ -248,7 +248,7 @@ def schedule_conv2d_nhwc(outs):
...
@@ -248,7 +248,7 @@ def schedule_conv2d_nhwc(outs):
# We define schedule template in this function instead of
# We define schedule template in this function instead of
# declaration function since actual input arguments need
# declaration function since actual input arguments need
# to be altered by the schedule selected.
# to be altered by the schedule selected.
@register
(
"topi_x86_conv2d_NCHWc"
)
@
autotvm.task.
register
(
"topi_x86_conv2d_NCHWc"
)
def
_topi_nn_conv2d_NCHWc
(
*
args
,
**
kwargs
):
def
_topi_nn_conv2d_NCHWc
(
*
args
,
**
kwargs
):
assert
not
kwargs
,
"Do not support kwargs in template function call"
assert
not
kwargs
,
"Do not support kwargs in template function call"
data
,
kernel
,
strides
,
padding
,
origin_layout
,
dtype
=
deserialize_args
(
args
)
data
,
kernel
,
strides
,
padding
,
origin_layout
,
dtype
=
deserialize_args
(
args
)
...
@@ -311,7 +311,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfo):
...
@@ -311,7 +311,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfo):
# (oc, ic, h, w) -> (OC, IC, h, w, ic, oc)
# (oc, ic, h, w) -> (OC, IC, h, w, ic, oc)
new_attrs
[
'kernel_layout'
]
=
'OIHW
%
di
%
do'
%
(
ic_bn
,
oc_bn
)
new_attrs
[
'kernel_layout'
]
=
'OIHW
%
di
%
do'
%
(
ic_bn
,
oc_bn
)
# Store
altered operator's config
# Store
the same config for the altered operator (workload)
new_data
=
tvm
.
placeholder
((
batch_size
,
in_channel
//
ic_bn
,
height
,
width
,
ic_bn
),
new_data
=
tvm
.
placeholder
((
batch_size
,
in_channel
//
ic_bn
,
height
,
width
,
ic_bn
),
dtype
=
data
.
dtype
)
dtype
=
data
.
dtype
)
new_kernel
=
tvm
.
placeholder
((
out_channel
//
oc_bn
,
in_channel
//
ic_bn
,
kh
,
kw
,
ic_bn
,
oc_bn
),
new_kernel
=
tvm
.
placeholder
((
out_channel
//
oc_bn
,
in_channel
//
ic_bn
,
kh
,
kw
,
ic_bn
,
oc_bn
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment