Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
672147c8
Commit
672147c8
authored
Aug 09, 2018
by
Lianmin Zheng
Committed by
Tianqi Chen
Aug 09, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add conv2d transpose and fix bugs (#1566)
parent
6d4cf448
Hide whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
299 additions
and
80 deletions
+299
-80
nnvm/include/nnvm/top/nn.h
+5
-0
nnvm/python/nnvm/testing/dcgan.py
+11
-8
nnvm/python/nnvm/top/nn.py
+5
-1
nnvm/src/top/nn/convolution.cc
+1
-1
nnvm/tests/python/frontend/mxnet/model_zoo/dcgan.py
+11
-8
python/tvm/autotvm/measure/measure_methods.py
+3
-1
python/tvm/autotvm/task/dispatcher.py
+28
-3
python/tvm/autotvm/task/nnvm_integration.py
+12
-1
python/tvm/autotvm/tophub.py
+15
-3
python/tvm/autotvm/tuner/callback.py
+2
-2
topi/python/topi/arm_cpu/__init__.py
+1
-0
topi/python/topi/arm_cpu/conv2d.py
+13
-11
topi/python/topi/arm_cpu/conv2d_transpose.py
+144
-0
topi/python/topi/arm_cpu/depthwise_conv2d.py
+11
-6
topi/python/topi/nn/conv2d_transpose.py
+6
-2
topi/tests/python/test_topi_conv2d.py
+1
-1
topi/tests/python/test_topi_conv2d_transpose_nchw.py
+1
-1
tutorials/autotvm/tune_nnvm_arm.py
+28
-30
tutorials/nnvm_quick_start.py
+1
-1
No files found.
nnvm/include/nnvm/top/nn.h
View file @
672147c8
...
...
@@ -254,6 +254,7 @@ struct Conv2DTransposeParam : public dmlc::Parameter<Conv2DTransposeParam> {
int
groups
;
std
::
string
layout
;
std
::
string
kernel_layout
;
int
out_dtype
;
bool
use_bias
;
DMLC_DECLARE_PARAMETER
(
Conv2DTransposeParam
)
{
...
...
@@ -286,6 +287,10 @@ struct Conv2DTransposeParam : public dmlc::Parameter<Conv2DTransposeParam> {
.
describe
(
"Dimension ordering of data and weight. Can be 'OIHW', 'OIHW16o16i', etc."
"'O', 'I', 'H', 'W' stands for num_filter, input_channel, height, and width"
"dimensions respectively."
);
DMLC_DECLARE_DTYPE_FIELD
(
out_dtype
)
.
add_enum
(
"same"
,
-
1
)
.
set_default
(
-
1
)
.
describe
(
"Output data type, set to explicit type under mixed precision setting"
);
DMLC_DECLARE_FIELD
(
use_bias
).
set_default
(
true
)
.
describe
(
"Whether the layer uses a bias vector."
);
}
...
...
nnvm/python/nnvm/testing/dcgan.py
View file @
672147c8
...
...
@@ -42,28 +42,31 @@ def deconv2d_bn_relu(data, prefix, **kwargs):
def
get_symbol
(
oshape
,
ngf
=
128
,
code
=
None
):
"""get symbol of dcgan generator"""
assert
oshape
[
-
1
]
==
32
,
"Only support 32x32
image"
assert
oshape
[
-
2
]
==
32
,
"Only support 32x32
image"
assert
oshape
[
-
1
]
==
64
,
"Only support 64x64
image"
assert
oshape
[
-
2
]
==
64
,
"Only support 64x64
image"
code
=
sym
.
Variable
(
"data"
)
if
code
is
None
else
code
net
=
sym
.
dense
(
code
,
name
=
"g1"
,
units
=
4
*
4
*
ngf
*
4
,
use_bias
=
False
)
net
=
sym
.
dense
(
code
,
name
=
"g1"
,
units
=
4
*
4
*
ngf
*
8
,
use_bias
=
False
)
net
=
sym
.
relu
(
net
)
# 4 x 4
net
=
sym
.
reshape
(
net
,
shape
=
(
-
1
,
ngf
*
4
,
4
,
4
))
net
=
sym
.
reshape
(
net
,
shape
=
(
-
1
,
ngf
*
8
,
4
,
4
))
# 8 x 8
net
=
deconv2d_bn_relu
(
net
,
ishape
=
(
ngf
*
4
,
4
,
4
),
oshape
=
(
ngf
*
2
,
8
,
8
),
kshape
=
(
4
,
4
),
prefix
=
"g2"
)
net
,
ishape
=
(
ngf
*
8
,
4
,
4
),
oshape
=
(
ngf
*
4
,
8
,
8
),
kshape
=
(
4
,
4
),
prefix
=
"g2"
)
# 16x16
net
=
deconv2d_bn_relu
(
net
,
ishape
=
(
ngf
*
2
,
8
,
8
),
oshape
=
(
ngf
,
16
,
16
),
kshape
=
(
4
,
4
),
prefix
=
"g3"
)
net
,
ishape
=
(
ngf
*
4
,
8
,
8
),
oshape
=
(
ngf
*
2
,
16
,
16
),
kshape
=
(
4
,
4
),
prefix
=
"g3"
)
# 32x32
net
=
deconv2d_bn_relu
(
net
,
ishape
=
(
ngf
*
2
,
16
,
16
),
oshape
=
(
ngf
,
32
,
32
),
kshape
=
(
4
,
4
),
prefix
=
"g4"
)
# 64x64
net
=
deconv2d
(
net
,
ishape
=
(
ngf
,
16
,
16
),
oshape
=
oshape
[
-
3
:],
kshape
=
(
4
,
4
),
name
=
"g4
_deconv"
)
net
,
ishape
=
(
ngf
,
32
,
32
),
oshape
=
oshape
[
-
3
:],
kshape
=
(
4
,
4
),
name
=
"g5
_deconv"
)
net
=
sym
.
tanh
(
net
)
return
net
def
get_workload
(
batch_size
,
oshape
=
(
3
,
32
,
32
),
ngf
=
128
,
random_len
=
100
,
dtype
=
"float32"
):
def
get_workload
(
batch_size
,
oshape
=
(
3
,
64
,
64
),
ngf
=
128
,
random_len
=
100
,
dtype
=
"float32"
):
"""Get benchmark workload for a DCGAN generator
Parameters
...
...
nnvm/python/nnvm/top/nn.py
View file @
672147c8
...
...
@@ -251,11 +251,15 @@ def compute_conv2d_transpose(attrs, inputs, _):
strides
=
attrs
.
get_int_tuple
(
"strides"
)
dilation
=
attrs
.
get_int_tuple
(
"dilation"
)
groups
=
attrs
.
get_int
(
"groups"
)
out_dtype
=
attrs
.
get_string
(
"out_dtype"
)
layout
=
attrs
[
"layout"
]
out_dtype
=
inputs
[
0
]
.
dtype
if
out_dtype
==
"same"
else
out_dtype
assert
layout
==
"NCHW"
,
"only support nchw for now"
assert
dilation
==
(
1
,
1
),
"not support dilate now"
assert
groups
==
1
,
"only support groups == 1 for now"
out
=
topi
.
nn
.
conv2d_transpose_nchw
(
inputs
[
0
],
inputs
[
1
],
strides
,
padding
)
out
=
topi
.
nn
.
conv2d_transpose_nchw
(
inputs
[
0
],
inputs
[
1
],
strides
,
padding
,
out_dtype
)
if
attrs
.
get_bool
(
"use_bias"
):
bias
=
inputs
[
2
]
bias
=
topi
.
expand_dims
(
bias
,
axis
=
1
,
num_newaxis
=
2
)
...
...
nnvm/src/top/nn/convolution.cc
View file @
672147c8
...
...
@@ -556,7 +556,7 @@ v (batch_size, channels, out_height, out_width) if `layout` is `NCHW`
.
set_attr
<
FGetAttrDict
>
(
"FGetAttrDict"
,
ParamGetAttrDict
<
Conv2DTransposeParam
>
)
.
set_attr
<
FListInputNames
>
(
"FListInputNames"
,
UseBiasListInputNames
<
Conv2DTransposeParam
>
)
.
set_attr
<
FInferShape
>
(
"FInferShape"
,
Conv2DTransposeInferShape
)
.
set_attr
<
FInferType
>
(
"FInferType"
,
ElemwiseType
<-
1
,
1
>
)
.
set_attr
<
FInferType
>
(
"FInferType"
,
Conv2DInferType
<
Conv2DTransposeParam
>
)
.
set_attr
<
FCorrectLayout
>
(
"FCorrectLayout"
,
Conv2DTransposeCorrectLayout
)
.
set_num_outputs
(
1
)
.
set_num_inputs
(
UseBiasNumInputs
<
Conv2DTransposeParam
>
)
...
...
nnvm/tests/python/frontend/mxnet/model_zoo/dcgan.py
View file @
672147c8
...
...
@@ -40,24 +40,27 @@ def deconv2d_bn_relu(data, prefix, **kwargs):
net
=
mx
.
sym
.
Activation
(
net
,
name
=
"
%
s_act"
%
prefix
,
act_type
=
'relu'
)
return
net
def
get_symbol
(
oshape
=
(
3
,
32
,
32
),
ngf
=
128
,
code
=
None
):
def
get_symbol
(
oshape
=
(
3
,
64
,
64
),
ngf
=
128
,
code
=
None
):
"""get symbol of dcgan generator"""
assert
oshape
[
-
1
]
==
32
,
"Only support 32x32
image"
assert
oshape
[
-
2
]
==
32
,
"Only support 32x32
image"
assert
oshape
[
-
1
]
==
64
,
"Only support 64x64
image"
assert
oshape
[
-
2
]
==
64
,
"Only support 64x64
image"
code
=
mx
.
sym
.
Variable
(
"data"
)
if
code
is
None
else
code
net
=
mx
.
sym
.
FullyConnected
(
code
,
name
=
"g1"
,
num_hidden
=
4
*
4
*
ngf
*
4
,
no_bias
=
True
,
flatten
=
False
)
net
=
mx
.
sym
.
FullyConnected
(
code
,
name
=
"g1"
,
num_hidden
=
ngf
*
8
*
4
*
4
,
no_bias
=
True
,
flatten
=
False
)
net
=
mx
.
sym
.
Activation
(
net
,
act_type
=
'relu'
)
# 4 x 4
net
=
mx
.
sym
.
reshape
(
net
,
shape
=
(
-
1
,
ngf
*
4
,
4
,
4
))
net
=
mx
.
sym
.
reshape
(
net
,
shape
=
(
-
1
,
ngf
*
8
,
4
,
4
))
# 8 x 8
net
=
deconv2d_bn_relu
(
net
,
ishape
=
(
ngf
*
4
,
4
,
4
),
oshape
=
(
ngf
*
2
,
8
,
8
),
kshape
=
(
4
,
4
),
prefix
=
"g2"
)
net
,
ishape
=
(
ngf
*
8
,
4
,
4
),
oshape
=
(
ngf
*
4
,
8
,
8
),
kshape
=
(
4
,
4
),
prefix
=
"g2"
)
# 16x16
net
=
deconv2d_bn_relu
(
net
,
ishape
=
(
ngf
*
2
,
8
,
8
),
oshape
=
(
ngf
,
16
,
16
),
kshape
=
(
4
,
4
),
prefix
=
"g3"
)
net
,
ishape
=
(
ngf
*
4
,
8
,
8
),
oshape
=
(
ngf
*
2
,
16
,
16
),
kshape
=
(
4
,
4
),
prefix
=
"g3"
)
# 32x32
net
=
deconv2d_bn_relu
(
net
,
ishape
=
(
ngf
*
2
,
16
,
16
),
oshape
=
(
ngf
,
32
,
32
),
kshape
=
(
4
,
4
),
prefix
=
"g4"
)
# 64x64
net
=
deconv2d
(
net
,
ishape
=
(
ngf
,
16
,
16
),
oshape
=
oshape
[
-
3
:],
kshape
=
(
4
,
4
),
name
=
"g4
_deconv"
)
net
,
ishape
=
(
ngf
,
32
,
32
),
oshape
=
oshape
[
-
3
:],
kshape
=
(
4
,
4
),
name
=
"g5
_deconv"
)
net
=
mx
.
sym
.
Activation
(
net
,
act_type
=
'tanh'
)
return
net
python/tvm/autotvm/measure/measure_methods.py
View file @
672147c8
...
...
@@ -345,7 +345,9 @@ def _measure_common(input_pack, build_func, build_kwargs, number, repeat,
msg
=
msg
.
split
(
'
\n
'
)[
-
2
]
.
split
(
": "
)[
1
]
except
Exception
:
# pylint: disable=broad-except
pass
raise
InstantiationError
(
msg
)
res_pack
.
append
(
MeasureResult
((
InstantiationError
(
msg
),),
MeasureErrorNo
.
INSTANTIATION_ERROR
,
tstamp
-
tic
,
tstamp
))
else
:
res_pack
.
append
(
MeasureResult
((
RuntimeError
(
msg
),),
MeasureErrorNo
.
COMPILE_HOST
,
...
...
python/tvm/autotvm/task/dispatcher.py
View file @
672147c8
...
...
@@ -21,6 +21,8 @@ import numpy as np
from
tvm
import
target
as
_target
from
.space
import
ConfigSpace
logger
=
logging
.
getLogger
(
'autotvm'
)
class
DispatchContext
(
object
):
...
...
@@ -120,7 +122,12 @@ def dispatcher(fworkload):
raise
RuntimeError
(
"DispatchContext is not initialized"
)
workload
=
func
(
*
args
,
**
kwargs
)
cfg
=
context
.
query
(
tgt
,
workload
)
return
dispatch_dict
[
cfg
.
template_key
](
cfg
,
*
args
,
**
kwargs
)
if
cfg
.
template_key
:
return
dispatch_dict
[
cfg
.
template_key
](
cfg
,
*
args
,
**
kwargs
)
else
:
assert
dispatch_dict
,
"No func registered for this dispatcher"
for
v
in
dispatch_dict
.
values
():
return
v
(
cfg
,
*
args
,
**
kwargs
)
fdecorate
=
decorate
(
fworkload
,
dispatch_func
)
fdecorate
.
register
=
register
...
...
@@ -159,13 +166,18 @@ class ApplyHistoryBest(DispatchContext):
Otherwise, it is an iterator.
default: ConfigEntity, optional
The default config to return when no history records
allow_fallback: bool
Whether allow to use a fallback configuration if cannot find
tuned result.
"""
def
__init__
(
self
,
records
,
default
=
None
):
def
__init__
(
self
,
records
,
default
=
None
,
allow_fallback
=
False
):
super
(
ApplyHistoryBest
,
self
)
.
__init__
()
self
.
best_by_targetkey
=
{}
self
.
best_by_model
=
{}
self
.
_default
=
default
self
.
_allow_fallback
=
allow_fallback
self
.
fallback
=
{}
if
records
:
self
.
load
(
records
)
...
...
@@ -244,5 +256,18 @@ class ApplyHistoryBest(DispatchContext):
if
self
.
_default
:
return
self
.
_default
if
self
.
_allow_fallback
:
key
=
(
target
,
workload
)
if
key
in
self
.
fallback
:
return
self
.
fallback
[
key
]
logger
.
warning
(
"Cannot find config for target=
%
s, workload=
%
s. A fallback configuration "
"is used, which may bring great performance regression."
,
target
,
workload
)
cfg
=
ConfigSpace
()
self
.
fallback
[
key
]
=
cfg
return
cfg
raise
RuntimeError
(
"Cannot find config for target=
%
s, workload=
%
s"
%
(
target
,
workload
))
"Cannot find config for target=
%
s, workload=
%
s. You need to do tuning "
"for this workload to get the config."
%
(
target
,
workload
))
python/tvm/autotvm/task/nnvm_integration.py
View file @
672147c8
...
...
@@ -53,12 +53,14 @@ class TaskExtractEnv:
import
nnvm
self
.
symbol2topi
=
{
nnvm
.
sym
.
conv2d
:
[
topi
.
nn
.
conv2d
,
topi
.
nn
.
depthwise_conv2d_nchw
]
nnvm
.
sym
.
conv2d
:
[
topi
.
nn
.
conv2d
,
topi
.
nn
.
depthwise_conv2d_nchw
],
nnvm
.
sym
.
conv2d_transpose
:
[
topi
.
nn
.
conv2d_transpose
],
}
self
.
topi_to_task
=
{
topi
.
nn
.
conv2d
:
"topi_nn_conv2d"
,
topi
.
nn
.
depthwise_conv2d_nchw
:
"topi_nn_depthwise_conv2d_nchw"
,
topi
.
nn
.
conv2d_transpose_nchw
:
"topi_nn_conv2d_transpose_nchw"
,
}
self
.
_register_dummy
()
...
...
@@ -110,6 +112,15 @@ class TaskExtractEnv:
s
=
topi
.
generic
.
schedule_depthwise_conv2d_nchw
([
C
])
return
s
,
[
A
,
W
,
C
]
@register
(
"topi_nn_conv2d_transpose_nchw"
)
def
_topi_nn_conv2d_transpose_nchw
(
*
args
,
**
kwargs
):
assert
not
kwargs
,
"Do not support kwargs in template function call"
args
=
deserialize_args
(
args
)
A
,
W
=
args
[:
2
]
C
=
topi
.
nn
.
conv2d_transpose_nchw
(
*
args
,
**
kwargs
)
s
=
topi
.
generic
.
schedule_conv2d_transpose_nchw
([
C
])
return
s
,
[
A
,
W
,
C
]
def
reset
(
self
):
"""Reset task collections"""
self
.
task_collection
=
[]
...
...
python/tvm/autotvm/tophub.py
View file @
672147c8
...
...
@@ -9,6 +9,7 @@ TVM will download these parameters for you when you create the target for the fi
import
logging
import
os
import
json
import
sys
from
.task
import
ApplyHistoryBest
from
..
import
target
as
_target
...
...
@@ -27,7 +28,7 @@ def _alias(name):
return
table
.
get
(
name
,
name
)
def
context
(
target
,
extra_files
=
None
):
def
context
(
target
,
extra_files
=
None
,
allow_fallback
=
False
):
"""Return the dispatch context with pre-tuned parameters.
The corresponding downloaded *.log files under tophub root path will be loaded.
Users can also add their own files in argument `extra_files`.
...
...
@@ -38,9 +39,12 @@ def context(target, extra_files=None):
The compilation target
extra_files: list of str, optional
Extra log files to load
allow_fallback: bool
Whether allow to use a fallback configuration if cannot find
tuned result.
"""
rootpath
=
AUTOTVM_TOPHUB_ROOT_PATH
best_context
=
ApplyHistoryBest
([])
best_context
=
ApplyHistoryBest
([]
,
allow_fallback
=
allow_fallback
)
if
isinstance
(
target
,
str
):
target
=
_target
.
create
(
target
)
...
...
@@ -99,7 +103,15 @@ def check_package(backend):
if
os
.
path
.
isfile
(
os
.
path
.
join
(
AUTOTVM_TOPHUB_ROOT_PATH
,
backend
+
".log"
)):
return
download_package
(
backend
)
if
sys
.
version_info
>=
(
3
,):
import
urllib.request
as
urllib2
else
:
import
urllib2
try
:
download_package
(
backend
)
except
urllib2
.
URLError
:
logging
.
warning
(
"Failed to download tophub package for
%
s"
,
backend
)
def
list_packages
():
...
...
python/tvm/autotvm/tuner/callback.py
View file @
672147c8
...
...
@@ -118,8 +118,8 @@ def progress_bar(total, prefix=''):
ctx
.
cur_flops
=
flops
ctx
.
best_flops
=
tuner
.
best_flops
sys
.
stdout
.
write
(
'
%
s Current/Best:
%7.2
f/
%7.2
f GFLOPS | Progress: (
%
d/
%
d) '
'|
%.2
f s
\r
'
%
sys
.
stdout
.
write
(
'
\r
%
s Current/Best:
%7.2
f/
%7.2
f GFLOPS | Progress: (
%
d/
%
d) '
'|
%.2
f s'
%
(
prefix
,
ctx
.
cur_flops
/
1e9
,
ctx
.
best_flops
/
1e9
,
ctx
.
ct
,
ctx
.
total
,
time
.
time
()
-
tic
))
sys
.
stdout
.
flush
()
...
...
topi/python/topi/arm_cpu/__init__.py
View file @
672147c8
...
...
@@ -2,4 +2,5 @@
from
.
import
conv2d
from
.
import
depthwise_conv2d
from
.
import
conv2d_transpose
from
.
import
bitserial_conv2d
topi/python/topi/arm_cpu/conv2d.py
View file @
672147c8
...
...
@@ -42,7 +42,7 @@ def schedule_conv2d_nchw_arm_cpu(cfg, outs):
def
_callback
(
op
):
# schedule conv2d
if
'spatial_conv_output'
in
op
.
tag
:
if
'spatial_conv
2d
_output'
in
op
.
tag
:
output
=
op
.
output
(
0
)
conv
=
op
.
input_tensors
[
0
]
...
...
@@ -60,7 +60,7 @@ def schedule_conv2d_nchw_arm_cpu(cfg, outs):
_schedule_spatial_pack
(
cfg
,
s
,
data_vec
,
kernel_vec
,
conv
,
output
,
outs
[
0
])
if
'winograd_conv_output'
in
op
.
tag
:
if
'winograd_conv
2d
_output'
in
op
.
tag
:
output
=
op
.
output
(
0
)
_schedule_winograd
(
cfg
,
s
,
output
,
outs
[
0
])
...
...
@@ -72,7 +72,7 @@ def _decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype, n
assert
layout
==
"NCHW"
,
"Only support NCHW"
out_dtype
=
out_dtype
or
data
.
dtype
_
,
CI
,
IH
,
IW
=
get_const_tuple
(
data
.
shape
)
N
,
CI
,
IH
,
IW
=
get_const_tuple
(
data
.
shape
)
if
len
(
kernel
.
shape
)
==
4
:
pre_packed
=
False
CO
,
_
,
KH
,
KW
=
get_const_tuple
(
kernel
.
shape
)
...
...
@@ -81,13 +81,12 @@ def _decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype, n
CO
,
_
,
KH
,
KW
,
VC
=
get_const_tuple
(
kernel
.
shape
)
CO
=
CO
*
VC
pad_top
,
pad_left
,
pad_
down
,
pad_right
=
get_pad_tuple
(
padding
,
(
KH
,
KW
))
pad_top
,
pad_left
,
pad_
bottom
,
pad_right
=
get_pad_tuple
(
padding
,
(
KH
,
KW
))
HSTR
,
WSTR
=
strides
if
isinstance
(
strides
,
(
tuple
,
list
))
else
(
strides
,
strides
)
N
=
1
OH
=
(
IH
+
pad_top
+
pad_down
-
KH
)
//
HSTR
+
1
OH
=
(
IH
+
pad_top
+
pad_bottom
-
KH
)
//
HSTR
+
1
OW
=
(
IW
+
pad_left
+
pad_right
-
KW
)
//
WSTR
+
1
data_pad
=
pad
(
data
,
[
0
,
0
,
pad_top
,
pad_left
],
[
0
,
0
,
pad_
down
,
pad_right
])
data_pad
=
pad
(
data
,
[
0
,
0
,
pad_top
,
pad_left
],
[
0
,
0
,
pad_
bottom
,
pad_right
])
# ==================== define configuration space ====================
n
,
co
,
oh
,
ow
=
cfg
.
axis
(
N
),
cfg
.
axis
(
CO
),
cfg
.
axis
(
OH
),
cfg
.
axis
(
OW
)
...
...
@@ -145,7 +144,7 @@ def _decl_spatial_pack(cfg, data, kernel, strides, padding, layout, out_dtype, n
output
=
tvm
.
compute
(
oshape
,
lambda
n
,
co
,
h
,
w
:
conv
[
n
][
co
//
VC
][
h
//
VH
][
w
//
VW
][
h
%
VH
][
w
%
VW
][
co
%
VC
],
name
=
'output_unpack'
,
tag
=
'spatial_conv_output'
,
name
=
'output_unpack'
,
tag
=
'spatial_conv
2d
_output'
,
attrs
=
{
'workload'
:
_conv_arg_to_workload
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
)})
return
output
...
...
@@ -195,11 +194,14 @@ def _schedule_spatial_pack(cfg, s, data_vec, kernel_vec,
if
kernel_vec
.
op
.
name
==
'kernel_vec'
:
co
,
_
,
_
,
_
,
_
=
s
[
kernel_vec
]
.
op
.
axis
if
autotvm
.
GLOBAL_SCOPE
.
in_tuning
:
# kernel packing will be pre-computed during comp
li
ation, so we skip
# kernel packing will be pre-computed during comp
il
ation, so we skip
# this part to make tuning records correct
s
[
kernel_vec
]
.
pragma
(
co
,
'debug_skip_region'
)
else
:
s
[
kernel_vec
]
.
parallel
(
co
)
elif
kernel_vec
.
op
.
name
==
'kernel_vec_conv2d_transpose'
:
# for conv2d transpose
co
,
_
,
_
,
_
,
_
=
s
[
kernel_vec
]
.
op
.
axis
s
[
kernel_vec
]
.
parallel
(
co
)
return
s
...
...
@@ -330,7 +332,7 @@ def _decl_winograd(cfg, data, kernel, strides, padding, layout, out_dtype, tile_
# unpack output
output
=
tvm
.
compute
((
N
,
K
,
H
,
W
),
lambda
n
,
k
,
h
,
w
:
Y
[
k
][
n
*
nH
*
nW
+
(
h
//
m
)
*
nW
+
w
//
m
][
h
%
m
][
w
%
m
],
name
=
'output'
,
tag
=
'winograd_conv_output'
,
name
=
'output'
,
tag
=
'winograd_conv
2d
_output'
,
attrs
=
{
'workload'
:
_winograd_conv_arg_to_workload
(
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
tile_size
)})
...
...
@@ -462,7 +464,7 @@ def schedule_conv2d_winograd_without_weight_transform_(cfg, outs):
s
=
tvm
.
create_schedule
([
x
.
op
for
x
in
outs
])
def
_callback
(
op
):
if
'winograd_conv_output'
in
op
.
tag
:
if
'winograd_conv
2d
_output'
in
op
.
tag
:
output
=
op
.
output
(
0
)
_schedule_winograd
(
cfg
,
s
,
output
,
outs
[
0
])
...
...
topi/python/topi/arm_cpu/conv2d_transpose.py
0 → 100644
View file @
672147c8
# pylint: disable=invalid-name, unused-variable
"""Transposed 2D convolution operators (sometimes called Deconvolution)."""
from
__future__
import
absolute_import
as
_abs
import
tvm
from
tvm
import
autotvm
from
..generic
import
schedule_conv2d_transpose_nchw
from
..nn
import
conv2d_transpose_nchw
,
dilate
,
pad
,
get_pad_tuple
from
..util
import
get_const_tuple
,
traverse_inline
from
.conv2d
import
_schedule_spatial_pack
@autotvm.task.register_topi_compute
(
conv2d_transpose_nchw
,
"arm_cpu"
,
"direct"
)
def
conv2d_transpose_nchw_arm
(
cfg
,
Input
,
Filter
,
strides
,
padding
,
out_dtype
):
"""Transposed 2D convolution nchw forward operator.
Parameters
----------
Input : tvm.Tensor
4-D with shape [batch, in_channel, in_height, in_width]
Filter : tvm.Tensor
4-D with shape [in_channel, num_filter, filter_height, filter_width]
strides : tuple of two ints
The spatial stride along height and width
padding : int or str
Padding size, or ['VALID', 'SAME']
out_dtype: str
The output data type. This is used for mixed precision.
Returns
-------
Output : tvm.Tensor
4-D with shape [batch, out_channel, out_height, out_width]
"""
return
_decl_spatial_pack
(
cfg
,
Input
,
Filter
,
strides
,
padding
,
"NCHW"
,
out_dtype
,
2
)
def
_decl_spatial_pack
(
cfg
,
data
,
kernel
,
strides
,
padding
,
layout
,
out_dtype
,
num_tile
):
assert
layout
==
"NCHW"
,
"Only support NCHW"
out_dtype
=
out_dtype
or
data
.
dtype
N
,
CI
,
IH
,
IW
=
get_const_tuple
(
data
.
shape
)
_
,
CO
,
KH
,
KW
=
get_const_tuple
(
kernel
.
shape
)
pad_top
,
pad_left
,
pad_bottom
,
pad_right
=
get_pad_tuple
(
padding
,
(
KH
,
KW
))
bpad_top
,
bpad_bottom
=
KH
-
1
-
pad_top
,
KH
-
1
-
pad_bottom
bpad_left
,
bpad_right
=
KW
-
1
-
pad_left
,
KW
-
1
-
pad_right
HSTR
,
WSTR
=
strides
if
isinstance
(
strides
,
(
tuple
,
list
))
else
(
strides
,
strides
)
OH
=
(
IH
-
1
)
*
HSTR
-
pad_top
-
pad_bottom
+
KH
OW
=
(
IW
-
1
)
*
WSTR
-
pad_left
-
pad_right
+
KW
dilated_input
=
dilate
(
data
,
[
1
,
1
,
HSTR
,
WSTR
])
data_pad
=
pad
(
dilated_input
,
[
0
,
0
,
bpad_top
,
bpad_left
],
[
0
,
0
,
bpad_bottom
,
bpad_right
])
# ==================== define configuration space ====================
n
,
co
,
oh
,
ow
=
cfg
.
axis
(
N
),
cfg
.
axis
(
CO
),
cfg
.
axis
(
OH
),
cfg
.
axis
(
OW
)
ci
,
kh
,
kw
=
cfg
.
reduce_axis
(
CI
),
cfg
.
reduce_axis
(
KH
),
cfg
.
reduce_axis
(
KW
)
if
num_tile
==
2
:
# for arm cpu
co
,
vc
=
cfg
.
define_split
(
'tile_co'
,
co
,
num_outputs
=
2
)
oh
,
vh
=
cfg
.
define_split
(
'tile_oh'
,
oh
,
num_outputs
=
2
)
ow
,
vw
=
cfg
.
define_split
(
'tile_ow'
,
ow
,
num_outputs
=
2
)
elif
num_tile
==
3
:
# for mali gpu
co
,
_
,
vc
=
cfg
.
define_split
(
'tile_co'
,
co
,
num_outputs
=
3
)
oh
,
_
,
vh
=
cfg
.
define_split
(
'tile_oh'
,
oh
,
num_outputs
=
3
)
ow
,
_
,
vw
=
cfg
.
define_split
(
'tile_ow'
,
ow
,
num_outputs
=
3
)
else
:
raise
RuntimeError
(
"Invalid num_tile"
)
cfg
.
define_reorder
(
"reorder_0"
,
[
n
,
co
,
oh
,
ow
,
ci
,
kh
,
kw
,
vh
,
vw
,
vc
],
policy
=
'candidate'
,
candidate
=
[
[
n
,
co
,
oh
,
ow
,
ci
,
kh
,
kw
,
vh
,
vw
,
vc
],
[
n
,
co
,
oh
,
ow
,
ci
,
kh
,
kw
,
vc
,
vh
,
vw
]])
cfg
.
define_annotate
(
"ann_reduce"
,
[
kh
,
kw
],
policy
=
'try_unroll'
)
cfg
.
define_annotate
(
"ann_spatial"
,
[
vh
,
vw
,
vc
],
policy
=
'try_unroll_vec'
)
# ====================================================================
VC
=
cfg
[
"tile_co"
]
.
size
[
-
1
]
VH
=
cfg
[
"tile_oh"
]
.
size
[
-
1
]
VW
=
cfg
[
"tile_ow"
]
.
size
[
-
1
]
dvshape
=
(
N
,
OH
//
VH
,
OW
//
VW
,
CI
,
VH
+
KH
-
1
,
VW
+
KW
-
1
)
kvshape
=
(
CO
//
VC
,
CI
,
KH
,
KW
,
VC
)
ovshape
=
(
N
,
CO
//
VC
,
OH
//
VH
,
OW
//
VW
,
VH
,
VW
,
VC
)
oshape
=
(
N
,
CO
,
OH
,
OW
)
data_vec
=
tvm
.
compute
(
dvshape
,
lambda
n
,
h
,
w
,
ci
,
vh
,
vw
:
data_pad
[
n
][
ci
][
h
*
VH
+
vh
][
w
*
VW
+
vw
],
name
=
'data_vec'
)
kernel_vec
=
tvm
.
compute
(
kvshape
,
lambda
co
,
ci
,
kh
,
kw
,
vc
:
kernel
[
ci
][
co
*
VC
+
vc
][
kh
][
kw
],
name
=
'kernel_vec_conv2d_transpose'
)
ci
=
tvm
.
reduce_axis
((
0
,
CI
),
name
=
'ci'
)
kh
=
tvm
.
reduce_axis
((
0
,
KH
),
name
=
'kh'
)
kw
=
tvm
.
reduce_axis
((
0
,
KW
),
name
=
'kw'
)
conv
=
tvm
.
compute
(
ovshape
,
lambda
n
,
co
,
h
,
w
,
vh
,
vw
,
vc
:
\
tvm
.
sum
(
data_vec
[
n
,
h
,
w
,
ci
,
vh
+
kh
,
vw
+
kw
]
.
astype
(
out_dtype
)
*
kernel_vec
[
co
,
ci
,
KH
-
1
-
kh
,
KW
-
1
-
kw
,
vc
]
.
astype
(
out_dtype
),
axis
=
[
ci
,
kh
,
kw
]),
name
=
'conv'
)
output
=
tvm
.
compute
(
oshape
,
lambda
n
,
co
,
h
,
w
:
conv
[
n
][
co
//
VC
][
h
//
VH
][
w
//
VW
][
h
%
VH
][
w
%
VW
][
co
%
VC
],
name
=
'output_unpack'
,
tag
=
'spatial_conv2d_transpose_output'
)
return
output
# register customized schedule for arm cpu.
@autotvm.task.register_topi_schedule
(
schedule_conv2d_transpose_nchw
,
"arm_cpu"
,
"direct"
)
def
schedule_conv2d_transpose_arm
(
cfg
,
outs
):
"""Schedule conv2d transpose for arm cpu"""
s
=
tvm
.
create_schedule
([
x
.
op
for
x
in
outs
])
def
_callback
(
op
):
if
'spatial_conv2d_transpose_output'
in
op
.
tag
:
output
=
op
.
output
(
0
)
conv
=
op
.
input_tensors
[
0
]
data_vec
=
conv
.
op
.
input_tensors
[
0
]
data_pad
=
data_vec
.
op
.
input_tensors
[
0
]
dilated_input
=
data_pad
.
op
.
input_tensors
[
0
]
s
[
data_pad
]
.
compute_inline
()
s
[
dilated_input
]
.
compute_inline
()
kernel_vec
=
conv
.
op
.
input_tensors
[
1
]
if
kernel_vec
.
op
.
name
==
'kernel_vec'
:
kernel
=
kernel_vec
.
op
.
input_tensors
[
0
]
else
:
kernel
=
kernel_vec
if
isinstance
(
kernel
.
op
,
tvm
.
tensor
.
ComputeOp
)
and
"dilate"
in
kernel
.
op
.
tag
:
s
[
kernel
]
.
compute_inline
()
_schedule_spatial_pack
(
cfg
,
s
,
data_vec
,
kernel_vec
,
conv
,
output
,
outs
[
0
])
traverse_inline
(
s
,
outs
[
0
]
.
op
,
_callback
)
return
s
topi/python/topi/arm_cpu/depthwise_conv2d.py
View file @
672147c8
...
...
@@ -15,7 +15,16 @@ autotvm.task.register_topi_compute(depthwise_conv2d_nchw, 'arm_cpu', 'direct',
# register customized schedule for arm cpu.
@autotvm.task.register_topi_schedule
(
schedule_depthwise_conv2d_nchw
,
'arm_cpu'
,
'direct'
)
def
schedule_depthwise_conv2d_nchw_
(
cfg
,
outs
):
"""Schedule depthwise conv2d"""
"""Schedule depthwise conv2d
Parameters
----------
cfg: ConfigEntity
The configuration of this tempalte
outs: Array of Tensor
The computation graph description of depthwise convolution2d
in the format of an array of tensors.
"""
outs
=
[
outs
]
if
isinstance
(
outs
,
tvm
.
tensor
.
Tensor
)
else
outs
s
=
tvm
.
create_schedule
([
x
.
op
for
x
in
outs
])
...
...
@@ -79,10 +88,8 @@ def schedule_depthwise_conv2d_nchw_(cfg, outs):
return
s
scheduled_ops
=
[]
def
_callback
(
op
):
if
op
.
tag
==
'depthwise_conv2d_nchw'
and
op
not
in
scheduled_ops
:
if
op
.
tag
==
'depthwise_conv2d_nchw'
:
output
=
op
.
output
(
0
)
kernel
=
op
.
input_tensors
[
1
]
data
=
op
.
input_tensors
[
0
]
...
...
@@ -92,7 +99,5 @@ def schedule_depthwise_conv2d_nchw_(cfg, outs):
data
=
data_pad
.
op
.
input_tensors
[
0
]
_schedule
(
cfg
,
s
,
data
,
data_pad
,
kernel
,
output
)
scheduled_ops
.
append
(
op
)
traverse_inline
(
s
,
outs
[
0
]
.
op
,
_callback
)
return
s
topi/python/topi/nn/conv2d_transpose.py
View file @
672147c8
...
...
@@ -10,7 +10,7 @@ from ..util import simplify
@tvm.target.generic_func
def
conv2d_transpose_nchw
(
Input
,
Filter
,
strides
,
padding
):
def
conv2d_transpose_nchw
(
Input
,
Filter
,
strides
,
padding
,
out_dtype
):
"""Transposed 2D convolution nchw forward operator.
Parameters
...
...
@@ -27,6 +27,9 @@ def conv2d_transpose_nchw(Input, Filter, strides, padding):
padding : int or str
Padding size, or ['VALID', 'SAME']
out_dtype : str
The output data type. This is used for mixed precision.
Returns
-------
Output : tvm.Tensor
...
...
@@ -58,7 +61,8 @@ def conv2d_transpose_nchw(Input, Filter, strides, padding):
Output
=
tvm
.
compute
(
(
batch
,
out_c
,
out_h
,
out_w
),
lambda
b
,
c
,
h
,
w
:
tvm
.
sum
(
PaddedInput
[
b
,
dc
,
h
+
dh
,
w
+
dw
]
*
Filter
[
dc
,
c
,
filter_h
-
1
-
dh
,
filter_w
-
1
-
dw
],
PaddedInput
[
b
,
dc
,
h
+
dh
,
w
+
dw
]
.
astype
(
out_dtype
)
*
Filter
[
dc
,
c
,
filter_h
-
1
-
dh
,
filter_w
-
1
-
dw
]
.
astype
(
out_dtype
),
axis
=
[
dc
,
dh
,
dw
]),
tag
=
"conv2d_transpose_nchw"
)
return
Output
topi/tests/python/test_topi_conv2d.py
View file @
672147c8
...
...
@@ -40,7 +40,7 @@ def verify_conv2d(batch, in_size, in_channel, num_filter, kernel, stride, paddin
np
.
testing
.
assert_allclose
(
b
.
asnumpy
(),
b_np
,
rtol
=
1e-5
)
def
test_conv2d
():
with
autotvm
.
tophub
.
context
(
tvm
.
target
.
arm_cpu
(
'rasp3b'
)):
with
autotvm
.
tophub
.
context
(
tvm
.
target
.
arm_cpu
(
'rasp3b'
)
,
allow_fallback
=
True
):
verify_conv2d
(
1
,
56
,
64
,
64
,
3
,
1
,
1
)
if
__name__
==
"__main__"
:
...
...
topi/tests/python/test_topi_conv2d_transpose_nchw.py
View file @
672147c8
...
...
@@ -12,7 +12,7 @@ def verify_conv2d_transpose_nchw(batch, in_channel, in_size, num_filter, kernel,
A
=
tvm
.
placeholder
((
batch
,
in_channel
,
in_height
,
in_width
),
name
=
'A'
)
W
=
tvm
.
placeholder
((
in_channel
,
num_filter
,
kernel
,
kernel
),
name
=
'W'
)
B
=
topi
.
nn
.
conv2d_transpose_nchw
(
A
,
W
,
[
stride
,
stride
],
padding
)
B
=
topi
.
nn
.
conv2d_transpose_nchw
(
A
,
W
,
[
stride
,
stride
],
padding
,
A
.
dtype
)
C
=
topi
.
nn
.
relu
(
B
)
a_shape
=
get_const_tuple
(
A
.
shape
)
...
...
tutorials/autotvm/tune_nnvm_arm.py
View file @
672147c8
...
...
@@ -62,7 +62,7 @@ import tvm.contrib.graph_runtime as runtime
def
get_network
(
name
,
batch_size
):
"""Get the symbol definition and random weight of a network"""
shape
=
{
"data"
:
(
batch_size
,
3
,
224
,
224
)}
input_shape
=
(
batch_size
,
3
,
224
,
224
)
output_shape
=
(
batch_size
,
1000
)
if
name
==
'resnet-18'
:
...
...
@@ -90,7 +90,7 @@ def get_network(name, batch_size):
else
:
raise
ValueError
(
"Unsupported network: "
+
name
)
return
net
,
params
,
shape
,
output_shape
return
net
,
params
,
input_
shape
,
output_shape
#################################################################
# Start RPC Tracker
...
...
@@ -226,8 +226,8 @@ tuning_option = {
def
tune_tasks
(
tasks
,
measure_option
,
tuner
=
'xgb'
,
n_trial
=
5
00
,
early_stopping
=
200
,
n_trial
=
10
00
,
early_stopping
=
None
,
log_filename
=
'tuning.log'
,
use_transfer_learning
=
True
,
try_winograd
=
True
):
...
...
@@ -283,10 +283,10 @@ def tune_tasks(tasks,
def
tune_and_evaluate
():
# extract workloads from nnvm graph
print
(
"Extract tasks..."
)
net
,
params
,
shape
,
out_shape
=
get_network
(
network
,
batch_size
=
1
)
tasks
=
autotvm
.
task
.
extract_from_graph
(
net
,
shape
=
shape
,
dtype
=
dtype
,
s
ymbols
=
(
nnvm
.
sym
.
conv2d
,)
,
target
=
target
)
net
,
params
,
input_
shape
,
out_shape
=
get_network
(
network
,
batch_size
=
1
)
tasks
=
autotvm
.
task
.
extract_from_graph
(
net
,
target
=
target
,
s
hape
=
{
'data'
:
input_shape
},
dtype
=
dtype
,
symbols
=
(
nnvm
.
sym
.
conv2d
,)
)
# run tuning tasks
print
(
"Tuning..."
)
...
...
@@ -298,7 +298,7 @@ def tune_and_evaluate():
with
nnvm
.
compiler
.
build_config
(
opt_level
=
2
,
add_pass
=
[
'AlterOpLayout'
]):
graph
,
lib
,
params
=
nnvm
.
compiler
.
build
(
net
,
target
=
target
,
shape
=
shape
,
params
=
params
,
dtype
=
dtype
)
shape
=
{
'data'
:
input_shape
}
,
params
=
params
,
dtype
=
dtype
)
# export library
tmp
=
tempdir
()
...
...
@@ -319,7 +319,7 @@ def tune_and_evaluate():
# upload parameters to device
ctx
=
remote
.
context
(
str
(
target
),
0
)
rparams
=
{
k
:
tvm
.
nd
.
array
(
v
,
ctx
)
for
k
,
v
in
params
.
items
()}
data_tvm
=
tvm
.
nd
.
array
((
np
.
random
.
uniform
(
size
=
shape
[
'data'
]
))
.
astype
(
dtype
))
data_tvm
=
tvm
.
nd
.
array
((
np
.
random
.
uniform
(
size
=
input_shape
))
.
astype
(
dtype
))
module
=
runtime
.
create
(
graph
,
rlib
,
ctx
)
module
.
set_input
(
'data'
,
data_tvm
)
module
.
set_input
(
**
rparams
)
...
...
@@ -341,35 +341,33 @@ def tune_and_evaluate():
# -------------
# The tuning needs to train xgboost models and use them for prediction.
# So a high performance CPU is recommended.
# It takes about
1.5 hour
on a 32T AMD Ryzen CPU.
# It takes about
2 hours
on a 32T AMD Ryzen CPU.
# One sample output is
#
# .. code-block:: bash
#
# Extract tasks...
# Tuning...
# [Task 1/16] Current/Best: 1
3.15/ 20.49 GFLOPS | Progress: (297/1000) | 348.51
s Done.
# [Task 2/16] Current/Best: 16.
66/ 22.64 GFLOPS | Progress: (475/1000) | 415.42
s Done.
# [Task 3/16] Current/Best:
10.33/ 14.19 GFLOPS | Progress: (306/1000) | 239.61
s Done.
# [Task 4/16] Current/Best: 1
3.29/ 20.88 GFLOPS | Progress: (242/1000) | 227.48
s Done.
# [Task 5/16] Current/Best: 1
3.28/ 15.61 GFLOPS | Progress: (237/1000) | 191.56
s Done.
# [Task 6/16] Current/Best:
20.16/ 23.86 GFLOPS | Progress: (315/1000) | 304.31
s Done.
# [Task 7/16] Current/Best:
9.22/ 22.00 GFLOPS | Progress: (458/1000) | 433.2
6 s Done.
# [Task 8/16] Current/Best:
14.12/ 17.80 GFLOPS | Progress: (270/1000) | 240.73
s Done.
# [Task 9/16] Current/Best: 1
4.59/ 24.02 GFLOPS | Progress: (209/1000) | 213.61
s Done.
# [Task 10/16] Current/Best:
9.86/ 21.74 GFLOPS | Progress: (367/1000) | 359.93
s Done.
# [Task 11/16] Current/Best:
5.01/ 18.86 GFLOPS | Progress: (202/1000) | 191.18
s Done.
# [Task 12/16] Current/Best:
8.61/ 25.23 GFLOPS | Progress: (220/1000) | 220.74
s Done.
# [Task 13/16] Current/Best: 1
0.87/ 25.79 GFLOPS | Progress: (465/1000) | 902.14
s Done.
# [Task 14/16] Current/Best: 1
5.33/ 29.38 GFLOPS | Progress: (239/1000) | 481.33
s Done.
# [Task 15/16] Current/Best:
12.09/ 38.60 GFLOPS | Progress: (476/1000) | 928.3
5 s Done.
# [Task 16/16] Current/Best:
16.77/ 47.08 GFLOPS | Progress: (255/1000) | 439.91
s Done.
# [Task 1/16] Current/Best: 1
8.85/ 19.67 GFLOPS | Progress: (353/1000) | 387.05
s Done.
# [Task 2/16] Current/Best: 16.
10/ 23.50 GFLOPS | Progress: (444/1000) | 379.99
s Done.
# [Task 3/16] Current/Best:
5.49/ 13.96 GFLOPS | Progress: (610/1000) | 485.87
s Done.
# [Task 4/16] Current/Best: 1
0.07/ 20.48 GFLOPS | Progress: (430/1000) | 391.66
s Done.
# [Task 5/16] Current/Best: 1
1.50/ 15.50 GFLOPS | Progress: (374/1000) | 356.03
s Done.
# [Task 6/16] Current/Best:
10.76/ 23.77 GFLOPS | Progress: (526/1000) | 526.42
s Done.
# [Task 7/16] Current/Best:
12.71/ 22.03 GFLOPS | Progress: (341/1000) | 322.9
6 s Done.
# [Task 8/16] Current/Best:
8.60/ 17.91 GFLOPS | Progress: (272/1000) | 236.08
s Done.
# [Task 9/16] Current/Best: 1
5.37/ 23.62 GFLOPS | Progress: (275/1000) | 275.18
s Done.
# [Task 10/16] Current/Best:
6.62/ 23.01 GFLOPS | Progress: (330/1000) | 315.02
s Done.
# [Task 11/16] Current/Best:
1.85/ 21.39 GFLOPS | Progress: (281/1000) | 239.19
s Done.
# [Task 12/16] Current/Best:
15.41/ 24.02 GFLOPS | Progress: (258/1000) | 270.82
s Done.
# [Task 13/16] Current/Best: 1
7.96/ 25.79 GFLOPS | Progress: (380/1000) | 738.29
s Done.
# [Task 14/16] Current/Best: 1
4.81/ 31.17 GFLOPS | Progress: (413/1000) | 799.21
s Done.
# [Task 15/16] Current/Best:
24.39/ 40.97 GFLOPS | Progress: (355/1000) | 700.2
5 s Done.
# [Task 16/16] Current/Best:
9.42/ 49.90 GFLOPS | Progress: (348/1000) | 603.84
s Done.
# Compile...
# Upload...
# Evaluate inference time cost...
# Mean inference time (std dev): 156.51 ms (0.89 ms)
#
# Mean inference time (std dev): 157.29 ms (1.74 ms)
######################################################################
#
...
...
tutorials/nnvm_quick_start.py
View file @
672147c8
...
...
@@ -109,7 +109,7 @@ print(out.asnumpy().flatten()[0:10])
# Save and Load Compiled Module
# -----------------------------
# We can also save the graph, lib and parameters into files and load them
# back in de
velopment
environment.
# back in de
ploy
environment.
####################################################
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment