Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
T
tic
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
wenyuanbo
tic
Commits
10279098
Commit
10279098
authored
Oct 12, 2018
by
Lianmin Zheng
Committed by
Tianqi Chen
Oct 12, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[TOPI] Update pre-tuned parameters for TX2 and fp16 on Mali (#1892)
parent
bd71efc2
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
42 additions
and
35 deletions
+42
-35
apps/benchmark/README.md
+7
-0
apps/benchmark/arm_cpu_imagenet_bench.py
+4
-4
apps/benchmark/gpu_imagenet_bench.py
+3
-3
apps/benchmark/mobile_gpu_imagenet_bench.py
+6
-8
apps/benchmark/util.py
+12
-10
python/tvm/autotvm/measure/executor.py
+1
-1
python/tvm/autotvm/tophub.py
+2
-2
python/tvm/exec/autotvm_log_editor.py
+2
-2
topi/python/topi/mali/conv2d.py
+2
-2
tutorials/autotvm/tune_nnvm_arm.py
+1
-1
tutorials/autotvm/tune_nnvm_cuda.py
+1
-1
tutorials/autotvm/tune_nnvm_mobile_gpu.py
+1
-1
No files found.
apps/benchmark/README.md
View file @
10279098
...
...
@@ -28,6 +28,10 @@ Build TVM with LLVM and CUDA enabled. [Help](https://docs.tvm.ai/install/from_so
```
bash
python3 gpu_imagenet_bench.py
--model
1080ti
python3 gpu_imagenet_bench.py
--model
titanx
# For NVIDIA Jetson TX2, you can run the following command directly on the board,
# or use cross compilation and RPC like what we do for ARM CPU.
python3 gpu_imagenet_bench.py
--model
tx2
```
### ARM CPU & Mali GPU
...
...
@@ -87,13 +91,16 @@ python3 -m tvm.exec.rpc_tracker
python3 arm_cpu_imagenet_bench.py
--model
pixel2
--rpc-key
pixel2
python3 arm_cpu_imagenet_bench.py
--model
p20pro
--rpc-key
p20pro
python3 arm_cpu_imagenet_bench.py
--model
mate10pro
--rpc-key
mate10pro
```
```
bash
# Mali GPU
# NOTE: To make the test environment more stable, we close GUI and lock the frequency
sudo
/etc/init.d/lightdm stop
sudo
-i
echo
performance
>
/sys/class/misc/mali0/device/devfreq/ff9a0000.gpu/governor
python3 mobile_gpu_imagenet_bench.py
--model
rk3399
--rpc-key
rk3399
python3 mobile_gpu_imagenet_bench.py
--model
rk3399
--rpc-key
rk3399
--dtype
float16
```
### AMD GPU
...
...
apps/benchmark/arm_cpu_imagenet_bench.py
View file @
10279098
...
...
@@ -14,7 +14,7 @@ import nnvm.testing
from
util
import
get_network
,
print_progress
def
evaluate_network
(
network
,
target
,
target_host
,
number
):
def
evaluate_network
(
network
,
target
,
target_host
,
repeat
):
# connect to remote device
tracker
=
tvm
.
rpc
.
connect_tracker
(
args
.
host
,
args
.
port
)
remote
=
tracker
.
request
(
args
.
rpc_key
)
...
...
@@ -50,7 +50,7 @@ def evaluate_network(network, target, target_host, number):
# evaluate
print_progress
(
"
%-20
s evaluating..."
%
network
)
ftimer
=
module
.
module
.
time_evaluator
(
"run"
,
ctx
,
number
=
args
.
number
,
repeat
=
3
)
ftimer
=
module
.
module
.
time_evaluator
(
"run"
,
ctx
,
number
=
1
,
repeat
=
repeat
)
prof_res
=
np
.
array
(
ftimer
()
.
results
)
*
1000
# multiply 1000 for converting to millisecond
print
(
"
%-20
s
%-19
s (
%
s)"
%
(
network
,
"
%.2
f ms"
%
np
.
mean
(
prof_res
),
"
%.2
f ms"
%
np
.
std
(
prof_res
)))
...
...
@@ -70,7 +70,7 @@ if __name__ == "__main__":
parser
.
add_argument
(
"--host"
,
type
=
str
,
default
=
'localhost'
)
parser
.
add_argument
(
"--port"
,
type
=
int
,
default
=
9190
)
parser
.
add_argument
(
"--rpc-key"
,
type
=
str
,
required
=
True
)
parser
.
add_argument
(
"--
number"
,
type
=
int
,
default
=
3
)
parser
.
add_argument
(
"--
repeat"
,
type
=
int
,
default
=
10
)
args
=
parser
.
parse_args
()
dtype
=
'float32'
...
...
@@ -87,5 +87,5 @@ if __name__ == "__main__":
print
(
"
%-20
s
%-20
s"
%
(
"Network Name"
,
"Mean Inference Time (std dev)"
))
print
(
"--------------------------------------------------"
)
for
network
in
networks
:
evaluate_network
(
network
,
target
,
target_host
,
args
.
number
)
evaluate_network
(
network
,
target
,
target_host
,
args
.
repeat
)
apps/benchmark/gpu_imagenet_bench.py
View file @
10279098
...
...
@@ -22,10 +22,10 @@ if __name__ == "__main__":
'mobilenet'
,
'mobilenet_v2'
,
'squeezenet_v1.0'
,
'squeezenet_v1.1'
],
help
=
'The name of neural network'
)
parser
.
add_argument
(
"--model"
,
type
=
str
,
choices
=
[
'1080ti'
,
'titanx'
,
'gfx900'
],
default
=
'1080ti'
,
choices
=
[
'1080ti'
,
'titanx'
,
'
tx2'
,
'
gfx900'
],
default
=
'1080ti'
,
help
=
"The model of the test device. If your device is not listed in "
"the choices list, pick the most similar one as argument."
)
parser
.
add_argument
(
"--
number"
,
type
=
int
,
default
=
5
00
)
parser
.
add_argument
(
"--
repeat"
,
type
=
int
,
default
=
6
00
)
parser
.
add_argument
(
"--target"
,
type
=
str
,
choices
=
[
'cuda'
,
'opencl'
,
'rocm'
,
'nvptx'
,
'metal'
],
default
=
'cuda'
,
help
=
"The tvm compilation target"
)
...
...
@@ -58,6 +58,6 @@ if __name__ == "__main__":
module
.
set_input
(
**
params
)
# evaluate
ftimer
=
module
.
module
.
time_evaluator
(
"run"
,
ctx
,
number
=
args
.
number
,
repeat
=
3
)
ftimer
=
module
.
module
.
time_evaluator
(
"run"
,
ctx
,
number
=
1
,
repeat
=
args
.
repeat
)
prof_res
=
np
.
array
(
ftimer
()
.
results
)
*
1000
# multiply 1000 for converting to millisecond
print
(
"
%-20
s
%-19
s (
%
s)"
%
(
network
,
"
%.2
f ms"
%
np
.
mean
(
prof_res
),
"
%.2
f ms"
%
np
.
std
(
prof_res
)))
apps/benchmark/mobile_gpu_imagenet_bench.py
View file @
10279098
...
...
@@ -13,13 +13,13 @@ import nnvm.testing
from
util
import
get_network
,
print_progress
def
evaluate_network
(
network
,
target
,
target_host
,
number
):
def
evaluate_network
(
network
,
target
,
target_host
,
dtype
,
repeat
):
# connect to remote device
tracker
=
tvm
.
rpc
.
connect_tracker
(
args
.
host
,
args
.
port
)
remote
=
tracker
.
request
(
args
.
rpc_key
)
print_progress
(
network
)
net
,
params
,
input_shape
,
output_shape
=
get_network
(
network
,
batch_size
=
1
)
net
,
params
,
input_shape
,
output_shape
=
get_network
(
network
,
batch_size
=
1
,
dtype
=
dtype
)
print_progress
(
"
%-20
s building..."
%
network
)
with
nnvm
.
compiler
.
build_config
(
opt_level
=
3
):
...
...
@@ -40,7 +40,6 @@ def evaluate_network(network, target, target_host, number):
print_progress
(
"
%-20
s uploading..."
%
network
)
ctx
=
remote
.
context
(
str
(
target
),
0
)
remote
.
upload
(
tmp
.
relpath
(
filename
))
rparams
=
{
k
:
tvm
.
nd
.
array
(
v
,
ctx
)
for
k
,
v
in
params
.
items
()}
rlib
=
remote
.
load_module
(
filename
)
module
=
runtime
.
create
(
graph
,
rlib
,
ctx
)
...
...
@@ -50,7 +49,7 @@ def evaluate_network(network, target, target_host, number):
# evaluate
print_progress
(
"
%-20
s evaluating..."
%
network
)
ftimer
=
module
.
module
.
time_evaluator
(
"run"
,
ctx
,
number
=
number
,
repeat
=
3
)
ftimer
=
module
.
module
.
time_evaluator
(
"run"
,
ctx
,
number
=
1
,
repeat
=
repeat
)
prof_res
=
np
.
array
(
ftimer
()
.
results
)
*
1000
# multiply 1000 for converting to millisecond
print
(
"
%-20
s
%-19
s (
%
s)"
%
(
network
,
"
%.2
f ms"
%
np
.
mean
(
prof_res
),
"
%.2
f ms"
%
np
.
std
(
prof_res
)))
...
...
@@ -69,11 +68,10 @@ if __name__ == "__main__":
parser
.
add_argument
(
"--host"
,
type
=
str
,
default
=
'localhost'
)
parser
.
add_argument
(
"--port"
,
type
=
int
,
default
=
9190
)
parser
.
add_argument
(
"--rpc-key"
,
type
=
str
,
required
=
True
)
parser
.
add_argument
(
"--number"
,
type
=
int
,
default
=
30
)
parser
.
add_argument
(
"--repeat"
,
type
=
int
,
default
=
30
)
parser
.
add_argument
(
"--dtype"
,
type
=
str
,
default
=
'float32'
)
args
=
parser
.
parse_args
()
dtype
=
'float32'
if
args
.
network
is
None
:
networks
=
[
'squeezenet_v1.1'
,
'mobilenet'
,
'resnet-18'
,
'vgg-16'
]
else
:
...
...
@@ -87,4 +85,4 @@ if __name__ == "__main__":
print
(
"--------------------------------------------------"
)
for
network
in
networks
:
evaluate_network
(
network
,
target
,
target_host
,
args
.
number
)
evaluate_network
(
network
,
target
,
target_host
,
args
.
dtype
,
args
.
repeat
)
apps/benchmark/util.py
View file @
10279098
...
...
@@ -3,15 +3,17 @@
import
sys
import
nnvm
def
get_network
(
name
,
batch_size
):
def
get_network
(
name
,
batch_size
,
dtype
=
'float32'
):
"""Get the symbol definition and random weight of a network
Parameters
----------
name: str
The name of the network, can be 'resnet-18', 'resnet-50', 'vgg-16', 'inception_v3', 'mobilenet', ...
batch_size:
batch_size:
int
batch size
dtype: str
Data type
Returns
-------
...
...
@@ -28,24 +30,24 @@ def get_network(name, batch_size):
output_shape
=
(
batch_size
,
1000
)
if
name
==
'mobilenet'
:
net
,
params
=
nnvm
.
testing
.
mobilenet
.
get_workload
(
batch_size
=
batch_size
)
net
,
params
=
nnvm
.
testing
.
mobilenet
.
get_workload
(
batch_size
=
batch_size
,
dtype
=
dtype
)
elif
name
==
'mobilenet_v2'
:
net
,
params
=
nnvm
.
testing
.
mobilenet_v2
.
get_workload
(
batch_size
=
batch_size
)
net
,
params
=
nnvm
.
testing
.
mobilenet_v2
.
get_workload
(
batch_size
=
batch_size
,
dtype
=
dtype
)
elif
name
==
'inception_v3'
:
input_shape
=
(
1
,
3
,
299
,
299
)
net
,
params
=
nnvm
.
testing
.
inception_v3
.
get_workload
(
batch_size
=
batch_size
)
net
,
params
=
nnvm
.
testing
.
inception_v3
.
get_workload
(
batch_size
=
batch_size
,
dtype
=
dtype
)
elif
"resnet"
in
name
:
n_layer
=
int
(
name
.
split
(
'-'
)[
1
])
net
,
params
=
nnvm
.
testing
.
resnet
.
get_workload
(
num_layers
=
n_layer
,
batch_size
=
batch_size
)
net
,
params
=
nnvm
.
testing
.
resnet
.
get_workload
(
num_layers
=
n_layer
,
batch_size
=
batch_size
,
dtype
=
dtype
)
elif
"vgg"
in
name
:
n_layer
=
int
(
name
.
split
(
'-'
)[
1
])
net
,
params
=
nnvm
.
testing
.
vgg
.
get_workload
(
num_layers
=
n_layer
,
batch_size
=
batch_size
)
net
,
params
=
nnvm
.
testing
.
vgg
.
get_workload
(
num_layers
=
n_layer
,
batch_size
=
batch_size
,
dtype
=
dtype
)
elif
"densenet"
in
name
:
n_layer
=
int
(
name
.
split
(
'-'
)[
1
])
net
,
params
=
nnvm
.
testing
.
densenet
.
get_workload
(
num_layers
=
n_layer
,
batch_size
=
batch_size
)
net
,
params
=
nnvm
.
testing
.
densenet
.
get_workload
(
num_layers
=
n_layer
,
batch_size
=
batch_size
,
dtype
=
dtype
)
elif
"squeezenet"
in
name
:
version
=
name
.
split
(
"_v"
)[
1
]
net
,
params
=
nnvm
.
testing
.
squeezenet
.
get_workload
(
batch_size
=
batch_size
,
version
=
version
)
net
,
params
=
nnvm
.
testing
.
squeezenet
.
get_workload
(
batch_size
=
batch_size
,
version
=
version
,
dtype
=
dtype
)
elif
name
==
'custom'
:
# an example for custom network
from
nnvm.testing
import
utils
...
...
@@ -53,7 +55,7 @@ def get_network(name, batch_size):
net
=
nnvm
.
sym
.
conv2d
(
net
,
channels
=
4
,
kernel_size
=
(
3
,
3
),
padding
=
(
1
,
1
))
net
=
nnvm
.
sym
.
flatten
(
net
)
net
=
nnvm
.
sym
.
dense
(
net
,
units
=
1000
)
net
,
params
=
utils
.
create_workload
(
net
,
batch_size
,
(
3
,
224
,
224
))
net
,
params
=
utils
.
create_workload
(
net
,
batch_size
,
(
3
,
224
,
224
)
,
dtype
=
dtype
)
elif
name
==
'mxnet'
:
# an example for mxnet model
from
mxnet.gluon.model_zoo.vision
import
get_model
...
...
python/tvm/autotvm/measure/executor.py
View file @
10279098
...
...
@@ -6,7 +6,7 @@ class Executor(object):
Allows submit asynchronous jobs and returns the Future object.
"""
# timeout for jobs that may hang
DEFAULT_TIMEOUT
=
6
0
DEFAULT_TIMEOUT
=
12
0
def
submit
(
self
,
func
,
*
args
,
**
kwargs
):
"""
...
...
python/tvm/autotvm/tophub.py
View file @
10279098
...
...
@@ -22,10 +22,10 @@ AUTOTVM_TOPHUB_ROOT_PATH = os.path.join(os.path.expanduser('~'), ".tvm", "tophub
PACKAGE_VERSION
=
{
'arm_cpu'
:
"v0.03"
,
'cuda'
:
"v0.0
2
"
,
'cuda'
:
"v0.0
3
"
,
'rocm'
:
"v0.01"
,
'opencl'
:
"v0.01"
,
'mali'
:
"v0.0
2
"
,
'mali'
:
"v0.0
3
"
,
'vta'
:
"v0.01"
,
}
...
...
python/tvm/exec/autotvm_log_editor.py
View file @
10279098
...
...
@@ -10,9 +10,9 @@ from .. import autotvm
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--act"
,
type
=
str
,
choices
=
[
'pick-best'
],
parser
.
add_argument
(
"--act"
,
type
=
str
,
choices
=
[
'pick-best'
],
required
=
True
,
help
=
"The action"
)
parser
.
add_argument
(
"--i"
,
type
=
str
,
help
=
"The input file or directory"
)
parser
.
add_argument
(
"--i"
,
type
=
str
,
help
=
"The input file or directory"
,
required
=
True
)
parser
.
add_argument
(
"--o"
,
type
=
str
,
help
=
"The output file"
)
args
=
parser
.
parse_args
()
...
...
topi/python/topi/mali/conv2d.py
View file @
10279098
...
...
@@ -187,7 +187,7 @@ def _decl_winograd(cfg, data, kernel, strides, padding, layout, out_dtype, tile_
[
-
1
/
6.0
,
1
/
6.0
,
-
1
/
6.0
],
[
1
/
24.0
,
1
/
12.0
,
1
/
6.0
],
[
1
/
24.0
,
-
1
/
12.0
,
1
/
6.0
],
[
0
,
0
,
1
]],
dtype
=
np
.
float32
)
[
0
,
0
,
1
]],
out_dtype
)
B_data
=
np
.
array
([
[
4
,
0
,
0
,
0
,
0
,
0
],
...
...
@@ -209,7 +209,7 @@ def _decl_winograd(cfg, data, kernel, strides, padding, layout, out_dtype, tile_
[
1
,
0
,
0
],
[
1.0
/
2
,
1.0
/
2
,
1.0
/
2
],
[
1.0
/
2
,
-
1.0
/
2
,
1.0
/
2
],
[
0
,
0
,
1
]],
np
.
float32
)
[
0
,
0
,
1
]],
out_dtype
)
B_data
=
np
.
array
([
[
1
,
0
,
0
,
0
],
...
...
tutorials/autotvm/tune_nnvm_arm.py
View file @
10279098
...
...
@@ -334,7 +334,7 @@ def tune_and_evaluate(tuning_opt):
# evaluate
print
(
"Evaluate inference time cost..."
)
ftimer
=
module
.
module
.
time_evaluator
(
"run"
,
ctx
,
number
=
8
,
repeat
=
3
)
ftimer
=
module
.
module
.
time_evaluator
(
"run"
,
ctx
,
number
=
1
,
repeat
=
10
)
prof_res
=
np
.
array
(
ftimer
()
.
results
)
*
1000
# convert to millisecond
print
(
"Mean inference time (std dev):
%.2
f ms (
%.2
f ms)"
%
(
np
.
mean
(
prof_res
),
np
.
std
(
prof_res
)))
...
...
tutorials/autotvm/tune_nnvm_cuda.py
View file @
10279098
...
...
@@ -236,7 +236,7 @@ def tune_and_evaluate(tuning_opt):
# evaluate
print
(
"Evaluate inference time cost..."
)
ftimer
=
module
.
module
.
time_evaluator
(
"run"
,
ctx
,
number
=
400
,
repeat
=
3
)
ftimer
=
module
.
module
.
time_evaluator
(
"run"
,
ctx
,
number
=
1
,
repeat
=
600
)
prof_res
=
np
.
array
(
ftimer
()
.
results
)
*
1000
# convert to millisecond
print
(
"Mean inference time (std dev):
%.2
f ms (
%.2
f ms)"
%
(
np
.
mean
(
prof_res
),
np
.
std
(
prof_res
)))
...
...
tutorials/autotvm/tune_nnvm_mobile_gpu.py
View file @
10279098
...
...
@@ -335,7 +335,7 @@ def tune_and_evaluate(tuning_opt):
# evaluate
print
(
"Evaluate inference time cost..."
)
ftimer
=
module
.
module
.
time_evaluator
(
"run"
,
ctx
,
number
=
50
,
repeat
=
3
)
ftimer
=
module
.
module
.
time_evaluator
(
"run"
,
ctx
,
number
=
=
1
,
repeat
=
30
)
prof_res
=
np
.
array
(
ftimer
()
.
results
)
*
1000
# convert to millisecond
print
(
"Mean inference time (std dev):
%.2
f ms (
%.2
f ms)"
%
(
np
.
mean
(
prof_res
),
np
.
std
(
prof_res
)))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment