Commit dfb101a0 by Wu Zhao Committed by Lianmin Zheng

[Doc][Tutorial] Add the instructions how to use contrib_spatial_pack (#2427)

* [Doc][Tutorial] Add the instructions how to use contrib_spatial_pack

* Update the code according suggestions
parent ac54577f
""" """
Auto-tuning a convolutional network for ARM CPU Auto-tuning a convolutional network for ARM CPU
==================================================== ====================================================
**Author**: `Lianmin Zheng <https://https://github.com/merrymercy>`_ **Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, `Zhao Wu <https://github.com/FrozenGene>`_
Auto-tuning for a specific ARM device is critical for getting the best Auto-tuning for a specific ARM device is critical for getting the best
performance. This is a tutorial about how to tune a whole convolutional performance. This is a tutorial about how to tune a whole convolutional
...@@ -196,8 +196,8 @@ tuning_option = { ...@@ -196,8 +196,8 @@ tuning_option = {
'log_filename': log_file, 'log_filename': log_file,
'tuner': 'xgb', 'tuner': 'xgb',
'n_trial': 1000, 'n_trial': 2000,
'early_stopping': 400, 'early_stopping': 800,
'measure_option': autotvm.measure_option( 'measure_option': autotvm.measure_option(
builder=autotvm.LocalBuilder( builder=autotvm.LocalBuilder(
...@@ -220,6 +220,10 @@ tuning_option = { ...@@ -220,6 +220,10 @@ tuning_option = {
# If your device runs very slow or your conv2d operators have many GFLOPs, considering to # If your device runs very slow or your conv2d operators have many GFLOPs, considering to
# set timeout larger. # set timeout larger.
# #
# If your model has depthwise convolution, you could consider setting
# :code:`try_spatial_pack_depthwise` be :code:`True`, which perform better than default
# optimization in general. For example, on ARM CPU A53 2.0GHz, we find it could boost 1.6x
# performance of depthwise convolution on Mobilenet V1 model.
################################################################### ###################################################################
# Begin Tuning # Begin Tuning
...@@ -237,7 +241,8 @@ def tune_tasks(tasks, ...@@ -237,7 +241,8 @@ def tune_tasks(tasks,
early_stopping=None, early_stopping=None,
log_filename='tuning.log', log_filename='tuning.log',
use_transfer_learning=True, use_transfer_learning=True,
try_winograd=True): try_winograd=True,
try_spatial_pack_depthwise=False):
if try_winograd: if try_winograd:
for i in range(len(tasks)): for i in range(len(tasks)):
try: # try winograd template try: # try winograd template
...@@ -249,6 +254,16 @@ def tune_tasks(tasks, ...@@ -249,6 +254,16 @@ def tune_tasks(tasks,
except Exception: except Exception:
pass pass
# if we want to use spatial pack for depthwise convolution
if try_spatial_pack_depthwise:
tuner = 'xgb_knob'
for i in range(len(tasks)):
if tasks[i].name == 'topi_nn_depthwise_conv2d_nchw':
tsk = autotvm.task.create(tasks[i].name, tasks[i].args,
tasks[i].target, tasks[i].target_host,
'contrib_spatial_pack')
tasks[i] = tsk
# create tmp log file # create tmp log file
tmp_log_file = log_filename + ".tmp" tmp_log_file = log_filename + ".tmp"
if os.path.exists(tmp_log_file): if os.path.exists(tmp_log_file):
...@@ -260,6 +275,8 @@ def tune_tasks(tasks, ...@@ -260,6 +275,8 @@ def tune_tasks(tasks,
# create tuner # create tuner
if tuner == 'xgb' or tuner == 'xgb-rank': if tuner == 'xgb' or tuner == 'xgb-rank':
tuner_obj = XGBTuner(tsk, loss_type='rank') tuner_obj = XGBTuner(tsk, loss_type='rank')
elif tuner == 'xgb_knob':
tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob')
elif tuner == 'ga': elif tuner == 'ga':
tuner_obj = GATuner(tsk, pop_size=50) tuner_obj = GATuner(tsk, pop_size=50)
elif tuner == 'random': elif tuner == 'random':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment