[Doc][Tutorial] Add the instructions how to use contrib_spatial_pack (#2427)

* [Doc][Tutorial] Add the instructions how to use contrib_spatial_pack * Update the code according suggestions

[Doc][Tutorial] Add the instructions how to use contrib_spatial_pack (#2427)
* [Doc][Tutorial] Add the instructions how to use contrib_spatial_pack * Update the code according suggestions
dfb101a0 · Wu Zhao · Lianmin Zheng · ac54577f · dfb101a0
Commit dfb101a0 authored Jan 15, 2019 by Wu Zhao Committed by Lianmin Zheng Jan 15, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 21 additions and 4 deletions

tutorials/autotvm/tune_nnvm_arm.py
+21 -4

No files found.
--- a/tutorials/autotvm/tune_nnvm_arm.py
+++ b/tutorials/autotvm/tune_nnvm_arm.py
 """
 Auto-tuning a convolutional network for ARM CPU
 ====================================================
-**Author**: `Lianmin Zheng <https://https://github.com/merrymercy>`_
+**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, `Zhao Wu <https://github.com/FrozenGene>`_
 Auto-tuning for a specific ARM device is critical for getting the best
 performance. This is a tutorial about how to tune a whole convolutional
@@ -196,8 +196,8 @@ tuning_option = {
    'log_filename': log_file,
    'tuner': 'xgb',
-    'n_trial': 1000,
+    'n_trial': 2000,
-    'early_stopping': 400,
+    'early_stopping': 800,
    'measure_option': autotvm.measure_option(
        builder=autotvm.LocalBuilder(
@@ -220,6 +220,10 @@ tuning_option = {
 #   If your device runs very slow or your conv2d operators have many GFLOPs, considering to
 #   set timeout larger.
 #
+#   If your model has depthwise convolution, you could consider setting
+#   :code:`try_spatial_pack_depthwise` be :code:`True`, which perform better than default
+#   optimization in general. For example, on ARM CPU A53 2.0GHz, we find it could boost 1.6x
+#   performance of depthwise convolution on Mobilenet V1 model.
 ###################################################################
 # Begin Tuning
@@ -237,7 +241,8 @@ def tune_tasks(tasks,
               early_stopping=None,
               log_filename='tuning.log',
               use_transfer_learning=True,
-               try_winograd=True):
+               try_winograd=True,
+               try_spatial_pack_depthwise=False):
    if try_winograd:
        for i in range(len(tasks)):
            try:  # try winograd template
@@ -249,6 +254,16 @@ def tune_tasks(tasks,
            except Exception:
                pass
+    # if we want to use spatial pack for depthwise convolution
+    if try_spatial_pack_depthwise:
+        tuner = 'xgb_knob'
+        for i in range(len(tasks)):
+            if tasks[i].name == 'topi_nn_depthwise_conv2d_nchw':
+                tsk = autotvm.task.create(tasks[i].name, tasks[i].args,
+                                          tasks[i].target, tasks[i].target_host,
+                                          'contrib_spatial_pack')
+                tasks[i] = tsk
    # create tmp log file
    tmp_log_file = log_filename + ".tmp"
    if os.path.exists(tmp_log_file):
@@ -260,6 +275,8 @@ def tune_tasks(tasks,
        # create tuner
        if tuner == 'xgb' or tuner == 'xgb-rank':
            tuner_obj = XGBTuner(tsk, loss_type='rank')
+        elif tuner == 'xgb_knob':
+            tuner_obj = XGBTuner(tsk, loss_type='rank', feature_type='knob')
        elif tuner == 'ga':
            tuner_obj = GATuner(tsk, pop_size=50)
        elif tuner == 'random':