[VTA][HotFix] Relay->VTA quantization fix (#4433)

* relay -> vta fix * setting optlevel to 3 for quantization to fold batchnorm

[VTA][HotFix] Relay->VTA quantization fix (#4433)
* relay -> vta fix * setting optlevel to 3 for quantization to fold batchnorm
651bdf2f · Thierry Moreau · Yizhi Liu · abe8708f · 651bdf2f · 651bdf2f
Commit 651bdf2f authored Nov 26, 2019 by Thierry Moreau Committed by Yizhi Liu Nov 26, 2019
Show whitespace changes
Inline Side-by-side

Showing with 10 additions and 4 deletions

vta/scripts/tune_resnet.py
+2 -0

vta/tutorials/autotvm/tune_relay_vta.py
+4 -2

vta/tutorials/frontend/deploy_vision_on_vta.py
+4 -2

No files found.
--- a/vta/scripts/tune_resnet.py
+++ b/vta/scripts/tune_resnet.py
@@ -125,6 +125,8 @@ def compile_network(opt, env, target):
    dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

    # Perform quantization in Relay
+    # Note: We set opt_level to 3 in order to fold batch norm
+    with relay.build_config(opt_level=3):
        with relay.quantize.qconfig(global_scale=8.0,
                                    skip_conv_layers=[0]):
            relay_prog = relay.quantize.quantize(mod["main"], params=params)

--- a/vta/tutorials/autotvm/tune_relay_vta.py
+++ b/vta/tutorials/autotvm/tune_relay_vta.py
@@ -89,15 +89,17 @@ def compile_network(env, target, model, start_pack, stop_pack):
    dtype_dict.update({k: str(v.dtype) for k, v in params.items()})

    # Perform quantization in Relay
+    # Note: We set opt_level to 3 in order to fold batch norm
+    with relay.build_config(opt_level=3):
        with relay.quantize.qconfig(global_scale=8.0,
                                    skip_conv_layers=[0]):
-        relay_prog = relay.quantize.quantize(mod["main"], params=params)
+            mod = relay.quantize.quantize(mod, params=params)

    # Perform graph packing and constant folding for VTA target
    if target.device_name == "vta":
        assert env.BLOCK_IN == env.BLOCK_OUT
        relay_prog = graph_pack(
-            relay_prog,
+            mod["main"],
            env.BATCH,
            env.BLOCK_OUT,
            env.WGT_WIDTH,

--- a/vta/tutorials/frontend/deploy_vision_on_vta.py
+++ b/vta/tutorials/frontend/deploy_vision_on_vta.py
@@ -168,13 +168,15 @@ with autotvm.tophub.context(target):

    if target.device_name == "vta":
        # Perform quantization in Relay
+        # Note: We set opt_level to 3 in order to fold batch norm
+        with relay.build_config(opt_level=3):
            with relay.quantize.qconfig(global_scale=8.0,
                                        skip_conv_layers=[0]):
-            relay_prog = relay.quantize.quantize(mod["main"], params=params)
+                mod = relay.quantize.quantize(mod, params=params)
            # Perform graph packing and constant folding for VTA target
            assert env.BLOCK_IN == env.BLOCK_OUT
            relay_prog = graph_pack(
-            relay_prog,
+                mod["main"],
                env.BATCH,
                env.BLOCK_OUT,
                env.WGT_WIDTH,