Commit 651bdf2f by Thierry Moreau Committed by Yizhi Liu

[VTA][HotFix] Relay->VTA quantization fix (#4433)

* relay -> vta fix

* setting optlevel to 3 for quantization to fold batchnorm
parent abe8708f
...@@ -125,9 +125,11 @@ def compile_network(opt, env, target): ...@@ -125,9 +125,11 @@ def compile_network(opt, env, target):
dtype_dict.update({k: str(v.dtype) for k, v in params.items()}) dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
# Perform quantization in Relay # Perform quantization in Relay
with relay.quantize.qconfig(global_scale=8.0, # Note: We set opt_level to 3 in order to fold batch norm
skip_conv_layers=[0]): with relay.build_config(opt_level=3):
relay_prog = relay.quantize.quantize(mod["main"], params=params) with relay.quantize.qconfig(global_scale=8.0,
skip_conv_layers=[0]):
relay_prog = relay.quantize.quantize(mod["main"], params=params)
# Perform graph packing and constant folding for VTA target # Perform graph packing and constant folding for VTA target
if target.device_name == "vta": if target.device_name == "vta":
......
...@@ -89,15 +89,17 @@ def compile_network(env, target, model, start_pack, stop_pack): ...@@ -89,15 +89,17 @@ def compile_network(env, target, model, start_pack, stop_pack):
dtype_dict.update({k: str(v.dtype) for k, v in params.items()}) dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
# Perform quantization in Relay # Perform quantization in Relay
with relay.quantize.qconfig(global_scale=8.0, # Note: We set opt_level to 3 in order to fold batch norm
skip_conv_layers=[0]): with relay.build_config(opt_level=3):
relay_prog = relay.quantize.quantize(mod["main"], params=params) with relay.quantize.qconfig(global_scale=8.0,
skip_conv_layers=[0]):
mod = relay.quantize.quantize(mod, params=params)
# Perform graph packing and constant folding for VTA target # Perform graph packing and constant folding for VTA target
if target.device_name == "vta": if target.device_name == "vta":
assert env.BLOCK_IN == env.BLOCK_OUT assert env.BLOCK_IN == env.BLOCK_OUT
relay_prog = graph_pack( relay_prog = graph_pack(
relay_prog, mod["main"],
env.BATCH, env.BATCH,
env.BLOCK_OUT, env.BLOCK_OUT,
env.WGT_WIDTH, env.WGT_WIDTH,
......
...@@ -168,18 +168,20 @@ with autotvm.tophub.context(target): ...@@ -168,18 +168,20 @@ with autotvm.tophub.context(target):
if target.device_name == "vta": if target.device_name == "vta":
# Perform quantization in Relay # Perform quantization in Relay
with relay.quantize.qconfig(global_scale=8.0, # Note: We set opt_level to 3 in order to fold batch norm
skip_conv_layers=[0]): with relay.build_config(opt_level=3):
relay_prog = relay.quantize.quantize(mod["main"], params=params) with relay.quantize.qconfig(global_scale=8.0,
# Perform graph packing and constant folding for VTA target skip_conv_layers=[0]):
assert env.BLOCK_IN == env.BLOCK_OUT mod = relay.quantize.quantize(mod, params=params)
relay_prog = graph_pack( # Perform graph packing and constant folding for VTA target
relay_prog, assert env.BLOCK_IN == env.BLOCK_OUT
env.BATCH, relay_prog = graph_pack(
env.BLOCK_OUT, mod["main"],
env.WGT_WIDTH, env.BATCH,
start_name=pack_dict[model][0], env.BLOCK_OUT,
stop_name=pack_dict[model][1]) env.WGT_WIDTH,
start_name=pack_dict[model][0],
stop_name=pack_dict[model][1])
else: else:
relay_prog = mod["main"] relay_prog = mod["main"]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment