Commit 10279098 by Lianmin Zheng Committed by Tianqi Chen

[TOPI] Update pre-tuned parameters for TX2 and fp16 on Mali (#1892)

parent bd71efc2
......@@ -28,6 +28,10 @@ Build TVM with LLVM and CUDA enabled. [Help](https://docs.tvm.ai/install/from_so
```bash
python3 gpu_imagenet_bench.py --model 1080ti
python3 gpu_imagenet_bench.py --model titanx
# For NVIDIA Jetson TX2, you can run the following command directly on the board,
# or use cross compilation and RPC like what we do for ARM CPU.
python3 gpu_imagenet_bench.py --model tx2
```
### ARM CPU & Mali GPU
......@@ -87,13 +91,16 @@ python3 -m tvm.exec.rpc_tracker
python3 arm_cpu_imagenet_bench.py --model pixel2 --rpc-key pixel2
python3 arm_cpu_imagenet_bench.py --model p20pro --rpc-key p20pro
python3 arm_cpu_imagenet_bench.py --model mate10pro --rpc-key mate10pro
```
```bash
# Mali GPU
# NOTE: To make the test environment more stable, we close GUI and lock the frequency
sudo /etc/init.d/lightdm stop
sudo -i
echo performance > /sys/class/misc/mali0/device/devfreq/ff9a0000.gpu/governor
python3 mobile_gpu_imagenet_bench.py --model rk3399 --rpc-key rk3399
python3 mobile_gpu_imagenet_bench.py --model rk3399 --rpc-key rk3399 --dtype float16
```
### AMD GPU
......
......@@ -14,7 +14,7 @@ import nnvm.testing
from util import get_network, print_progress
def evaluate_network(network, target, target_host, number):
def evaluate_network(network, target, target_host, repeat):
# connect to remote device
tracker = tvm.rpc.connect_tracker(args.host, args.port)
remote = tracker.request(args.rpc_key)
......@@ -50,7 +50,7 @@ def evaluate_network(network, target, target_host, number):
# evaluate
print_progress("%-20s evaluating..." % network)
ftimer = module.module.time_evaluator("run", ctx, number=args.number, repeat=3)
ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=repeat)
prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond
print("%-20s %-19s (%s)" % (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)))
......@@ -70,7 +70,7 @@ if __name__ == "__main__":
parser.add_argument("--host", type=str, default='localhost')
parser.add_argument("--port", type=int, default=9190)
parser.add_argument("--rpc-key", type=str, required=True)
parser.add_argument("--number", type=int, default=3)
parser.add_argument("--repeat", type=int, default=10)
args = parser.parse_args()
dtype = 'float32'
......@@ -87,5 +87,5 @@ if __name__ == "__main__":
print("%-20s %-20s" % ("Network Name", "Mean Inference Time (std dev)"))
print("--------------------------------------------------")
for network in networks:
evaluate_network(network, target, target_host, args.number)
evaluate_network(network, target, target_host, args.repeat)
......@@ -22,10 +22,10 @@ if __name__ == "__main__":
'mobilenet', 'mobilenet_v2', 'squeezenet_v1.0', 'squeezenet_v1.1'],
help='The name of neural network')
parser.add_argument("--model", type=str,
choices=['1080ti', 'titanx', 'gfx900'], default='1080ti',
choices=['1080ti', 'titanx', 'tx2', 'gfx900'], default='1080ti',
help="The model of the test device. If your device is not listed in "
"the choices list, pick the most similar one as argument.")
parser.add_argument("--number", type=int, default=500)
parser.add_argument("--repeat", type=int, default=600)
parser.add_argument("--target", type=str,
choices=['cuda', 'opencl', 'rocm', 'nvptx', 'metal'], default='cuda',
help="The tvm compilation target")
......@@ -58,6 +58,6 @@ if __name__ == "__main__":
module.set_input(**params)
# evaluate
ftimer = module.module.time_evaluator("run", ctx, number=args.number, repeat=3)
ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=args.repeat)
prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond
print("%-20s %-19s (%s)" % (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)))
......@@ -13,13 +13,13 @@ import nnvm.testing
from util import get_network, print_progress
def evaluate_network(network, target, target_host, number):
def evaluate_network(network, target, target_host, dtype, repeat):
# connect to remote device
tracker = tvm.rpc.connect_tracker(args.host, args.port)
remote = tracker.request(args.rpc_key)
print_progress(network)
net, params, input_shape, output_shape = get_network(network, batch_size=1)
net, params, input_shape, output_shape = get_network(network, batch_size=1, dtype=dtype)
print_progress("%-20s building..." % network)
with nnvm.compiler.build_config(opt_level=3):
......@@ -40,7 +40,6 @@ def evaluate_network(network, target, target_host, number):
print_progress("%-20s uploading..." % network)
ctx = remote.context(str(target), 0)
remote.upload(tmp.relpath(filename))
rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}
rlib = remote.load_module(filename)
module = runtime.create(graph, rlib, ctx)
......@@ -50,7 +49,7 @@ def evaluate_network(network, target, target_host, number):
# evaluate
print_progress("%-20s evaluating..." % network)
ftimer = module.module.time_evaluator("run", ctx, number=number, repeat=3)
ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=repeat)
prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond
print("%-20s %-19s (%s)" % (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)))
......@@ -69,11 +68,10 @@ if __name__ == "__main__":
parser.add_argument("--host", type=str, default='localhost')
parser.add_argument("--port", type=int, default=9190)
parser.add_argument("--rpc-key", type=str, required=True)
parser.add_argument("--number", type=int, default=30)
parser.add_argument("--repeat", type=int, default=30)
parser.add_argument("--dtype", type=str, default='float32')
args = parser.parse_args()
dtype = 'float32'
if args.network is None:
networks = ['squeezenet_v1.1', 'mobilenet', 'resnet-18', 'vgg-16']
else:
......@@ -87,4 +85,4 @@ if __name__ == "__main__":
print("--------------------------------------------------")
for network in networks:
evaluate_network(network, target, target_host, args.number)
evaluate_network(network, target, target_host, args.dtype, args.repeat)
......@@ -3,15 +3,17 @@
import sys
import nnvm
def get_network(name, batch_size):
def get_network(name, batch_size, dtype='float32'):
"""Get the symbol definition and random weight of a network
Parameters
----------
name: str
The name of the network, can be 'resnet-18', 'resnet-50', 'vgg-16', 'inception_v3', 'mobilenet', ...
batch_size:
batch_size: int
batch size
dtype: str
Data type
Returns
-------
......@@ -28,24 +30,24 @@ def get_network(name, batch_size):
output_shape = (batch_size, 1000)
if name == 'mobilenet':
net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size)
net, params = nnvm.testing.mobilenet.get_workload(batch_size=batch_size, dtype=dtype)
elif name == 'mobilenet_v2':
net, params = nnvm.testing.mobilenet_v2.get_workload(batch_size=batch_size)
net, params = nnvm.testing.mobilenet_v2.get_workload(batch_size=batch_size, dtype=dtype)
elif name == 'inception_v3':
input_shape = (1, 3, 299, 299)
net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size)
net, params = nnvm.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
elif "resnet" in name:
n_layer = int(name.split('-')[1])
net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size)
net, params = nnvm.testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype)
elif "vgg" in name:
n_layer = int(name.split('-')[1])
net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size)
net, params = nnvm.testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype)
elif "densenet" in name:
n_layer = int(name.split('-')[1])
net, params = nnvm.testing.densenet.get_workload(num_layers=n_layer, batch_size=batch_size)
net, params = nnvm.testing.densenet.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype)
elif "squeezenet" in name:
version = name.split("_v")[1]
net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version=version)
net, params = nnvm.testing.squeezenet.get_workload(batch_size=batch_size, version=version, dtype=dtype)
elif name == 'custom':
# an example for custom network
from nnvm.testing import utils
......@@ -53,7 +55,7 @@ def get_network(name, batch_size):
net = nnvm.sym.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1))
net = nnvm.sym.flatten(net)
net = nnvm.sym.dense(net, units=1000)
net, params = utils.create_workload(net, batch_size, (3, 224, 224))
net, params = utils.create_workload(net, batch_size, (3, 224, 224), dtype=dtype)
elif name == 'mxnet':
# an example for mxnet model
from mxnet.gluon.model_zoo.vision import get_model
......
......@@ -6,7 +6,7 @@ class Executor(object):
Allows submit asynchronous jobs and returns the Future object.
"""
# timeout for jobs that may hang
DEFAULT_TIMEOUT = 60
DEFAULT_TIMEOUT = 120
def submit(self, func, *args, **kwargs):
"""
......
......@@ -22,10 +22,10 @@ AUTOTVM_TOPHUB_ROOT_PATH = os.path.join(os.path.expanduser('~'), ".tvm", "tophub
PACKAGE_VERSION = {
'arm_cpu': "v0.03",
'cuda': "v0.02",
'cuda': "v0.03",
'rocm': "v0.01",
'opencl': "v0.01",
'mali': "v0.02",
'mali': "v0.03",
'vta': "v0.01",
}
......
......@@ -10,9 +10,9 @@ from .. import autotvm
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--act", type=str, choices=['pick-best'],
parser.add_argument("--act", type=str, choices=['pick-best'], required=True,
help="The action")
parser.add_argument("--i", type=str, help="The input file or directory")
parser.add_argument("--i", type=str, help="The input file or directory", required=True)
parser.add_argument("--o", type=str, help="The output file")
args = parser.parse_args()
......
......@@ -187,7 +187,7 @@ def _decl_winograd(cfg, data, kernel, strides, padding, layout, out_dtype, tile_
[-1 / 6.0, 1 / 6.0, -1 / 6.0],
[1 / 24.0, 1 / 12.0, 1 / 6.0],
[1 / 24.0, -1 / 12.0, 1 / 6.0],
[0, 0, 1]], dtype=np.float32)
[0, 0, 1]], out_dtype)
B_data = np.array([
[4, 0, 0, 0, 0, 0],
......@@ -209,7 +209,7 @@ def _decl_winograd(cfg, data, kernel, strides, padding, layout, out_dtype, tile_
[1, 0, 0],
[1.0/2, 1.0/2, 1.0/2],
[1.0/2, -1.0/2, 1.0/2],
[0, 0, 1]], np.float32)
[0, 0, 1]], out_dtype)
B_data = np.array([
[1, 0, 0, 0],
......
......@@ -334,7 +334,7 @@ def tune_and_evaluate(tuning_opt):
# evaluate
print("Evaluate inference time cost...")
ftimer = module.module.time_evaluator("run", ctx, number=8, repeat=3)
ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=10)
prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
(np.mean(prof_res), np.std(prof_res)))
......
......@@ -236,7 +236,7 @@ def tune_and_evaluate(tuning_opt):
# evaluate
print("Evaluate inference time cost...")
ftimer = module.module.time_evaluator("run", ctx, number=400, repeat=3)
ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=600)
prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
(np.mean(prof_res), np.std(prof_res)))
......
......@@ -335,7 +335,7 @@ def tune_and_evaluate(tuning_opt):
# evaluate
print("Evaluate inference time cost...")
ftimer = module.module.time_evaluator("run", ctx, number=50, repeat=3)
ftimer = module.module.time_evaluator("run", ctx, number==1, repeat=30)
prof_res = np.array(ftimer().results) * 1000 # convert to millisecond
print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
(np.mean(prof_res), np.std(prof_res)))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment