arm_cpu_imagenet_bench.py 3.5 KB
Newer Older
1
"""Benchmark script for ImageNet models on ARM CPU.
2 3 4 5 6 7 8 9 10
see README.md for the usage and results of this script.
"""
import argparse

import numpy as np

import tvm
from tvm.contrib.util import tempdir
import tvm.contrib.graph_runtime as runtime
11 12
import nnvm.compiler
import nnvm.testing
13

14
from util import get_network, print_progress
15 16


17
def evaluate_network(network, target, target_host, repeat):
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
    # connect to remote device
    tracker = tvm.rpc.connect_tracker(args.host, args.port)
    remote = tracker.request(args.rpc_key)

    print_progress(network)
    net, params, input_shape, output_shape = get_network(network, batch_size=1)

    print_progress("%-20s building..." % network)
    with nnvm.compiler.build_config(opt_level=3):
        graph, lib, params = nnvm.compiler.build(
            net, target=target, target_host=target_host,
            shape={'data': input_shape}, params=params, dtype=dtype)

    tmp = tempdir()
    if 'android' in str(target):
        from tvm.contrib import ndk
        filename = "%s.so" % network
        lib.export_library(tmp.relpath(filename), ndk.create_shared)
    else:
        filename = "%s.tar" % network
        lib.export_library(tmp.relpath(filename))

    # upload library and params
    print_progress("%-20s uploading..." % network)
    ctx = remote.context(str(target), 0)
    remote.upload(tmp.relpath(filename))

    rlib = remote.load_module(filename)
    module = runtime.create(graph, rlib, ctx)
    data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
    module.set_input('data', data_tvm)
49
    module.set_input(**params)
50 51 52

    # evaluate
    print_progress("%-20s evaluating..." % network)
53
    ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=repeat)
54 55 56 57
    prof_res = np.array(ftimer().results) * 1000  # multiply 1000 for converting to millisecond
    print("%-20s %-19s (%s)" % (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)))


58 59
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
60
    parser.add_argument("--network", type=str, choices=
61 62 63 64
                        ['resnet-18', 'resnet-34', 'resnet-50',
                         'vgg-16', 'vgg-19', 'densenet-121', 'inception_v3',
                         'mobilenet', 'mobilenet_v2', 'squeezenet_v1.0', 'squeezenet_v1.1'],
                        help='The name of neural network')
65
    parser.add_argument("--model", type=str, choices=
66
                        ['rk3399', 'mate10', 'mate10pro', 'p20', 'p20pro',
67 68 69
                         'pixel2', 'rasp3b', 'pynq'], default='rk3399',
                        help="The model of the test device. If your device is not listed in "
                             "the choices list, pick the most similar one as argument.")
70 71 72
    parser.add_argument("--host", type=str, default='localhost')
    parser.add_argument("--port", type=int, default=9190)
    parser.add_argument("--rpc-key", type=str, required=True)
73
    parser.add_argument("--repeat", type=int, default=10)
74 75 76 77 78
    args = parser.parse_args()

    dtype = 'float32'

    if args.network is None:
79
        networks = ['squeezenet_v1.1', 'mobilenet', 'resnet-18', 'vgg-16']
80 81 82
    else:
        networks = [args.network]

83 84
    target = tvm.target.arm_cpu(model=args.model)
    target_host = None
85 86 87 88 89

    print("--------------------------------------------------")
    print("%-20s %-20s" % ("Network Name", "Mean Inference Time (std dev)"))
    print("--------------------------------------------------")
    for network in networks:
90
        evaluate_network(network, target, target_host, args.repeat)
91