gpu_imagenet_bench.py 3.14 KB
Newer Older
1 2
"""Benchmark script for ImageNet models on GPU.
see README.md for the usage and results of this script.
3
"""
4
import argparse
5
import threading
6

7
import numpy as np
8

9
import tvm
10 11
from tvm.contrib.util import tempdir
import tvm.contrib.graph_runtime as runtime
12 13 14
import nnvm.compiler
import nnvm.testing

15 16
from util import get_network

17

18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
def benchmark(network, target):
    net, params, input_shape, output_shape = get_network(network, batch_size=1)

    with nnvm.compiler.build_config(opt_level=3):
        graph, lib, params = nnvm.compiler.build(
            net, target=target, shape={'data': input_shape}, params=params, dtype=dtype)

    # create runtime
    ctx = tvm.context(str(target), 0)
    module = runtime.create(graph, lib, ctx)
    data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
    module.set_input('data', data_tvm)
    module.set_input(**params)

    # evaluate
    ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=args.repeat)
    prof_res = np.array(ftimer().results) * 1000  # multiply 1000 for converting to millisecond
    print("%-20s %-19s (%s)" % (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)))


38
if __name__ == "__main__":
39
    parser = argparse.ArgumentParser()
40
    parser.add_argument("--network", type=str, choices=
41 42 43 44
                        ['resnet-18', 'resnet-34', 'resnet-50',
                         'vgg-16', 'vgg-19', 'densenet-121', 'inception_v3',
                         'mobilenet', 'mobilenet_v2', 'squeezenet_v1.0', 'squeezenet_v1.1'],
                        help='The name of neural network')
45
    parser.add_argument("--model", type=str,
46
                        choices=['1080ti', 'titanx', 'tx2', 'gfx900'], default='1080ti',
47 48
                        help="The model of the test device. If your device is not listed in "
                             "the choices list, pick the most similar one as argument.")
49
    parser.add_argument("--repeat", type=int, default=600)
50 51 52
    parser.add_argument("--target", type=str,
                        choices=['cuda', 'opencl', 'rocm', 'nvptx', 'metal'], default='cuda',
                        help="The tvm compilation target")
53
    parser.add_argument("--thread", type=int, default=1, help="The number of threads to be run.")
54 55
    args = parser.parse_args()

56
    dtype = 'float32'
57

58 59
    if args.network is None:
        networks = ['resnet-50', 'mobilenet', 'vgg-19', 'inception_v3']
60
    else:
61
        networks = [args.network]
62

63
    target = tvm.target.create('%s -model=%s' % (args.target, args.model))
64

65 66 67 68
    print("--------------------------------------------------")
    print("%-20s %-20s" % ("Network Name", "Mean Inference Time (std dev)"))
    print("--------------------------------------------------")
    for network in networks:
69 70 71 72 73 74 75 76 77 78 79 80 81
        if args.thread == 1:
            benchmark(network, target)
        else:
            threads = list()
            for n in range(args.thread):
                thread = threading.Thread(target=benchmark, args=([network, target]), name="thread%d" % n)
                threads.append(thread)

            for thread in threads:
                thread.start()

            for thread in threads:
                thread.join()