""" Introduction to TOPI ==================== **Author**: `Ehsan M. Kermani <https://github.com/ehsanmok>`_ This is an introductory tutorial to TVM Operator Inventory (TOPI). TOPI provides numpy-style generic operations and schedules with higher abstractions than TVM. In this tutorial, we will see how TOPI can save us from writing boilerplates code in TVM. """ from __future__ import absolute_import, print_function import tvm import topi import numpy as np ###################################################################### # Basic example # ------------- # Let's revisit the sum of rows operation (equivalent to :code:`B = numpy.sum(A, axis=1)`') \ # To compute the sum of rows of a two dimensional TVM tensor A, we should # specify the symbolic operation as well as schedule as follows # n = tvm.var("n") m = tvm.var("m") A = tvm.placeholder((n, m), name='A') k = tvm.reduce_axis((0, m), "k") B = tvm.compute((n,), lambda i: tvm.sum(A[i, k], axis=k), name="B") s = tvm.create_schedule(B.op) ###################################################################### # and to examine the IR code in human readable format, we can do # print(tvm.lower(s, [A], simple_mode=True)) ###################################################################### # However, for such a common operation we had to define the reduce axis ourselves as well as explicit computation with # :code: `tvm.compute`. Imagine for more complicated operations how much details we need to provide. # Fortunately, we can replace those two lines with simple :code:`topi.sum` much like :code`numpy.sum` # C = topi.sum(A, axis=1) ts = tvm.create_schedule(C.op) print(tvm.lower(ts, [A], simple_mode=True)) ###################################################################### # Numpy-style operator overloading # -------------------------------- # We can add two tensors using :code:`topi.broadcast_add` that have correct (broadcastable with specific) shapes. # Even shorter, TOPI provides operator overloading for such common operations. For example, # x, y = 100, 10 a = tvm.placeholder((x, y, y), name="a") b = tvm.placeholder((y, y), name="b") c = a + b # same as topi.broadcast_add d = a * b # same as topi.broadcast_mul ###################################################################### # Overloaded with the same syntax, TOPI handles broadcasting a primitive (`int`, `float`) to a tensor :code:`d - 3.14`. ###################################################################### # Generic schedules and fusing operations # --------------------------------------- # Up to now, we have seen an example of how TOPI can save us from writing explicit computations in lower level API. # But it doesn't stop here. Still we did the scheduling as before. TOPI also provides higher level # scheduling recipes depending on a given context. For example, for CUDA, # we can schedule the following series of operations ending with :code:`topi.sum` using only # :code:`topi.generic.schedule_reduce` # e = topi.elemwise_sum([c, d]) f = e / 2.0 g = topi.sum(f) with tvm.target.cuda(): sg = topi.generic.schedule_reduce(g) print(tvm.lower(sg, [a, b], simple_mode=True)) ###################################################################### # As you can see, scheduled stages of computation have been accumulated and we can examine them by # print(sg.stages) ###################################################################### # We can test the correctness by comparing with :code:`numpy` result as follows # func = tvm.build(sg, [a, b, g], 'cuda') ctx = tvm.gpu(0) a_np = np.random.uniform(size=(x, y, y)).astype(a.dtype) b_np = np.random.uniform(size=(y, y)).astype(b.dtype) g_np = np.sum(np.add(a_np + b_np, a_np * b_np) / 2.0) a_nd = tvm.nd.array(a_np, ctx) b_nd = tvm.nd.array(b_np, ctx) g_nd = tvm.nd.array(np.zeros(g_np.shape, dtype=g_np.dtype), ctx) func(a_nd, b_nd, g_nd) tvm.testing.assert_allclose(g_nd.asnumpy(), g_np, rtol=1e-5) ###################################################################### # TOPI also provides common neural nets operations such as _softmax_ with optimized schedule # tarray = tvm.placeholder((512, 512), name="tarray") softmax_topi = topi.nn.softmax(tarray) with tvm.target.create("cuda"): sst = topi.generic.schedule_softmax(softmax_topi) print(tvm.lower(sst, [tarray], simple_mode=True)) ###################################################################### # Fusing convolutions # ------------------- # We can fuse :code:`topi.nn.conv2d` and :code:`topi.nn.relu` together. # # .. note:: # # TOPI functions are all generic functions. They have different implementations # for different backends to optimize for performance. # For each backend, it is necessary to call them under a target scope for both # compute declaration and schedule. TVM will choose the right function to call with # the target information. data = tvm.placeholder((1, 3, 224, 224)) kernel = tvm.placeholder((10, 3, 5, 5)) with tvm.target.create("cuda"): conv = topi.nn.conv2d(data, kernel, strides=1, padding=2) out = topi.nn.relu(conv) sconv = topi.generic.nn.schedule_conv2d_nchw(out) print(tvm.lower(sconv, [data, kernel], simple_mode=True)) ###################################################################### # Summary # ------- # In this tutorial, we have seen # # - How to use TOPI API for common operations with numpy-style operators. # - How TOPI facilitates generic schedules and operator fusion for a context, to generate optimized kernel codes.