Commit c0e5ec56 by ziheng Committed by Tianqi Chen

[DOC] Add schedule_computaion (#92)

* [DOC] Add schedule_computaion

* Finetune the doc

* Finetune the doc

* Finetune the doc

* Set max_unroll_step=0 by default
parent 93057c8a
...@@ -18,7 +18,7 @@ def lower(sch, ...@@ -18,7 +18,7 @@ def lower(sch,
name="default_function", name="default_function",
binds=None, binds=None,
with_api_wrapper=True, with_api_wrapper=True,
max_auto_unroll_step=8): max_auto_unroll_step=0):
"""Lowering step before build into target. """Lowering step before build into target.
Parameters Parameters
......
...@@ -4,7 +4,7 @@ Get Started with TVM ...@@ -4,7 +4,7 @@ Get Started with TVM
**Author**: `Tianqi Chen <https://tqchen.github.io>`_ **Author**: `Tianqi Chen <https://tqchen.github.io>`_
This is an introduction tutorial to TVM. This is an introduction tutorial to TVM.
TVM is a domain specifric language for efficient kernel construction. TVM is a domain specific language for efficient kernel construction.
In this tutorial, we will demonstrate the basic workflow in TVM. In this tutorial, we will demonstrate the basic workflow in TVM.
""" """
......
"""
Schedule Primitives in TVM
==========================
**Author**: `Ziheng Jiang <https://github.com/ZihengJiang>`_
TVM is a domain specific language for efficient kernel construction.
In this tutorial, we will show you how to schedule the computation by
various primitives provided by TVM.
"""
from __future__ import absolute_import, print_function
import tvm
import numpy as np
######################################################################
#
# There often exist several methods to compute the same result,
# however, different methods will result in different locality and
# performance. So TVM asks user to provide how to execute the
# computation called **Schedule**.
#
# A **Schedule** is a set of transformation of computation that
# transforms the loop of computations in the program.
# declare some variables for use later
n = tvm.var('n')
m = tvm.var('m')
######################################################################
# A schedule can be created from a list of ops, by default the
# schedule compute tensor in a serial manner in a row-major order.
# declare a matrix element-wise multiply
A = tvm.placeholder((m, n), name='A')
B = tvm.placeholder((m, n), name='B')
C = tvm.compute((m, n), lambda i, j: A[i, j] * B[i, j], name='C')
s = tvm.create_schedule([C.op])
# lower will transform the computation from definition to the real
# callable function. With argument `with_api_wrapper=False`, it will
# return you a readable C like statement, we use it here to print the
# schedule result.
print(tvm.lower(s, [A, B, C], with_api_wrapper=False))
######################################################################
# One schedule is composed by multiple stages, and one
# **Stage** represents schedule for one operation. We provide various
# methods to schedule every stage.
######################################################################
# split
# --------------------------
# :code:`split` can split a specified axis into two axises by
# :code:`factor`.
A = tvm.placeholder((m,), name='A')
B = tvm.compute((m,), lambda i: A[i]*2, name='B')
s = tvm.create_schedule(B.op)
xo, xi = s[B].split(B.op.axis[0], factor=32)
print(tvm.lower(s, [A, B], with_api_wrapper=False))
######################################################################
# You can also split a axis by :code:`nparts`, which splits the axis
# contrary with :code:`factor`.
A = tvm.placeholder((m,), name='A')
B = tvm.compute((m,), lambda i: A[i], name='B')
s = tvm.create_schedule(B.op)
bx, tx = s[B].split(B.op.axis[0], nparts=32)
print(tvm.lower(s, [A, B], with_api_wrapper=False))
######################################################################
# tile
# --------------------------
# :code:`tile` help you execute the computation tile by tile over two
# axises.
A = tvm.placeholder((m, n), name='A')
B = tvm.compute((m, n), lambda i, j: A[i, j], name='B')
s = tvm.create_schedule(B.op)
xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5)
print(tvm.lower(s, [A, B], with_api_wrapper=False))
######################################################################
# fuse
# --------------------------
# :code:`fuse` can fuse two consecutive axises of one computation.
A = tvm.placeholder((m, n), name='A')
B = tvm.compute((m, n), lambda i, j: A[i, j], name='B')
s = tvm.create_schedule(B.op)
# tile to four axises first: (i.outer, j.outer, i.inner, j.inner)
xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5)
# then fuse (i.inner, j.inner) into one axis: (i.inner.j.inner.fused)
fused = s[B].fuse(yi, xi)
print(tvm.lower(s, [A, B], with_api_wrapper=False))
######################################################################
# reorder
# --------------------------
# :code:`reorder` can reorder the axises in the specified order.
A = tvm.placeholder((m, n), name='A')
B = tvm.compute((m, n), lambda i, j: A[i, j], name='B')
s = tvm.create_schedule(B.op)
# tile to four axises first: (i.outer, j.outer, i.inner, j.inner)
xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5)
# then reorder the axises: (i.inner, j.outer, i.outer, j.inner)
s[B].reorder(xi, yo, xo, yi)
print(tvm.lower(s, [A, B], with_api_wrapper=False))
######################################################################
# bind
# --------------------------
# :code:`bind` can bind a specified axis with a thread axis, often used
# in gpu programming.
A = tvm.placeholder((n,), name='A')
B = tvm.compute(A.shape, lambda i: A[i] * 2, name='B')
s = tvm.create_schedule(B.op)
bx, tx = s[B].split(B.op.axis[0], factor=64)
s[B].bind(bx, tvm.thread_axis("blockIdx.x"))
s[B].bind(tx, tvm.thread_axis("threadIdx.x"))
print(tvm.lower(s, [A, B], with_api_wrapper=False))
######################################################################
# compute_at
# --------------------------
# For a schedule consists of multiple operators, tvm will compute
# tensors at the root separately by default.
A = tvm.placeholder((m,), name='A')
B = tvm.compute((m,), lambda i: A[i]+1, name='B')
C = tvm.compute((m,), lambda i: B[i]*2, name='C')
s = tvm.create_schedule(C.op)
print(tvm.lower(s, [A, B, C], with_api_wrapper=False))
######################################################################
# :code:`compute_at` can move computation of `B` into the first axis
# of computation of `C`.
A = tvm.placeholder((m,), name='A')
B = tvm.compute((m,), lambda i: A[i]+1, name='B')
C = tvm.compute((m,), lambda i: B[i]*2, name='C')
s = tvm.create_schedule(C.op)
s[B].compute_at(s[C], C.op.axis[0])
print(tvm.lower(s, [A, B, C], with_api_wrapper=False))
######################################################################
# compute_inline
# --------------------------
# :code:`compute_inline` can mark one stage as inline, then the body of
# computation will be expanded and inserted at the address where the
# tensor is required.
A = tvm.placeholder((m,), name='A')
B = tvm.compute((m,), lambda i: A[i]+1, name='B')
C = tvm.compute((m,), lambda i: B[i]*2, name='C')
s = tvm.create_schedule(C.op)
s[B].compute_inline()
print(tvm.lower(s, [A, B, C], with_api_wrapper=False))
######################################################################
# compute_root
# --------------------------
# :code:`compute_root` can move computation of one stage to the root.
A = tvm.placeholder((m,), name='A')
B = tvm.compute((m,), lambda i: A[i]+1, name='B')
C = tvm.compute((m,), lambda i: B[i]*2, name='C')
s = tvm.create_schedule(C.op)
s[B].compute_at(s[C], C.op.axis[0])
s[B].compute_root()
print(tvm.lower(s, [A, B, C], with_api_wrapper=False))
######################################################################
# Summary
# -------
# This tutorial provides an introduction to schedule primitives in
# tvm, which permits users schedule the computation easily and
# flexibly.
#
# In order to get an good performance kernel implementation, the
# general workflow often is:
#
# - Describe your computation via series of operations.
# - Try to schedule the computation with primitives.
# - Compile and run to see the performance difference.
# - Adjust your schedule according the running result.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment