Commit 46c56e28 by Tianqi Chen Committed by GitHub

[EXAMPLE/PYTHON] Improve extension type, add operator lib (#162)

parent 560463e9
TVM Examples
============
This folder contains various example projects on how to u
- [extension](extension) how to extend TVM C++ api along with python API.
- [operator](operator) implementation of operators.
# Perf Examples for TVM # Operator Collections
This folder contains perf examples of tvm under various settings. This folder contains collections of operators on perf tunning operators with TVM.
The collection is contributed and maintained by the community.
## GPU Perf Workflow ## Perf Workflow
Since TVM is work in progress, some optimization might not be perfect. Since TVM is work in progress, some optimization might not be perfect.
One quick way I find useful is to do codegen plus manual modification. One quick way I find useful is to do codegen plus manual modification.
The workflow is: The workflow is:
- Generate the GPU kernels, write them into a file, say ```cuda/matexp_generated.cu``` - Generate the GPU kernels, write them into a file, say ```perf/matexp_generated.cu```
- Copy the generated file into another one, say ```cuda/matexp_manual.cu```, - Copy the generated file into another one, say ```perf/matexp_manual.cu```,
do modifications according to your intuition. do modifications according to your intuition.
- Set use_manual flag in the script to continue the codegen workflow as normal, but piggy back the manual written code instead. - Set use_manual flag in the script to continue the codegen workflow as normal, but piggy back the manual written code instead.
- Observe the performance difference. - Observe the performance difference.
......
...@@ -98,12 +98,11 @@ def test_gemm(): ...@@ -98,12 +98,11 @@ def test_gemm():
max_auto_unroll_step = 8 max_auto_unroll_step = 8
# correctness # correctness
def check_device(device, host="stackvm"): def check_device(device):
if not tvm.module.enabled(host):
return
if not tvm.module.enabled(device): if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return return
f = tvm.build(s, [A, B, C], device, host, f = tvm.build(s, [A, B, C], device,
max_auto_unroll_step=max_auto_unroll_step) max_auto_unroll_step=max_auto_unroll_step)
ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0) ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
# launch the kernel. # launch the kernel.
......
...@@ -18,11 +18,12 @@ from . import ir_builder ...@@ -18,11 +18,12 @@ from . import ir_builder
from . import ndarray as nd from . import ndarray as nd
from .ndarray import context, cpu, gpu, opencl, cl, metal, mtl, vpi from .ndarray import context, cpu, gpu, opencl, cl, metal, mtl, vpi
from ._ffi.runtime_ctypes import TypeCode
from ._ffi.function import Function from ._ffi.function import Function
from ._ffi.base import TVMError, __version__ from ._ffi.base import TVMError, __version__
from .api import * from .api import *
from .intrin import * from .intrin import *
from .node import register_node from .node import register_node
from .ndarray import register_dltensor from .ndarray import register_extension
from .schedule import create_schedule from .schedule import create_schedule
from .build import build, lower from .build import build, lower
...@@ -95,9 +95,9 @@ def _make_tvm_args(args, temp_args): ...@@ -95,9 +95,9 @@ def _make_tvm_args(args, temp_args):
elif isinstance(arg, NDArrayBase): elif isinstance(arg, NDArrayBase):
values[i].v_handle = ctypes.cast(arg.handle, ctypes.c_void_p) values[i].v_handle = ctypes.cast(arg.handle, ctypes.c_void_p)
type_codes[i] = TypeCode.ARRAY_HANDLE type_codes[i] = TypeCode.ARRAY_HANDLE
elif isinstance(arg, _nd._DLTENSOR_COMPATS): elif isinstance(arg, _nd._TVM_COMPATS):
values[i].v_handle = ctypes.c_void_p(arg._dltensor_addr) values[i].v_handle = ctypes.c_void_p(arg._tvm_handle)
type_codes[i] = TypeCode.ARRAY_HANDLE type_codes[i] = arg._tvm_tcode
elif isinstance(arg, Integral): elif isinstance(arg, Integral):
values[i].v_int64 = arg values[i].v_int64 = arg
type_codes[i] = TypeCode.INT type_codes[i] = TypeCode.INT
......
...@@ -25,18 +25,19 @@ class NDArrayBase(object): ...@@ -25,18 +25,19 @@ class NDArrayBase(object):
check_call(_LIB.TVMArrayFree(self.handle)) check_call(_LIB.TVMArrayFree(self.handle))
@property @property
def _dltensor_addr(self): def _tvm_handle(self):
return ctypes.cast(self.handle, ctypes.c_void_p).value return ctypes.cast(self.handle, ctypes.c_void_p).value
def _make_array(handle, is_view): def _make_array(handle, is_view):
handle = ctypes.cast(handle, TVMArrayHandle) handle = ctypes.cast(handle, TVMArrayHandle)
return _CLASS_NDARRAY(handle, is_view) return _CLASS_NDARRAY(handle, is_view)
_DLTENSOR_COMPATS = () _TVM_COMPATS = ()
def _reg_dltensor(cls): def _reg_extension(cls):
global _DLTENSOR_COMPATS global _TVM_COMPATS
_DLTENSOR_COMPATS += (cls,) _TVM_COMPATS += (cls,)
_CLASS_NDARRAY = None _CLASS_NDARRAY = None
......
...@@ -4,24 +4,7 @@ from __future__ import absolute_import as _abs ...@@ -4,24 +4,7 @@ from __future__ import absolute_import as _abs
import ctypes import ctypes
from ..base import py_str, check_call, _LIB from ..base import py_str, check_call, _LIB
from ..runtime_ctypes import TVMByteArray from ..runtime_ctypes import TVMByteArray, TypeCode
class TypeCode(object):
"""Type code used in API calls"""
INT = 0
UINT = 1
FLOAT = 2
HANDLE = 3
NULL = 4
TVM_TYPE = 5
TVM_CONTEXT = 6
ARRAY_HANDLE = 7
NODE_HANDLE = 8
MODULE_HANDLE = 9
FUNC_HANDLE = 10
STR = 11
BYTES = 12
class TVMValue(ctypes.Union): class TVMValue(ctypes.Union):
"""TVMValue in C API""" """TVMValue in C API"""
......
...@@ -84,10 +84,10 @@ cdef inline void make_arg(object arg, ...@@ -84,10 +84,10 @@ cdef inline void make_arg(object arg,
elif isinstance(arg, NDArrayBase): elif isinstance(arg, NDArrayBase):
value[0].v_handle = (<NDArrayBase>arg).chandle value[0].v_handle = (<NDArrayBase>arg).chandle
tcode[0] = kArrayHandle tcode[0] = kArrayHandle
elif isinstance(arg, _DLTENSOR_COMPATS): elif isinstance(arg, _TVM_COMPATS):
ptr = arg._dltensor_addr ptr = arg._tvm_handle
value[0].v_handle = (<void*>ptr) value[0].v_handle = (<void*>ptr)
tcode[0] = kArrayHandle tcode[0] = arg._tvm_tcode
elif isinstance(arg, (int, long)): elif isinstance(arg, (int, long)):
value[0].v_int64 = arg value[0].v_int64 = arg
tcode[0] = kInt tcode[0] = kInt
......
...@@ -12,7 +12,7 @@ cdef class NDArrayBase: ...@@ -12,7 +12,7 @@ cdef class NDArrayBase:
ptr = ctypes.cast(handle, ctypes.c_void_p).value ptr = ctypes.cast(handle, ctypes.c_void_p).value
self.chandle = <DLTensor*>(ptr) self.chandle = <DLTensor*>(ptr)
property _dltensor_addr: property _tvm_handle:
def __get__(self): def __get__(self):
return <unsigned long long>self.chandle return <unsigned long long>self.chandle
...@@ -41,11 +41,11 @@ cdef c_make_array(void* chandle, is_view): ...@@ -41,11 +41,11 @@ cdef c_make_array(void* chandle, is_view):
(<NDArrayBase>ret).chandle = <DLTensor*>chandle (<NDArrayBase>ret).chandle = <DLTensor*>chandle
return ret return ret
cdef _DLTENSOR_COMPATS = () cdef _TVM_COMPATS = ()
def _reg_dltensor(cls): def _reg_extension(cls):
global _DLTENSOR_COMPATS global _TVM_COMPATS
_DLTENSOR_COMPATS += (cls,) _TVM_COMPATS += (cls,)
def _make_array(handle, is_view): def _make_array(handle, is_view):
cdef unsigned long long ptr cdef unsigned long long ptr
......
...@@ -16,14 +16,14 @@ try: ...@@ -16,14 +16,14 @@ try:
if _FFI_MODE == "ctypes": if _FFI_MODE == "ctypes":
raise ImportError() raise ImportError()
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
from ._cy3.core import _set_class_ndarray, _reg_dltensor, _make_array from ._cy3.core import _set_class_ndarray, _reg_extension, _make_array
from ._cy3.core import NDArrayBase as _NDArrayBase from ._cy3.core import NDArrayBase as _NDArrayBase
else: else:
from ._cy2.core import _set_class_ndarray, _reg_dltensor, _make_array from ._cy2.core import _set_class_ndarray, _reg_extension, _make_array
from ._cy2.core import NDArrayBase as _NDArrayBase from ._cy2.core import NDArrayBase as _NDArrayBase
except IMPORT_EXCEPT: except IMPORT_EXCEPT:
# pylint: disable=wrong-import-position # pylint: disable=wrong-import-position
from ._ctypes.ndarray import _set_class_ndarray, _reg_dltensor, _make_array from ._ctypes.ndarray import _set_class_ndarray, _reg_extension, _make_array
from ._ctypes.ndarray import NDArrayBase as _NDArrayBase from ._ctypes.ndarray import NDArrayBase as _NDArrayBase
...@@ -197,8 +197,8 @@ class NDArrayBase(_NDArrayBase): ...@@ -197,8 +197,8 @@ class NDArrayBase(_NDArrayBase):
return target return target
def register_dltensor(cls): def register_extension(cls):
"""Register a DLTensor compatible class to TVM. """Register a extensio class to TVM.
After the class is registered, the class will be able After the class is registered, the class will be able
to directly pass as Function argument generated by TVM. to directly pass as Function argument generated by TVM.
...@@ -206,12 +206,14 @@ def register_dltensor(cls): ...@@ -206,12 +206,14 @@ def register_dltensor(cls):
Parameters Parameters
---------- ----------
cls : class cls : class
The class object to be registered as DLTensor compatible. The class object to be registered as extension.
Note Note
---- ----
The registered class is requires a property _dltensor_addr, The registered class is requires two properties: _tvm_handle and _tvm_tcode
which returns an integer that represents the address of DLTensor.
- ```_tvm_handle``` returns integer represents the address of the handle.
- ```_tvm_tcode``` returns integer represents type code of the class.
Returns Returns
------- -------
...@@ -231,8 +233,12 @@ def register_dltensor(cls): ...@@ -231,8 +233,12 @@ def register_dltensor(cls):
self.handle = _LIB.NewDLTensor() self.handle = _LIB.NewDLTensor()
@property @property
def _dltensor_addr(self): def _tvm_handle(self):
return self.handle.value return self.handle.value
@property
def _tvm_tcode(self):
return tvm.TypeCode.ARRAY_HANDLE
""" """
_reg_dltensor(cls) _reg_extension(cls)
return cls return cls
...@@ -9,6 +9,22 @@ from .. import _api_internal ...@@ -9,6 +9,22 @@ from .. import _api_internal
tvm_shape_index_t = ctypes.c_int64 tvm_shape_index_t = ctypes.c_int64
class TypeCode(object):
"""Type code used in API calls"""
INT = 0
UINT = 1
FLOAT = 2
HANDLE = 3
NULL = 4
TVM_TYPE = 5
TVM_CONTEXT = 6
ARRAY_HANDLE = 7
NODE_HANDLE = 8
MODULE_HANDLE = 9
FUNC_HANDLE = 10
STR = 11
BYTES = 12
class TVMByteArray(ctypes.Structure): class TVMByteArray(ctypes.Structure):
"""Temp data structure for byte array.""" """Temp data structure for byte array."""
_fields_ = [("data", ctypes.POINTER(ctypes.c_byte)), _fields_ = [("data", ctypes.POINTER(ctypes.c_byte)),
......
...@@ -9,7 +9,7 @@ import numpy as _np ...@@ -9,7 +9,7 @@ import numpy as _np
from ._ffi.ndarray import TVMContext, TVMType, NDArrayBase from ._ffi.ndarray import TVMContext, TVMType, NDArrayBase
from ._ffi.ndarray import context, empty from ._ffi.ndarray import context, empty
from ._ffi.ndarray import _set_class_ndarray, register_dltensor from ._ffi.ndarray import _set_class_ndarray, register_extension
class NDArray(NDArrayBase): class NDArray(NDArrayBase):
"""Lightweight NDArray class of TVM runtime. """Lightweight NDArray class of TVM runtime.
......
...@@ -25,6 +25,9 @@ def test_llvm_add_pipeline(): ...@@ -25,6 +25,9 @@ def test_llvm_add_pipeline():
assert struct.unpack(endian + 'h', arr[0x12:0x14])[0] == e_machine assert struct.unpack(endian + 'h', arr[0x12:0x14])[0] == e_machine
def build_i386(): def build_i386():
if not tvm.module.enabled("llvm"):
print("Skip because llvm is not enabled..")
return
temp = util.tempdir() temp = util.tempdir()
target = "llvm -target=i386-pc-linux-gnu" target = "llvm -target=i386-pc-linux-gnu"
f = tvm.build(s, [A, B, C], target) f = tvm.build(s, [A, B, C], target)
...@@ -33,6 +36,9 @@ def test_llvm_add_pipeline(): ...@@ -33,6 +36,9 @@ def test_llvm_add_pipeline():
verify_elf(path, 0x03) verify_elf(path, 0x03)
def build_arm(): def build_arm():
if not tvm.module.enabled("llvm"):
print("Skip because llvm is not enabled..")
return
temp = util.tempdir() temp = util.tempdir()
target = "llvm -target=arm-none-linux-gnueabihf" target = "llvm -target=arm-none-linux-gnueabihf"
f = tvm.build(s, [A, B, C], target) f = tvm.build(s, [A, B, C], target)
......
import tvm import tvm
import numpy as np import numpy as np
@tvm.register_dltensor @tvm.register_extension
class MyTensorView(object): class MyTensorView(object):
def __init__(self, arr): def __init__(self, arr):
self.arr = arr self.arr = arr
@property @property
def _dltensor_addr(self): def _tvm_handle(self):
return self.arr._dltensor_addr return self.arr._tvm_handle
@property
def _tvm_tcode(self):
return tvm.TypeCode.ARRAY_HANDLE
def test_dltensor_compatible(): def test_dltensor_compatible():
dtype = 'int64' dtype = 'int64'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment