Commit 46c56e28 by Tianqi Chen Committed by GitHub

[EXAMPLE/PYTHON] Improve extension type, add operator lib (#162)

parent 560463e9
TVM Examples
============
This folder contains various example projects on how to u
- [extension](extension) how to extend TVM C++ api along with python API.
- [operator](operator) implementation of operators.
# Perf Examples for TVM
This folder contains perf examples of tvm under various settings.
# Operator Collections
This folder contains collections of operators on perf tunning operators with TVM.
The collection is contributed and maintained by the community.
## GPU Perf Workflow
## Perf Workflow
Since TVM is work in progress, some optimization might not be perfect.
One quick way I find useful is to do codegen plus manual modification.
The workflow is:
- Generate the GPU kernels, write them into a file, say ```cuda/matexp_generated.cu```
- Copy the generated file into another one, say ```cuda/matexp_manual.cu```,
- Generate the GPU kernels, write them into a file, say ```perf/matexp_generated.cu```
- Copy the generated file into another one, say ```perf/matexp_manual.cu```,
do modifications according to your intuition.
- Set use_manual flag in the script to continue the codegen workflow as normal, but piggy back the manual written code instead.
- Observe the performance difference.
......
......@@ -98,12 +98,11 @@ def test_gemm():
max_auto_unroll_step = 8
# correctness
def check_device(device, host="stackvm"):
if not tvm.module.enabled(host):
return
def check_device(device):
if not tvm.module.enabled(device):
print("Skip because %s is not enabled" % device)
return
f = tvm.build(s, [A, B, C], device, host,
f = tvm.build(s, [A, B, C], device,
max_auto_unroll_step=max_auto_unroll_step)
ctx = tvm.gpu(0) if device == "cuda" else tvm.cl(0)
# launch the kernel.
......
......@@ -18,11 +18,12 @@ from . import ir_builder
from . import ndarray as nd
from .ndarray import context, cpu, gpu, opencl, cl, metal, mtl, vpi
from ._ffi.runtime_ctypes import TypeCode
from ._ffi.function import Function
from ._ffi.base import TVMError, __version__
from .api import *
from .intrin import *
from .node import register_node
from .ndarray import register_dltensor
from .ndarray import register_extension
from .schedule import create_schedule
from .build import build, lower
......@@ -95,9 +95,9 @@ def _make_tvm_args(args, temp_args):
elif isinstance(arg, NDArrayBase):
values[i].v_handle = ctypes.cast(arg.handle, ctypes.c_void_p)
type_codes[i] = TypeCode.ARRAY_HANDLE
elif isinstance(arg, _nd._DLTENSOR_COMPATS):
values[i].v_handle = ctypes.c_void_p(arg._dltensor_addr)
type_codes[i] = TypeCode.ARRAY_HANDLE
elif isinstance(arg, _nd._TVM_COMPATS):
values[i].v_handle = ctypes.c_void_p(arg._tvm_handle)
type_codes[i] = arg._tvm_tcode
elif isinstance(arg, Integral):
values[i].v_int64 = arg
type_codes[i] = TypeCode.INT
......
......@@ -25,18 +25,19 @@ class NDArrayBase(object):
check_call(_LIB.TVMArrayFree(self.handle))
@property
def _dltensor_addr(self):
def _tvm_handle(self):
return ctypes.cast(self.handle, ctypes.c_void_p).value
def _make_array(handle, is_view):
handle = ctypes.cast(handle, TVMArrayHandle)
return _CLASS_NDARRAY(handle, is_view)
_DLTENSOR_COMPATS = ()
_TVM_COMPATS = ()
def _reg_dltensor(cls):
global _DLTENSOR_COMPATS
_DLTENSOR_COMPATS += (cls,)
def _reg_extension(cls):
global _TVM_COMPATS
_TVM_COMPATS += (cls,)
_CLASS_NDARRAY = None
......
......@@ -4,24 +4,7 @@ from __future__ import absolute_import as _abs
import ctypes
from ..base import py_str, check_call, _LIB
from ..runtime_ctypes import TVMByteArray
class TypeCode(object):
"""Type code used in API calls"""
INT = 0
UINT = 1
FLOAT = 2
HANDLE = 3
NULL = 4
TVM_TYPE = 5
TVM_CONTEXT = 6
ARRAY_HANDLE = 7
NODE_HANDLE = 8
MODULE_HANDLE = 9
FUNC_HANDLE = 10
STR = 11
BYTES = 12
from ..runtime_ctypes import TVMByteArray, TypeCode
class TVMValue(ctypes.Union):
"""TVMValue in C API"""
......
......@@ -84,10 +84,10 @@ cdef inline void make_arg(object arg,
elif isinstance(arg, NDArrayBase):
value[0].v_handle = (<NDArrayBase>arg).chandle
tcode[0] = kArrayHandle
elif isinstance(arg, _DLTENSOR_COMPATS):
ptr = arg._dltensor_addr
elif isinstance(arg, _TVM_COMPATS):
ptr = arg._tvm_handle
value[0].v_handle = (<void*>ptr)
tcode[0] = kArrayHandle
tcode[0] = arg._tvm_tcode
elif isinstance(arg, (int, long)):
value[0].v_int64 = arg
tcode[0] = kInt
......
......@@ -12,7 +12,7 @@ cdef class NDArrayBase:
ptr = ctypes.cast(handle, ctypes.c_void_p).value
self.chandle = <DLTensor*>(ptr)
property _dltensor_addr:
property _tvm_handle:
def __get__(self):
return <unsigned long long>self.chandle
......@@ -41,11 +41,11 @@ cdef c_make_array(void* chandle, is_view):
(<NDArrayBase>ret).chandle = <DLTensor*>chandle
return ret
cdef _DLTENSOR_COMPATS = ()
cdef _TVM_COMPATS = ()
def _reg_dltensor(cls):
global _DLTENSOR_COMPATS
_DLTENSOR_COMPATS += (cls,)
def _reg_extension(cls):
global _TVM_COMPATS
_TVM_COMPATS += (cls,)
def _make_array(handle, is_view):
cdef unsigned long long ptr
......
......@@ -16,14 +16,14 @@ try:
if _FFI_MODE == "ctypes":
raise ImportError()
if sys.version_info >= (3, 0):
from ._cy3.core import _set_class_ndarray, _reg_dltensor, _make_array
from ._cy3.core import _set_class_ndarray, _reg_extension, _make_array
from ._cy3.core import NDArrayBase as _NDArrayBase
else:
from ._cy2.core import _set_class_ndarray, _reg_dltensor, _make_array
from ._cy2.core import _set_class_ndarray, _reg_extension, _make_array
from ._cy2.core import NDArrayBase as _NDArrayBase
except IMPORT_EXCEPT:
# pylint: disable=wrong-import-position
from ._ctypes.ndarray import _set_class_ndarray, _reg_dltensor, _make_array
from ._ctypes.ndarray import _set_class_ndarray, _reg_extension, _make_array
from ._ctypes.ndarray import NDArrayBase as _NDArrayBase
......@@ -197,8 +197,8 @@ class NDArrayBase(_NDArrayBase):
return target
def register_dltensor(cls):
"""Register a DLTensor compatible class to TVM.
def register_extension(cls):
"""Register a extensio class to TVM.
After the class is registered, the class will be able
to directly pass as Function argument generated by TVM.
......@@ -206,12 +206,14 @@ def register_dltensor(cls):
Parameters
----------
cls : class
The class object to be registered as DLTensor compatible.
The class object to be registered as extension.
Note
----
The registered class is requires a property _dltensor_addr,
which returns an integer that represents the address of DLTensor.
The registered class is requires two properties: _tvm_handle and _tvm_tcode
- ```_tvm_handle``` returns integer represents the address of the handle.
- ```_tvm_tcode``` returns integer represents type code of the class.
Returns
-------
......@@ -231,8 +233,12 @@ def register_dltensor(cls):
self.handle = _LIB.NewDLTensor()
@property
def _dltensor_addr(self):
def _tvm_handle(self):
return self.handle.value
@property
def _tvm_tcode(self):
return tvm.TypeCode.ARRAY_HANDLE
"""
_reg_dltensor(cls)
_reg_extension(cls)
return cls
......@@ -9,6 +9,22 @@ from .. import _api_internal
tvm_shape_index_t = ctypes.c_int64
class TypeCode(object):
"""Type code used in API calls"""
INT = 0
UINT = 1
FLOAT = 2
HANDLE = 3
NULL = 4
TVM_TYPE = 5
TVM_CONTEXT = 6
ARRAY_HANDLE = 7
NODE_HANDLE = 8
MODULE_HANDLE = 9
FUNC_HANDLE = 10
STR = 11
BYTES = 12
class TVMByteArray(ctypes.Structure):
"""Temp data structure for byte array."""
_fields_ = [("data", ctypes.POINTER(ctypes.c_byte)),
......
......@@ -9,7 +9,7 @@ import numpy as _np
from ._ffi.ndarray import TVMContext, TVMType, NDArrayBase
from ._ffi.ndarray import context, empty
from ._ffi.ndarray import _set_class_ndarray, register_dltensor
from ._ffi.ndarray import _set_class_ndarray, register_extension
class NDArray(NDArrayBase):
"""Lightweight NDArray class of TVM runtime.
......
......@@ -25,6 +25,9 @@ def test_llvm_add_pipeline():
assert struct.unpack(endian + 'h', arr[0x12:0x14])[0] == e_machine
def build_i386():
if not tvm.module.enabled("llvm"):
print("Skip because llvm is not enabled..")
return
temp = util.tempdir()
target = "llvm -target=i386-pc-linux-gnu"
f = tvm.build(s, [A, B, C], target)
......@@ -33,6 +36,9 @@ def test_llvm_add_pipeline():
verify_elf(path, 0x03)
def build_arm():
if not tvm.module.enabled("llvm"):
print("Skip because llvm is not enabled..")
return
temp = util.tempdir()
target = "llvm -target=arm-none-linux-gnueabihf"
f = tvm.build(s, [A, B, C], target)
......
import tvm
import numpy as np
@tvm.register_dltensor
@tvm.register_extension
class MyTensorView(object):
def __init__(self, arr):
self.arr = arr
@property
def _dltensor_addr(self):
return self.arr._dltensor_addr
def _tvm_handle(self):
return self.arr._tvm_handle
@property
def _tvm_tcode(self):
return tvm.TypeCode.ARRAY_HANDLE
def test_dltensor_compatible():
dtype = 'int64'
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment