Commit 4361ba0d by Tianqi Chen Committed by GitHub

[RUNTIME] Simple NDArray container API in c++ (#1418)

parent 0be2b0fa
......@@ -87,6 +87,7 @@ typedef enum {
kFuncHandle = 10U,
kStr = 11U,
kBytes = 12U,
kNDArrayContainer = 13U,
// Extension codes for other frameworks to integrate TVM PackedFunc.
// To make sure each framework's id do not conflict, use first and
// last sections to mark ranges.
......@@ -121,6 +122,9 @@ typedef DLContext TVMContext;
*/
typedef DLTensor TVMArray;
/*! \brief the array handle */
typedef TVMArray* TVMArrayHandle;
/*!
* \brief Union type of values
* being passed through API and function calls.
......@@ -149,8 +153,6 @@ typedef void* TVMModuleHandle;
typedef void* TVMFunctionHandle;
/*! \brief Handle to hold return value. */
typedef void* TVMRetValueHandle;
/*! \brief the array handle */
typedef TVMArray* TVMArrayHandle;
/*!
* \brief The stream that is specific to device
* can be NULL, which indicates the default one.
......
/*!
* Copyright (c) 2017 by Contributors
* \file tvm/runtime/ndarray.h
* \brief Abstract device memory management API
*/
#ifndef TVM_RUNTIME_NDARRAY_H_
#define TVM_RUNTIME_NDARRAY_H_
#include <atomic>
#include <vector>
#include <utility>
#include "./c_runtime_api.h"
namespace tvm {
namespace runtime {
/*!
* \brief Managed NDArray.
* The array is backed by reference counted blocks.
*/
class NDArray {
public:
// internal container type
struct Container;
/*! \brief default constructor */
NDArray() {}
/*!
* \brief cosntruct a NDArray that refers to data
* \param data The data this NDArray refers to
*/
explicit inline NDArray(Container* data);
/*!
* \brief copy constructor
* \param other The value to be copied
*/
inline NDArray(const NDArray& other); // NOLINT(*)
/*!
* \brief move constructor
* \param other The value to be moved
*/
NDArray(NDArray&& other) // NOLINT(*)
: data_(other.data_) {
other.data_ = nullptr;
}
/*! \brief destructor */
~NDArray() {
this->reset();
}
/*!
* \brief Swap this array with another NDArray
* \param other The other NDArray
*/
void swap(NDArray& other) { // NOLINT(*)
std::swap(data_, other.data_);
}
/*!
* \brief copy assignmemt
* \param other The value to be assigned.
* \return reference to self.
*/
NDArray& operator=(const NDArray& other) { // NOLINT(*)
// copy-and-swap idiom
NDArray(other).swap(*this); // NOLINT(*)
return *this;
}
/*!
* \brief move assignmemt
* \param other The value to be assigned.
* \return reference to self.
*/
NDArray& operator=(NDArray&& other) { // NOLINT(*)
// copy-and-swap idiom
NDArray(std::move(other)).swap(*this); // NOLINT(*)
return *this;
}
/*! \return If NDArray is defined */
bool defined() const {
return data_ != nullptr;
}
/*! \return If both NDArray reference the same container */
bool same_as(const NDArray& other) const {
return data_ == other.data_;
}
/*! \brief reset the content of NDArray to be nullptr */
inline void reset();
/*!
* \return the reference counter
* \note this number is approximate in multi-threaded setting.
*/
inline int use_count() const;
/*! \return Pointer to content of DLTensor */
inline const DLTensor* operator->() const;
/*!
* \brief Copy data content from another array.
* \param other The source array to be copied from.
* \note The copy may happen asynchrously if it involves a GPU context.
* TVMSynchronize is necessary.
*/
inline void CopyFrom(DLTensor* other);
inline void CopyFrom(const NDArray& other);
/*!
* \brief Copy data content into another array.
* \param other The source array to be copied from.
* \note The copy may happen asynchrously if it involves a GPU context.
* TVMSynchronize is necessary.
*/
inline void CopyTo(DLTensor* other);
inline void CopyTo(const NDArray& other);
/*!
* \brief Create a NDArray that shares the data memory with the current one.
* \param shape The shape of the new array.
* \param dtype The data type of the new array.
* \note The memory size of new array must be smaller than the current one.
*/
TVM_DLL NDArray CreateView(
std::vector<int64_t> shape, DLDataType dtype);
/*!
* \brief Create a reference view of NDArray that
* represents as DLManagedTensor.
* \return A DLManagedTensor
*/
TVM_DLL DLManagedTensor* ToDLPack() const;
/*!
* \brief Create an empty NDArray.
* \param shape The shape of the new array.
* \param dtype The data type of the new array.
* \param ctx The context of the Array.
* \return The created Array
*/
TVM_DLL static NDArray Empty(std::vector<int64_t> shape,
DLDataType dtype,
DLContext ctx);
/*!
* \brief Create a NDArray backed by a dlpack tensor.
*
* This allows us to create a NDArray using the memory
* allocated by an external deep learning framework
* that is DLPack compatible.
*
* The memory is retained until the NDArray went out of scope.
*
* \return The created NDArray view.
*/
TVM_DLL static NDArray FromDLPack(DLManagedTensor* tensor);
/*!
* \brief Function to copy data from one array to another.
* \param from The source array.
* \param to The target array.
* \param stream The stream used in copy.
*/
TVM_DLL static void CopyFromTo(
DLTensor* from, DLTensor* to, TVMStreamHandle stream = nullptr);
// internal namespace
struct Internal;
private:
/*! \brief Internal Data content */
Container* data_{nullptr};
// enable internal functions
friend struct Internal;
friend class TVMRetValue;
friend class TVMArgsSetter;
};
/*!
* \brief Reference counted Container object used to back NDArray.
*
* This object is DLTensor compatible:
* the pointer to the NDArrayContainer can be directly
* interpreted as a DLTensor*
*
* \note: do not use this function directly, use NDArray.
*/
struct NDArray::Container {
public:
// NOTE: the first part of this structure is the same as
// DLManagedTensor, note that, however, the deleter
// is only called when the reference counter goes to 0
/*!
* \brief The corresponding dl_tensor field.
* \note it is important that the first field is DLTensor
* So that this data structure is DLTensor compatible.
* The head ptr of this struct can be viewed as DLTensor*.
*/
DLTensor dl_tensor;
/*!
* \brief addtional context, reserved for recycling
* \note We can attach additional content here
* which the current container depend on
* (e.g. reference to original memory when creating views).
*/
void* manager_ctx{nullptr};
/*!
* \brief Customized deleter
*
* \note The customized deleter is helpful to enable
* different ways of memory allocator that are not
* currently defined by the system.
*/
void (*deleter)(Container* self) = nullptr;
/*! \brief default constructor */
Container() {
dl_tensor.data = nullptr;
dl_tensor.ndim = 0;
dl_tensor.shape = nullptr;
dl_tensor.strides = nullptr;
dl_tensor.byte_offset = 0;
}
/*! \brief developer function, increases reference counter */
void IncRef() {
ref_counter_.fetch_add(1, std::memory_order_relaxed);
}
/*! \brief developer function, decrease reference counter */
void DecRef() {
if (ref_counter_.fetch_sub(1, std::memory_order_release) == 1) {
std::atomic_thread_fence(std::memory_order_acquire);
if (this->deleter != nullptr) {
(*this->deleter)(this);
}
}
}
private:
friend class NDArray;
/*!
* \brief The shape container,
* can be used used for shape data.
*/
std::vector<int64_t> shape_;
/*! \brief The internal array object */
std::atomic<int> ref_counter_{0};
};
// implementations of inline functions
// the usages of functions are documented in place.
inline NDArray::NDArray(Container* data)
: data_(data) {
data_->IncRef();
}
inline NDArray::NDArray(const NDArray& other)
: data_(other.data_) {
data_->IncRef();
}
inline void NDArray::reset() {
if (data_ != nullptr) {
data_->DecRef();
data_ = nullptr;
}
}
inline void NDArray::CopyFrom(DLTensor* other) {
CHECK(data_ != nullptr);
CopyFromTo(other, &(data_->dl_tensor));
}
inline void NDArray::CopyFrom(const NDArray& other) {
CHECK(data_ != nullptr);
CHECK(other.data_ != nullptr);
CopyFromTo(&(other.data_->dl_tensor), &(data_->dl_tensor));
}
inline void NDArray::CopyTo(DLTensor* other) {
CHECK(data_ != nullptr);
CopyFromTo(&(data_->dl_tensor), other);
}
inline void NDArray::CopyTo(const NDArray& other) {
CHECK(data_ != nullptr);
CHECK(other.data_ != nullptr);
CopyFromTo(&(data_->dl_tensor), &(other.data_->dl_tensor));
}
inline int NDArray::use_count() const {
if (data_ == nullptr) return 0;
return data_->ref_counter_.load(std::memory_order_relaxed);
}
inline const DLTensor* NDArray::operator->() const {
return &(data_->dl_tensor);
}
} // namespace runtime
} // namespace tvm
#endif // TVM_RUNTIME_NDARRAY_H_
......@@ -16,6 +16,7 @@
#include <type_traits>
#include "./c_runtime_api.h"
#include "./module.h"
#include "./ndarray.h"
namespace HalideIR {
// Forward declare type for extensions
......@@ -249,10 +250,22 @@ class TVMPODValue_ {
TVM_CHECK_TYPE_CODE(type_code_, kHandle);
return value_.v_handle;
}
operator TVMArray*() const {
if (type_code_ == kNull) return nullptr;
TVM_CHECK_TYPE_CODE(type_code_, kArrayHandle);
return static_cast<TVMArray*>(value_.v_handle);
operator DLTensor*() const {
if (type_code_ == kArrayHandle ||
type_code_ == kNDArrayContainer) {
return static_cast<DLTensor*>(value_.v_handle);
} else {
if (type_code_ == kNull) return nullptr;
LOG(FATAL) << "Expected "
<< "DLTensor* or NDArray but get "
<< TypeCode2Str(type_code_);
return nullptr;
}
}
operator NDArray() const {
if (type_code_ == kNull) return NDArray();
TVM_CHECK_TYPE_CODE(type_code_, kNDArrayContainer);
return NDArray(static_cast<NDArray::Container*>(value_.v_handle));
}
operator TVMContext() const {
TVM_CHECK_TYPE_CODE(type_code_, kTVMContext);
......@@ -312,8 +325,10 @@ class TVMArgValue : public TVMPODValue_ {
using TVMPODValue_::operator int;
using TVMPODValue_::operator bool;
using TVMPODValue_::operator void*;
using TVMPODValue_::operator TVMArray*;
using TVMPODValue_::operator DLTensor*;
using TVMPODValue_::operator NDArray;
using TVMPODValue_::operator TVMContext;
// conversion operator.
operator std::string() const {
if (type_code_ == kTVMType) {
......@@ -394,8 +409,9 @@ class TVMRetValue : public TVMPODValue_ {
using TVMPODValue_::operator int;
using TVMPODValue_::operator bool;
using TVMPODValue_::operator void*;
using TVMPODValue_::operator TVMArray*;
using TVMPODValue_::operator DLTensor*;
using TVMPODValue_::operator TVMContext;
using TVMPODValue_::operator NDArray;
// Disable copy and assign from another value, but allow move.
TVMRetValue(const TVMRetValue& other) {
this->Assign(other);
......@@ -477,6 +493,13 @@ class TVMRetValue : public TVMPODValue_ {
this->SwitchToClass(kBytes, std::string(value.data, value.size));
return *this;
}
TVMRetValue& operator=(NDArray other) {
this->Clear();
type_code_ = kNDArrayContainer;
value_.v_handle = other.data_;
other.data_ = nullptr;
return *this;
}
TVMRetValue& operator=(PackedFunc f) {
this->SwitchToClass(kFuncHandle, f);
return *this;
......@@ -559,6 +582,10 @@ class TVMRetValue : public TVMPODValue_ {
SwitchToClass<Module>(kModuleHandle, other);
break;
}
case kNDArrayContainer: {
*this = other.operator NDArray();
break;
}
case kNodeHandle: {
SwitchToClass<std::shared_ptr<Node> >(
kNodeHandle, *other.template ptr<std::shared_ptr<Node> >());
......@@ -607,6 +634,10 @@ class TVMRetValue : public TVMPODValue_ {
case kFuncHandle: delete ptr<PackedFunc>(); break;
case kModuleHandle: delete ptr<Module>(); break;
case kNodeHandle: delete ptr<std::shared_ptr<Node> >(); break;
case kNDArrayContainer: {
static_cast<NDArray::Container*>(value_.v_handle)->DecRef();
break;
}
}
if (type_code_ > kExtBegin) {
#if TVM_RUNTIME_HEADER_ONLY
......@@ -635,6 +666,7 @@ inline const char* TypeCode2Str(int type_code) {
case kTVMContext: return "TVMContext";
case kFuncHandle: return "FunctionHandle";
case kModuleHandle: return "ModuleHandle";
case kNDArrayContainer: return "NDArrayContainer";
default: LOG(FATAL) << "unknown type_code="
<< static_cast<int>(type_code); return "";
}
......@@ -776,7 +808,7 @@ class TVMArgsSetter {
values_[i].v_handle = value;
type_codes_[i] = kHandle;
}
void operator()(size_t i, TVMArray* value) const {
void operator()(size_t i, DLTensor* value) const {
values_[i].v_handle = value;
type_codes_[i] = kArrayHandle;
}
......@@ -811,6 +843,10 @@ class TVMArgsSetter {
values_[i].v_handle = const_cast<Module*>(&value);
type_codes_[i] = kModuleHandle;
}
void operator()(size_t i, const NDArray& value) const { // NOLINT(*)
values_[i].v_handle = value.data_;
type_codes_[i] = kNDArrayContainer;
}
void operator()(size_t i, const TVMRetValue& value) const { // NOLINT(*)
if (value.type_code() == kStr) {
values_[i].v_str = value.ptr<std::string>()->c_str();
......
......@@ -94,7 +94,8 @@ def _make_tvm_args(args, temp_args):
type_codes[i] = TypeCode.NULL
elif isinstance(arg, NDArrayBase):
values[i].v_handle = ctypes.cast(arg.handle, ctypes.c_void_p)
type_codes[i] = TypeCode.ARRAY_HANDLE
type_codes[i] = (TypeCode.NDARRAY_CONTAINER
if not arg.is_view else TypeCode.ARRAY_HANDLE)
elif isinstance(arg, _nd._TVM_COMPATS):
values[i].v_handle = ctypes.c_void_p(arg._tvm_handle)
type_codes[i] = arg.__class__._tvm_tcode
......@@ -208,6 +209,7 @@ C_TO_PY_ARG_SWITCH[TypeCode.FUNC_HANDLE] = _wrap_arg_func(
C_TO_PY_ARG_SWITCH[TypeCode.MODULE_HANDLE] = _wrap_arg_func(
_return_module, TypeCode.MODULE_HANDLE)
C_TO_PY_ARG_SWITCH[TypeCode.ARRAY_HANDLE] = lambda x: _make_array(x.v_handle, True)
C_TO_PY_ARG_SWITCH[TypeCode.NDARRAY_CONTAINER] = lambda x: _make_array(x.v_handle, False)
_CLASS_MODULE = None
_CLASS_FUNCTION = None
......
......@@ -18,6 +18,7 @@ cdef enum TVMTypeCode:
kFuncHandle = 10
kStr = 11
kBytes = 12
kNDArrayContainer = 13
kExtBegin = 15
cdef extern from "tvm/runtime/c_runtime_api.h":
......
......@@ -84,7 +84,8 @@ cdef inline int make_arg(object arg,
tcode[0] = kNodeHandle
elif isinstance(arg, NDArrayBase):
value[0].v_handle = (<NDArrayBase>arg).chandle
tcode[0] = kArrayHandle
tcode[0] = (kNDArrayContainer if
not (<NDArrayBase>arg).c_is_view else kArrayHandle)
elif isinstance(arg, _TVM_COMPATS):
ptr = arg._tvm_handle
value[0].v_handle = (<void*>ptr)
......@@ -173,6 +174,8 @@ cdef inline object make_ret(TVMValue value, int tcode):
return value.v_int64
elif tcode == kFloat:
return value.v_float64
elif tcode == kNDArrayContainer:
return c_make_array(value.v_handle, False)
elif tcode == kStr:
return py_str(value.v_str)
elif tcode == kBytes:
......
......@@ -25,6 +25,7 @@ class TypeCode(object):
FUNC_HANDLE = 10
STR = 11
BYTES = 12
NDARRAY_CONTAINER = 13
EXT_BEGIN = 15
class TVMByteArray(ctypes.Structure):
......
......@@ -102,6 +102,7 @@ LoweredFunc MakeAPI(Stmt body,
msg << name << ": Expect arg[" << i << "] to be pointer";
seq_check.emplace_back(
AssertStmt::make(tcode == kHandle ||
tcode == kNDArrayContainer ||
tcode == kArrayHandle ||
tcode == kNull, msg.str(), nop));
} else if (t.is_int() || t.is_uint()) {
......
......@@ -124,54 +124,6 @@ void DeviceAPI::SyncStreamFromTo(TVMContext ctx,
TVMStreamHandle event_dst) {
LOG(FATAL) << "Device does not support stream api.";
}
inline TVMArray* TVMArrayCreate_() {
TVMArray* arr = new TVMArray();
arr->shape = nullptr;
arr->strides = nullptr;
arr->ndim = 0;
arr->data = nullptr;
return arr;
}
inline void TVMArrayFree_(TVMArray* arr) {
if (arr != nullptr) {
// ok to delete nullptr
delete[] arr->shape;
delete[] arr->strides;
if (arr->data != nullptr) {
DeviceAPIManager::Get(arr->ctx)->FreeDataSpace(
arr->ctx, arr->data);
}
}
delete arr;
}
inline void VerifyType(int dtype_code, int dtype_bits, int dtype_lanes) {
CHECK_GE(dtype_lanes, 1);
if (dtype_code == kDLFloat) {
CHECK_EQ(dtype_bits % 8, 0);
} else {
CHECK_EQ(dtype_bits % 8, 0);
}
CHECK_EQ(dtype_bits & (dtype_bits - 1), 0);
}
inline size_t GetDataSize(TVMArray* arr) {
size_t size = 1;
for (tvm_index_t i = 0; i < arr->ndim; ++i) {
size *= arr->shape[i];
}
size *= (arr->dtype.bits * arr->dtype.lanes + 7) / 8;
return size;
}
inline size_t GetDataAlignment(TVMArray* arr) {
size_t align = (arr->dtype.bits / 8) * arr->dtype.lanes;
if (align < kAllocAlignment) return kAllocAlignment;
return align;
}
} // namespace runtime
} // namespace tvm
......@@ -370,110 +322,6 @@ int TVMFuncCreateFromCFunc(TVMPackedCFunc func,
API_END();
}
int TVMArrayAlloc(const tvm_index_t* shape,
int ndim,
int dtype_code,
int dtype_bits,
int dtype_lanes,
int device_type,
int device_id,
TVMArrayHandle* out) {
TVMArray* arr = nullptr;
API_BEGIN();
// shape
arr = TVMArrayCreate_();
// ndim
arr->ndim = ndim;
// dtype
VerifyType(dtype_code, dtype_bits, dtype_lanes);
arr->dtype.code = static_cast<uint8_t>(dtype_code);
arr->dtype.bits = static_cast<uint8_t>(dtype_bits);
arr->dtype.lanes = static_cast<uint16_t>(dtype_lanes);
if (ndim != 0) {
tvm_index_t* shape_copy = new tvm_index_t[ndim];
std::copy(shape, shape + ndim, shape_copy);
arr->shape = shape_copy;
} else {
arr->shape = nullptr;
}
// ctx
arr->ctx.device_type = static_cast<DLDeviceType>(device_type);
arr->ctx.device_id = device_id;
size_t size = GetDataSize(arr);
size_t alignment = GetDataAlignment(arr);
arr->data = DeviceAPIManager::Get(arr->ctx)->AllocDataSpace(
arr->ctx, size, alignment, arr->dtype);
*out = arr;
API_END_HANDLE_ERROR(TVMArrayFree_(arr));
}
int TVMArrayFree(TVMArrayHandle handle) {
API_BEGIN();
TVMArray* arr = handle;
TVMArrayFree_(arr);
API_END();
}
int TVMArrayCopyFromTo(TVMArrayHandle from,
TVMArrayHandle to,
TVMStreamHandle stream) {
API_BEGIN();
size_t from_size = GetDataSize(from);
size_t to_size = GetDataSize(to);
CHECK_EQ(from_size, to_size)
<< "TVMArrayCopyFromTo: The size must exactly match";
CHECK(from->ctx.device_type == to->ctx.device_type
|| from->ctx.device_type == kDLCPU
|| to->ctx.device_type == kDLCPU)
<< "Can not copy across different ctx types directly";
// Use the context that is *not* a cpu context to get the correct device
// api manager.
TVMContext ctx = from->ctx.device_type != kDLCPU ? from->ctx : to->ctx;
DeviceAPIManager::Get(ctx)->CopyDataFromTo(
from->data, static_cast<size_t>(from->byte_offset),
to->data, static_cast<size_t>(to->byte_offset),
from_size, from->ctx, to->ctx, from->dtype, stream);
API_END();
}
int TVMArrayCopyFromBytes(TVMArrayHandle handle,
void* data,
size_t nbytes) {
API_BEGIN();
TVMContext cpu_ctx;
cpu_ctx.device_type = kDLCPU;
cpu_ctx.device_id = 0;
size_t arr_size = GetDataSize(handle);
CHECK_EQ(arr_size, nbytes)
<< "TVMArrayCopyFromBytes: size mismatch";
DeviceAPIManager::Get(handle->ctx)->CopyDataFromTo(
data, 0,
handle->data, static_cast<size_t>(handle->byte_offset),
nbytes, cpu_ctx, handle->ctx, handle->dtype, nullptr);
API_END();
}
int TVMArrayCopyToBytes(TVMArrayHandle handle,
void* data,
size_t nbytes) {
API_BEGIN();
TVMContext cpu_ctx;
cpu_ctx.device_type = kDLCPU;
cpu_ctx.device_id = 0;
size_t arr_size = GetDataSize(handle);
CHECK_EQ(arr_size, nbytes)
<< "TVMArrayCopyToBytes: size mismatch";
DeviceAPIManager::Get(handle->ctx)->CopyDataFromTo(
handle->data, static_cast<size_t>(handle->byte_offset),
data, 0,
nbytes, handle->ctx, cpu_ctx, handle->dtype, nullptr);
API_END();
}
int TVMStreamCreate(int device_type, int device_id, TVMStreamHandle* out) {
API_BEGIN();
TVMContext ctx;
......
/*!
* Copyright (c) 2017 by Contributors
* \file ndarray.cc
* \brief NDArray container infratructure.
*/
#include <dmlc/logging.h>
#include <tvm/runtime/ndarray.h>
#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/device_api.h>
#include "./runtime_base.h"
// deleter for arrays used by DLPack exporter
extern "C" void NDArrayDLPackDeleter(DLManagedTensor* tensor);
namespace tvm {
namespace runtime {
inline void VerifyDataType(DLDataType dtype) {
CHECK_GE(dtype.lanes, 1);
if (dtype.code == kDLFloat) {
CHECK_EQ(dtype.bits % 8, 0);
} else {
CHECK_EQ(dtype.bits % 8, 0);
}
CHECK_EQ(dtype.bits & (dtype.bits - 1), 0);
}
inline size_t GetDataSize(const DLTensor& arr) {
size_t size = 1;
for (tvm_index_t i = 0; i < arr.ndim; ++i) {
size *= arr.shape[i];
}
size *= (arr.dtype.bits * arr.dtype.lanes + 7) / 8;
return size;
}
inline size_t GetDataAlignment(const DLTensor& arr) {
size_t align = (arr.dtype.bits / 8) * arr.dtype.lanes;
if (align < kAllocAlignment) return kAllocAlignment;
return align;
}
struct NDArray::Internal {
// Default deleter for the container
static void DefaultDeleter(NDArray::Container* ptr) {
using tvm::runtime::NDArray;
if (ptr->manager_ctx != nullptr) {
static_cast<NDArray::Container*>(ptr->manager_ctx)->DecRef();
} else if (ptr->dl_tensor.data != nullptr) {
tvm::runtime::DeviceAPI::Get(ptr->dl_tensor.ctx)->FreeDataSpace(
ptr->dl_tensor.ctx, ptr->dl_tensor.data);
}
delete ptr;
}
// Deleter for NDArray converted from DLPack
// This is used from data which is passed from external DLPack(DLManagedTensor)
// that are not allocated inside of TVM.
// This enables us to create NDArray from memory allocated by other
// frameworks that are DLPack compatible
static void DLPackDeleter(NDArray::Container* ptr) {
DLManagedTensor* tensor = static_cast<DLManagedTensor*>(ptr->manager_ctx);
if (tensor->deleter != nullptr) {
(*tensor->deleter)(tensor);
}
delete ptr;
}
// Local create function which allocates tensor metadata
// but does not allocate space for the data.
static NDArray Create(std::vector<int64_t> shape,
DLDataType dtype,
DLContext ctx) {
VerifyDataType(dtype);
// critical zone
NDArray::Container* data = new NDArray::Container();
data->deleter = DefaultDeleter;
NDArray ret(data);
ret.data_ = data;
// RAII now in effect
// setup shape
data->shape_ = std::move(shape);
data->dl_tensor.shape = dmlc::BeginPtr(data->shape_);
data->dl_tensor.ndim = static_cast<int>(data->shape_.size());
// setup dtype
data->dl_tensor.dtype = dtype;
// setup ctx
data->dl_tensor.ctx = ctx;
return ret;
}
// Implementation of API function
static DLTensor* MoveAsDLTensor(NDArray arr) {
DLTensor* tensor = const_cast<DLTensor*>(arr.operator->());
CHECK(reinterpret_cast<DLTensor*>(arr.data_) == tensor);
arr.data_ = nullptr;
return tensor;
}
};
NDArray NDArray::CreateView(std::vector<int64_t> shape,
DLDataType dtype) {
CHECK(data_ != nullptr);
CHECK(data_->dl_tensor.strides == nullptr)
<< "Can only create view for compact tensor";
NDArray ret = Internal::Create(shape, dtype, data_->dl_tensor.ctx);
ret.data_->dl_tensor.byte_offset =
this->data_->dl_tensor.byte_offset;
size_t curr_size = GetDataSize(this->data_->dl_tensor);
size_t view_size = GetDataSize(ret.data_->dl_tensor);
CHECK_LE(view_size, curr_size)
<< "Tries to create a view that has bigger memory than current one";
// increase ref count
this->data_->IncRef();
ret.data_->manager_ctx = this->data_;
ret.data_->dl_tensor.data = this->data_->dl_tensor.data;
return ret;
}
DLManagedTensor* NDArray::ToDLPack() const {
CHECK(data_ != nullptr);
DLManagedTensor* ret = new DLManagedTensor();
ret->dl_tensor = data_->dl_tensor;
ret->manager_ctx = const_cast<NDArray*>(this);
data_->IncRef();
ret->deleter = NDArrayDLPackDeleter;
return ret;
}
NDArray NDArray::Empty(std::vector<int64_t> shape,
DLDataType dtype,
DLContext ctx) {
NDArray ret = Internal::Create(shape, dtype, ctx);
// setup memory content
size_t size = GetDataSize(ret.data_->dl_tensor);
size_t alignment = GetDataAlignment(ret.data_->dl_tensor);
ret.data_->dl_tensor.data =
DeviceAPI::Get(ret->ctx)->AllocDataSpace(
ret->ctx, size, alignment, ret->dtype);
return ret;
}
NDArray NDArray::FromDLPack(DLManagedTensor* tensor) {
NDArray::Container* data = new NDArray::Container();
data->deleter = Internal::DLPackDeleter;
data->manager_ctx = tensor;
data->dl_tensor = tensor->dl_tensor;
return NDArray(data);
}
void NDArray::CopyFromTo(DLTensor* from,
DLTensor* to,
TVMStreamHandle stream) {
size_t from_size = GetDataSize(*from);
size_t to_size = GetDataSize(*to);
CHECK_EQ(from_size, to_size)
<< "TVMArrayCopyFromTo: The size must exactly match";
CHECK(from->ctx.device_type == to->ctx.device_type
|| from->ctx.device_type == kDLCPU
|| to->ctx.device_type == kDLCPU)
<< "Can not copy across different ctx types directly";
// Use the context that is *not* a cpu context to get the correct device
// api manager.
TVMContext ctx = from->ctx.device_type != kDLCPU ? from->ctx : to->ctx;
DeviceAPI::Get(ctx)->CopyDataFromTo(
from->data, static_cast<size_t>(from->byte_offset),
to->data, static_cast<size_t>(to->byte_offset),
from_size, from->ctx, to->ctx, from->dtype, stream);
}
} // namespace runtime
} // namespace tvm
using namespace tvm::runtime;
void NDArrayDLPackDeleter(DLManagedTensor* tensor) {
static_cast<NDArray::Container*>(tensor->manager_ctx)->DecRef();
delete tensor;
}
int TVMArrayAlloc(const tvm_index_t* shape,
int ndim,
int dtype_code,
int dtype_bits,
int dtype_lanes,
int device_type,
int device_id,
TVMArrayHandle* out) {
API_BEGIN();
DLDataType dtype;
dtype.code = static_cast<uint8_t>(dtype_code);
dtype.bits = static_cast<uint8_t>(dtype_bits);
dtype.lanes = static_cast<uint16_t>(dtype_lanes);
DLContext ctx;
ctx.device_type = static_cast<DLDeviceType>(device_type);
ctx.device_id = device_id;
*out = NDArray::Internal::MoveAsDLTensor(
NDArray::Empty(std::vector<int64_t>(shape, shape + ndim), dtype, ctx));
API_END();
}
int TVMArrayFree(TVMArrayHandle handle) {
API_BEGIN();
reinterpret_cast<NDArray::Container*>(handle)->DecRef();
API_END();
}
int TVMArrayCopyFromTo(TVMArrayHandle from,
TVMArrayHandle to,
TVMStreamHandle stream) {
API_BEGIN();
NDArray::CopyFromTo(from, to, stream);
API_END();
}
int TVMArrayCopyFromBytes(TVMArrayHandle handle,
void* data,
size_t nbytes) {
API_BEGIN();
TVMContext cpu_ctx;
cpu_ctx.device_type = kDLCPU;
cpu_ctx.device_id = 0;
size_t arr_size = GetDataSize(*handle);
CHECK_EQ(arr_size, nbytes)
<< "TVMArrayCopyFromBytes: size mismatch";
DeviceAPI::Get(handle->ctx)->CopyDataFromTo(
data, 0,
handle->data, static_cast<size_t>(handle->byte_offset),
nbytes, cpu_ctx, handle->ctx, handle->dtype, nullptr);
API_END();
}
int TVMArrayCopyToBytes(TVMArrayHandle handle,
void* data,
size_t nbytes) {
API_BEGIN();
TVMContext cpu_ctx;
cpu_ctx.device_type = kDLCPU;
cpu_ctx.device_id = 0;
size_t arr_size = GetDataSize(*handle);
CHECK_EQ(arr_size, nbytes)
<< "TVMArrayCopyToBytes: size mismatch";
DeviceAPI::Get(handle->ctx)->CopyDataFromTo(
handle->data, static_cast<size_t>(handle->byte_offset),
data, 0,
nbytes, handle->ctx, cpu_ctx, handle->dtype, nullptr);
API_END();
}
......@@ -175,7 +175,12 @@ class RPCSession::EventHandler : public dmlc::Stream {
// send Packed sequence to writer.
void SendPackedSeq(const TVMValue* arg_values, const int* type_codes, int n) {
this->Write(n);
this->WriteArray(type_codes, n);
// only handles .
for (int i = 0; i < n; ++i) {
int tcode = type_codes[i];
if (tcode == kNDArrayContainer) tcode = kArrayHandle;
this->Write(tcode);
}
// Argument packing.
for (int i = 0; i < n; ++i) {
int tcode = type_codes[i];
......@@ -207,6 +212,7 @@ class RPCSession::EventHandler : public dmlc::Stream {
this->Write(handle);
break;
}
case kNDArrayContainer:
case kArrayHandle: {
DLTensor* arr = static_cast<DLTensor*>(value.v_handle);
TVMContext ctx = StripSessMask(arr->ctx);
......
......@@ -38,6 +38,31 @@ TEST(PackedFunc, Node) {
CHECK(t.same_as(x));
}
TEST(PackedFunc, NDArray) {
using namespace tvm;
using namespace tvm::runtime;
auto x = NDArray::Empty(
{}, String2TVMType("float32"),
TVMContext{kDLCPU, 0});
reinterpret_cast<float*>(x->data)[0] = 10.0f;
CHECK(x.use_count() == 1);
PackedFunc forward([&](TVMArgs args, TVMRetValue* rv) {
*rv = args[0];
});
NDArray ret = PackedFunc([&](TVMArgs args, TVMRetValue* rv) {
NDArray y = args[0];
DLTensor* ptr = args[0];
CHECK(ptr == x.operator->());
CHECK(x.same_as(y));
CHECK(x.use_count() == 2);
*rv = forward(y);
})(x);
CHECK(ret.use_count() == 2);
CHECK(ret.same_as(x));
}
TEST(PackedFunc, str) {
using namespace tvm;
using namespace tvm::runtime;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment