Commit bde53033 by Lianmin Zheng Committed by Tianqi Chen

[RUNTIME] Add fp16/fp32 conversion functions (#1766)

parent e8d6d9aa
[submodule "dmlc-core"]
path = dmlc-core
path = 3rdparty/dmlc-core
url = https://github.com/dmlc/dmlc-core
[submodule "HalideIR"]
path = HalideIR
path = 3rdparty/HalideIR
url = https://github.com/dmlc/HalideIR
[submodule "dlpack"]
path = dlpack
path = 3rdparty/dlpack
url = https://github.com/dmlc/dlpack
Subproject commit cf6090aeaeb782d1daff54b0ca5c2c281d7008db
/*
* Copyright (c) 2009-2015 by llvm/compiler-rt contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* Copyright (c) 2018 by Contributors
* \file builtin_fp16.cc
* \brief Functions for conversion between fp32 and fp16, adopted from compiler-rt.
*/
#include <cstdint>
static inline uint32_t __clz(uint32_t x) {
// count leading zeros
int n = 32;
uint32_t y;
y = x >>16; if (y) { n = n -16; x = y; }
y = x >> 8; if (y) { n = n - 8; x = y; }
y = x >> 4; if (y) { n = n - 4; x = y; }
y = x >> 2; if (y) { n = n - 2; x = y; }
y = x >> 1; if (y) return n - 2;
return n - x;
}
template <typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS,
typename DST_T, typename DST_REP_T, int DST_SIG_BITS>
static inline DST_T __truncXfYf2__(SRC_T a) {
// Various constants whose values follow from the type parameters.
// Any reasonable optimizer will fold and propagate all of these.
const int srcBits = sizeof(SRC_T) * 8;
const int srcExpBits = srcBits - SRC_SIG_BITS - 1;
const int srcInfExp = (1 << srcExpBits) - 1;
const int srcExpBias = srcInfExp >> 1;
const SRC_REP_T srcMinNormal = SRC_REP_T(1) << SRC_SIG_BITS;
const SRC_REP_T srcSignificandMask = srcMinNormal - 1;
const SRC_REP_T srcInfinity = (SRC_REP_T)srcInfExp << SRC_SIG_BITS;
const SRC_REP_T srcSignMask = SRC_REP_T(1) << (SRC_SIG_BITS + srcExpBits);
const SRC_REP_T srcAbsMask = srcSignMask - 1;
const SRC_REP_T roundMask = (SRC_REP_T(1) << (SRC_SIG_BITS - DST_SIG_BITS)) - 1;
const SRC_REP_T halfway = SRC_REP_T(1) << (SRC_SIG_BITS - DST_SIG_BITS - 1);
const SRC_REP_T srcQNaN = SRC_REP_T(1) << (SRC_SIG_BITS - 1);
const SRC_REP_T srcNaNCode = srcQNaN - 1;
const int dstBits = sizeof(DST_T) * 8;
const int dstExpBits = dstBits - DST_SIG_BITS - 1;
const int dstInfExp = (1 << dstExpBits) - 1;
const int dstExpBias = dstInfExp >> 1;
const int underflowExponent = srcExpBias + 1 - dstExpBias;
const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;
const SRC_REP_T underflow = (SRC_REP_T)underflowExponent << SRC_SIG_BITS;
const SRC_REP_T overflow = (SRC_REP_T)overflowExponent << SRC_SIG_BITS;
const DST_REP_T dstQNaN = DST_REP_T(1) << (DST_SIG_BITS - 1);
const DST_REP_T dstNaNCode = dstQNaN - 1;
// Break a into a sign and representation of the absolute value
const union { SRC_T f; SRC_REP_T i; } src_rep = {.f = a};
const SRC_REP_T aRep = src_rep.i;
const SRC_REP_T aAbs = aRep & srcAbsMask;
const SRC_REP_T sign = aRep & srcSignMask;
DST_REP_T absResult;
if (aAbs - underflow < aAbs - overflow) {
// The exponent of a is within the range of normal numbers in the
// destination format. We can convert by simply right-shifting with
// rounding and adjusting the exponent.
absResult = aAbs >> (SRC_SIG_BITS - DST_SIG_BITS);
absResult -= (DST_REP_T)(srcExpBias - dstExpBias) << DST_SIG_BITS;
const SRC_REP_T roundBits = aAbs & roundMask;
// Round to nearest
if (roundBits > halfway)
absResult++;
// Ties to even
else if (roundBits == halfway)
absResult += absResult & 1;
}
else if (aAbs > srcInfinity) {
// a is NaN.
// Conjure the result by beginning with infinity, setting the qNaN
// bit and inserting the (truncated) trailing NaN field.
absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
absResult |= dstQNaN;
absResult |= ((aAbs & srcNaNCode) >> (SRC_SIG_BITS - DST_SIG_BITS)) & dstNaNCode;
}
else if (aAbs >= overflow) {
// a overflows to infinity.
absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
}
else {
// a underflows on conversion to the destination type or is an exact
// zero. The result may be a denormal or zero. Extract the exponent
// to get the shift amount for the denormalization.
const int aExp = aAbs >> SRC_SIG_BITS;
const int shift = srcExpBias - dstExpBias - aExp + 1;
const SRC_REP_T significand = (aRep & srcSignificandMask) | srcMinNormal;
// Right shift by the denormalization amount with sticky.
if (shift > SRC_SIG_BITS) {
absResult = 0;
} else {
const bool sticky = significand << (srcBits - shift);
SRC_REP_T denormalizedSignificand = significand >> shift | sticky;
absResult = denormalizedSignificand >> (SRC_SIG_BITS - DST_SIG_BITS);
const SRC_REP_T roundBits = denormalizedSignificand & roundMask;
// Round to nearest
if (roundBits > halfway)
absResult++;
// Ties to even
else if (roundBits == halfway)
absResult += absResult & 1;
}
}
// Apply the signbit to (DST_T)abs(a).
const DST_REP_T result = absResult | sign >> (srcBits - dstBits);
const union { DST_T f; DST_REP_T i; } dst_rep = {.i = result};
return dst_rep.f;
}
template<typename SRC_T, typename SRC_REP_T, int SRC_SIG_BITS,
typename DST_T, typename DST_REP_T, int DST_SIG_BITS>
static inline DST_T __extendXfYf2__(SRC_T a) {
// Various constants whose values follow from the type parameters.
// Any reasonable optimizer will fold and propagate all of these.
const int srcBits = sizeof(SRC_T) * 8;
const int srcExpBits = srcBits - SRC_SIG_BITS - 1;
const int srcInfExp = (1 << srcExpBits) - 1;
const int srcExpBias = srcInfExp >> 1;
const SRC_REP_T srcMinNormal = SRC_REP_T(1) << SRC_SIG_BITS;
const SRC_REP_T srcInfinity = (SRC_REP_T)srcInfExp << SRC_SIG_BITS;
const SRC_REP_T srcSignMask = SRC_REP_T(1) << (SRC_SIG_BITS + srcExpBits);
const SRC_REP_T srcAbsMask = srcSignMask - 1;
const SRC_REP_T srcQNaN = SRC_REP_T(1) << (SRC_SIG_BITS - 1);
const SRC_REP_T srcNaNCode = srcQNaN - 1;
const int dstBits = sizeof(DST_T)*8;
const int dstExpBits = dstBits - DST_SIG_BITS - 1;
const int dstInfExp = (1 << dstExpBits) - 1;
const int dstExpBias = dstInfExp >> 1;
const DST_REP_T dstMinNormal = DST_REP_T(1) << DST_SIG_BITS;
// Break a into a sign and representation of the absolute value
const union { SRC_T f; SRC_REP_T i; } src_rep = {.f = a};
const SRC_REP_T aRep = src_rep.i;
const SRC_REP_T aAbs = aRep & srcAbsMask;
const SRC_REP_T sign = aRep & srcSignMask;
DST_REP_T absResult;
// If sizeof(SRC_REP_T) < sizeof(int), the subtraction result is promoted
// to (signed) int. To avoid that, explicitly cast to SRC_REP_T.
if ((SRC_REP_T)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) {
// a is a normal number.
// Extend to the destination type by shifting the significand and
// exponent into the proper position and rebiasing the exponent.
absResult = (DST_REP_T)aAbs << (DST_SIG_BITS - SRC_SIG_BITS);
absResult += (DST_REP_T)(dstExpBias - srcExpBias) << DST_SIG_BITS;
}
else if (aAbs >= srcInfinity) {
// a is NaN or infinity.
// Conjure the result by beginning with infinity, then setting the qNaN
// bit (if needed) and right-aligning the rest of the trailing NaN
// payload field.
absResult = (DST_REP_T)dstInfExp << DST_SIG_BITS;
absResult |= (DST_REP_T)(aAbs & srcQNaN) << (DST_SIG_BITS - SRC_SIG_BITS);
absResult |= (DST_REP_T)(aAbs & srcNaNCode) << (DST_SIG_BITS - SRC_SIG_BITS);
}
else if (aAbs) {
// a is denormal.
// renormalize the significand and clear the leading bit, then insert
// the correct adjusted exponent in the destination type.
const int scale = __clz(aAbs) - __clz(srcMinNormal);
absResult = (DST_REP_T)aAbs << (DST_SIG_BITS - SRC_SIG_BITS + scale);
absResult ^= dstMinNormal;
const int resultExponent = dstExpBias - srcExpBias - scale + 1;
absResult |= (DST_REP_T)resultExponent << DST_SIG_BITS;
}
else {
// a is zero.
absResult = 0;
}
// Apply the signbit to (DST_T)abs(a).
const DST_REP_T result = absResult | (DST_REP_T)sign << (dstBits - srcBits);
const union { DST_T f; DST_REP_T i; } dst_rep = {.i = result};
return dst_rep.f;
}
Subproject commit bee4d1dd8dc1ee4a1fd8fa6a96476c2f8b7492a3
Subproject commit 4f0564ec769477c66d480dd966088f172050c874
......@@ -50,8 +50,9 @@ tvm_option(USE_RANDOM "Build with random support" OFF)
# include directories
include_directories("include")
include_directories("dlpack/include")
include_directories("dmlc-core/include")
include_directories("3rdparty/dlpack/include")
include_directories("3rdparty/dmlc-core/include")
include_directories("3rdparty/compiler-rt")
# initial variables
set(TVM_LINKER_LIBS "")
......@@ -87,8 +88,8 @@ else(MSVC)
endif(MSVC)
# add source group
FILE(GLOB_RECURSE GROUP_SOURCE "src/*.cc" "HalideIR/src/*.cpp" "nnvm/src/*.cc")
FILE(GLOB_RECURSE GROUP_INCLUDE "src/*.h" "include/*.h" "HalideIR/src/*.h"
FILE(GLOB_RECURSE GROUP_SOURCE "src/*.cc" "3rdparty/HalideIR/src/*.cpp" "nnvm/src/*.cc")
FILE(GLOB_RECURSE GROUP_INCLUDE "src/*.h" "include/*.h" "3rdparty/HalideIR/src/*.h"
"nnvm/src/*.h" "nnvm/include/*.h")
assign_source_group("Source" ${GROUP_SOURCE})
assign_source_group("Include" ${GROUP_INCLUDE})
......@@ -127,7 +128,7 @@ file(GLOB_RECURSE NNVM_COMPILER_SRCS
file(GLOB TOPI_SRCS
topi/src/*.cc
)
file(GLOB_RECURSE HALIDEIR_SRCS HalideIR/src/*.cpp)
file(GLOB_RECURSE HALIDEIR_SRCS 3rdparty/HalideIR/src/*.cpp)
list(APPEND COMPILER_SRCS ${HALIDEIR_SRCS})
file(GLOB RUNTIME_SRCS src/runtime/*.cc)
......@@ -194,7 +195,7 @@ target_link_libraries(nnvm_compiler tvm)
# Related headers
target_include_directories(
tvm
PUBLIC "HalideIR/src"
PUBLIC "3rdparty/HalideIR/src"
PUBLIC "topi/include")
target_include_directories(
tvm_topi
......@@ -244,12 +245,12 @@ if (INSTALL_DEV)
PATTERN "*.h"
)
install(
DIRECTORY "HalideIR/src/." DESTINATION "include/HalideIR"
DIRECTORY "3rdparty/HalideIR/src/." DESTINATION "include/HalideIR"
FILES_MATCHING
PATTERN "*.h"
)
install(
DIRECTORY "dlpack/include/." DESTINATION "include"
DIRECTORY "3rdparty/dlpack/include/." DESTINATION "include"
FILES_MATCHING
PATTERN "*.h"
)
......
Subproject commit cf6090aeaeb782d1daff54b0ca5c2c281d7008db
......@@ -4,11 +4,11 @@ ROOTDIR = $(CURDIR)
cython cython2 cython3 web runtime vta
ifndef DMLC_CORE_PATH
DMLC_CORE_PATH = $(ROOTDIR)/dmlc-core
DMLC_CORE_PATH = $(ROOTDIR)/3rdparty/dmlc-core
endif
ifndef DLPACK_PATH
DLPACK_PATH = $(ROOTDIR)/dlpack
DLPACK_PATH = $(ROOTDIR)/3rdparty/dlpack
endif
INCLUDE_FLAGS = -Iinclude -I$(DLPACK_PATH)/include -I$(DMLC_CORE_PATH)/include
......@@ -50,10 +50,10 @@ build/libtvm_web_runtime.js: build/libtvm_web_runtime.bc
# Lint scripts
cpplint:
python3 dmlc-core/scripts/lint.py vta cpp vta/include vta/src
python3 dmlc-core/scripts/lint.py topi cpp topi/include;
python3 dmlc-core/scripts/lint.py nnvm cpp nnvm/include nnvm/src;
python3 dmlc-core/scripts/lint.py tvm cpp include src verilog\
python3 3rdparty/dmlc-core/scripts/lint.py vta cpp vta/include vta/src
python3 3rdparty/dmlc-core/scripts/lint.py topi cpp topi/include;
python3 3rdparty/dmlc-core/scripts/lint.py nnvm cpp nnvm/include nnvm/src;
python3 3rdparty/dmlc-core/scripts/lint.py tvm cpp include src verilog\
examples/extension/src examples/graph_executor/src
pylint:
......@@ -63,7 +63,7 @@ pylint:
python3 -m pylint vta/python/vta --rcfile=$(ROOTDIR)/tests/lint/pylintrc
jnilint:
python3 dmlc-core/scripts/lint.py tvm4j-jni cpp jvm/native/src
python3 3rdparty/dmlc-core/scripts/lint.py tvm4j-jni cpp jvm/native/src
lint: cpplint pylint jnilint
......
......@@ -20,9 +20,9 @@ LOCAL_SRC_FILES := ml_dmlc_tvm_native_c_api.cc
LOCAL_LDFLAGS := -L$(SYSROOT)/usr/lib/ -llog
LOCAL_C_INCLUDES := $(ROOT_PATH)/include \
$(ROOT_PATH)/dlpack/include \
$(ROOT_PATH)/dmlc-core/include \
$(ROOT_PATH)/HalideIR/src \
$(ROOT_PATH)/3rdparty/dlpack/include \
$(ROOT_PATH)/3rdparty/dmlc-core/include \
$(ROOT_PATH)/3rdparty/HalideIR/src \
$(ROOT_PATH)/topi/include
LOCAL_MODULE = tvm4j_runtime_packed
......
......@@ -20,9 +20,9 @@ LOCAL_SRC_FILES := ml_dmlc_tvm_native_c_api.cc
LOCAL_LDFLAGS := -L$(SYSROOT)/usr/lib/ -llog
LOCAL_C_INCLUDES := $(ROOT_PATH)/include \
$(ROOT_PATH)/dlpack/include \
$(ROOT_PATH)/dmlc-core/include \
$(ROOT_PATH)/HalideIR/src \
$(ROOT_PATH)/3rdparty/dlpack/include \
$(ROOT_PATH)/3rdparty/dmlc-core/include \
$(ROOT_PATH)/3rdparty/HalideIR/src \
$(ROOT_PATH)/topi/include
LOCAL_MODULE = tvm4j_runtime_packed
......
......@@ -2,9 +2,9 @@
TVM_ROOT=$(shell cd ../..; pwd)
PKG_CFLAGS = -std=c++11 -O2 -fPIC\
-I${TVM_ROOT}/include\
-I${TVM_ROOT}/dmlc-core/include\
-I${TVM_ROOT}/dlpack/include\
-I${TVM_ROOT}/HalideIR/src
-I${TVM_ROOT}/3rdparty/dmlc-core/include\
-I${TVM_ROOT}/3rdparty/dlpack/include\
-I${TVM_ROOT}/3rdparty/HalideIR/src
PKG_LDFLAGS =-L${TVM_ROOT}/lib
UNAME_S := $(shell uname -s)
......
# Makefile Example to deploy TVM modules.
TVM_ROOT=$(shell cd ../..; pwd)
NNVM_PATH=nnvm
DMLC_CORE=${TVM_ROOT}/dmlc-core
DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core
PKG_CFLAGS = -std=c++11 -O2 -fPIC\
-I${TVM_ROOT}/include\
-I${DMLC_CORE}/include\
-I${TVM_ROOT}/dlpack/include\
-I${TVM_ROOT}/3rdparty/dlpack/include\
PKG_LDFLAGS = -L${TVM_ROOT}/build -ldl -lpthread
......
......@@ -8,8 +8,8 @@
* - Compile with -std=c++11
* - Add the following include path
* - /path/to/tvm/include/
* - /path/to/tvm/dmlc-core/include/
* - /path/to/tvm/dlpack/include/
* - /path/to/tvm/3rdparty/dmlc-core/include/
* - /path/to/tvm/3rdparty/dlpack/include/
* - Add -lpthread -ldl to the linked library.
* - You are good to go.
* - See the Makefile in the same folder for example.
......
......@@ -386,8 +386,8 @@
GCC_SYMBOLS_PRIVATE_EXTERN = NO;
HEADER_SEARCH_PATHS = (
../../include,
../../dlpack/include,
"../../dmlc-core/include",
../../3rdparty/dlpack/include,
"../../3rdparty/dmlc-core/include",
);
INFOPLIST_FILE = tvmrpc/Info.plist;
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
......@@ -406,8 +406,8 @@
GCC_SYMBOLS_PRIVATE_EXTERN = NO;
HEADER_SEARCH_PATHS = (
../../include,
../../dlpack/include,
"../../dmlc-core/include",
../../3rdparty/dlpack/include,
"../../3rdparty/dmlc-core/include",
);
INFOPLIST_FILE = tvmrpc/Info.plist;
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks";
......@@ -422,9 +422,9 @@
BUNDLE_LOADER = "$(TEST_HOST)";
DEVELOPMENT_TEAM = 3FR42MXLK9;
HEADER_SEARCH_PATHS = (
../../dlpack/include,
../../3rdparty/dlpack/include,
../../include,
"../../dmlc-core/include",
"../../3rdparty/dmlc-core/include",
);
INFOPLIST_FILE = tvmrpcLauncher/Info.plist;
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks";
......@@ -440,9 +440,9 @@
BUNDLE_LOADER = "$(TEST_HOST)";
DEVELOPMENT_TEAM = 3FR42MXLK9;
HEADER_SEARCH_PATHS = (
../../dlpack/include,
../../3rdparty/dlpack/include,
../../include,
"../../dmlc-core/include",
"../../3rdparty/dmlc-core/include",
);
INFOPLIST_FILE = tvmrpcLauncher/Info.plist;
LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks";
......
......@@ -3,12 +3,12 @@ ROCM_PATH=/opt/rocm
TVM_ROOT=$(shell cd ../..; pwd)
NNVM_PATH=nnvm
DMLC_CORE=${TVM_ROOT}/dmlc-core
DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core
PKG_CFLAGS = -std=c++11 -O2 -fPIC\
-I${TVM_ROOT}/include\
-I${DMLC_CORE}/include\
-I${TVM_ROOT}/dlpack/include\
-I${TVM_ROOT}/3rdparty/dlpack/include\
-I${ROCM_PATH}/include
PKG_LDFLAGS = -L${ROCM_PATH}/lib -L${TVM_ROOT}/lib -ldl -lpthread -lhip_hcc -lMIOpen
......
......@@ -23,7 +23,7 @@ uservice_library_name := sgx_uae_service$(sgx_sim)
pkg_cflags := -std=c++11 -O2 -fPIC\
-I${TVM_ROOT}/include\
-I${DMLC_CORE}/include\
-I${TVM_ROOT}/dlpack/include\
-I${TVM_ROOT}/3rdparty/dlpack/include\
-I.\
-DDMLC_LOG_STACK_TRACE=0\
-fmax-errors=4
......
Subproject commit bee4d1dd8dc1ee4a1fd8fa6a96476c2f8b7492a3
Subproject commit 4f0564ec769477c66d480dd966088f172050c874
......@@ -13,7 +13,7 @@ TVMPATH = ..
export LDFLAGS = -pthread -lm
export CFLAGS = -std=c++11 -Wall -O2 -Iinclude -fPIC
CFLAGS += -I$(TVMPATH)/include -I$(TVMPATH)/dlpack/include -I$(TVMPATH)/HalideIR/src -I$(TVMPATH)/topi/include
CFLAGS += -I$(TVMPATH)/include -I$(TVMPATH)/3rdparty/dlpack/include -I$(TVMPATH)/3rdparty/HalideIR/src -I$(TVMPATH)/topi/include
ifdef DMLC_CORE_PATH
CFLAGS += -I$(DMLC_CORE_PATH)/include
......
......@@ -4,7 +4,7 @@ export CFLAGS = -std=c++11 -Wall -O2 -Iinclude -fPIC
ifdef DMLC_CORE_PATH
CFLAGS += -I$(DMLC_CORE_PATH)/include
else
CFLAGS += -I$(CURDIR)/../dmlc-core/include
CFLAGS += -I$(CURDIR)/../3rdparty/dmlc-core/include
endif
.PHONY: all clean
......
......@@ -74,8 +74,8 @@ def config_cython():
"tvm._ffi.%s.%s" % (subdir, fn[:-4]),
["tvm/_ffi/_cython/%s" % fn],
include_dirs=["../include/",
"../dmlc-core/include",
"../dlpack/include",
"../3rdparty/dmlc-core/include",
"../3rdparty/dlpack/include",
],
library_dirs=library_dirs,
libraries=libraries,
......
/*!
* Copyright (c) 2018 by Contributors
* \file builtin_fp16.cc
* \brief Functions for conversion between fp32 and fp16
*/
#include <builtin_fp16.h>
namespace tvm {
namespace runtime {
extern "C" uint16_t __gnu_f2h_ieee(float a) {
return __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(a);
}
extern "C" float __gnu_h2f_ieee(uint16_t a) {
return __extendXfYf2__<uint16_t, uint16_t, 10, float, uint32_t, 23>(a);
}
} // namespace runtime
} // namespace tvm
......@@ -35,5 +35,26 @@ def test_nd_create():
ctx.sync()
def test_fp16_conversion():
n = 100
for (src, dst) in [('float32', 'float16'), ('float16', 'float32')]:
A = tvm.placeholder((n,), dtype=src)
B = tvm.compute((n,), lambda i: A[i].astype(dst))
s = tvm.create_schedule([B.op])
func = tvm.build(s, [A, B], 'llvm')
x_tvm = tvm.nd.array(100 * np.random.randn(n).astype(src) - 50)
y_tvm = tvm.nd.array(100 * np.random.randn(n).astype(dst) - 50)
func(x_tvm, y_tvm)
expected = x_tvm.asnumpy().astype(dst)
real = y_tvm.asnumpy()
np.testing.assert_allclose(expected, real)
if __name__ == "__main__":
test_nd_create()
test_fp16_conversion()
......@@ -42,8 +42,8 @@ class PkgConfig(object):
self.include_path = [
"-I%s/include" % proj_root,
"-I%s/vta/include" % proj_root,
"-I%s/dlpack/include" % proj_root,
"-I%s/dmlc-core/include" % proj_root
"-I%s/3rdparty/dlpack/include" % proj_root,
"-I%s/3rdparty/dmlc-core/include" % proj_root
]
# List of source files that can be used to build standalone library.
self.lib_source = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment