Commit 589831df by Zhixun Tan Committed by Tianqi Chen

[WIP] WebGL Backend (#672)

Basic WebGL Backend
parent d4a46898
......@@ -22,6 +22,7 @@ endif()
tvm_option(USE_CUDA "Build with CUDA" OFF)
tvm_option(USE_OPENCL "Build with OpenCL" OFF)
tvm_option(USE_OPENGL "Build with OpenGL" OFF)
tvm_option(USE_METAL "Build with Metal" OFF)
tvm_option(USE_RPC "Build with RPC" ON)
tvm_option(USE_GRAPH_RUNTIME "Build with tiny graph runtime" ON)
......@@ -61,8 +62,8 @@ if(MSVC)
else(MSVC)
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11)
set(CMAKE_C_FLAGS "-O3 -Wall -std=c++11 -fPIC")
set(CMAKE_CXX_FLAGS ${CMAKE_C_FLAGS})
set(CMAKE_C_FLAGS "-O3 -Wall -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -std=c++11")
endif(MSVC)
# add source group
......@@ -87,6 +88,7 @@ file(GLOB RUNTIME_SRCS src/runtime/*.cc)
file(GLOB COMPILER_LLVM_SRCS src/codegen/llvm/*.cc)
file(GLOB RUNTIME_CUDA_SRCS src/runtime/cuda/*.cc)
file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc)
file(GLOB RUNTIME_OPENGL_SRCS src/runtime/opengl/*.cc)
file(GLOB RUNTIME_METAL_SRCS src/runtime/metal/*.mm)
file(GLOB RUNTIME_RPC_SRCS src/runtime/rpc/*.cc)
file(GLOB RUNTIME_GRAPH_SRCS src/runtime/graph/*.cc)
......@@ -135,6 +137,18 @@ else(USE_OPENCL)
add_definitions(-DTVM_OPENCL_RUNTIME=0)
endif(USE_OPENCL)
if(USE_OPENGL)
find_package(OpenGL QUIET REQUIRED)
find_package(glfw3 QUIET REQUIRED)
message(STATUS "Build with OpenGL support")
include_directories(${OPENGL_INCLUDE_DIRS})
list(APPEND TVM_RUNTIME_LINKER_LIBS ${OpenGL_LIBRARIES} glfw)
list(APPEND RUNTIME_SRCS ${RUNTIME_OPENGL_SRCS})
add_definitions(-DTVM_OPENGL_RUNTIME=1)
else(USE_OPENGL)
add_definitions(-DTVM_OPENGL_RUNTIME=0)
endif(USE_OPENGL)
if(USE_METAL)
find_package(OpenCL QUIET REQUIRED)
message(STATUS "Build with Metal support")
......
......@@ -88,6 +88,7 @@ stage('Build') {
echo USE_CUDNN=1 >> config.mk
echo USE_CUDA=1 >> config.mk
echo USE_OPENCL=1 >> config.mk
echo USE_OPENGL=1 >> config.mk
echo LLVM_CONFIG=llvm-config-4.0 >> config.mk
echo USE_RPC=1 >> config.mk
echo USE_GRAPH_RUNTIME=1 >> config.mk
......@@ -120,6 +121,7 @@ stage('Build') {
echo USE_CUDA=0 >> config.mk
echo USE_OPENCL=0 >> config.mk
echo USE_RPC=0 >> config.mk
echo USE_OPENGL=1 >> config.mk
echo LLVM_CONFIG=llvm-config-4.0 >> config.mk
"""
make('cpu', '-j2')
......
......@@ -32,8 +32,8 @@ OBJCFLAGS = -fno-objc-arc
EMCC_FLAGS= -std=c++11 -DDMLC_LOG_STACK_TRACE=0\
-Oz -s RESERVED_FUNCTION_POINTERS=2 -s MAIN_MODULE=1 -s NO_EXIT_RUNTIME=1\
-s EXTRA_EXPORTED_RUNTIME_METHODS="['cwrap','getValue','setValue','addFunction']"\
-s USE_GLFW=3 -s USE_WEBGL2=1 -lglfw\
$(INCLUDE_FLAGS)
# llvm configuration
ifdef LLVM_CONFIG
LLVM_VERSION=$(shell $(LLVM_CONFIG) --version| cut -b 1,3)
......@@ -54,6 +54,7 @@ METAL_SRC = $(wildcard src/runtime/metal/*.mm)
CUDA_SRC = $(wildcard src/runtime/cuda/*.cc)
ROCM_SRC = $(wildcard src/runtime/rocm/*.cc)
OPENCL_SRC = $(wildcard src/runtime/opencl/*.cc)
OPENGL_SRC = $(wildcard src/runtime/opengl/*.cc)
RPC_SRC = $(wildcard src/runtime/rpc/*.cc)
GRAPH_SRC = $(wildcard src/runtime/graph/*.cc)
RUNTIME_SRC = $(wildcard src/runtime/*.cc)
......@@ -65,6 +66,7 @@ METAL_OBJ = $(patsubst src/%.mm, build/%.o, $(METAL_SRC))
CUDA_OBJ = $(patsubst src/%.cc, build/%.o, $(CUDA_SRC))
ROCM_OBJ = $(patsubst src/%.cc, build/%.o, $(ROCM_SRC))
OPENCL_OBJ = $(patsubst src/%.cc, build/%.o, $(OPENCL_SRC))
OPENGL_OBJ = $(patsubst src/%.cc, build/%.o, $(OPENGL_SRC))
RPC_OBJ = $(patsubst src/%.cc, build/%.o, $(RPC_SRC))
GRAPH_OBJ = $(patsubst src/%.cc, build/%.o, $(GRAPH_SRC))
CC_OBJ = $(patsubst src/%.cc, build/%.o, $(CC_SRC)) $(LLVM_OBJ)
......@@ -119,6 +121,19 @@ else
CFLAGS += -DTVM_OPENCL_RUNTIME=0
endif
ifeq ($(USE_OPENGL), 1)
CFLAGS += -DTVM_OPENGL_RUNTIME=1
EMCC_FLAGS += -DTVM_OPENGL_RUNTIME=1
ifeq ($(UNAME_S), Darwin)
FRAMEWORKS += -framework OpenGL
else
LDFLAGS += -lGL -lglfw
endif
RUNTIME_DEP += $(OPENGL_OBJ)
else
CFLAGS += -DTVM_OPENGL_RUNTIME=0
endif
ifeq ($(USE_METAL), 1)
CFLAGS += -DTVM_METAL_RUNTIME=1
LDFLAGS += -lobjc
......
......@@ -55,9 +55,11 @@ typedef int64_t tvm_index_t;
/*! \brief Extension device types in TVM */
typedef enum {
kOpenGL = 11,
// Extension DRAM type, used for quickly test extension device
// The device api can differ depending on the xpu driver registered.
kExtDev = 12
kExtDev = 12,
// AddExtraTVMType which is not in DLPack here
} TVMDeviceExtType;
......
......@@ -55,11 +55,16 @@ class DeviceAPI {
/*!
* \brief Allocate a data space on device.
* \param ctx The device context to perform operation.
* \param size The size of the memory
* \param nbytes The number of bytes in memory.
* \param alignment The alignment of the memory.
* \return The allocated device pointer
* \param type_hint The type of elements. Only needed by certain backends such
* as OpenGL, as nbytes & alignment are sufficient for most backends.
* \return The allocated device pointer.
*/
virtual void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) = 0;
virtual void* AllocDataSpace(TVMContext ctx,
size_t nbytes,
size_t alignment,
TVMType type_hint) = 0;
/*!
* \brief Free a data space on device.
* \param ctx The device context to perform operation.
......
......@@ -214,6 +214,11 @@ class Stage : public NodeRef {
*/
Stage& double_buffer(); // NOLINT(*)
/*!
* \brief Schedule for OpenGL fragment shader.
* \return reference to self.
*/
Stage& opengl(); // NOLINT(*)
/*!
* \brief whether the stage has been scheduled.
* \return whether the stage has been scheduled.
*/
......
......@@ -17,7 +17,7 @@ from . import ir_builder
from . import target
from . import ndarray as nd
from .ndarray import context, cpu, gpu, opencl, cl, metal, mtl, vpi, rocm, ext_dev
from .ndarray import context, cpu, gpu, opencl, cl, metal, mtl, vpi, rocm, opengl, ext_dev
from ._ffi.runtime_ctypes import TypeCode
from ._ffi.function import Function
......
......@@ -97,6 +97,7 @@ class TVMContext(ctypes.Structure):
8 : 'metal',
9 : 'vpi',
10: 'rocm',
11: 'opengl',
12: 'ext_dev',
}
STR2MASK = {
......@@ -111,6 +112,7 @@ class TVMContext(ctypes.Structure):
'metal': 8,
'vpi': 9,
'rocm': 10,
'opengl': 11,
'ext_dev': 12,
}
def __init__(self, device_type, device_id):
......
......@@ -285,6 +285,10 @@ class RPCSession(object):
"""Construct remote Metal device."""
return self.context(8, dev_id)
def opengl(self, dev_id=0):
"""Construct remote OpenGL device."""
return self.context(11, dev_id)
def ext_dev(self, dev_id=0):
"""Construct remote extension device."""
return self.context(12, dev_id)
......
......@@ -120,6 +120,21 @@ def vpi(dev_id=0):
"""
return TVMContext(9, dev_id)
def opengl(dev_id=0):
"""Construct a OpenGL device
Parameters
----------
dev_id : int, optional
The integer device id
Returns
-------
ctx : TVMContext
The created context
"""
return TVMContext(11, dev_id)
def ext_dev(dev_id=0):
"""Construct a extension device
......
......@@ -611,4 +611,11 @@ class Stage(NodeBase):
"""
_api_internal._StageDoubleBuffer(self)
def opengl(self):
"""The special OpenGL schedule
Maps each output element to a pixel.
"""
_api_internal._StageOpenGL(self)
_init_api("tvm.schedule")
......@@ -67,7 +67,7 @@ class Target(object):
Parameters
----------
target_name : {"llvm", "cuda", "opencl", "metal", "rocm", "stackvm", "ext_dev"}
target_name : {"llvm", "cuda", "opencl", "metal", "rocm", "stackvm", "opengl", "ext_dev"}
The major target name.
options : list of str, optional
......@@ -119,6 +119,8 @@ class Target(object):
elif target_name in ("metal",):
self.keys += ("gpu",)
self.max_num_threads = 256
elif target_name in ("opengl",):
self.keys += ("opengl",)
elif target_name in ("stackvm", "ext_dev"):
# Do not now class for stacvm or ext_dev
pass
......
......@@ -399,6 +399,11 @@ TVM_REGISTER_API("_StageDoubleBuffer")
args[0].operator Stage().double_buffer();
});
TVM_REGISTER_API("_StageOpenGL")
.set_body([](TVMArgs args, TVMRetValue *ret) {
args[0].operator Stage().opengl();
});
TVM_REGISTER_API("_ScheduleNormalize")
.set_body([](TVMArgs args, TVMRetValue* ret) {
*ret = args[0].operator Schedule()
......
/*!
* Copyright (c) 2017 by Contributors
* Build opengl modules from source.
* \file build_opengl.cc
*/
#include <tvm/base.h>
#include "./codegen_opengl.h"
#include "./build_common.h"
namespace tvm {
namespace codegen {
runtime::Module BuildOpenGL(Array<LoweredFunc> funcs) {
bool output_ssa = false;
CodeGenOpenGL cg;
cg.Init(output_ssa);
for (LoweredFunc f : funcs) {
cg.AddFunction(f);
}
auto shaders = cg.Finish();
#if TVM_OPENGL_RUNTIME
return OpenGLModuleCreate(shaders, "gl", ExtractFuncInfo(funcs));
#else
LOG(WARNING) << "OpenGL runtime not enabled, return a source module...";
auto data = ToJSON(shaders);
return DeviceSourceModuleCreate(data, "gl", ExtractFuncInfo(funcs), "opengl");
#endif // TVM_OPENGL_RUNTIME
}
TVM_REGISTER_API("codegen.build_opengl")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = BuildOpenGL(args[0]);
});
} // namespace codegen
} // namespace tvm
......@@ -150,7 +150,7 @@ class CodeGenC :
std::string GetStructRef(
Type t, const Expr& buffer, const Expr& index, int kind);
// print reference to a buffer as type t in index.
std::string GetBufferRef(
virtual std::string GetBufferRef(
Type t, const Variable* buffer, Expr index);
/*!
* \brief If buffer is allocated as type t.
......
/*!
* Copyright (c) 2017 by Contributors
* \file codegen_opengl.cc
*
* We are targeting OpenGL 3.3. The reason of not targeting a recent version
* of OpenGL is to have better compatibility of WebGL 2.
*/
#include <tvm/runtime/config.h>
#include <tvm/packed_func_ext.h>
#include <vector>
#include <string>
#include "./codegen_opengl.h"
#include "../runtime/thread_storage_scope.h"
namespace tvm {
namespace codegen {
CodeGenOpenGL::CodeGenOpenGL()
: output_(nullptr), output_iter_var_(nullptr) {}
void CodeGenOpenGL::InitFuncState(LoweredFunc f) {
CodeGenC::InitFuncState(f);
output_ = nullptr;
inputs_.clear();
output_iter_var_ = nullptr;
thread_extent_var_ = "";
}
void CodeGenOpenGL::AddFunction(LoweredFunc f) {
// clear previous generated state.
this->InitFuncState(f);
this->decl_stream << "#version 300 es\n";
this->decl_stream << "precision highp float;\n";
// skip the first underscore, so SSA variable starts from _1
GetUniqueName("_");
// add to alloc buffer type.
for (const auto& kv : f->handle_data_type) {
RegisterHandleType(kv.first.get(), kv.second.type());
}
// Allocate argument names. Store in `var_idmap_`.
for (auto arg : f->args) {
auto arg_name = GetUniqueName(arg.get()->name_hint);
var_idmap_[arg.get()] = arg_name;
}
thread_extent_var_ = GetUniqueName("thread_extent");
this->decl_stream << "uniform int " << thread_extent_var_ << ";\n";
this->stream << "void main() {\n";
int func_scope = this->BeginScope();
this->PrintStmt(f->body);
this->EndScope(func_scope);
this->PrintIndent();
this->stream << "}\n\n";
// Declare arguments.
for (auto arg : f->args) {
if (this->inputs_.find(arg.get()) != this->inputs_.cend()) {
// Declare input texture.
// Format:
// - Float: "uniform sampler2D {name};"
// - Int: "uniform isampler2D {name};"
// - UInt: "uniform usampler2D {name};"
auto arg_name = GetVarID(arg.get());
auto type_it = this->handle_data_type_.find(arg.get());
CHECK(type_it != this->handle_data_type_.cend()) << "Cannot find type.";
auto type = Type2TVMType(type_it->second);
CHECK_EQ(type.lanes, 1) << "Vector type not supported.";
switch (type.code) {
case kDLInt:
this->decl_stream << "uniform isampler2D " << arg_name << ";\n";
break;
case kDLUInt:
this->decl_stream << "uniform usampler2D " << arg_name << ";\n";
break;
case kDLFloat:
this->decl_stream << "uniform sampler2D " << arg_name << ";\n";
break;
default:
LOG(FATAL) << "Unsupported type code.";
}
} else if (this->output_ == arg.get()) {
// Declare output texture.
// Format: "out {type} {name};"
auto arg_name = GetVarID(arg.get());
auto type_it = this->handle_data_type_.find(arg.get());
CHECK(type_it != this->handle_data_type_.cend()) << "Cannot find type.";
auto type = type_it->second;
this->decl_stream << "out ";
PrintType(type, this->decl_stream);
this->decl_stream << " " << arg_name << ";\n";
} else {
// Declare uniform value.
// Format: "uniform {type} {name};"
auto arg_name = GetVarID(arg.get());
auto type = arg.get()->type;
this->decl_stream << "uniform ";
PrintType(type, this->decl_stream);
this->decl_stream << " " << arg_name << ";\n";
}
}
std::vector<std::string> arg_names;
std::vector<runtime::OpenGLArgKind> arg_kinds;
for (auto arg : f->args) {
std::string name = GetVarID(arg.get());
runtime::OpenGLArgKind kind;
if (inputs_.find(arg.get()) != inputs_.cend()) {
kind = runtime::OpenGLArgKind::kInputTexture;
} else if (output_ == arg.get()) {
kind = runtime::OpenGLArgKind::kOutputTexture;
} else {
kind = runtime::OpenGLArgKind::kUniform;
}
arg_names.push_back(name);
arg_kinds.push_back(kind);
}
shaders_[f->name] = runtime::OpenGLShader(
this->decl_stream.str() + this->stream.str(),
std::move(arg_names), std::move(arg_kinds),
this->thread_extent_var_);
}
std::unordered_map<std::string, runtime::OpenGLShader> CodeGenOpenGL::Finish() {
return shaders_;
}
void CodeGenOpenGL::BindThreadIndex(const IterVar& iv) {
CHECK_EQ(iv->thread_tag, "threadIdx.x") << "Must be threadIdx.x";
CHECK(var_idmap_.find(iv->var.get()) == var_idmap_.end())
<< "Only support one thread iter var";
CHECK(output_iter_var_ == nullptr) << "Only support one thread iter var";
var_idmap_[iv->var.get()] = iv->thread_tag;
output_iter_var_ = iv->var.get();
// Declare threadIdx local variable.
this->PrintIndent();
this->stream << "ivec2 threadIdx = ivec2(gl_FragCoord.xy);\n";
// Return directly if threadIdx.x >= thread_extent.
this->PrintIndent();
this->stream << "if (threadIdx.x >= " << thread_extent_var_ << ") {\n";
this->PrintIndent();
this->stream << " return;\n";
this->PrintIndent();
this->stream << "}\n";
}
// GLSL texture store is special. We can only store to one output texture, and
// we must store to the index that matches the current "thread index".
void CodeGenOpenGL::VisitStmt_(const Store* op) {
auto t = op->value.type();
auto buffer = op->buffer_var.get();
auto index = op->index;
if (t.lanes() == 1) {
// Store to a scalar.
CHECK(inputs_.find(buffer) == inputs_.cend())
<< "Texture has been read from before. Must not store to it.";
if (output_ == nullptr) {
output_ = buffer; // Record that this texture is the output.
} else {
CHECK(output_ == buffer) << "GLSL can only write to 1 texture.";
}
this->PrintIndent();
this->stream << GetBufferRef(t, buffer, index) << " = "
<< PrintExpr(op->value) << ";\n";
} else {
// Store to a vector.
LOG(FATAL) << "Vectorized store not implemented.";
}
}
// texelFetch(tex, ivec2(idx, 0), 0).r
std::string CodeGenOpenGL::TexelFetch(const Variable* buffer, Expr index) {
std::ostringstream os;
os << "texelFetch(" << GetVarID(buffer) << ", ivec2(";
PrintExpr(index, os);
os << ", 0), 0).r";
return os.str();
}
// Print a reference expression to a buffer.
// Format: texelFetch(buffer, index, 0).r
std::string CodeGenOpenGL::GetBufferRef(
Type t, const Variable* buffer, Expr index) {
CHECK_EQ(t.lanes(), 1) << "Vector type not supported.";
CHECK(HandleTypeMatch(buffer, t)) << "Type mismatch not supported.";
if (buffer == this->output_) {
// This is the output texture.
CHECK_EQ(index.get(), output_iter_var_)
<< "GLSL must access corresponding elem of output texture.";
return GetVarID(buffer);
} else {
// This is an input texture.
this->inputs_.insert(buffer);
return TexelFetch(buffer, index);
}
}
void CodeGenOpenGL::PrintType(Type t, std::ostream& os) {
switch (t.code()) {
case halideir_type_int:
CHECK_EQ(t.bits(), 32) << "Only support 32-bit int.";
os << "int";
break;
case halideir_type_uint:
CHECK_EQ(t.bits(), 32) << "Only support 32-bit uint.";
os << "uint";
break;
case halideir_type_float:
CHECK_EQ(t.bits(), 32) << "Only support 32-bit float.";
os << "float";
break;
default:
LOG(FATAL) << "Unsupported type code.";
}
}
// Codegen for immediate values
void CodeGenOpenGL::VisitExpr_(const IntImm* op, std::ostream& os) {
CHECK_EQ(op->type, Int(32)) << "GLSL 3.0 only supports 32-bit ints.";
CodeGenC::VisitExpr_(op, os);
}
void CodeGenOpenGL::VisitExpr_(const UIntImm* op, std::ostream& os) {
CHECK_EQ(op->type, UInt(32)) << "GLSL 3.0 only supports 32-bit uints.";
CodeGenC::VisitExpr_(op, os);
}
void CodeGenOpenGL::VisitExpr_(const FloatImm* op, std::ostream& os) {
CHECK_EQ(op->type, Float(32)) << "GLSL 3.0 only supports 32-bit floats.";
CodeGenC::VisitExpr_(op, os);
}
void CodeGenOpenGL::VisitExpr_(const StringImm*, std::ostream& os) {
LOG(FATAL) << "GLSL 3.0 doesn't support strings.";
}
} // namespace codegen
} // namespace tvm
/*!
* Copyright (c) 2017 by Contributors
* \file codegen_opengl.h
* \brief Generate OpenGL device code.
*/
#ifndef TVM_CODEGEN_CODEGEN_OPENGL_H_
#define TVM_CODEGEN_CODEGEN_OPENGL_H_
#include <tvm/codegen.h>
#include <tvm/packed_func_ext.h>
#include <string>
#include "./codegen_c.h"
#include "../runtime/opengl/opengl_module.h"
namespace tvm {
namespace codegen {
class CodeGenOpenGL final : public CodeGenC {
public:
CodeGenOpenGL();
void AddFunction(LoweredFunc f);
std::unordered_map<std::string, runtime::OpenGLShader> Finish();
void InitFuncState(LoweredFunc f) final;
void BindThreadIndex(const IterVar& iv) final;
void VisitStmt_(const Store* op) final;
std::string TexelFetch(const Variable* buffer, Expr index);
std::string GetBufferRef(Type t, const Variable* buffer, Expr index) final;
void PrintType(Type t, std::ostream& os) final; // NOLINT(*)
// Codegen for immediate values
void VisitExpr_(const IntImm* op, std::ostream& os) final; // NOLINT(*)
void VisitExpr_(const UIntImm* op, std::ostream& os) final; // NOLINT(*)
void VisitExpr_(const FloatImm* op, std::ostream& os) final; // NOLINT(*)
void VisitExpr_(const StringImm* op, std::ostream& os) final; // NOLINT(*)
private:
const Variable* output_{nullptr};
std::unordered_set<const Variable*> inputs_;
const Variable* output_iter_var_{nullptr};
std::unordered_map<std::string, runtime::OpenGLShader> shaders_;
std::string thread_extent_var_;
};
} // namespace codegen
} // namespace tvm
#endif // TVM_CODEGEN_CODEGEN_OPENGL_H_
......@@ -49,7 +49,10 @@ class VPIDeviceAPI final : public runtime::DeviceAPI {
*rv = 1;
}
}
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final {
void* AllocDataSpace(TVMContext ctx,
size_t size,
size_t alignment,
TVMType type_hint) final {
// always align to 32 bytes at least.
CHECK_LE(alignment, runtime::kAllocAlignment);
alignment = runtime::kAllocAlignment;
......
......@@ -31,6 +31,7 @@ inline std::string DeviceName(int type) {
case kDLMetal: return "metal";
case kDLVPI: return "vpi";
case kDLROCM: return "rocm";
case kOpenGL: return "opengl";
case kExtDev: return "ext_dev";
default: LOG(FATAL) << "unknown type =" << type; return "Unknown";
}
......@@ -95,7 +96,8 @@ DeviceAPI* DeviceAPI::Get(TVMContext ctx, bool allow_missing) {
}
void* DeviceAPI::AllocWorkspace(TVMContext ctx, size_t size) {
return AllocDataSpace(ctx, size, kTempAllocaAlignment);
TVMType type_hint{kDLUInt, 8, 1};
return AllocDataSpace(ctx, size, kTempAllocaAlignment, type_hint);
}
void DeviceAPI::FreeWorkspace(TVMContext ctx, void* ptr) {
......@@ -365,7 +367,7 @@ int TVMArrayAlloc(const tvm_index_t* shape,
size_t size = GetDataSize(arr);
size_t alignment = GetDataAlignment(arr);
arr->data = DeviceAPIManager::Get(arr->ctx)->AllocDataSpace(
arr->ctx, size, alignment);
arr->ctx, size, alignment, arr->dtype);
*out = arr;
API_END_HANDLE_ERROR(TVMArrayFree_(arr));
}
......
......@@ -20,13 +20,16 @@ class CPUDeviceAPI final : public DeviceAPI {
*rv = 1;
}
}
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final {
void* AllocDataSpace(TVMContext ctx,
size_t nbytes,
size_t alignment,
TVMType type_hint) final {
void* ptr;
#if _MSC_VER
ptr = _aligned_malloc(size, alignment);
ptr = _aligned_malloc(nbytes, alignment);
if (ptr == nullptr) throw std::bad_alloc();
#else
int ret = posix_memalign(&ptr, alignment, size);
int ret = posix_memalign(&ptr, alignment, nbytes);
if (ret != 0) throw std::bad_alloc();
#endif
return ptr;
......
......@@ -54,12 +54,15 @@ class CUDADeviceAPI final : public DeviceAPI {
}
*rv = value;
}
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final {
void* AllocDataSpace(TVMContext ctx,
size_t nbytes,
size_t alignment,
TVMType type_hint) final {
CUDA_CALL(cudaSetDevice(ctx.device_id));
CHECK_EQ(256 % alignment, 0U)
<< "CUDA space is aligned at 256 bytes";
void *ret;
CUDA_CALL(cudaMalloc(&ret, size));
CUDA_CALL(cudaMalloc(&ret, nbytes));
return ret;
}
......
......@@ -63,7 +63,10 @@ class MetalWorkspace final : public DeviceAPI {
// override device API
void SetDevice(TVMContext ctx) final;
void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final;
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final;
void* AllocDataSpace(TVMContext ctx,
size_t nbytes,
size_t alignment,
TVMType type_hint) final;
void FreeDataSpace(TVMContext ctx, void* ptr) final;
void CopyDataFromTo(const void* from,
size_t from_size,
......
......@@ -123,12 +123,12 @@ void MetalWorkspace::SetDevice(TVMContext ctx) {
}
void* MetalWorkspace::AllocDataSpace(
TVMContext ctx, size_t size, size_t alignment) {
TVMContext ctx, size_t nbytes, size_t alignment, TVMType type_hint) {
this->Init();
id<MTLDevice> dev = GetDevice(ctx);
// allocate buffer in GPU only mode.
id<MTLBuffer> buf = [
dev newBufferWithLength:size
dev newBufferWithLength:nbytes
options:MTLResourceStorageModePrivate];
CHECK(buf != nil);
return (__bridge void*)([buf retain]);
......
......@@ -115,6 +115,8 @@ bool RuntimeEnabled(const std::string& target) {
f_name = "device_api.gpu";
} else if (target == "cl" || target == "opencl") {
f_name = "device_api.opencl";
} else if (target == "gl" || target == "opengl") {
f_name = "device_api.opengl";
} else if (target == "mtl" || target == "metal") {
f_name = "device_api.metal";
} else if (target == "stackvm") {
......
......@@ -142,7 +142,10 @@ class OpenCLWorkspace final : public DeviceAPI {
// override device API
void SetDevice(TVMContext ctx) final;
void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final;
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final;
void* AllocDataSpace(TVMContext ctx,
size_t size,
size_t alignment,
TVMType type_hint) final;
void FreeDataSpace(TVMContext ctx, void* ptr) final;
void CopyDataFromTo(const void* from,
size_t from_offset,
......
......@@ -51,7 +51,7 @@ void OpenCLWorkspace::GetAttr(
}
void* OpenCLWorkspace::AllocDataSpace(
TVMContext ctx, size_t size, size_t alignment) {
TVMContext ctx, size_t size, size_t alignment, TVMType type_hint) {
this->Init();
CHECK(context != nullptr) << "No OpenCL device";
cl_int err_code;
......
/*!
* Copyright (c) 2017 by Contributors
* \file opengl_module.cc
*/
#include <utility>
#include "./opengl_common.h"
#include "./opengl_module.h"
#if TVM_OPENGL_RUNTIME
#include <tvm/runtime/registry.h>
#include "../pack_args.h"
#include "../thread_storage_scope.h"
#include "../file_util.h"
namespace tvm {
namespace runtime {
class OpenGLModuleNode final : public ModuleNode {
public:
OpenGLModuleNode(std::unordered_map<std::string, OpenGLShader> shaders,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap);
~OpenGLModuleNode() override = default;
const char* type_key() const final { return "opengl"; }
PackedFunc GetFunction(const std::string& name,
const std::shared_ptr<ModuleNode>& sptr_to_self) final;
std::string GetSource(const std::string& format) final;
void SaveToFile(const std::string& file_name,
const std::string& format) final;
void SaveToBinary(dmlc::Stream* stream) final;
const gl::Program& GetProgram(const std::string& func_name) const;
const OpenGLShader& GetShader(const std::string& func_name) const;
const FunctionInfo& GetFunctionInfo(const std::string& func_name) const;
gl::OpenGLWorkspace& workspace() const { return *workspace_; }
private:
std::shared_ptr<gl::OpenGLWorkspace> workspace_;
std::unordered_map<std::string, OpenGLShader> shaders_;
std::string fmt_;
std::unordered_map<std::string, FunctionInfo> fmap_;
std::unordered_map<std::string, gl::Program> programs_;
DISALLOW_COPY_AND_ASSIGN(OpenGLModuleNode);
};
class OpenGLWrappedFunc {
public:
OpenGLWrappedFunc(OpenGLModuleNode* m,
std::shared_ptr<ModuleNode> sptr,
std::string func_name,
std::vector<size_t> arg_size,
const std::vector<std::string>& thread_axis_tags);
void operator()(TVMArgs args, TVMRetValue* rv, void** void_args) const;
private:
// The module
OpenGLModuleNode* m_;
// resource handle
std::shared_ptr<ModuleNode> sptr_;
// The name of the function.
std::string func_name_;
// convert code for void argument
std::vector<size_t> arg_size_;
// thread axis config
ThreadAxisConfig thread_axis_cfg_;
};
OpenGLModuleNode::OpenGLModuleNode(
std::unordered_map<std::string, OpenGLShader> shaders,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap)
: workspace_(gl::OpenGLWorkspace::Global()), shaders_(std::move(shaders)),
fmt_(std::move(fmt)), fmap_(std::move(fmap)), programs_() {
CHECK_EQ(fmt_, "gl") << "Unknown OpenGL format " << fmt_;
for (auto &pair : shaders_) {
auto &func_name = pair.first;
auto &shader = pair.second;
programs_.emplace(func_name,
workspace_->CreateProgram(shader.source.c_str()));
}
}
PackedFunc OpenGLModuleNode::GetFunction(
const std::string& name,
const std::shared_ptr<ModuleNode>& sptr_to_self) {
CHECK_EQ(sptr_to_self.get(), this);
CHECK_NE(name, symbol::tvm_module_main) << "Device function do not have main";
auto func_info_it = fmap_.find(name);
if (func_info_it == fmap_.end()) { return PackedFunc(); }
auto &func_info = func_info_it->second;
std::vector<size_t> arg_size(func_info.arg_types.size());
for (size_t i = 0; i < func_info.arg_types.size(); ++i) {
TVMType t = func_info.arg_types[i];
CHECK_EQ(t.lanes, 1U);
uint32_t bits = t.bits;
CHECK_EQ(bits % 8, 0U);
arg_size[i] = bits / 8;
}
// Initialize the wrapped func.
OpenGLWrappedFunc f(this, sptr_to_self, name, arg_size,
func_info.thread_axis_tags);
return PackFuncVoidAddr(f, func_info.arg_types);
}
std::string OpenGLModuleNode::GetSource(const std::string& format) {
if (format != fmt_ && fmt_ != "gl") { return ""; }
std::ostringstream os;
for (auto &pair : shaders_) {
auto &name = pair.first;
auto &shader = pair.second;
os << "[" << name << "]" << "\n";
os << shader.source <<"\n";
}
return os.str();
}
void OpenGLModuleNode::SaveToFile(const std::string& file_name,
const std::string& format) {
std::string fmt = GetFileFormat(file_name, format);
CHECK_EQ(fmt, fmt_) << "Can only save to format=" << fmt_;
std::string meta_file = GetMetaFilePath(file_name);
SaveMetaDataToFile(meta_file, fmap_);
SaveBinaryToFile(file_name, ToJSON(shaders_));
}
void OpenGLModuleNode::SaveToBinary(dmlc::Stream* stream) {
stream->Write(fmt_);
stream->Write(fmap_);
stream->Write(ToJSON(shaders_));
}
const gl::Program& OpenGLModuleNode::GetProgram(
const std::string& func_name) const {
auto it = programs_.find(func_name);
if (it == programs_.end()) {
LOG(FATAL) << "Cannot find program";
}
return it->second;
}
const OpenGLShader& OpenGLModuleNode::GetShader(
const std::string& func_name) const {
auto it = shaders_.find(func_name);
if (it == shaders_.end()) {
LOG(FATAL) << "Cannot find shader";
}
return it->second;
}
const FunctionInfo& OpenGLModuleNode::GetFunctionInfo(
const std::string& func_name) const {
auto it = fmap_.find(func_name);
if (it == fmap_.end()) {
LOG(FATAL) << "Cannot find shader";
}
return it->second;
}
OpenGLWrappedFunc::OpenGLWrappedFunc(
OpenGLModuleNode* m,
std::shared_ptr<ModuleNode> sptr,
std::string func_name,
std::vector<size_t> arg_size,
const std::vector<std::string>& thread_axis_tags)
: m_(m), sptr_(std::move(sptr)), func_name_(std::move(func_name)),
arg_size_(std::move(arg_size)) {
thread_axis_cfg_.Init(arg_size_.size(), thread_axis_tags);
}
void OpenGLWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv,
void** void_args) const {
auto &shader = m_->GetShader(func_name_);
auto &program = m_->GetProgram(func_name_);
auto &func_info = m_->GetFunctionInfo(func_name_);
size_t nargs = shader.arg_kinds.size();
// Must call this function before setting uniforms & input textures.
m_->workspace().SetCurrentProgram(program);
// Set all arguments.
GLuint texture_unit = 0;
gl::Texture* output = nullptr;
for (size_t i = 0; i != nargs; ++i) {
auto &name = shader.arg_names.at(i);
auto kind = shader.arg_kinds.at(i);
auto type = func_info.arg_types.at(i);
switch (kind) {
case OpenGLArgKind::kUniform: {
m_->workspace().SetUniform(program, name, type, void_args[i]);
break;
}
case OpenGLArgKind::kInputTexture: {
CHECK_EQ(type.code, kHandle) << "Type is not handle?";
auto texture = *static_cast<gl::Texture**>(void_args[i]);
m_->workspace().SetInputTexture(program, name, texture_unit, texture);
++texture_unit;
break;
}
case OpenGLArgKind::kOutputTexture: {
CHECK_EQ(type.code, kHandle) << "Type is not handle?";
CHECK(output == nullptr) << "Can only have one output texture.";
output = *static_cast<gl::Texture**>(void_args[i]);
break;
}
}
}
// Set "thread_extent" uniform.
ThreadWorkLoad wl = thread_axis_cfg_.Extract(args);
std::unique_ptr<GLint> thread_extent(new GLint(wl.block_dim(0)));
m_->workspace().SetUniform(program, shader.thread_extent_var,
TVMType{kDLInt, 32, 1},
static_cast<void*>(thread_extent.get()));
m_->workspace().Render(output);
}
Module OpenGLModuleCreate(std::unordered_map<std::string, OpenGLShader> shaders,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap) {
auto n = std::make_shared<OpenGLModuleNode>(std::move(shaders),
std::move(fmt),
std::move(fmap));
return Module(n);
}
Module OpenGLModuleLoadFile(const std::string& file_name,
const std::string& format) {
std::string data;
std::unordered_map<std::string, FunctionInfo> fmap;
std::string fmt = GetFileFormat(file_name, format);
std::string meta_file = GetMetaFilePath(file_name);
LoadBinaryFromFile(file_name, &data);
LoadMetaDataFromFile(meta_file, &fmap);
return OpenGLModuleCreate(FromJSON(data), fmt, fmap);
}
Module OpenGLModuleLoadBinary(void* strm) {
auto stream = static_cast<dmlc::Stream*>(strm);
std::string data;
std::unordered_map<std::string, FunctionInfo> fmap;
std::string fmt;
stream->Read(&fmt);
stream->Read(&fmap);
stream->Read(&data);
return OpenGLModuleCreate(FromJSON(data), fmt, fmap);
}
TVM_REGISTER_GLOBAL("module.loadfile_gl")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = OpenGLModuleLoadFile(args[0], args[1]);
});
TVM_REGISTER_GLOBAL("module.loadfile_glbin")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = OpenGLModuleLoadFile(args[0], args[1]);
});
TVM_REGISTER_GLOBAL("module.loadbinary_opengl")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = OpenGLModuleLoadBinary(args[0]);
});
} // namespace runtime
} // namespace tvm
#endif // TVM_OPENGL_RUNTIME
/*!
* Copyright (c) 2017 by Contributors
* \file opengl_module.h
* \brief Execution handling of OpenGL kernels
*/
#ifndef TVM_RUNTIME_OPENGL_OPENGL_MODULE_H_
#define TVM_RUNTIME_OPENGL_OPENGL_MODULE_H_
#include <tvm/runtime/config.h>
#include <tvm/runtime/packed_func.h>
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include "../meta_data.h"
namespace tvm {
namespace runtime {
/*!
* \brief Determines how we supply arguments.
*/
enum class OpenGLArgKind {
kInputTexture = 0, // Bind to "gsampler2D" in GLSL.
kOutputTexture = 1, // Bind to "out" in GLSL.
kUniform = 2, // Bind to "uniform" in GLSL.
};
std::string OpenGLArgKind2String(OpenGLArgKind kind);
OpenGLArgKind String2OpenGLArgKind(const std::string& str);
/*!
* \brief The output of OpenGL codegen.
* Contains necessary information to build a fragment shader and bind arguments.
*/
struct OpenGLShader {
OpenGLShader() = default;
OpenGLShader(std::string source,
std::vector<std::string> arg_names,
std::vector<OpenGLArgKind> arg_kinds,
std::string thread_extent_var)
: source(std::move(source)), arg_names(std::move(arg_names)),
arg_kinds(std::move(arg_kinds)),
thread_extent_var(std::move(thread_extent_var)) {
CHECK_EQ(this->arg_names.size(), this->arg_kinds.size()) << "Invalid input";
}
std::string source;
std::vector<std::string> arg_names; // Matches FunctionInfo.
std::vector<OpenGLArgKind> arg_kinds; // Matches FunctionInfo.
std::string thread_extent_var; // Stores the output length.
void Save(dmlc::JSONWriter* writer) const;
void Load(dmlc::JSONReader* reader);
};
std::string ToJSON(const std::unordered_map<std::string, OpenGLShader>& shaders);
std::unordered_map<std::string, OpenGLShader> FromJSON(const std::string& str);
/*!
* \brief Create an OpenGL module from data.
*
* \param data The module data.
* \param fmt The format of the data,
* \param fmap The map function information map of each function.
*/
Module OpenGLModuleCreate(std::unordered_map<std::string, OpenGLShader> shaders,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap);
inline std::string OpenGLArgKind2String(OpenGLArgKind kind) {
switch (kind) {
case OpenGLArgKind::kOutputTexture:
return "output_texture";
case OpenGLArgKind::kInputTexture:
return "input_texture";
case OpenGLArgKind::kUniform:
return "uniform";
}
assert(false);
}
inline OpenGLArgKind String2OpenGLArgKind(const std::string& str) {
if (str == "output_texture") {
return OpenGLArgKind::kOutputTexture;
} else if (str == "input_texture") {
return OpenGLArgKind::kInputTexture;
} else if (str == "uniform") {
return OpenGLArgKind::kUniform;
} else {
LOG(FATAL) << "Invalid OpenGL arg kind.";
assert(false);
}
}
inline void OpenGLShader::Save(dmlc::JSONWriter* writer) const {
std::vector<std::string> arg_kind_strs;
for (auto kind : arg_kinds) {
arg_kind_strs.push_back(OpenGLArgKind2String(kind));
}
writer->BeginObject();
writer->WriteObjectKeyValue("arg_names", arg_names);
writer->WriteObjectKeyValue("arg_kinds", arg_kind_strs);
writer->WriteObjectKeyValue("source", source);
writer->WriteObjectKeyValue("thread_extent_var", thread_extent_var);
writer->EndObject();
}
inline void OpenGLShader::Load(dmlc::JSONReader* reader) {
std::vector<std::string> arg_kind_strs;
dmlc::JSONObjectReadHelper helper;
helper.DeclareField("arg_names", &arg_names);
helper.DeclareField("arg_kinds", &arg_kind_strs);
helper.DeclareField("source", &source);
helper.DeclareField("thread_extent_var", &thread_extent_var);
helper.ReadAllFields(reader);
arg_kinds.clear();
for (auto& str : arg_kind_strs) {
arg_kinds.push_back(String2OpenGLArgKind(str));
}
}
inline std::string ToJSON(
const std::unordered_map<std::string, OpenGLShader>& shaders) {
std::ostringstream os;
dmlc::JSONWriter writer(&os);
writer.BeginObject();
writer.WriteObjectKeyValue("shaders", shaders);
writer.EndObject();
return os.str();
}
inline std::unordered_map<std::string, OpenGLShader> FromJSON(
const std::string& str) {
std::unordered_map<std::string, OpenGLShader> shaders;
std::istringstream is(str);
dmlc::JSONReader reader(&is);
dmlc::JSONObjectReadHelper helper;
helper.DeclareField("shaders", &shaders);
helper.ReadAllFields(&reader);
return shaders;
}
} // namespace runtime
} // namespace tvm
#endif // TVM_RUNTIME_OPENGL_OPENGL_MODULE_H_
......@@ -52,12 +52,15 @@ class ROCMDeviceAPI final : public DeviceAPI {
}
*rv = value;
}
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final {
void* AllocDataSpace(TVMContext ctx,
size_t nbytes,
size_t alignment,
TVMType type_hint) final {
ROCM_CALL(hipSetDevice(ctx.device_id));
CHECK_EQ(256 % alignment, 0U)
<< "ROCM space is aligned at 256 bytes";
void *ret;
ROCM_CALL(hipMalloc(&ret, size));
ROCM_CALL(hipMalloc(&ret, nbytes));
return ret;
}
......
......@@ -20,10 +20,13 @@ class RPCDeviceAPI final : public DeviceAPI {
*rv = GetSess(ctx)->CallRemote(
RPCCode::kDevGetAttr, ctx, static_cast<int>(kind));
}
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final {
void* AllocDataSpace(TVMContext ctx,
size_t nbytes,
size_t alignment,
TVMType type_hint) final {
auto sess = GetSess(ctx);
void *data = sess->CallRemote(
RPCCode::kDevAllocData, ctx, size, alignment);
RPCCode::kDevAllocData, ctx, nbytes, alignment, type_hint);
RemoteSpace* space = new RemoteSpace();
space->data = data;
space->sess = std::move(sess);
......
......@@ -887,9 +887,11 @@ void RPCDevGetAttr(TVMArgs args, TVMRetValue *rv) {
void RPCDevAllocData(TVMArgs args, TVMRetValue *rv) {
TVMContext ctx = args[0];
uint64_t size = args[1];
uint64_t nbytes = args[1];
uint64_t alignment = args[2];
void* data = DeviceAPI::Get(ctx)->AllocDataSpace(ctx, size, alignment);
TVMType type_hint = args[3];
void* data = DeviceAPI::Get(ctx)->AllocDataSpace(
ctx, nbytes, alignment, type_hint);
*rv = data;
}
......
......@@ -23,28 +23,29 @@ class WorkspacePool::Pool {
allocated_.push_back(e);
}
// allocate from pool
void* Alloc(TVMContext ctx, DeviceAPI* device, size_t size) {
void* Alloc(TVMContext ctx, DeviceAPI* device, size_t nbytes) {
// Allocate align to page.
size = (size + (kWorkspacePageSize - 1)) / kWorkspacePageSize * kWorkspacePageSize;
if (size == 0) size = kWorkspacePageSize;
nbytes = (nbytes + (kWorkspacePageSize - 1)) / kWorkspacePageSize * kWorkspacePageSize;
if (nbytes == 0) nbytes = kWorkspacePageSize;
Entry e;
TVMType type = {.code = kDLUInt, .bits = 8, .lanes = 1};
if (free_list_.size() == 2) {
e = free_list_.back();
free_list_.pop_back();
if (e.size < size) {
if (e.size < nbytes) {
// resize the page
device->FreeDataSpace(ctx, e.data);
e.data = device->AllocDataSpace(ctx, size, kTempAllocaAlignment);
e.size = size;
e.data = device->AllocDataSpace(ctx, nbytes, kTempAllocaAlignment, type);
e.size = nbytes;
}
} else if (free_list_.size() == 1) {
e.data = device->AllocDataSpace(ctx, size, kTempAllocaAlignment);
e.size = size;
e.data = device->AllocDataSpace(ctx, nbytes, kTempAllocaAlignment, type);
e.size = nbytes;
} else {
if (free_list_.back().size >= size) {
if (free_list_.back().size >= nbytes) {
// find smallest fit
auto it = free_list_.end() - 2;
for (; it->size >= size; --it) {}
for (; it->size >= nbytes; --it) {}
e = *(it + 1);
free_list_.erase(it + 1);
} else {
......@@ -52,8 +53,8 @@ class WorkspacePool::Pool {
e = free_list_.back();
free_list_.pop_back();
device->FreeDataSpace(ctx, e.data);
e.data = device->AllocDataSpace(ctx, size, kTempAllocaAlignment);
e.size = size;
e.data = device->AllocDataSpace(ctx, nbytes, kTempAllocaAlignment, type);
e.size = nbytes;
}
}
allocated_.push_back(e);
......
......@@ -397,6 +397,45 @@ Stage& Stage::double_buffer() {
return *this;
}
Stage& Stage::opengl() {
CHECK(!is_scheduled()) << "Must be a fresh schedule";
StageNode *self = operator->();
auto all_iter_vars = self->all_iter_vars; // curr version of all_iter_vars
CHECK(!all_iter_vars.empty()) << "At least one iter var";
// Fuse all data parallel dimensions to 1.
IterVar fused = all_iter_vars[0];
for (size_t i = 1; i != all_iter_vars.size(); ++i) {
auto iter_var = all_iter_vars[i];
switch (iter_var->iter_type) {
case IterVarType::kDataPar: {
fuse(fused, all_iter_vars[i], &fused);
break;
}
case IterVarType::kThreadIndex: {
LOG(ERROR) << "A fresh schedule shouldn't have thread index iter var";
break;
}
case IterVarType::kCommReduce:
case IterVarType::kOrdered:
case IterVarType::kOpaque: {
break;
}
default: {
LOG(ERROR) << "Invalid iter var type "
<< IterVarType2String(iter_var->iter_type);
break;
}
}
}
// Bind the only dimension to threadIdx.x.
bind(fused, thread_axis(Range(nullptr), "threadIdx.x"));
return *this;
}
Stage CopyStage(const Stage& s) {
std::shared_ptr<StageNode> n =
std::make_shared<StageNode>(*s.operator->());
......
......@@ -20,3 +20,6 @@ RUN bash /install/ubuntu_install_java.sh
COPY install/ubuntu_install_llvm.sh /install/ubuntu_install_llvm.sh
RUN bash /install/ubuntu_install_llvm.sh
COPY install/ubuntu_install_opengl.sh /install/ubuntu_install_opengl.sh
RUN bash /install/ubuntu_install_opengl.sh
......@@ -37,6 +37,9 @@ RUN bash /install/ubuntu_install_nodejs.sh
COPY install/ubuntu_install_rocm.sh /install/ubuntu_install_rocm.sh
RUN bash /install/ubuntu_install_rocm.sh
COPY install/ubuntu_install_opengl.sh /install/ubuntu_install_opengl.sh
RUN bash /install/ubuntu_install_opengl.sh
# Enable doxygen for c++ doc build
RUN apt-get install -y doxygen graphviz
......
apt-get update --fix-missing
apt-get install -y --no-install-recommends --force-yes \
libgl1-mesa-dev libglfw3-dev
\ No newline at end of file
......@@ -7,7 +7,8 @@ def enabled_ctx_list():
('cl', tvm.opencl(0)),
('metal', tvm.metal(0)),
('rocm', tvm.rocm(0)),
('vpi', tvm.vpi(0))]
('vpi', tvm.vpi(0)),
('opengl', tvm.opengl(0))]
for k, v in ctx_list:
assert tvm.context(k, 0) == v
ctx_list = [x[1] for x in ctx_list if x[1].exist]
......@@ -19,7 +20,8 @@ print("Testing using contexts:", ENABLED_CTX_LIST)
def test_nd_create():
for ctx in ENABLED_CTX_LIST:
for dtype in ["float32", "int8", "uint16"]:
for dtype in ["uint8", "int8", "uint16", "int16", "uint32", "int32",
"float32"]:
x = np.random.randint(0, 10, size=(3, 4))
x = np.array(x, dtype=dtype)
y = tvm.nd.array(x, ctx=ctx)
......
......@@ -17,3 +17,5 @@ TVM_FFI=cython python -m nose -v tests/python/integration || exit -1
TVM_FFI=ctypes python3 -m nose -v tests/python/integration || exit -1
TVM_FFI=cython python -m nose -v tests/python/contrib || exit -1
TVM_FFI=ctypes python3 -m nose -v tests/python/contrib || exit -1
TVM_FFI=cython python -m nose -v tests/webgl || exit -1
TVM_FFI=ctypes python3 -m nose -v tests/webgl || exit -1
## Test cases for the WebGL backend
Any test case with name `test_local_...` tests the C++ OpenGL backend on the
local OS, which can be executed automatically.
Any test case with name `test_remote_...` tests the WebGL backend within the
browser, which must be run manually. See instruction within the test.
import tvm
import numpy as np
def test_local_gemm():
if not tvm.module.enabled("opengl"):
return
if not tvm.module.enabled("llvm"):
return
nn = 2
n = tvm.var('n')
n = tvm.convert(nn)
m = n
l = n
A = tvm.placeholder((n, l), name='A', dtype='int32')
B = tvm.placeholder((m, l), name='B', dtype='int32')
k = tvm.reduce_axis((0, l), name='k')
C = tvm.compute((n, m), lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k),
name='CC')
s = tvm.create_schedule(C.op)
s[C].opengl()
print(tvm.lower(s, [A, B, C], simple_mode=True))
f = tvm.build(s, [A, B, C], "opengl", name="gemm")
print("------opengl code------")
print(f.imported_modules[0].get_source(fmt="gl"))
ctx = tvm.opengl()
n, m, l = nn, nn, nn
a_np = np.random.uniform(low=0, high=10, size=(n, l)).astype(A.dtype)
b_np = np.random.uniform(low=0, high=10, size=(m, l)).astype(B.dtype)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(b_np, ctx)
c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
f(a, b, c)
np.testing.assert_allclose(c.asnumpy(), np.dot(a_np, b_np.T))
if __name__ == "__main__":
test_local_gemm()
import numpy as np
import tvm
from tvm.contrib import rpc, util, emscripten
def test_local_save_load():
if not tvm.module.enabled("opengl"):
return
if not tvm.module.enabled("llvm"):
return
n = tvm.var("n")
A = tvm.placeholder((n,), name='A', dtype='int32')
B = tvm.placeholder((n,), name='B', dtype='int32')
C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
s = tvm.create_schedule(C.op)
s[C].opengl()
f = tvm.build(s, [A, B, C], "opengl", target_host="llvm", name="myadd")
ctx = tvm.opengl(0)
n = 10
a = tvm.nd.array(np.random.uniform(high=10, size=(n)).astype(A.dtype), ctx)
b = tvm.nd.array(np.random.uniform(high=10, size=(n)).astype(B.dtype), ctx)
c = tvm.nd.array(np.zeros((n), dtype=C.dtype), ctx)
f(a, b, c)
temp = util.tempdir()
path_so = temp.relpath("myadd.so")
f.export_library(path_so)
f1 = tvm.module.load(path_so)
f1(a, b, c)
np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
if __name__ == "__main__":
test_local_save_load()
"""
The following instruction is based on web/README.md.
Setup an RPC server:
$ python -m tvm.exec.rpc_proxy --example-rpc=1
Go to http://localhost:9190 in browser.
Click "Connect To Proxy".
Run this test script:
$ python tests/webgl/test_remote_save_load.py
"""
import numpy as np
import tvm
from tvm.contrib import rpc, util, emscripten
proxy_host = "localhost"
proxy_port = 9090
def try_remote_save_load():
if not tvm.module.enabled("rpc"):
return
if not tvm.module.enabled("opengl"):
return
if not tvm.module.enabled("llvm"):
return
# Build the module.
n = tvm.var("n")
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
s = tvm.create_schedule(C.op)
s[C].opengl()
target_host = "llvm -target=asmjs-unknown-emscripten -system-lib"
f = tvm.build(s, [A, B, C], "opengl", target_host=target_host, name="myadd")
remote = rpc.connect(proxy_host, proxy_port, key="js")
temp = util.tempdir()
ctx = remote.opengl(0)
path_obj = temp.relpath("myadd.bc")
path_dso = temp.relpath("myadd.js")
path_gl = temp.relpath("myadd.gl")
path_json = temp.relpath("myadd.tvm_meta.json")
f.save(path_obj)
emscripten.create_js(path_dso, path_obj, side_module=True)
f.imported_modules[0].save(path_gl)
remote.upload(path_dso, "myadd.dso")
remote.upload(path_gl)
remote.upload(path_json)
remote.download("myadd.dso")
remote.download("myadd.gl")
remote.download("myadd.tvm_meta.json")
print('Loading myadd.dso')
fhost = remote.load_module("myadd.dso")
print('Loading myadd.gl')
fdev = remote.load_module("myadd.gl")
print('import_module')
fhost.import_module(fdev)
print('running...')
a = tvm.nd.array(np.random.uniform(size=16).astype(A.dtype), ctx)
b = tvm.nd.array(np.zeros(16, dtype=A.dtype), ctx)
c = tvm.nd.array(np.zeros(16, dtype=C.dtype), ctx)
fhost(a, b, c)
np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
if __name__ == "__main__":
try_remote_save_load()
......@@ -36,5 +36,9 @@
<button onclick="connect_rpc()">Connect To Proxy</button>
<button onclick="clear_log()">Clear Log</button>
<div id="log"></div>
<canvas id="canvas"></canvas>
<script>
Module["canvas"] = document.getElementById("canvas");
</script>
</body>
</html>
......@@ -686,7 +686,8 @@ var tvm_runtime = tvm_runtime || {};
2 : "gpu",
4 : "opencl",
8 : "metal",
9 : "vpi"
9 : "vpi",
11 : "opengl",
};
var CTX_STR2MASK = {
"cpu": 1,
......@@ -695,7 +696,8 @@ var tvm_runtime = tvm_runtime || {};
"cl": 4,
"opencl": 4,
"metal": 8,
"vpi": 9
"vpi": 9,
"opengl": 11,
};
TVMContext.prototype = {
toString : function() {
......
......@@ -18,6 +18,8 @@
#include "../src/runtime/rpc/rpc_event_impl.cc"
#include "../src/runtime/rpc/rpc_server_env.cc"
#include "../src/runtime/graph/graph_runtime.cc"
#include "../src/runtime/opengl/opengl_device_api.cc"
#include "../src/runtime/opengl/opengl_module.cc"
namespace tvm {
namespace contrib {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment