Commit 589831df by Zhixun Tan Committed by Tianqi Chen

[WIP] WebGL Backend (#672)

Basic WebGL Backend
parent d4a46898
...@@ -22,6 +22,7 @@ endif() ...@@ -22,6 +22,7 @@ endif()
tvm_option(USE_CUDA "Build with CUDA" OFF) tvm_option(USE_CUDA "Build with CUDA" OFF)
tvm_option(USE_OPENCL "Build with OpenCL" OFF) tvm_option(USE_OPENCL "Build with OpenCL" OFF)
tvm_option(USE_OPENGL "Build with OpenGL" OFF)
tvm_option(USE_METAL "Build with Metal" OFF) tvm_option(USE_METAL "Build with Metal" OFF)
tvm_option(USE_RPC "Build with RPC" ON) tvm_option(USE_RPC "Build with RPC" ON)
tvm_option(USE_GRAPH_RUNTIME "Build with tiny graph runtime" ON) tvm_option(USE_GRAPH_RUNTIME "Build with tiny graph runtime" ON)
...@@ -61,8 +62,8 @@ if(MSVC) ...@@ -61,8 +62,8 @@ if(MSVC)
else(MSVC) else(MSVC)
include(CheckCXXCompilerFlag) include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11) check_cxx_compiler_flag("-std=c++11" SUPPORT_CXX11)
set(CMAKE_C_FLAGS "-O3 -Wall -std=c++11 -fPIC") set(CMAKE_C_FLAGS "-O3 -Wall -fPIC")
set(CMAKE_CXX_FLAGS ${CMAKE_C_FLAGS}) set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -std=c++11")
endif(MSVC) endif(MSVC)
# add source group # add source group
...@@ -87,6 +88,7 @@ file(GLOB RUNTIME_SRCS src/runtime/*.cc) ...@@ -87,6 +88,7 @@ file(GLOB RUNTIME_SRCS src/runtime/*.cc)
file(GLOB COMPILER_LLVM_SRCS src/codegen/llvm/*.cc) file(GLOB COMPILER_LLVM_SRCS src/codegen/llvm/*.cc)
file(GLOB RUNTIME_CUDA_SRCS src/runtime/cuda/*.cc) file(GLOB RUNTIME_CUDA_SRCS src/runtime/cuda/*.cc)
file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc) file(GLOB RUNTIME_OPENCL_SRCS src/runtime/opencl/*.cc)
file(GLOB RUNTIME_OPENGL_SRCS src/runtime/opengl/*.cc)
file(GLOB RUNTIME_METAL_SRCS src/runtime/metal/*.mm) file(GLOB RUNTIME_METAL_SRCS src/runtime/metal/*.mm)
file(GLOB RUNTIME_RPC_SRCS src/runtime/rpc/*.cc) file(GLOB RUNTIME_RPC_SRCS src/runtime/rpc/*.cc)
file(GLOB RUNTIME_GRAPH_SRCS src/runtime/graph/*.cc) file(GLOB RUNTIME_GRAPH_SRCS src/runtime/graph/*.cc)
...@@ -135,6 +137,18 @@ else(USE_OPENCL) ...@@ -135,6 +137,18 @@ else(USE_OPENCL)
add_definitions(-DTVM_OPENCL_RUNTIME=0) add_definitions(-DTVM_OPENCL_RUNTIME=0)
endif(USE_OPENCL) endif(USE_OPENCL)
if(USE_OPENGL)
find_package(OpenGL QUIET REQUIRED)
find_package(glfw3 QUIET REQUIRED)
message(STATUS "Build with OpenGL support")
include_directories(${OPENGL_INCLUDE_DIRS})
list(APPEND TVM_RUNTIME_LINKER_LIBS ${OpenGL_LIBRARIES} glfw)
list(APPEND RUNTIME_SRCS ${RUNTIME_OPENGL_SRCS})
add_definitions(-DTVM_OPENGL_RUNTIME=1)
else(USE_OPENGL)
add_definitions(-DTVM_OPENGL_RUNTIME=0)
endif(USE_OPENGL)
if(USE_METAL) if(USE_METAL)
find_package(OpenCL QUIET REQUIRED) find_package(OpenCL QUIET REQUIRED)
message(STATUS "Build with Metal support") message(STATUS "Build with Metal support")
......
...@@ -88,6 +88,7 @@ stage('Build') { ...@@ -88,6 +88,7 @@ stage('Build') {
echo USE_CUDNN=1 >> config.mk echo USE_CUDNN=1 >> config.mk
echo USE_CUDA=1 >> config.mk echo USE_CUDA=1 >> config.mk
echo USE_OPENCL=1 >> config.mk echo USE_OPENCL=1 >> config.mk
echo USE_OPENGL=1 >> config.mk
echo LLVM_CONFIG=llvm-config-4.0 >> config.mk echo LLVM_CONFIG=llvm-config-4.0 >> config.mk
echo USE_RPC=1 >> config.mk echo USE_RPC=1 >> config.mk
echo USE_GRAPH_RUNTIME=1 >> config.mk echo USE_GRAPH_RUNTIME=1 >> config.mk
...@@ -120,6 +121,7 @@ stage('Build') { ...@@ -120,6 +121,7 @@ stage('Build') {
echo USE_CUDA=0 >> config.mk echo USE_CUDA=0 >> config.mk
echo USE_OPENCL=0 >> config.mk echo USE_OPENCL=0 >> config.mk
echo USE_RPC=0 >> config.mk echo USE_RPC=0 >> config.mk
echo USE_OPENGL=1 >> config.mk
echo LLVM_CONFIG=llvm-config-4.0 >> config.mk echo LLVM_CONFIG=llvm-config-4.0 >> config.mk
""" """
make('cpu', '-j2') make('cpu', '-j2')
......
...@@ -32,8 +32,8 @@ OBJCFLAGS = -fno-objc-arc ...@@ -32,8 +32,8 @@ OBJCFLAGS = -fno-objc-arc
EMCC_FLAGS= -std=c++11 -DDMLC_LOG_STACK_TRACE=0\ EMCC_FLAGS= -std=c++11 -DDMLC_LOG_STACK_TRACE=0\
-Oz -s RESERVED_FUNCTION_POINTERS=2 -s MAIN_MODULE=1 -s NO_EXIT_RUNTIME=1\ -Oz -s RESERVED_FUNCTION_POINTERS=2 -s MAIN_MODULE=1 -s NO_EXIT_RUNTIME=1\
-s EXTRA_EXPORTED_RUNTIME_METHODS="['cwrap','getValue','setValue','addFunction']"\ -s EXTRA_EXPORTED_RUNTIME_METHODS="['cwrap','getValue','setValue','addFunction']"\
-s USE_GLFW=3 -s USE_WEBGL2=1 -lglfw\
$(INCLUDE_FLAGS) $(INCLUDE_FLAGS)
# llvm configuration # llvm configuration
ifdef LLVM_CONFIG ifdef LLVM_CONFIG
LLVM_VERSION=$(shell $(LLVM_CONFIG) --version| cut -b 1,3) LLVM_VERSION=$(shell $(LLVM_CONFIG) --version| cut -b 1,3)
...@@ -54,6 +54,7 @@ METAL_SRC = $(wildcard src/runtime/metal/*.mm) ...@@ -54,6 +54,7 @@ METAL_SRC = $(wildcard src/runtime/metal/*.mm)
CUDA_SRC = $(wildcard src/runtime/cuda/*.cc) CUDA_SRC = $(wildcard src/runtime/cuda/*.cc)
ROCM_SRC = $(wildcard src/runtime/rocm/*.cc) ROCM_SRC = $(wildcard src/runtime/rocm/*.cc)
OPENCL_SRC = $(wildcard src/runtime/opencl/*.cc) OPENCL_SRC = $(wildcard src/runtime/opencl/*.cc)
OPENGL_SRC = $(wildcard src/runtime/opengl/*.cc)
RPC_SRC = $(wildcard src/runtime/rpc/*.cc) RPC_SRC = $(wildcard src/runtime/rpc/*.cc)
GRAPH_SRC = $(wildcard src/runtime/graph/*.cc) GRAPH_SRC = $(wildcard src/runtime/graph/*.cc)
RUNTIME_SRC = $(wildcard src/runtime/*.cc) RUNTIME_SRC = $(wildcard src/runtime/*.cc)
...@@ -65,6 +66,7 @@ METAL_OBJ = $(patsubst src/%.mm, build/%.o, $(METAL_SRC)) ...@@ -65,6 +66,7 @@ METAL_OBJ = $(patsubst src/%.mm, build/%.o, $(METAL_SRC))
CUDA_OBJ = $(patsubst src/%.cc, build/%.o, $(CUDA_SRC)) CUDA_OBJ = $(patsubst src/%.cc, build/%.o, $(CUDA_SRC))
ROCM_OBJ = $(patsubst src/%.cc, build/%.o, $(ROCM_SRC)) ROCM_OBJ = $(patsubst src/%.cc, build/%.o, $(ROCM_SRC))
OPENCL_OBJ = $(patsubst src/%.cc, build/%.o, $(OPENCL_SRC)) OPENCL_OBJ = $(patsubst src/%.cc, build/%.o, $(OPENCL_SRC))
OPENGL_OBJ = $(patsubst src/%.cc, build/%.o, $(OPENGL_SRC))
RPC_OBJ = $(patsubst src/%.cc, build/%.o, $(RPC_SRC)) RPC_OBJ = $(patsubst src/%.cc, build/%.o, $(RPC_SRC))
GRAPH_OBJ = $(patsubst src/%.cc, build/%.o, $(GRAPH_SRC)) GRAPH_OBJ = $(patsubst src/%.cc, build/%.o, $(GRAPH_SRC))
CC_OBJ = $(patsubst src/%.cc, build/%.o, $(CC_SRC)) $(LLVM_OBJ) CC_OBJ = $(patsubst src/%.cc, build/%.o, $(CC_SRC)) $(LLVM_OBJ)
...@@ -119,6 +121,19 @@ else ...@@ -119,6 +121,19 @@ else
CFLAGS += -DTVM_OPENCL_RUNTIME=0 CFLAGS += -DTVM_OPENCL_RUNTIME=0
endif endif
ifeq ($(USE_OPENGL), 1)
CFLAGS += -DTVM_OPENGL_RUNTIME=1
EMCC_FLAGS += -DTVM_OPENGL_RUNTIME=1
ifeq ($(UNAME_S), Darwin)
FRAMEWORKS += -framework OpenGL
else
LDFLAGS += -lGL -lglfw
endif
RUNTIME_DEP += $(OPENGL_OBJ)
else
CFLAGS += -DTVM_OPENGL_RUNTIME=0
endif
ifeq ($(USE_METAL), 1) ifeq ($(USE_METAL), 1)
CFLAGS += -DTVM_METAL_RUNTIME=1 CFLAGS += -DTVM_METAL_RUNTIME=1
LDFLAGS += -lobjc LDFLAGS += -lobjc
......
...@@ -55,9 +55,11 @@ typedef int64_t tvm_index_t; ...@@ -55,9 +55,11 @@ typedef int64_t tvm_index_t;
/*! \brief Extension device types in TVM */ /*! \brief Extension device types in TVM */
typedef enum { typedef enum {
kOpenGL = 11,
// Extension DRAM type, used for quickly test extension device // Extension DRAM type, used for quickly test extension device
// The device api can differ depending on the xpu driver registered. // The device api can differ depending on the xpu driver registered.
kExtDev = 12 kExtDev = 12,
// AddExtraTVMType which is not in DLPack here // AddExtraTVMType which is not in DLPack here
} TVMDeviceExtType; } TVMDeviceExtType;
......
...@@ -55,11 +55,16 @@ class DeviceAPI { ...@@ -55,11 +55,16 @@ class DeviceAPI {
/*! /*!
* \brief Allocate a data space on device. * \brief Allocate a data space on device.
* \param ctx The device context to perform operation. * \param ctx The device context to perform operation.
* \param size The size of the memory * \param nbytes The number of bytes in memory.
* \param alignment The alignment of the memory. * \param alignment The alignment of the memory.
* \return The allocated device pointer * \param type_hint The type of elements. Only needed by certain backends such
* as OpenGL, as nbytes & alignment are sufficient for most backends.
* \return The allocated device pointer.
*/ */
virtual void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) = 0; virtual void* AllocDataSpace(TVMContext ctx,
size_t nbytes,
size_t alignment,
TVMType type_hint) = 0;
/*! /*!
* \brief Free a data space on device. * \brief Free a data space on device.
* \param ctx The device context to perform operation. * \param ctx The device context to perform operation.
......
...@@ -214,6 +214,11 @@ class Stage : public NodeRef { ...@@ -214,6 +214,11 @@ class Stage : public NodeRef {
*/ */
Stage& double_buffer(); // NOLINT(*) Stage& double_buffer(); // NOLINT(*)
/*! /*!
* \brief Schedule for OpenGL fragment shader.
* \return reference to self.
*/
Stage& opengl(); // NOLINT(*)
/*!
* \brief whether the stage has been scheduled. * \brief whether the stage has been scheduled.
* \return whether the stage has been scheduled. * \return whether the stage has been scheduled.
*/ */
......
...@@ -17,7 +17,7 @@ from . import ir_builder ...@@ -17,7 +17,7 @@ from . import ir_builder
from . import target from . import target
from . import ndarray as nd from . import ndarray as nd
from .ndarray import context, cpu, gpu, opencl, cl, metal, mtl, vpi, rocm, ext_dev from .ndarray import context, cpu, gpu, opencl, cl, metal, mtl, vpi, rocm, opengl, ext_dev
from ._ffi.runtime_ctypes import TypeCode from ._ffi.runtime_ctypes import TypeCode
from ._ffi.function import Function from ._ffi.function import Function
......
...@@ -97,6 +97,7 @@ class TVMContext(ctypes.Structure): ...@@ -97,6 +97,7 @@ class TVMContext(ctypes.Structure):
8 : 'metal', 8 : 'metal',
9 : 'vpi', 9 : 'vpi',
10: 'rocm', 10: 'rocm',
11: 'opengl',
12: 'ext_dev', 12: 'ext_dev',
} }
STR2MASK = { STR2MASK = {
...@@ -111,6 +112,7 @@ class TVMContext(ctypes.Structure): ...@@ -111,6 +112,7 @@ class TVMContext(ctypes.Structure):
'metal': 8, 'metal': 8,
'vpi': 9, 'vpi': 9,
'rocm': 10, 'rocm': 10,
'opengl': 11,
'ext_dev': 12, 'ext_dev': 12,
} }
def __init__(self, device_type, device_id): def __init__(self, device_type, device_id):
......
...@@ -285,6 +285,10 @@ class RPCSession(object): ...@@ -285,6 +285,10 @@ class RPCSession(object):
"""Construct remote Metal device.""" """Construct remote Metal device."""
return self.context(8, dev_id) return self.context(8, dev_id)
def opengl(self, dev_id=0):
"""Construct remote OpenGL device."""
return self.context(11, dev_id)
def ext_dev(self, dev_id=0): def ext_dev(self, dev_id=0):
"""Construct remote extension device.""" """Construct remote extension device."""
return self.context(12, dev_id) return self.context(12, dev_id)
......
...@@ -120,6 +120,21 @@ def vpi(dev_id=0): ...@@ -120,6 +120,21 @@ def vpi(dev_id=0):
""" """
return TVMContext(9, dev_id) return TVMContext(9, dev_id)
def opengl(dev_id=0):
"""Construct a OpenGL device
Parameters
----------
dev_id : int, optional
The integer device id
Returns
-------
ctx : TVMContext
The created context
"""
return TVMContext(11, dev_id)
def ext_dev(dev_id=0): def ext_dev(dev_id=0):
"""Construct a extension device """Construct a extension device
......
...@@ -611,4 +611,11 @@ class Stage(NodeBase): ...@@ -611,4 +611,11 @@ class Stage(NodeBase):
""" """
_api_internal._StageDoubleBuffer(self) _api_internal._StageDoubleBuffer(self)
def opengl(self):
"""The special OpenGL schedule
Maps each output element to a pixel.
"""
_api_internal._StageOpenGL(self)
_init_api("tvm.schedule") _init_api("tvm.schedule")
...@@ -67,7 +67,7 @@ class Target(object): ...@@ -67,7 +67,7 @@ class Target(object):
Parameters Parameters
---------- ----------
target_name : {"llvm", "cuda", "opencl", "metal", "rocm", "stackvm", "ext_dev"} target_name : {"llvm", "cuda", "opencl", "metal", "rocm", "stackvm", "opengl", "ext_dev"}
The major target name. The major target name.
options : list of str, optional options : list of str, optional
...@@ -119,6 +119,8 @@ class Target(object): ...@@ -119,6 +119,8 @@ class Target(object):
elif target_name in ("metal",): elif target_name in ("metal",):
self.keys += ("gpu",) self.keys += ("gpu",)
self.max_num_threads = 256 self.max_num_threads = 256
elif target_name in ("opengl",):
self.keys += ("opengl",)
elif target_name in ("stackvm", "ext_dev"): elif target_name in ("stackvm", "ext_dev"):
# Do not now class for stacvm or ext_dev # Do not now class for stacvm or ext_dev
pass pass
......
...@@ -399,6 +399,11 @@ TVM_REGISTER_API("_StageDoubleBuffer") ...@@ -399,6 +399,11 @@ TVM_REGISTER_API("_StageDoubleBuffer")
args[0].operator Stage().double_buffer(); args[0].operator Stage().double_buffer();
}); });
TVM_REGISTER_API("_StageOpenGL")
.set_body([](TVMArgs args, TVMRetValue *ret) {
args[0].operator Stage().opengl();
});
TVM_REGISTER_API("_ScheduleNormalize") TVM_REGISTER_API("_ScheduleNormalize")
.set_body([](TVMArgs args, TVMRetValue* ret) { .set_body([](TVMArgs args, TVMRetValue* ret) {
*ret = args[0].operator Schedule() *ret = args[0].operator Schedule()
......
/*!
* Copyright (c) 2017 by Contributors
* Build opengl modules from source.
* \file build_opengl.cc
*/
#include <tvm/base.h>
#include "./codegen_opengl.h"
#include "./build_common.h"
namespace tvm {
namespace codegen {
runtime::Module BuildOpenGL(Array<LoweredFunc> funcs) {
bool output_ssa = false;
CodeGenOpenGL cg;
cg.Init(output_ssa);
for (LoweredFunc f : funcs) {
cg.AddFunction(f);
}
auto shaders = cg.Finish();
#if TVM_OPENGL_RUNTIME
return OpenGLModuleCreate(shaders, "gl", ExtractFuncInfo(funcs));
#else
LOG(WARNING) << "OpenGL runtime not enabled, return a source module...";
auto data = ToJSON(shaders);
return DeviceSourceModuleCreate(data, "gl", ExtractFuncInfo(funcs), "opengl");
#endif // TVM_OPENGL_RUNTIME
}
TVM_REGISTER_API("codegen.build_opengl")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = BuildOpenGL(args[0]);
});
} // namespace codegen
} // namespace tvm
...@@ -150,7 +150,7 @@ class CodeGenC : ...@@ -150,7 +150,7 @@ class CodeGenC :
std::string GetStructRef( std::string GetStructRef(
Type t, const Expr& buffer, const Expr& index, int kind); Type t, const Expr& buffer, const Expr& index, int kind);
// print reference to a buffer as type t in index. // print reference to a buffer as type t in index.
std::string GetBufferRef( virtual std::string GetBufferRef(
Type t, const Variable* buffer, Expr index); Type t, const Variable* buffer, Expr index);
/*! /*!
* \brief If buffer is allocated as type t. * \brief If buffer is allocated as type t.
......
/*!
* Copyright (c) 2017 by Contributors
* \file codegen_opengl.cc
*
* We are targeting OpenGL 3.3. The reason of not targeting a recent version
* of OpenGL is to have better compatibility of WebGL 2.
*/
#include <tvm/runtime/config.h>
#include <tvm/packed_func_ext.h>
#include <vector>
#include <string>
#include "./codegen_opengl.h"
#include "../runtime/thread_storage_scope.h"
namespace tvm {
namespace codegen {
CodeGenOpenGL::CodeGenOpenGL()
: output_(nullptr), output_iter_var_(nullptr) {}
void CodeGenOpenGL::InitFuncState(LoweredFunc f) {
CodeGenC::InitFuncState(f);
output_ = nullptr;
inputs_.clear();
output_iter_var_ = nullptr;
thread_extent_var_ = "";
}
void CodeGenOpenGL::AddFunction(LoweredFunc f) {
// clear previous generated state.
this->InitFuncState(f);
this->decl_stream << "#version 300 es\n";
this->decl_stream << "precision highp float;\n";
// skip the first underscore, so SSA variable starts from _1
GetUniqueName("_");
// add to alloc buffer type.
for (const auto& kv : f->handle_data_type) {
RegisterHandleType(kv.first.get(), kv.second.type());
}
// Allocate argument names. Store in `var_idmap_`.
for (auto arg : f->args) {
auto arg_name = GetUniqueName(arg.get()->name_hint);
var_idmap_[arg.get()] = arg_name;
}
thread_extent_var_ = GetUniqueName("thread_extent");
this->decl_stream << "uniform int " << thread_extent_var_ << ";\n";
this->stream << "void main() {\n";
int func_scope = this->BeginScope();
this->PrintStmt(f->body);
this->EndScope(func_scope);
this->PrintIndent();
this->stream << "}\n\n";
// Declare arguments.
for (auto arg : f->args) {
if (this->inputs_.find(arg.get()) != this->inputs_.cend()) {
// Declare input texture.
// Format:
// - Float: "uniform sampler2D {name};"
// - Int: "uniform isampler2D {name};"
// - UInt: "uniform usampler2D {name};"
auto arg_name = GetVarID(arg.get());
auto type_it = this->handle_data_type_.find(arg.get());
CHECK(type_it != this->handle_data_type_.cend()) << "Cannot find type.";
auto type = Type2TVMType(type_it->second);
CHECK_EQ(type.lanes, 1) << "Vector type not supported.";
switch (type.code) {
case kDLInt:
this->decl_stream << "uniform isampler2D " << arg_name << ";\n";
break;
case kDLUInt:
this->decl_stream << "uniform usampler2D " << arg_name << ";\n";
break;
case kDLFloat:
this->decl_stream << "uniform sampler2D " << arg_name << ";\n";
break;
default:
LOG(FATAL) << "Unsupported type code.";
}
} else if (this->output_ == arg.get()) {
// Declare output texture.
// Format: "out {type} {name};"
auto arg_name = GetVarID(arg.get());
auto type_it = this->handle_data_type_.find(arg.get());
CHECK(type_it != this->handle_data_type_.cend()) << "Cannot find type.";
auto type = type_it->second;
this->decl_stream << "out ";
PrintType(type, this->decl_stream);
this->decl_stream << " " << arg_name << ";\n";
} else {
// Declare uniform value.
// Format: "uniform {type} {name};"
auto arg_name = GetVarID(arg.get());
auto type = arg.get()->type;
this->decl_stream << "uniform ";
PrintType(type, this->decl_stream);
this->decl_stream << " " << arg_name << ";\n";
}
}
std::vector<std::string> arg_names;
std::vector<runtime::OpenGLArgKind> arg_kinds;
for (auto arg : f->args) {
std::string name = GetVarID(arg.get());
runtime::OpenGLArgKind kind;
if (inputs_.find(arg.get()) != inputs_.cend()) {
kind = runtime::OpenGLArgKind::kInputTexture;
} else if (output_ == arg.get()) {
kind = runtime::OpenGLArgKind::kOutputTexture;
} else {
kind = runtime::OpenGLArgKind::kUniform;
}
arg_names.push_back(name);
arg_kinds.push_back(kind);
}
shaders_[f->name] = runtime::OpenGLShader(
this->decl_stream.str() + this->stream.str(),
std::move(arg_names), std::move(arg_kinds),
this->thread_extent_var_);
}
std::unordered_map<std::string, runtime::OpenGLShader> CodeGenOpenGL::Finish() {
return shaders_;
}
void CodeGenOpenGL::BindThreadIndex(const IterVar& iv) {
CHECK_EQ(iv->thread_tag, "threadIdx.x") << "Must be threadIdx.x";
CHECK(var_idmap_.find(iv->var.get()) == var_idmap_.end())
<< "Only support one thread iter var";
CHECK(output_iter_var_ == nullptr) << "Only support one thread iter var";
var_idmap_[iv->var.get()] = iv->thread_tag;
output_iter_var_ = iv->var.get();
// Declare threadIdx local variable.
this->PrintIndent();
this->stream << "ivec2 threadIdx = ivec2(gl_FragCoord.xy);\n";
// Return directly if threadIdx.x >= thread_extent.
this->PrintIndent();
this->stream << "if (threadIdx.x >= " << thread_extent_var_ << ") {\n";
this->PrintIndent();
this->stream << " return;\n";
this->PrintIndent();
this->stream << "}\n";
}
// GLSL texture store is special. We can only store to one output texture, and
// we must store to the index that matches the current "thread index".
void CodeGenOpenGL::VisitStmt_(const Store* op) {
auto t = op->value.type();
auto buffer = op->buffer_var.get();
auto index = op->index;
if (t.lanes() == 1) {
// Store to a scalar.
CHECK(inputs_.find(buffer) == inputs_.cend())
<< "Texture has been read from before. Must not store to it.";
if (output_ == nullptr) {
output_ = buffer; // Record that this texture is the output.
} else {
CHECK(output_ == buffer) << "GLSL can only write to 1 texture.";
}
this->PrintIndent();
this->stream << GetBufferRef(t, buffer, index) << " = "
<< PrintExpr(op->value) << ";\n";
} else {
// Store to a vector.
LOG(FATAL) << "Vectorized store not implemented.";
}
}
// texelFetch(tex, ivec2(idx, 0), 0).r
std::string CodeGenOpenGL::TexelFetch(const Variable* buffer, Expr index) {
std::ostringstream os;
os << "texelFetch(" << GetVarID(buffer) << ", ivec2(";
PrintExpr(index, os);
os << ", 0), 0).r";
return os.str();
}
// Print a reference expression to a buffer.
// Format: texelFetch(buffer, index, 0).r
std::string CodeGenOpenGL::GetBufferRef(
Type t, const Variable* buffer, Expr index) {
CHECK_EQ(t.lanes(), 1) << "Vector type not supported.";
CHECK(HandleTypeMatch(buffer, t)) << "Type mismatch not supported.";
if (buffer == this->output_) {
// This is the output texture.
CHECK_EQ(index.get(), output_iter_var_)
<< "GLSL must access corresponding elem of output texture.";
return GetVarID(buffer);
} else {
// This is an input texture.
this->inputs_.insert(buffer);
return TexelFetch(buffer, index);
}
}
void CodeGenOpenGL::PrintType(Type t, std::ostream& os) {
switch (t.code()) {
case halideir_type_int:
CHECK_EQ(t.bits(), 32) << "Only support 32-bit int.";
os << "int";
break;
case halideir_type_uint:
CHECK_EQ(t.bits(), 32) << "Only support 32-bit uint.";
os << "uint";
break;
case halideir_type_float:
CHECK_EQ(t.bits(), 32) << "Only support 32-bit float.";
os << "float";
break;
default:
LOG(FATAL) << "Unsupported type code.";
}
}
// Codegen for immediate values
void CodeGenOpenGL::VisitExpr_(const IntImm* op, std::ostream& os) {
CHECK_EQ(op->type, Int(32)) << "GLSL 3.0 only supports 32-bit ints.";
CodeGenC::VisitExpr_(op, os);
}
void CodeGenOpenGL::VisitExpr_(const UIntImm* op, std::ostream& os) {
CHECK_EQ(op->type, UInt(32)) << "GLSL 3.0 only supports 32-bit uints.";
CodeGenC::VisitExpr_(op, os);
}
void CodeGenOpenGL::VisitExpr_(const FloatImm* op, std::ostream& os) {
CHECK_EQ(op->type, Float(32)) << "GLSL 3.0 only supports 32-bit floats.";
CodeGenC::VisitExpr_(op, os);
}
void CodeGenOpenGL::VisitExpr_(const StringImm*, std::ostream& os) {
LOG(FATAL) << "GLSL 3.0 doesn't support strings.";
}
} // namespace codegen
} // namespace tvm
/*!
* Copyright (c) 2017 by Contributors
* \file codegen_opengl.h
* \brief Generate OpenGL device code.
*/
#ifndef TVM_CODEGEN_CODEGEN_OPENGL_H_
#define TVM_CODEGEN_CODEGEN_OPENGL_H_
#include <tvm/codegen.h>
#include <tvm/packed_func_ext.h>
#include <string>
#include "./codegen_c.h"
#include "../runtime/opengl/opengl_module.h"
namespace tvm {
namespace codegen {
class CodeGenOpenGL final : public CodeGenC {
public:
CodeGenOpenGL();
void AddFunction(LoweredFunc f);
std::unordered_map<std::string, runtime::OpenGLShader> Finish();
void InitFuncState(LoweredFunc f) final;
void BindThreadIndex(const IterVar& iv) final;
void VisitStmt_(const Store* op) final;
std::string TexelFetch(const Variable* buffer, Expr index);
std::string GetBufferRef(Type t, const Variable* buffer, Expr index) final;
void PrintType(Type t, std::ostream& os) final; // NOLINT(*)
// Codegen for immediate values
void VisitExpr_(const IntImm* op, std::ostream& os) final; // NOLINT(*)
void VisitExpr_(const UIntImm* op, std::ostream& os) final; // NOLINT(*)
void VisitExpr_(const FloatImm* op, std::ostream& os) final; // NOLINT(*)
void VisitExpr_(const StringImm* op, std::ostream& os) final; // NOLINT(*)
private:
const Variable* output_{nullptr};
std::unordered_set<const Variable*> inputs_;
const Variable* output_iter_var_{nullptr};
std::unordered_map<std::string, runtime::OpenGLShader> shaders_;
std::string thread_extent_var_;
};
} // namespace codegen
} // namespace tvm
#endif // TVM_CODEGEN_CODEGEN_OPENGL_H_
...@@ -49,7 +49,10 @@ class VPIDeviceAPI final : public runtime::DeviceAPI { ...@@ -49,7 +49,10 @@ class VPIDeviceAPI final : public runtime::DeviceAPI {
*rv = 1; *rv = 1;
} }
} }
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final { void* AllocDataSpace(TVMContext ctx,
size_t size,
size_t alignment,
TVMType type_hint) final {
// always align to 32 bytes at least. // always align to 32 bytes at least.
CHECK_LE(alignment, runtime::kAllocAlignment); CHECK_LE(alignment, runtime::kAllocAlignment);
alignment = runtime::kAllocAlignment; alignment = runtime::kAllocAlignment;
......
...@@ -31,6 +31,7 @@ inline std::string DeviceName(int type) { ...@@ -31,6 +31,7 @@ inline std::string DeviceName(int type) {
case kDLMetal: return "metal"; case kDLMetal: return "metal";
case kDLVPI: return "vpi"; case kDLVPI: return "vpi";
case kDLROCM: return "rocm"; case kDLROCM: return "rocm";
case kOpenGL: return "opengl";
case kExtDev: return "ext_dev"; case kExtDev: return "ext_dev";
default: LOG(FATAL) << "unknown type =" << type; return "Unknown"; default: LOG(FATAL) << "unknown type =" << type; return "Unknown";
} }
...@@ -95,7 +96,8 @@ DeviceAPI* DeviceAPI::Get(TVMContext ctx, bool allow_missing) { ...@@ -95,7 +96,8 @@ DeviceAPI* DeviceAPI::Get(TVMContext ctx, bool allow_missing) {
} }
void* DeviceAPI::AllocWorkspace(TVMContext ctx, size_t size) { void* DeviceAPI::AllocWorkspace(TVMContext ctx, size_t size) {
return AllocDataSpace(ctx, size, kTempAllocaAlignment); TVMType type_hint{kDLUInt, 8, 1};
return AllocDataSpace(ctx, size, kTempAllocaAlignment, type_hint);
} }
void DeviceAPI::FreeWorkspace(TVMContext ctx, void* ptr) { void DeviceAPI::FreeWorkspace(TVMContext ctx, void* ptr) {
...@@ -365,7 +367,7 @@ int TVMArrayAlloc(const tvm_index_t* shape, ...@@ -365,7 +367,7 @@ int TVMArrayAlloc(const tvm_index_t* shape,
size_t size = GetDataSize(arr); size_t size = GetDataSize(arr);
size_t alignment = GetDataAlignment(arr); size_t alignment = GetDataAlignment(arr);
arr->data = DeviceAPIManager::Get(arr->ctx)->AllocDataSpace( arr->data = DeviceAPIManager::Get(arr->ctx)->AllocDataSpace(
arr->ctx, size, alignment); arr->ctx, size, alignment, arr->dtype);
*out = arr; *out = arr;
API_END_HANDLE_ERROR(TVMArrayFree_(arr)); API_END_HANDLE_ERROR(TVMArrayFree_(arr));
} }
......
...@@ -20,13 +20,16 @@ class CPUDeviceAPI final : public DeviceAPI { ...@@ -20,13 +20,16 @@ class CPUDeviceAPI final : public DeviceAPI {
*rv = 1; *rv = 1;
} }
} }
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final { void* AllocDataSpace(TVMContext ctx,
size_t nbytes,
size_t alignment,
TVMType type_hint) final {
void* ptr; void* ptr;
#if _MSC_VER #if _MSC_VER
ptr = _aligned_malloc(size, alignment); ptr = _aligned_malloc(nbytes, alignment);
if (ptr == nullptr) throw std::bad_alloc(); if (ptr == nullptr) throw std::bad_alloc();
#else #else
int ret = posix_memalign(&ptr, alignment, size); int ret = posix_memalign(&ptr, alignment, nbytes);
if (ret != 0) throw std::bad_alloc(); if (ret != 0) throw std::bad_alloc();
#endif #endif
return ptr; return ptr;
......
...@@ -54,12 +54,15 @@ class CUDADeviceAPI final : public DeviceAPI { ...@@ -54,12 +54,15 @@ class CUDADeviceAPI final : public DeviceAPI {
} }
*rv = value; *rv = value;
} }
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final { void* AllocDataSpace(TVMContext ctx,
size_t nbytes,
size_t alignment,
TVMType type_hint) final {
CUDA_CALL(cudaSetDevice(ctx.device_id)); CUDA_CALL(cudaSetDevice(ctx.device_id));
CHECK_EQ(256 % alignment, 0U) CHECK_EQ(256 % alignment, 0U)
<< "CUDA space is aligned at 256 bytes"; << "CUDA space is aligned at 256 bytes";
void *ret; void *ret;
CUDA_CALL(cudaMalloc(&ret, size)); CUDA_CALL(cudaMalloc(&ret, nbytes));
return ret; return ret;
} }
......
...@@ -63,7 +63,10 @@ class MetalWorkspace final : public DeviceAPI { ...@@ -63,7 +63,10 @@ class MetalWorkspace final : public DeviceAPI {
// override device API // override device API
void SetDevice(TVMContext ctx) final; void SetDevice(TVMContext ctx) final;
void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final; void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final;
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final; void* AllocDataSpace(TVMContext ctx,
size_t nbytes,
size_t alignment,
TVMType type_hint) final;
void FreeDataSpace(TVMContext ctx, void* ptr) final; void FreeDataSpace(TVMContext ctx, void* ptr) final;
void CopyDataFromTo(const void* from, void CopyDataFromTo(const void* from,
size_t from_size, size_t from_size,
......
...@@ -123,12 +123,12 @@ void MetalWorkspace::SetDevice(TVMContext ctx) { ...@@ -123,12 +123,12 @@ void MetalWorkspace::SetDevice(TVMContext ctx) {
} }
void* MetalWorkspace::AllocDataSpace( void* MetalWorkspace::AllocDataSpace(
TVMContext ctx, size_t size, size_t alignment) { TVMContext ctx, size_t nbytes, size_t alignment, TVMType type_hint) {
this->Init(); this->Init();
id<MTLDevice> dev = GetDevice(ctx); id<MTLDevice> dev = GetDevice(ctx);
// allocate buffer in GPU only mode. // allocate buffer in GPU only mode.
id<MTLBuffer> buf = [ id<MTLBuffer> buf = [
dev newBufferWithLength:size dev newBufferWithLength:nbytes
options:MTLResourceStorageModePrivate]; options:MTLResourceStorageModePrivate];
CHECK(buf != nil); CHECK(buf != nil);
return (__bridge void*)([buf retain]); return (__bridge void*)([buf retain]);
......
...@@ -115,6 +115,8 @@ bool RuntimeEnabled(const std::string& target) { ...@@ -115,6 +115,8 @@ bool RuntimeEnabled(const std::string& target) {
f_name = "device_api.gpu"; f_name = "device_api.gpu";
} else if (target == "cl" || target == "opencl") { } else if (target == "cl" || target == "opencl") {
f_name = "device_api.opencl"; f_name = "device_api.opencl";
} else if (target == "gl" || target == "opengl") {
f_name = "device_api.opengl";
} else if (target == "mtl" || target == "metal") { } else if (target == "mtl" || target == "metal") {
f_name = "device_api.metal"; f_name = "device_api.metal";
} else if (target == "stackvm") { } else if (target == "stackvm") {
......
...@@ -142,7 +142,10 @@ class OpenCLWorkspace final : public DeviceAPI { ...@@ -142,7 +142,10 @@ class OpenCLWorkspace final : public DeviceAPI {
// override device API // override device API
void SetDevice(TVMContext ctx) final; void SetDevice(TVMContext ctx) final;
void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final; void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final;
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final; void* AllocDataSpace(TVMContext ctx,
size_t size,
size_t alignment,
TVMType type_hint) final;
void FreeDataSpace(TVMContext ctx, void* ptr) final; void FreeDataSpace(TVMContext ctx, void* ptr) final;
void CopyDataFromTo(const void* from, void CopyDataFromTo(const void* from,
size_t from_offset, size_t from_offset,
......
...@@ -51,7 +51,7 @@ void OpenCLWorkspace::GetAttr( ...@@ -51,7 +51,7 @@ void OpenCLWorkspace::GetAttr(
} }
void* OpenCLWorkspace::AllocDataSpace( void* OpenCLWorkspace::AllocDataSpace(
TVMContext ctx, size_t size, size_t alignment) { TVMContext ctx, size_t size, size_t alignment, TVMType type_hint) {
this->Init(); this->Init();
CHECK(context != nullptr) << "No OpenCL device"; CHECK(context != nullptr) << "No OpenCL device";
cl_int err_code; cl_int err_code;
......
/*!
* Copyright (c) 2017 by Contributors
* \file opengl_common.h
* \brief OpenGL common header
*/
#ifndef TVM_RUNTIME_OPENGL_OPENGL_COMMON_H_
#define TVM_RUNTIME_OPENGL_OPENGL_COMMON_H_
#include <tvm/runtime/config.h>
#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/packed_func.h>
#include <tvm/runtime/device_api.h>
#include <dmlc/logging.h>
#include <GL/gl.h>
#include <GLFW/glfw3.h>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
namespace tvm {
namespace runtime {
namespace gl {
// This file contains the following classes.
class GLFunctionPointers;
class OpenGLWorkspace;
class Texture;
class Program;
inline GLFWglproc GetProcAddress(const char* procname) {
GLFWglproc proc = glfwGetProcAddress(procname);
CHECK(proc != nullptr) << "Cannot get function \"" << procname << "\"";
return proc;
}
#define SetGLFunctionPointer(NAME) \
NAME(decltype(NAME)(GetProcAddress("gl" #NAME)))
/*!
* \brief The function pointers of all OpenGL APIs that are used.
* Must be constructed after creating an OpenGL context.
*/
class GLFunctionPointers {
public:
GLFunctionPointers()
: SetGLFunctionPointer(ActiveTexture),
SetGLFunctionPointer(AttachShader),
SetGLFunctionPointer(BindBuffer),
SetGLFunctionPointer(BindFramebuffer),
SetGLFunctionPointer(BindTexture),
SetGLFunctionPointer(BindVertexArray),
SetGLFunctionPointer(BufferData),
SetGLFunctionPointer(CheckFramebufferStatus),
SetGLFunctionPointer(Clear),
SetGLFunctionPointer(CompileShader),
SetGLFunctionPointer(CreateProgram),
SetGLFunctionPointer(CreateShader),
SetGLFunctionPointer(DeleteFramebuffers),
SetGLFunctionPointer(DeleteProgram),
SetGLFunctionPointer(DeleteShader),
SetGLFunctionPointer(DeleteTextures),
SetGLFunctionPointer(DetachShader),
SetGLFunctionPointer(DrawArrays),
SetGLFunctionPointer(DrawBuffers),
SetGLFunctionPointer(EnableVertexAttribArray),
SetGLFunctionPointer(Finish),
SetGLFunctionPointer(FramebufferTexture2D),
SetGLFunctionPointer(GenBuffers),
SetGLFunctionPointer(GenFramebuffers),
SetGLFunctionPointer(GenTextures),
SetGLFunctionPointer(GenVertexArrays),
SetGLFunctionPointer(GetAttribLocation),
SetGLFunctionPointer(GetError),
SetGLFunctionPointer(GetIntegerv),
SetGLFunctionPointer(GetProgramInfoLog),
SetGLFunctionPointer(GetProgramiv),
SetGLFunctionPointer(GetShaderInfoLog),
SetGLFunctionPointer(GetShaderiv),
SetGLFunctionPointer(GetString),
SetGLFunctionPointer(GetUniformLocation),
SetGLFunctionPointer(LinkProgram),
SetGLFunctionPointer(ReadPixels),
SetGLFunctionPointer(ShaderSource),
SetGLFunctionPointer(TexImage2D),
SetGLFunctionPointer(TexParameteri),
SetGLFunctionPointer(TexSubImage2D),
SetGLFunctionPointer(Uniform1f),
SetGLFunctionPointer(Uniform1i),
SetGLFunctionPointer(UseProgram),
SetGLFunctionPointer(VertexAttribPointer),
SetGLFunctionPointer(Viewport) {}
void (*ActiveTexture)(GLenum texture);
void (*AttachShader)(GLuint program, GLuint shader);
void (*BindBuffer)(GLenum target, GLuint buffer);
void (*BindFramebuffer)(GLenum target, GLuint framebuffer);
void (*BindTexture)(GLenum target, GLuint texture);
void (*BindVertexArray)(GLuint array);
void (*BufferData)(GLenum target, GLsizeiptr size, const GLvoid* data,
GLenum usage);
GLenum (*CheckFramebufferStatus)(GLenum target);
void (*Clear)(GLbitfield mask);
void (*CompileShader)(GLuint shader);
GLuint (*CreateProgram)();
GLuint (*CreateShader)(GLenum shader_type);
void (*DeleteFramebuffers)(GLsizei n, const GLuint* framebuffers);
void (*DeleteProgram)(GLuint program);
void (*DeleteShader)(GLuint shader);
void (*DeleteTextures)(GLsizei n, const GLuint* textures);
void (*DetachShader)(GLuint program, GLuint shader);
void (*DrawArrays)(GLenum mode, GLint first, GLsizei count);
void (*DrawBuffers)(GLsizei n, const GLenum* bufs);
void (*EnableVertexAttribArray)(GLuint index);
void (*Finish)();
void (*FramebufferTexture2D)(GLenum target, GLenum attachment,
GLenum textarget, GLuint texture, GLint level);
void (*GenBuffers)(GLsizei n, GLuint* buffers);
void (*GenFramebuffers)(GLsizei n, GLuint* ids);
void (*GenTextures)(GLsizei n, GLuint* textures);
void (*GenVertexArrays)(GLsizei n, GLuint* arrays);
GLint (*GetAttribLocation)(GLuint program, const GLchar* name);
GLenum (*GetError)();
void (*GetIntegerv)(GLenum pname, GLint* data);
void (*GetProgramInfoLog)(GLuint program, GLsizei maxLength, GLsizei* length,
GLchar* info_log);
void (*GetProgramiv)(GLuint program, GLenum pname, GLint* params);
void (*GetShaderInfoLog)(GLuint shader, GLsizei max_length, GLsizei* length,
GLchar* info_log);
void (*GetShaderiv)(GLuint shader, GLenum pname, GLint* params);
const GLubyte *(*GetString)(GLenum name);
GLint (*GetUniformLocation)(GLuint program, const GLchar* name);
void (*LinkProgram)(GLuint program);
void (*ReadPixels)(GLint x, GLint y, GLsizei width, GLsizei height,
GLenum format, GLenum type, GLvoid* data);
void (*ShaderSource)(GLuint shader, GLsizei count, const GLchar** string,
const GLint* length);
void (*TexImage2D)(GLenum target, GLint level, GLint internal_format,
GLsizei width, GLsizei height, GLint border, GLenum format,
GLenum type, const GLvoid* data);
void (*TexParameteri)(GLenum target, GLenum pname, GLint param);
void (*TexSubImage2D)(GLenum target, GLint level, GLint xoffset,
GLint yoffset, GLsizei width, GLsizei height,
GLenum format, GLenum type, const GLvoid* data);
void (*Uniform1f)(GLint location, GLfloat v0);
void (*Uniform1i)(GLint location, GLint v0);
void (*UseProgram)(GLuint program);
void (*VertexAttribPointer)(GLuint index, GLint size, GLenum type,
GLboolean normalized, GLsizei stride,
const GLvoid* pointer);
void (*Viewport)(GLint x, GLint y, GLsizei width, GLsizei height);
};
/*!
* \brief Process global OpenGL workspace.
*/
class OpenGLWorkspace final : public DeviceAPI {
public:
~OpenGLWorkspace() final;
// override device API
void SetDevice(TVMContext ctx) final;
void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) final;
void* AllocDataSpace(TVMContext ctx,
size_t nbytes,
size_t alignment,
TVMType type_hint) final;
void FreeDataSpace(TVMContext ctx, void* ptr) final;
void CopyDataFromTo(const void* from,
size_t from_offset,
void* to,
size_t to_offset,
size_t size,
TVMContext ctx_from,
TVMContext ctx_to,
TVMStreamHandle stream) final;
void StreamSync(TVMContext ctx, TVMStreamHandle stream) final;
void* AllocWorkspace(TVMContext ctx, size_t size) final;
void FreeWorkspace(TVMContext ctx, void* data) final;
/*!
* \brief Get the global OpenGL workspace.
* \return The global OpenGL workspace.
*/
static const std::shared_ptr<OpenGLWorkspace>& Global();
/*!
* \brief Create an OpenGL program that uses the given fragment shader.
* \param fragment_shader The fragment shader **source**.
* \return The OpenGL program.
*/
Program CreateProgram(const char* fragment_shader_src);
/*!
* \brief Create an OpenGL texture that stores an array.
* \param type Element type.
* \param nbytes Number of bytes in the array.
* \return The OpenGL texture.
*/
Texture CreateTexture(TVMType type, size_t nbytes);
/*!
* \brief Upload user data into a sub-region of an OpenGL texture.
* \param texture The texture to be written to.
* \param begin The index of the first element to be written to.
* \param nelems The number of elements to be written to.
* \param data The user data.
*/
void PutTextureData(Texture* texture,
GLint begin,
GLsizei nelems,
const GLvoid* data);
/*!
* \brief Download a sub-region of an OpenGL texture.
* \param texture The texture to download from.
* \param begin The index of first element to download from.
* \param nelems The number of elements to download from.
* \param data The user buffer.
*/
void GetTextureData(const Texture* texture,
GLint begin,
GLsizei nelems,
GLvoid* data);
/*!
* \brief Set currently used OpenGL program.
*/
void SetCurrentProgram(const Program& program);
/*!
* \brief Set uniform values for an OpenGL program.
* Must call SetCurrentProgram before calling this.
* \param program The OpenGL program.
* \param name The uniform argument name.
* \param type The type of the uniform.
* \param value The value to pass in.
*/
void SetUniform(const Program& program,
const std::string& name,
TVMType type,
void* value);
/*!
* \brief Set input texture for an OpenGL program.
* Must call SetCurrentProgram before calling this.
* \param program The OpenGL program.
* \param name The texture uniform argument name.
* \param unit The texture unit to use. Each input texture must occupy a
* different unit.
* \param texture The OpenGL texture to pass in.
*/
void SetInputTexture(const Program& program,
const std::string& name,
GLuint unit,
Texture* texture);
/*!
* \brief Render to a texture.
* \param output The output texture.
*/
void Render(Texture* output);
private:
friend class Texture;
friend class Program;
// Global singleton. Hide constructor.
OpenGLWorkspace();
GLFWwindow* window_;
std::unique_ptr<GLFunctionPointers> gl;
GLuint vertex_shader_;
static const int kWindowWidth = 640;
static const int kWindowHeight = 480;
struct Vertex {
float x, y;
};
static constexpr size_t kNumVertices = 6;
static const Vertex vertices[kNumVertices];
static const char* vertex_shader_text_;
/*!
* \brief Bind a texture to a "texture unit".
* After calling this function, the "texture unit" becomes "active", and the
* texture is bound to GL_TEXTURE_2D in that "texture unit".
* \param unit The texture unit to activate.
* \param texture The texture to bind.
*/
void BindTextureUnit(GLuint unit, GLuint texture);
/*!
* \brief Callback in Texture's destructor.
*/
void OnDeleteTexture(GLuint texture);
/*!
* \brief Callback in Program's destructor.
*/
void OnDeleteProgram(GLuint program);
/*!
* \brief Check if there is any outstanding OpenGL error. If there is, crash.
*/
void CheckOpenGLError();
/*!
* \brief Get the maximum number of texture units.
*/
GLuint NumTextureUnits();
/*!
* \brief Create and compile a shader from a source string.
* \param shader_kind The kind of shader.
* Could be GL_VERTEX_SHADER or GL_FRAGMENT_SHADER.
* \param shader_src The source string of the shader.
* \return The compiled shader ID.
*/
GLuint CreateShader(GLenum shader_kind, const char* shader_src);
/*!
* \brief Create an OpenGL program that uses the given fragment shader.
* \param fragment_shader The **compiled** fragment shader.
* \return The OpenGL program.
*/
Program CreateProgram(GLuint fragment_shader);
};
/*!
* \brief An OpenGL program, composed of a vertex shader and a fragment shader.
* In TVM, every program has the same vertex shader.
* So a program just corresponds to a fragment shader.
* A program can only be created by the workspace.
* This class is just a wrapper over an OpenGL program ID.
*/
class Program {
public:
// Move constructor.
Program(Program&& other) noexcept
: workspace_(other.workspace_), program_(other.program_) {
other.program_ = kInvalidProgram;
}
// Move assignment.
Program& operator=(Program&& other) noexcept {
workspace_ = other.workspace_;
program_ = other.program_;
other.program_ = kInvalidProgram;
return *this;
}
// Disallow copy.
Program(const Program& other) = delete;
Program& operator=(const Program& other) = delete;
// Destructor.
~Program() {
if (program_ != kInvalidProgram) {
workspace_->OnDeleteProgram(program_);
program_ = kInvalidProgram;
}
}
private:
friend class OpenGLWorkspace;
// Only OpenGLWorkspace can create a Program.
// We enforce this to make sure OpenGL is initialized.
explicit Program(OpenGLWorkspace* workspace, GLuint program)
: workspace_(workspace), program_(program) {}
// The internal OpenGL program ID.
GLuint program() const { return program_; }
static constexpr GLuint kInvalidProgram = static_cast<GLuint>(-1);
OpenGLWorkspace* workspace_;
GLuint program_;
};
/*!
* \brief The storage format of a texture.
* The members match the API of glTexImage2D.
*/
struct TextureFormat {
TextureFormat(GLint internal_format, GLenum format, GLenum type)
: internal_format(internal_format), format(format), type(type) {}
GLsizei elemsz() const {
switch (type) {
case GL_BYTE: case GL_UNSIGNED_BYTE:
return 1;
case GL_SHORT: case GL_UNSIGNED_SHORT:
return 2;
case GL_INT: case GL_UNSIGNED_INT:
return 4;
case GL_FLOAT:
return 4;
default:
LOG(FATAL) << "Unsupported type";
return -1;
}
}
bool operator==(const TextureFormat& other) const {
return std::make_tuple(internal_format, format, type) ==
std::make_tuple(other.internal_format, other.format, other.type);
}
GLint internal_format; // OpenGL says this is GLint, not GLenum.
GLenum format;
GLenum type;
};
/*!
* \brief An OpenGL texture represents a chunk of GPU memory.
* This is the way we represent tensors.
* We always use 2D textures.
*/
class Texture {
public:
// Move constructor.
Texture(Texture&& other) noexcept
: workspace_(other.workspace_), texture_(other.texture_),
format_(other.format_), width_(other.width_), height_(other.height_) {
other.texture_ = kInvalidTexture;
}
// Move assignment.
Texture& operator=(Texture&& other) noexcept {
workspace_ = other.workspace_;
texture_ = other.texture_;
format_ = other.format_;
width_ = other.width_;
height_ = other.height_;
other.texture_ = kInvalidTexture;
return *this;
}
// Disallow copy.
Texture(const Texture& other) = delete;
Texture& operator=(const Texture& other) = delete;
// Destructor.
~Texture() {
if (texture_ != kInvalidTexture) {
workspace_->OnDeleteTexture(texture_);
texture_ = kInvalidTexture;
}
}
/*!
* \brief The width of the texture in number of pixels.
*/
GLsizei width() const { return width_; }
/*!
* \brief The height of the texture in number of pixels.
*/
GLsizei height() const { return height_; }
/*!
* \brief The number of bytes of each element in the array.
*/
GLsizei elemsz() const { return format_.elemsz(); }
private:
friend class OpenGLWorkspace;
// Only OpenGLWorkspace can create a Texture.
// We enforce this to make sure OpenGL is initialized.
// Always only use the first dimension of a 2D texture.
// The reason is that texelFetch only supports 2D textures.
explicit Texture(OpenGLWorkspace* workspace, GLuint texture,
TextureFormat format,
GLsizei width, GLsizei height)
: workspace_(workspace), texture_(texture), format_(format),
width_(width), height_(height) {}
// The internal texture ID.
GLuint texture() const { return texture_; }
static constexpr GLuint kInvalidTexture = static_cast<GLuint>(-1);
OpenGLWorkspace* workspace_;
GLuint texture_;
TextureFormat format_;
GLsizei width_;
GLsizei height_;
};
} // namespace gl
} // namespace runtime
} // namespace tvm
#endif // TVM_RUNTIME_OPENGL_OPENGL_COMMON_H_
/*!
* Copyright (c) 2017 by Contributors
* \file opengl_device_api.cc
*/
#include "./opengl_common.h"
#if TVM_OPENGL_RUNTIME
#include <tvm/runtime/registry.h>
#include <cstring>
namespace tvm {
namespace runtime {
namespace gl {
/*!
* \brief Turn OpenGL error enum to string.
*/
static const char* GLGetErrorString(GLenum error) {
switch (error) {
case GL_NO_ERROR:
return "GL_NO_ERROR";
case GL_INVALID_ENUM:
return "GL_INVALID_ENUM";
case GL_INVALID_VALUE:
return "GL_INVALID_VALUE";
case GL_INVALID_OPERATION:
return "GL_INVALID_OPERATION";
case GL_STACK_OVERFLOW:
return "GL_STACK_OVERFLOW";
case GL_STACK_UNDERFLOW:
return "GL_STACK_UNDERFLOW";
case GL_OUT_OF_MEMORY:
return "GL_OUT_OF_MEMORY";
default:
return "Unknown OpenGL error code";
}
}
/*!
* \brief Get the latest error.
*/
void OpenGLWorkspace::CheckOpenGLError() {
GLenum err = gl->GetError();
CHECK_EQ(err, GL_NO_ERROR) << "OpenGL error, code=" << err << ": "
<< gl::GLGetErrorString(err);
}
/*!
* \brief Protected OpenGL call.
* \param func Expression to call.
*/
#define OPENGL_CALL(func) \
{ \
(func); \
CheckOpenGLError(); \
}
/*!
* \brief The error handling callback passed to GLFW.
*/
void GlfwErrorCallback(int err, const char* str) {
LOG(FATAL) << "Error: [" << err << "] " << str;
}
const std::shared_ptr<OpenGLWorkspace>& OpenGLWorkspace::Global() {
static std::shared_ptr<OpenGLWorkspace> inst(new OpenGLWorkspace);
return inst;
}
void OpenGLWorkspace::SetDevice(TVMContext ctx) {
CHECK_EQ(ctx.device_type, static_cast<int>(kOpenGL))
<< "Device type must be OpenGL.";
CHECK_EQ(ctx.device_id, 0) << "Only support 1 OpenGL \"device\".";
}
void OpenGLWorkspace::GetAttr(
TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) {
switch (kind) {
case kExist: {
*rv = static_cast<int>(ctx.device_id == 0);
break;
}
case kMaxThreadsPerBlock: {
GLint max_texture_size;
OPENGL_CALL(gl->GetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size));
break;
}
case kWarpSize: {
*rv = 1;
break;
}
case kComputeVersion: {
break;
}
}
}
void* OpenGLWorkspace::AllocDataSpace(
TVMContext ctx, size_t nbytes, size_t alignment, TVMType type_hint) {
return reinterpret_cast<void*>(new Texture(CreateTexture(type_hint, nbytes)));
}
void OpenGLWorkspace::FreeDataSpace(TVMContext ctx, void* ptr) {
delete reinterpret_cast<Texture*>(ptr);
}
void OpenGLWorkspace::CopyDataFromTo(const void* from,
size_t from_offset,
void* to,
size_t to_offset,
size_t size,
TVMContext ctx_from,
TVMContext ctx_to,
TVMStreamHandle stream) {
CHECK(stream == nullptr);
// TODO(zhixunt): This is a nasty hack to avoid comparison between
// incompatible enums. We should add kOpenGL to dlpack.
constexpr int gl_devtype = kOpenGL;
std::tuple<int, int> type_from_to(ctx_from.device_type, ctx_to.device_type);
if (type_from_to == std::make_tuple(gl_devtype, gl_devtype)) {
auto from_texture = static_cast<const Texture*>(from);
auto to_texture = static_cast<Texture*>(to);
auto temp_buffer = std::unique_ptr<char[]>(new char[size]);
CHECK(from_texture->format_ == to_texture->format_);
auto elemsz = from_texture->elemsz();
auto from_begin = static_cast<GLint>(from_offset / elemsz);
auto to_begin = static_cast<GLint>(to_offset / elemsz);
auto nelems = static_cast<GLsizei>(size / elemsz);
GetTextureData(from_texture, from_begin, nelems, temp_buffer.get());
PutTextureData(to_texture, to_begin, nelems, temp_buffer.get());
} else if (type_from_to == std::make_tuple(gl_devtype, kDLCPU)) {
auto texture = static_cast<const Texture*>(from);
void *data = static_cast<char *>(to) + to_offset;
auto elemsz = texture->elemsz();
auto begin = static_cast<GLint>(from_offset / elemsz);
auto nelems = static_cast<GLsizei>(size / elemsz);
GetTextureData(texture, begin, nelems, data);
} else if (type_from_to == std::make_tuple(kDLCPU, gl_devtype)) {
auto texture = reinterpret_cast<Texture*>(to);
const void* data = static_cast<const char*>(from) + from_offset;
auto elemsz = texture->elemsz();
auto begin = static_cast<GLint>(to_offset / elemsz);
auto nelems = static_cast<GLsizei>(size / elemsz);
PutTextureData(texture, begin, nelems, data);
} else {
LOG(FATAL) << "Expect copy from/to OpenGL or between OpenGL";
}
}
void OpenGLWorkspace::StreamSync(TVMContext ctx, TVMStreamHandle stream) {}
void* OpenGLWorkspace::AllocWorkspace(TVMContext ctx, size_t size) {
LOG(FATAL) << "Cannot allocate OpenGL workspace.";
return nullptr;
}
void OpenGLWorkspace::FreeWorkspace(TVMContext ctx, void* data) {
LOG(FATAL) << "Cannot free OpenGL workspace.";
}
OpenGLWorkspace::OpenGLWorkspace() {
// Set an error handler.
// This can be called before glfwInit().
glfwSetErrorCallback(&GlfwErrorCallback);
// Initialize GLFW.
if (glfwInit() != GL_TRUE) {
LOG(FATAL) << "glfwInit() failed!";
}
// Create a window.
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT, GL_TRUE);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_VISIBLE, GL_FALSE);
window_ = glfwCreateWindow(kWindowWidth, kWindowHeight, "", nullptr, nullptr);
if (window_ == nullptr) {
LOG(FATAL) << "glfwCreateWindow() failed!";
}
// Before using any OpenGL API, we must specify a context.
glfwMakeContextCurrent(window_);
// Load all OpenGL API function pointers.
gl = std::unique_ptr<GLFunctionPointers>(new GLFunctionPointers);
CheckOpenGLError();
// We always render the same vertices and triangles.
GLuint vertex_buffer;
OPENGL_CALL(gl->GenBuffers(1, &vertex_buffer));
OPENGL_CALL(gl->BindBuffer(GL_ARRAY_BUFFER, vertex_buffer));
OPENGL_CALL(gl->BufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices,
GL_STATIC_DRAW));
GLuint vertex_array;
OPENGL_CALL(gl->GenVertexArrays(1, &vertex_array));
OPENGL_CALL(gl->BindVertexArray(vertex_array));
OPENGL_CALL(gl->BindBuffer(GL_ARRAY_BUFFER, vertex_buffer));
// We always use the same vertex shader.
vertex_shader_ = CreateShader(GL_VERTEX_SHADER, vertex_shader_text_);
LOG(INFO) << "OpenGL initialized, version = " << gl->GetString(GL_VERSION);
}
OpenGLWorkspace::~OpenGLWorkspace() {
// Paired with glfwCreateWindow().
glfwDestroyWindow(window_);
// Paired with glfwInit().
glfwTerminate();
}
void OpenGLWorkspace::BindTextureUnit(GLuint unit, GLuint texture) {
OPENGL_CALL(gl->ActiveTexture(GL_TEXTURE0 + unit));
OPENGL_CALL(gl->BindTexture(GL_TEXTURE_2D, texture));
}
void OpenGLWorkspace::OnDeleteTexture(GLuint texture) {
OPENGL_CALL(gl->DeleteTextures(1, &texture));
}
void OpenGLWorkspace::OnDeleteProgram(GLuint program) {
OPENGL_CALL(gl->DeleteProgram(program));
}
GLuint OpenGLWorkspace::NumTextureUnits() {
GLint num_units;
OPENGL_CALL(gl->GetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &num_units));
return static_cast<GLuint>(num_units);
}
const OpenGLWorkspace::Vertex OpenGLWorkspace::vertices[OpenGLWorkspace::kNumVertices] = {
{-1.f, -1.f},
{1.0f, -1.f},
{1.0f, 1.0f},
{-1.f, -1.f},
{-1.f, 1.0f},
{1.0f, 1.0f},
};
// Don't need to change this.
// The vertex shader only needs to take in the triangle points.
// No need for point transformations.
const char* OpenGLWorkspace::vertex_shader_text_ = "#version 300 es\n"
"in vec2 point; // input to vertex shader\n"
"void main() {\n"
" gl_Position = vec4(point, 0.0, 1.0);\n"
"}\n";
Program OpenGLWorkspace::CreateProgram(
const char* fragment_shader_src) {
// Create and compile the shaders.
GLuint fragment_shader = CreateShader(GL_FRAGMENT_SHADER,
fragment_shader_src);
// Link the shaders and create the program.
Program program = CreateProgram(fragment_shader);
OPENGL_CALL(gl->DeleteShader(fragment_shader));
return program;
}
GLuint OpenGLWorkspace::CreateShader(GLenum shader_kind,
const char* shader_src) {
// Create the shader.
GLuint shader = gl->CreateShader(shader_kind);
gl->ShaderSource(shader, 1, &shader_src, nullptr);
gl->CompileShader(shader);
// Check compile errors.
GLint err;
gl->GetShaderiv(shader, GL_COMPILE_STATUS, &err);
GLint info_log_len;
gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &info_log_len);
if (err != GL_TRUE) {
std::unique_ptr<char[]> err_msg(new char[info_log_len + 1]);
gl->GetShaderInfoLog(shader, info_log_len, nullptr, err_msg.get());
LOG(FATAL) << err_msg.get();
assert(false);
}
CheckOpenGLError();
return shader;
}
static TextureFormat GetTextureFormat(TVMType type) {
CHECK_EQ(type.lanes, 1) << "Not supporting multi-lane types.";
switch (type.code) {
case kDLInt: {
switch (type.bits) {
case 8:
return {GL_R8I, GL_RED_INTEGER, GL_BYTE};
case 16:
return {GL_R16I, GL_RED_INTEGER, GL_SHORT};
case 32:
return {GL_R32I, GL_RED_INTEGER, GL_INT};
default:
LOG(FATAL) << "Unsupported type bits " << type.bits;
}
}
case kDLUInt: {
switch (type.bits) {
case 8:
return {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE};
case 16:
return {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT};
case 32:
return {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT};
default:
LOG(FATAL) << "Unsupported type bits " << type.bits;
}
}
case kDLFloat: {
switch (type.bits) {
case 32:
return {GL_R32F, GL_RED, GL_FLOAT};
default:
LOG(FATAL) << "Unsupported type bits " << type.bits;
}
}
default:
LOG(FATAL) << "Unsupported type code" << type.code;
}
assert(false);
}
Texture OpenGLWorkspace::CreateTexture(TVMType type, size_t nbytes) {
// Create a texture.
GLuint texture;
OPENGL_CALL(gl->GenTextures(1, &texture));
BindTextureUnit(NumTextureUnits() - 1, texture);
// Use glTexImage2D with nullptr data to specify GPU data storage.
auto texture_format = GetTextureFormat(type);
auto width = static_cast<GLsizei>(nbytes / (type.bits / 8));
auto height = GLsizei(1);
OPENGL_CALL(gl->TexImage2D(GL_TEXTURE_2D, /*level=*/0,
texture_format.internal_format,
width, height, /*border=*/0,
texture_format.format, texture_format.type,
/*data=*/nullptr));
OPENGL_CALL(
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE));
OPENGL_CALL(
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE));
OPENGL_CALL(
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST));
OPENGL_CALL(
gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST));
return Texture(this, texture, texture_format, width, height);
}
Program OpenGLWorkspace::CreateProgram(GLuint fragment_shader) {
// Create the program and link the shaders.
GLuint program = gl->CreateProgram();
gl->AttachShader(program, vertex_shader_);
gl->AttachShader(program, fragment_shader);
gl->LinkProgram(program);
// Check link errors.
GLint err;
gl->GetProgramiv(program, GL_LINK_STATUS, &err);
GLint info_log_len;
gl->GetProgramiv(program, GL_INFO_LOG_LENGTH, &info_log_len);
if (err != GL_TRUE) {
std::unique_ptr<char[]> err_msg(new char[info_log_len + 1]);
gl->GetProgramInfoLog(program, info_log_len, nullptr, err_msg.get());
LOG(FATAL) << err_msg.get();
assert(false);
}
CheckOpenGLError();
OPENGL_CALL(gl->DetachShader(program, vertex_shader_));
OPENGL_CALL(gl->DetachShader(program, fragment_shader));
auto point_attrib = GLuint(gl->GetAttribLocation(program, "point"));
OPENGL_CALL(gl->EnableVertexAttribArray(point_attrib));
OPENGL_CALL(gl->VertexAttribPointer(point_attrib, 2, GL_FLOAT, GL_FALSE,
sizeof(Vertex), nullptr));
return Program(this, program);
}
void OpenGLWorkspace::PutTextureData(Texture *texture,
GLint begin,
GLsizei nelems,
const GLvoid* data) {
// Bind to temporary unit.
BindTextureUnit(NumTextureUnits() - 1, texture->texture());
// Similar to cudaMemcpy.
OPENGL_CALL(gl->TexSubImage2D(GL_TEXTURE_2D, /*level=*/0,
/*xoffset=*/begin, /*yoffset=*/0,
/*width=*/nelems, /*height=*/1,
texture->format_.format, texture->format_.type,
data));
}
void OpenGLWorkspace::GetTextureData(const Texture *texture,
GLint begin,
GLsizei nelems,
GLvoid* data) {
BindTextureUnit(NumTextureUnits() - 1, texture->texture());
// Create frame buffer.
GLuint frame_buffer;
OPENGL_CALL(gl->GenFramebuffers(1, &frame_buffer));
OPENGL_CALL(gl->BindFramebuffer(GL_FRAMEBUFFER, frame_buffer));
// Bind texture to framebuffer's attachment 0.
OPENGL_CALL(gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
GL_TEXTURE_2D, texture->texture(), 0));
// Always check that our framebuffer is okay.
if (gl->CheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG(FATAL) << "Framebuffer not complete.";
}
#ifdef __EMSCRIPTEN__
// WebGL2's glReadPixels API doesn't allow GL_RED user buffer format.
// Instead, We must use GL_RGBA. This means the data we retrieve has useless
// GBA channels. Here we are applying a dirty hack.
// TODO(zhixunt): We really want to utilize all RGBA channels in textures.
//
// WebGL2's glReadPixels API also doesn't allow GL_RED_INTEGER or
// GL_RGB_INTEGER user buffer format, which means we cannot retrieve integer
// texture data? (need to confirm)
CHECK_EQ(texture->format_.internal_format, GL_R32F)
<< "Retrieving integer texture not supported yet.";
auto elemsz = texture->format_.elemsz();
auto nchannels = 4;
auto padded_data_size = nchannels * nelems * elemsz;
auto padded_data = std::unique_ptr<char[]>(new char[padded_data_size]);
OPENGL_CALL(gl->ReadPixels(/*x=*/begin, /*y=*/0, /*width=*/nelems,
/*height=*/1, GL_RGBA, GL_FLOAT,
padded_data.get()));
for (GLsizei i = 0; i != nelems; ++i) {
auto dst = reinterpret_cast<char *>(data) + i * elemsz;
auto src = padded_data.get() + nchannels * i * elemsz;
std::memcpy(dst, src, elemsz);
}
#else
OPENGL_CALL(gl->ReadPixels(/*x=*/begin, /*y=*/0, /*width=*/nelems,
/*height=*/1, texture->format_.format,
texture->format_.type, data));
#endif
OPENGL_CALL(gl->DeleteFramebuffers(1, &frame_buffer));
}
void OpenGLWorkspace::SetCurrentProgram(const Program& program) {
OPENGL_CALL(gl->UseProgram(program.program()));
}
void OpenGLWorkspace::SetUniform(const Program& program,
const std::string& name,
TVMType type,
void* value) {
GLint location = gl->GetUniformLocation(program.program(), name.c_str());
switch (type.code) {
case kDLInt: {
CHECK_EQ(type.bits, 32) << "Only support 32-bit int for uniform.";
GLint uniform_value = *reinterpret_cast<GLint*>(value);
OPENGL_CALL(gl->Uniform1i(location, uniform_value));
break;
}
case kDLUInt: {
LOG(FATAL) << "Strangely, emcc WebGL does not support glUniform1ui.";
break;
}
case kDLFloat: {
CHECK_EQ(type.bits, 32) << "Only support 32-bit float for uniform.";
GLfloat uniform_value = *reinterpret_cast<GLfloat*>(value);
OPENGL_CALL(gl->Uniform1f(location, uniform_value));
break;
}
default: {
LOG(FATAL) << "Unsupported type code for uniform.";
break;
}
}
}
void OpenGLWorkspace::SetInputTexture(const Program& program,
const std::string& name,
GLuint unit,
Texture* texture) {
// We always use the last texture unit as temporary.
// Therefore, we can have "NumTextureUnits() - 1" input textures.
CHECK_LT(unit, NumTextureUnits() - 1) << "Too many textures.";
BindTextureUnit(unit, texture->texture());
GLint location = gl->GetUniformLocation(program.program_, name.c_str());
OPENGL_CALL(gl->Uniform1i(location, unit));
}
void OpenGLWorkspace::Render(Texture* output) {
// Create frame buffer.
GLuint frame_buffer;
OPENGL_CALL(gl->GenFramebuffers(1, &frame_buffer));
OPENGL_CALL(gl->BindFramebuffer(GL_FRAMEBUFFER, frame_buffer));
// Set "renderedTexture" as our colour attachement 0.
OPENGL_CALL(gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
GL_TEXTURE_2D, output->texture(), 0));
// Specify that we will render to color attachment 0.
GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
OPENGL_CALL(gl->DrawBuffers(1, DrawBuffers));
// Always check that our framebuffer is okay.
if (gl->CheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
LOG(FATAL) << "Framebuffer not complete.";
}
// Perform rendering.
OPENGL_CALL(gl->Viewport(0, 0, output->width(), output->height()));
OPENGL_CALL(gl->Clear(GL_COLOR_BUFFER_BIT));
OPENGL_CALL(gl->DrawArrays(GL_TRIANGLES, 0, 6));
OPENGL_CALL(gl->DeleteFramebuffers(1, &frame_buffer));
}
TVM_REGISTER_GLOBAL("device_api.opengl")
.set_body([](TVMArgs args, TVMRetValue* rv) {
DeviceAPI* ptr = OpenGLWorkspace::Global().get();
*rv = static_cast<void*>(ptr);
});
} // namespace gl
} // namespace runtime
} // namespace tvm
#endif // TVM_OPENGL_RUNTIME
/*!
* Copyright (c) 2017 by Contributors
* \file opengl_module.cc
*/
#include <utility>
#include "./opengl_common.h"
#include "./opengl_module.h"
#if TVM_OPENGL_RUNTIME
#include <tvm/runtime/registry.h>
#include "../pack_args.h"
#include "../thread_storage_scope.h"
#include "../file_util.h"
namespace tvm {
namespace runtime {
class OpenGLModuleNode final : public ModuleNode {
public:
OpenGLModuleNode(std::unordered_map<std::string, OpenGLShader> shaders,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap);
~OpenGLModuleNode() override = default;
const char* type_key() const final { return "opengl"; }
PackedFunc GetFunction(const std::string& name,
const std::shared_ptr<ModuleNode>& sptr_to_self) final;
std::string GetSource(const std::string& format) final;
void SaveToFile(const std::string& file_name,
const std::string& format) final;
void SaveToBinary(dmlc::Stream* stream) final;
const gl::Program& GetProgram(const std::string& func_name) const;
const OpenGLShader& GetShader(const std::string& func_name) const;
const FunctionInfo& GetFunctionInfo(const std::string& func_name) const;
gl::OpenGLWorkspace& workspace() const { return *workspace_; }
private:
std::shared_ptr<gl::OpenGLWorkspace> workspace_;
std::unordered_map<std::string, OpenGLShader> shaders_;
std::string fmt_;
std::unordered_map<std::string, FunctionInfo> fmap_;
std::unordered_map<std::string, gl::Program> programs_;
DISALLOW_COPY_AND_ASSIGN(OpenGLModuleNode);
};
class OpenGLWrappedFunc {
public:
OpenGLWrappedFunc(OpenGLModuleNode* m,
std::shared_ptr<ModuleNode> sptr,
std::string func_name,
std::vector<size_t> arg_size,
const std::vector<std::string>& thread_axis_tags);
void operator()(TVMArgs args, TVMRetValue* rv, void** void_args) const;
private:
// The module
OpenGLModuleNode* m_;
// resource handle
std::shared_ptr<ModuleNode> sptr_;
// The name of the function.
std::string func_name_;
// convert code for void argument
std::vector<size_t> arg_size_;
// thread axis config
ThreadAxisConfig thread_axis_cfg_;
};
OpenGLModuleNode::OpenGLModuleNode(
std::unordered_map<std::string, OpenGLShader> shaders,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap)
: workspace_(gl::OpenGLWorkspace::Global()), shaders_(std::move(shaders)),
fmt_(std::move(fmt)), fmap_(std::move(fmap)), programs_() {
CHECK_EQ(fmt_, "gl") << "Unknown OpenGL format " << fmt_;
for (auto &pair : shaders_) {
auto &func_name = pair.first;
auto &shader = pair.second;
programs_.emplace(func_name,
workspace_->CreateProgram(shader.source.c_str()));
}
}
PackedFunc OpenGLModuleNode::GetFunction(
const std::string& name,
const std::shared_ptr<ModuleNode>& sptr_to_self) {
CHECK_EQ(sptr_to_self.get(), this);
CHECK_NE(name, symbol::tvm_module_main) << "Device function do not have main";
auto func_info_it = fmap_.find(name);
if (func_info_it == fmap_.end()) { return PackedFunc(); }
auto &func_info = func_info_it->second;
std::vector<size_t> arg_size(func_info.arg_types.size());
for (size_t i = 0; i < func_info.arg_types.size(); ++i) {
TVMType t = func_info.arg_types[i];
CHECK_EQ(t.lanes, 1U);
uint32_t bits = t.bits;
CHECK_EQ(bits % 8, 0U);
arg_size[i] = bits / 8;
}
// Initialize the wrapped func.
OpenGLWrappedFunc f(this, sptr_to_self, name, arg_size,
func_info.thread_axis_tags);
return PackFuncVoidAddr(f, func_info.arg_types);
}
std::string OpenGLModuleNode::GetSource(const std::string& format) {
if (format != fmt_ && fmt_ != "gl") { return ""; }
std::ostringstream os;
for (auto &pair : shaders_) {
auto &name = pair.first;
auto &shader = pair.second;
os << "[" << name << "]" << "\n";
os << shader.source <<"\n";
}
return os.str();
}
void OpenGLModuleNode::SaveToFile(const std::string& file_name,
const std::string& format) {
std::string fmt = GetFileFormat(file_name, format);
CHECK_EQ(fmt, fmt_) << "Can only save to format=" << fmt_;
std::string meta_file = GetMetaFilePath(file_name);
SaveMetaDataToFile(meta_file, fmap_);
SaveBinaryToFile(file_name, ToJSON(shaders_));
}
void OpenGLModuleNode::SaveToBinary(dmlc::Stream* stream) {
stream->Write(fmt_);
stream->Write(fmap_);
stream->Write(ToJSON(shaders_));
}
const gl::Program& OpenGLModuleNode::GetProgram(
const std::string& func_name) const {
auto it = programs_.find(func_name);
if (it == programs_.end()) {
LOG(FATAL) << "Cannot find program";
}
return it->second;
}
const OpenGLShader& OpenGLModuleNode::GetShader(
const std::string& func_name) const {
auto it = shaders_.find(func_name);
if (it == shaders_.end()) {
LOG(FATAL) << "Cannot find shader";
}
return it->second;
}
const FunctionInfo& OpenGLModuleNode::GetFunctionInfo(
const std::string& func_name) const {
auto it = fmap_.find(func_name);
if (it == fmap_.end()) {
LOG(FATAL) << "Cannot find shader";
}
return it->second;
}
OpenGLWrappedFunc::OpenGLWrappedFunc(
OpenGLModuleNode* m,
std::shared_ptr<ModuleNode> sptr,
std::string func_name,
std::vector<size_t> arg_size,
const std::vector<std::string>& thread_axis_tags)
: m_(m), sptr_(std::move(sptr)), func_name_(std::move(func_name)),
arg_size_(std::move(arg_size)) {
thread_axis_cfg_.Init(arg_size_.size(), thread_axis_tags);
}
void OpenGLWrappedFunc::operator()(TVMArgs args, TVMRetValue* rv,
void** void_args) const {
auto &shader = m_->GetShader(func_name_);
auto &program = m_->GetProgram(func_name_);
auto &func_info = m_->GetFunctionInfo(func_name_);
size_t nargs = shader.arg_kinds.size();
// Must call this function before setting uniforms & input textures.
m_->workspace().SetCurrentProgram(program);
// Set all arguments.
GLuint texture_unit = 0;
gl::Texture* output = nullptr;
for (size_t i = 0; i != nargs; ++i) {
auto &name = shader.arg_names.at(i);
auto kind = shader.arg_kinds.at(i);
auto type = func_info.arg_types.at(i);
switch (kind) {
case OpenGLArgKind::kUniform: {
m_->workspace().SetUniform(program, name, type, void_args[i]);
break;
}
case OpenGLArgKind::kInputTexture: {
CHECK_EQ(type.code, kHandle) << "Type is not handle?";
auto texture = *static_cast<gl::Texture**>(void_args[i]);
m_->workspace().SetInputTexture(program, name, texture_unit, texture);
++texture_unit;
break;
}
case OpenGLArgKind::kOutputTexture: {
CHECK_EQ(type.code, kHandle) << "Type is not handle?";
CHECK(output == nullptr) << "Can only have one output texture.";
output = *static_cast<gl::Texture**>(void_args[i]);
break;
}
}
}
// Set "thread_extent" uniform.
ThreadWorkLoad wl = thread_axis_cfg_.Extract(args);
std::unique_ptr<GLint> thread_extent(new GLint(wl.block_dim(0)));
m_->workspace().SetUniform(program, shader.thread_extent_var,
TVMType{kDLInt, 32, 1},
static_cast<void*>(thread_extent.get()));
m_->workspace().Render(output);
}
Module OpenGLModuleCreate(std::unordered_map<std::string, OpenGLShader> shaders,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap) {
auto n = std::make_shared<OpenGLModuleNode>(std::move(shaders),
std::move(fmt),
std::move(fmap));
return Module(n);
}
Module OpenGLModuleLoadFile(const std::string& file_name,
const std::string& format) {
std::string data;
std::unordered_map<std::string, FunctionInfo> fmap;
std::string fmt = GetFileFormat(file_name, format);
std::string meta_file = GetMetaFilePath(file_name);
LoadBinaryFromFile(file_name, &data);
LoadMetaDataFromFile(meta_file, &fmap);
return OpenGLModuleCreate(FromJSON(data), fmt, fmap);
}
Module OpenGLModuleLoadBinary(void* strm) {
auto stream = static_cast<dmlc::Stream*>(strm);
std::string data;
std::unordered_map<std::string, FunctionInfo> fmap;
std::string fmt;
stream->Read(&fmt);
stream->Read(&fmap);
stream->Read(&data);
return OpenGLModuleCreate(FromJSON(data), fmt, fmap);
}
TVM_REGISTER_GLOBAL("module.loadfile_gl")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = OpenGLModuleLoadFile(args[0], args[1]);
});
TVM_REGISTER_GLOBAL("module.loadfile_glbin")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = OpenGLModuleLoadFile(args[0], args[1]);
});
TVM_REGISTER_GLOBAL("module.loadbinary_opengl")
.set_body([](TVMArgs args, TVMRetValue* rv) {
*rv = OpenGLModuleLoadBinary(args[0]);
});
} // namespace runtime
} // namespace tvm
#endif // TVM_OPENGL_RUNTIME
/*!
* Copyright (c) 2017 by Contributors
* \file opengl_module.h
* \brief Execution handling of OpenGL kernels
*/
#ifndef TVM_RUNTIME_OPENGL_OPENGL_MODULE_H_
#define TVM_RUNTIME_OPENGL_OPENGL_MODULE_H_
#include <tvm/runtime/config.h>
#include <tvm/runtime/packed_func.h>
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include "../meta_data.h"
namespace tvm {
namespace runtime {
/*!
* \brief Determines how we supply arguments.
*/
enum class OpenGLArgKind {
kInputTexture = 0, // Bind to "gsampler2D" in GLSL.
kOutputTexture = 1, // Bind to "out" in GLSL.
kUniform = 2, // Bind to "uniform" in GLSL.
};
std::string OpenGLArgKind2String(OpenGLArgKind kind);
OpenGLArgKind String2OpenGLArgKind(const std::string& str);
/*!
* \brief The output of OpenGL codegen.
* Contains necessary information to build a fragment shader and bind arguments.
*/
struct OpenGLShader {
OpenGLShader() = default;
OpenGLShader(std::string source,
std::vector<std::string> arg_names,
std::vector<OpenGLArgKind> arg_kinds,
std::string thread_extent_var)
: source(std::move(source)), arg_names(std::move(arg_names)),
arg_kinds(std::move(arg_kinds)),
thread_extent_var(std::move(thread_extent_var)) {
CHECK_EQ(this->arg_names.size(), this->arg_kinds.size()) << "Invalid input";
}
std::string source;
std::vector<std::string> arg_names; // Matches FunctionInfo.
std::vector<OpenGLArgKind> arg_kinds; // Matches FunctionInfo.
std::string thread_extent_var; // Stores the output length.
void Save(dmlc::JSONWriter* writer) const;
void Load(dmlc::JSONReader* reader);
};
std::string ToJSON(const std::unordered_map<std::string, OpenGLShader>& shaders);
std::unordered_map<std::string, OpenGLShader> FromJSON(const std::string& str);
/*!
* \brief Create an OpenGL module from data.
*
* \param data The module data.
* \param fmt The format of the data,
* \param fmap The map function information map of each function.
*/
Module OpenGLModuleCreate(std::unordered_map<std::string, OpenGLShader> shaders,
std::string fmt,
std::unordered_map<std::string, FunctionInfo> fmap);
inline std::string OpenGLArgKind2String(OpenGLArgKind kind) {
switch (kind) {
case OpenGLArgKind::kOutputTexture:
return "output_texture";
case OpenGLArgKind::kInputTexture:
return "input_texture";
case OpenGLArgKind::kUniform:
return "uniform";
}
assert(false);
}
inline OpenGLArgKind String2OpenGLArgKind(const std::string& str) {
if (str == "output_texture") {
return OpenGLArgKind::kOutputTexture;
} else if (str == "input_texture") {
return OpenGLArgKind::kInputTexture;
} else if (str == "uniform") {
return OpenGLArgKind::kUniform;
} else {
LOG(FATAL) << "Invalid OpenGL arg kind.";
assert(false);
}
}
inline void OpenGLShader::Save(dmlc::JSONWriter* writer) const {
std::vector<std::string> arg_kind_strs;
for (auto kind : arg_kinds) {
arg_kind_strs.push_back(OpenGLArgKind2String(kind));
}
writer->BeginObject();
writer->WriteObjectKeyValue("arg_names", arg_names);
writer->WriteObjectKeyValue("arg_kinds", arg_kind_strs);
writer->WriteObjectKeyValue("source", source);
writer->WriteObjectKeyValue("thread_extent_var", thread_extent_var);
writer->EndObject();
}
inline void OpenGLShader::Load(dmlc::JSONReader* reader) {
std::vector<std::string> arg_kind_strs;
dmlc::JSONObjectReadHelper helper;
helper.DeclareField("arg_names", &arg_names);
helper.DeclareField("arg_kinds", &arg_kind_strs);
helper.DeclareField("source", &source);
helper.DeclareField("thread_extent_var", &thread_extent_var);
helper.ReadAllFields(reader);
arg_kinds.clear();
for (auto& str : arg_kind_strs) {
arg_kinds.push_back(String2OpenGLArgKind(str));
}
}
inline std::string ToJSON(
const std::unordered_map<std::string, OpenGLShader>& shaders) {
std::ostringstream os;
dmlc::JSONWriter writer(&os);
writer.BeginObject();
writer.WriteObjectKeyValue("shaders", shaders);
writer.EndObject();
return os.str();
}
inline std::unordered_map<std::string, OpenGLShader> FromJSON(
const std::string& str) {
std::unordered_map<std::string, OpenGLShader> shaders;
std::istringstream is(str);
dmlc::JSONReader reader(&is);
dmlc::JSONObjectReadHelper helper;
helper.DeclareField("shaders", &shaders);
helper.ReadAllFields(&reader);
return shaders;
}
} // namespace runtime
} // namespace tvm
#endif // TVM_RUNTIME_OPENGL_OPENGL_MODULE_H_
...@@ -52,12 +52,15 @@ class ROCMDeviceAPI final : public DeviceAPI { ...@@ -52,12 +52,15 @@ class ROCMDeviceAPI final : public DeviceAPI {
} }
*rv = value; *rv = value;
} }
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final { void* AllocDataSpace(TVMContext ctx,
size_t nbytes,
size_t alignment,
TVMType type_hint) final {
ROCM_CALL(hipSetDevice(ctx.device_id)); ROCM_CALL(hipSetDevice(ctx.device_id));
CHECK_EQ(256 % alignment, 0U) CHECK_EQ(256 % alignment, 0U)
<< "ROCM space is aligned at 256 bytes"; << "ROCM space is aligned at 256 bytes";
void *ret; void *ret;
ROCM_CALL(hipMalloc(&ret, size)); ROCM_CALL(hipMalloc(&ret, nbytes));
return ret; return ret;
} }
......
...@@ -20,10 +20,13 @@ class RPCDeviceAPI final : public DeviceAPI { ...@@ -20,10 +20,13 @@ class RPCDeviceAPI final : public DeviceAPI {
*rv = GetSess(ctx)->CallRemote( *rv = GetSess(ctx)->CallRemote(
RPCCode::kDevGetAttr, ctx, static_cast<int>(kind)); RPCCode::kDevGetAttr, ctx, static_cast<int>(kind));
} }
void* AllocDataSpace(TVMContext ctx, size_t size, size_t alignment) final { void* AllocDataSpace(TVMContext ctx,
size_t nbytes,
size_t alignment,
TVMType type_hint) final {
auto sess = GetSess(ctx); auto sess = GetSess(ctx);
void *data = sess->CallRemote( void *data = sess->CallRemote(
RPCCode::kDevAllocData, ctx, size, alignment); RPCCode::kDevAllocData, ctx, nbytes, alignment, type_hint);
RemoteSpace* space = new RemoteSpace(); RemoteSpace* space = new RemoteSpace();
space->data = data; space->data = data;
space->sess = std::move(sess); space->sess = std::move(sess);
......
...@@ -887,9 +887,11 @@ void RPCDevGetAttr(TVMArgs args, TVMRetValue *rv) { ...@@ -887,9 +887,11 @@ void RPCDevGetAttr(TVMArgs args, TVMRetValue *rv) {
void RPCDevAllocData(TVMArgs args, TVMRetValue *rv) { void RPCDevAllocData(TVMArgs args, TVMRetValue *rv) {
TVMContext ctx = args[0]; TVMContext ctx = args[0];
uint64_t size = args[1]; uint64_t nbytes = args[1];
uint64_t alignment = args[2]; uint64_t alignment = args[2];
void* data = DeviceAPI::Get(ctx)->AllocDataSpace(ctx, size, alignment); TVMType type_hint = args[3];
void* data = DeviceAPI::Get(ctx)->AllocDataSpace(
ctx, nbytes, alignment, type_hint);
*rv = data; *rv = data;
} }
......
...@@ -23,28 +23,29 @@ class WorkspacePool::Pool { ...@@ -23,28 +23,29 @@ class WorkspacePool::Pool {
allocated_.push_back(e); allocated_.push_back(e);
} }
// allocate from pool // allocate from pool
void* Alloc(TVMContext ctx, DeviceAPI* device, size_t size) { void* Alloc(TVMContext ctx, DeviceAPI* device, size_t nbytes) {
// Allocate align to page. // Allocate align to page.
size = (size + (kWorkspacePageSize - 1)) / kWorkspacePageSize * kWorkspacePageSize; nbytes = (nbytes + (kWorkspacePageSize - 1)) / kWorkspacePageSize * kWorkspacePageSize;
if (size == 0) size = kWorkspacePageSize; if (nbytes == 0) nbytes = kWorkspacePageSize;
Entry e; Entry e;
TVMType type = {.code = kDLUInt, .bits = 8, .lanes = 1};
if (free_list_.size() == 2) { if (free_list_.size() == 2) {
e = free_list_.back(); e = free_list_.back();
free_list_.pop_back(); free_list_.pop_back();
if (e.size < size) { if (e.size < nbytes) {
// resize the page // resize the page
device->FreeDataSpace(ctx, e.data); device->FreeDataSpace(ctx, e.data);
e.data = device->AllocDataSpace(ctx, size, kTempAllocaAlignment); e.data = device->AllocDataSpace(ctx, nbytes, kTempAllocaAlignment, type);
e.size = size; e.size = nbytes;
} }
} else if (free_list_.size() == 1) { } else if (free_list_.size() == 1) {
e.data = device->AllocDataSpace(ctx, size, kTempAllocaAlignment); e.data = device->AllocDataSpace(ctx, nbytes, kTempAllocaAlignment, type);
e.size = size; e.size = nbytes;
} else { } else {
if (free_list_.back().size >= size) { if (free_list_.back().size >= nbytes) {
// find smallest fit // find smallest fit
auto it = free_list_.end() - 2; auto it = free_list_.end() - 2;
for (; it->size >= size; --it) {} for (; it->size >= nbytes; --it) {}
e = *(it + 1); e = *(it + 1);
free_list_.erase(it + 1); free_list_.erase(it + 1);
} else { } else {
...@@ -52,8 +53,8 @@ class WorkspacePool::Pool { ...@@ -52,8 +53,8 @@ class WorkspacePool::Pool {
e = free_list_.back(); e = free_list_.back();
free_list_.pop_back(); free_list_.pop_back();
device->FreeDataSpace(ctx, e.data); device->FreeDataSpace(ctx, e.data);
e.data = device->AllocDataSpace(ctx, size, kTempAllocaAlignment); e.data = device->AllocDataSpace(ctx, nbytes, kTempAllocaAlignment, type);
e.size = size; e.size = nbytes;
} }
} }
allocated_.push_back(e); allocated_.push_back(e);
......
...@@ -397,6 +397,45 @@ Stage& Stage::double_buffer() { ...@@ -397,6 +397,45 @@ Stage& Stage::double_buffer() {
return *this; return *this;
} }
Stage& Stage::opengl() {
CHECK(!is_scheduled()) << "Must be a fresh schedule";
StageNode *self = operator->();
auto all_iter_vars = self->all_iter_vars; // curr version of all_iter_vars
CHECK(!all_iter_vars.empty()) << "At least one iter var";
// Fuse all data parallel dimensions to 1.
IterVar fused = all_iter_vars[0];
for (size_t i = 1; i != all_iter_vars.size(); ++i) {
auto iter_var = all_iter_vars[i];
switch (iter_var->iter_type) {
case IterVarType::kDataPar: {
fuse(fused, all_iter_vars[i], &fused);
break;
}
case IterVarType::kThreadIndex: {
LOG(ERROR) << "A fresh schedule shouldn't have thread index iter var";
break;
}
case IterVarType::kCommReduce:
case IterVarType::kOrdered:
case IterVarType::kOpaque: {
break;
}
default: {
LOG(ERROR) << "Invalid iter var type "
<< IterVarType2String(iter_var->iter_type);
break;
}
}
}
// Bind the only dimension to threadIdx.x.
bind(fused, thread_axis(Range(nullptr), "threadIdx.x"));
return *this;
}
Stage CopyStage(const Stage& s) { Stage CopyStage(const Stage& s) {
std::shared_ptr<StageNode> n = std::shared_ptr<StageNode> n =
std::make_shared<StageNode>(*s.operator->()); std::make_shared<StageNode>(*s.operator->());
......
...@@ -20,3 +20,6 @@ RUN bash /install/ubuntu_install_java.sh ...@@ -20,3 +20,6 @@ RUN bash /install/ubuntu_install_java.sh
COPY install/ubuntu_install_llvm.sh /install/ubuntu_install_llvm.sh COPY install/ubuntu_install_llvm.sh /install/ubuntu_install_llvm.sh
RUN bash /install/ubuntu_install_llvm.sh RUN bash /install/ubuntu_install_llvm.sh
COPY install/ubuntu_install_opengl.sh /install/ubuntu_install_opengl.sh
RUN bash /install/ubuntu_install_opengl.sh
...@@ -37,6 +37,9 @@ RUN bash /install/ubuntu_install_nodejs.sh ...@@ -37,6 +37,9 @@ RUN bash /install/ubuntu_install_nodejs.sh
COPY install/ubuntu_install_rocm.sh /install/ubuntu_install_rocm.sh COPY install/ubuntu_install_rocm.sh /install/ubuntu_install_rocm.sh
RUN bash /install/ubuntu_install_rocm.sh RUN bash /install/ubuntu_install_rocm.sh
COPY install/ubuntu_install_opengl.sh /install/ubuntu_install_opengl.sh
RUN bash /install/ubuntu_install_opengl.sh
# Enable doxygen for c++ doc build # Enable doxygen for c++ doc build
RUN apt-get install -y doxygen graphviz RUN apt-get install -y doxygen graphviz
......
apt-get update --fix-missing
apt-get install -y --no-install-recommends --force-yes \
libgl1-mesa-dev libglfw3-dev
\ No newline at end of file
...@@ -7,7 +7,8 @@ def enabled_ctx_list(): ...@@ -7,7 +7,8 @@ def enabled_ctx_list():
('cl', tvm.opencl(0)), ('cl', tvm.opencl(0)),
('metal', tvm.metal(0)), ('metal', tvm.metal(0)),
('rocm', tvm.rocm(0)), ('rocm', tvm.rocm(0)),
('vpi', tvm.vpi(0))] ('vpi', tvm.vpi(0)),
('opengl', tvm.opengl(0))]
for k, v in ctx_list: for k, v in ctx_list:
assert tvm.context(k, 0) == v assert tvm.context(k, 0) == v
ctx_list = [x[1] for x in ctx_list if x[1].exist] ctx_list = [x[1] for x in ctx_list if x[1].exist]
...@@ -19,7 +20,8 @@ print("Testing using contexts:", ENABLED_CTX_LIST) ...@@ -19,7 +20,8 @@ print("Testing using contexts:", ENABLED_CTX_LIST)
def test_nd_create(): def test_nd_create():
for ctx in ENABLED_CTX_LIST: for ctx in ENABLED_CTX_LIST:
for dtype in ["float32", "int8", "uint16"]: for dtype in ["uint8", "int8", "uint16", "int16", "uint32", "int32",
"float32"]:
x = np.random.randint(0, 10, size=(3, 4)) x = np.random.randint(0, 10, size=(3, 4))
x = np.array(x, dtype=dtype) x = np.array(x, dtype=dtype)
y = tvm.nd.array(x, ctx=ctx) y = tvm.nd.array(x, ctx=ctx)
......
...@@ -17,3 +17,5 @@ TVM_FFI=cython python -m nose -v tests/python/integration || exit -1 ...@@ -17,3 +17,5 @@ TVM_FFI=cython python -m nose -v tests/python/integration || exit -1
TVM_FFI=ctypes python3 -m nose -v tests/python/integration || exit -1 TVM_FFI=ctypes python3 -m nose -v tests/python/integration || exit -1
TVM_FFI=cython python -m nose -v tests/python/contrib || exit -1 TVM_FFI=cython python -m nose -v tests/python/contrib || exit -1
TVM_FFI=ctypes python3 -m nose -v tests/python/contrib || exit -1 TVM_FFI=ctypes python3 -m nose -v tests/python/contrib || exit -1
TVM_FFI=cython python -m nose -v tests/webgl || exit -1
TVM_FFI=ctypes python3 -m nose -v tests/webgl || exit -1
## Test cases for the WebGL backend
Any test case with name `test_local_...` tests the C++ OpenGL backend on the
local OS, which can be executed automatically.
Any test case with name `test_remote_...` tests the WebGL backend within the
browser, which must be run manually. See instruction within the test.
import tvm
import numpy as np
def test_local_gemm():
if not tvm.module.enabled("opengl"):
return
if not tvm.module.enabled("llvm"):
return
nn = 2
n = tvm.var('n')
n = tvm.convert(nn)
m = n
l = n
A = tvm.placeholder((n, l), name='A', dtype='int32')
B = tvm.placeholder((m, l), name='B', dtype='int32')
k = tvm.reduce_axis((0, l), name='k')
C = tvm.compute((n, m), lambda ii, jj: tvm.sum(A[ii, k] * B[jj, k], axis=k),
name='CC')
s = tvm.create_schedule(C.op)
s[C].opengl()
print(tvm.lower(s, [A, B, C], simple_mode=True))
f = tvm.build(s, [A, B, C], "opengl", name="gemm")
print("------opengl code------")
print(f.imported_modules[0].get_source(fmt="gl"))
ctx = tvm.opengl()
n, m, l = nn, nn, nn
a_np = np.random.uniform(low=0, high=10, size=(n, l)).astype(A.dtype)
b_np = np.random.uniform(low=0, high=10, size=(m, l)).astype(B.dtype)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(b_np, ctx)
c = tvm.nd.array(np.zeros((n, m), dtype=C.dtype), ctx)
f(a, b, c)
np.testing.assert_allclose(c.asnumpy(), np.dot(a_np, b_np.T))
if __name__ == "__main__":
test_local_gemm()
import numpy as np
import tvm
from tvm.contrib import rpc, util, emscripten
def test_local_save_load():
if not tvm.module.enabled("opengl"):
return
if not tvm.module.enabled("llvm"):
return
n = tvm.var("n")
A = tvm.placeholder((n,), name='A', dtype='int32')
B = tvm.placeholder((n,), name='B', dtype='int32')
C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
s = tvm.create_schedule(C.op)
s[C].opengl()
f = tvm.build(s, [A, B, C], "opengl", target_host="llvm", name="myadd")
ctx = tvm.opengl(0)
n = 10
a = tvm.nd.array(np.random.uniform(high=10, size=(n)).astype(A.dtype), ctx)
b = tvm.nd.array(np.random.uniform(high=10, size=(n)).astype(B.dtype), ctx)
c = tvm.nd.array(np.zeros((n), dtype=C.dtype), ctx)
f(a, b, c)
temp = util.tempdir()
path_so = temp.relpath("myadd.so")
f.export_library(path_so)
f1 = tvm.module.load(path_so)
f1(a, b, c)
np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
if __name__ == "__main__":
test_local_save_load()
"""
The following instruction is based on web/README.md.
Setup an RPC server:
$ python -m tvm.exec.rpc_proxy --example-rpc=1
Go to http://localhost:9190 in browser.
Click "Connect To Proxy".
Run this test script:
$ python tests/webgl/test_remote_save_load.py
"""
import numpy as np
import tvm
from tvm.contrib import rpc, util, emscripten
proxy_host = "localhost"
proxy_port = 9090
def try_remote_save_load():
if not tvm.module.enabled("rpc"):
return
if not tvm.module.enabled("opengl"):
return
if not tvm.module.enabled("llvm"):
return
# Build the module.
n = tvm.var("n")
A = tvm.placeholder((n,), name='A')
B = tvm.placeholder((n,), name='B')
C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C")
s = tvm.create_schedule(C.op)
s[C].opengl()
target_host = "llvm -target=asmjs-unknown-emscripten -system-lib"
f = tvm.build(s, [A, B, C], "opengl", target_host=target_host, name="myadd")
remote = rpc.connect(proxy_host, proxy_port, key="js")
temp = util.tempdir()
ctx = remote.opengl(0)
path_obj = temp.relpath("myadd.bc")
path_dso = temp.relpath("myadd.js")
path_gl = temp.relpath("myadd.gl")
path_json = temp.relpath("myadd.tvm_meta.json")
f.save(path_obj)
emscripten.create_js(path_dso, path_obj, side_module=True)
f.imported_modules[0].save(path_gl)
remote.upload(path_dso, "myadd.dso")
remote.upload(path_gl)
remote.upload(path_json)
remote.download("myadd.dso")
remote.download("myadd.gl")
remote.download("myadd.tvm_meta.json")
print('Loading myadd.dso')
fhost = remote.load_module("myadd.dso")
print('Loading myadd.gl')
fdev = remote.load_module("myadd.gl")
print('import_module')
fhost.import_module(fdev)
print('running...')
a = tvm.nd.array(np.random.uniform(size=16).astype(A.dtype), ctx)
b = tvm.nd.array(np.zeros(16, dtype=A.dtype), ctx)
c = tvm.nd.array(np.zeros(16, dtype=C.dtype), ctx)
fhost(a, b, c)
np.testing.assert_allclose(c.asnumpy(), a.asnumpy() + b.asnumpy())
if __name__ == "__main__":
try_remote_save_load()
...@@ -36,5 +36,9 @@ ...@@ -36,5 +36,9 @@
<button onclick="connect_rpc()">Connect To Proxy</button> <button onclick="connect_rpc()">Connect To Proxy</button>
<button onclick="clear_log()">Clear Log</button> <button onclick="clear_log()">Clear Log</button>
<div id="log"></div> <div id="log"></div>
<canvas id="canvas"></canvas>
<script>
Module["canvas"] = document.getElementById("canvas");
</script>
</body> </body>
</html> </html>
...@@ -686,7 +686,8 @@ var tvm_runtime = tvm_runtime || {}; ...@@ -686,7 +686,8 @@ var tvm_runtime = tvm_runtime || {};
2 : "gpu", 2 : "gpu",
4 : "opencl", 4 : "opencl",
8 : "metal", 8 : "metal",
9 : "vpi" 9 : "vpi",
11 : "opengl",
}; };
var CTX_STR2MASK = { var CTX_STR2MASK = {
"cpu": 1, "cpu": 1,
...@@ -695,7 +696,8 @@ var tvm_runtime = tvm_runtime || {}; ...@@ -695,7 +696,8 @@ var tvm_runtime = tvm_runtime || {};
"cl": 4, "cl": 4,
"opencl": 4, "opencl": 4,
"metal": 8, "metal": 8,
"vpi": 9 "vpi": 9,
"opengl": 11,
}; };
TVMContext.prototype = { TVMContext.prototype = {
toString : function() { toString : function() {
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#include "../src/runtime/rpc/rpc_event_impl.cc" #include "../src/runtime/rpc/rpc_event_impl.cc"
#include "../src/runtime/rpc/rpc_server_env.cc" #include "../src/runtime/rpc/rpc_server_env.cc"
#include "../src/runtime/graph/graph_runtime.cc" #include "../src/runtime/graph/graph_runtime.cc"
#include "../src/runtime/opengl/opengl_device_api.cc"
#include "../src/runtime/opengl/opengl_module.cc"
namespace tvm { namespace tvm {
namespace contrib { namespace contrib {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment