Commit 80f9ca00 by Zhixun Tan Committed by Tianqi Chen

[OpenGL] Let OpenGL texture always be 1024 x nrows. (#817)

* OpenGL texture is always 1024 x nrows.

* Address review comments.
parent 56c4eeb3
...@@ -154,7 +154,8 @@ void CodeGenOpenGL::BindThreadIndex(const IterVar& iv) { ...@@ -154,7 +154,8 @@ void CodeGenOpenGL::BindThreadIndex(const IterVar& iv) {
// Declare threadIdx local variable. // Declare threadIdx local variable.
this->PrintIndent(); this->PrintIndent();
this->stream << "ivec2 threadIdx = ivec2(gl_FragCoord.xy);\n"; this->stream << "ivec2 threadIdx = ivec2(" << runtime::kTextureRowSize
<< " * int(gl_FragCoord.y) + int(gl_FragCoord.x), 0);\n";
// Return directly if threadIdx.x >= thread_extent. // Return directly if threadIdx.x >= thread_extent.
this->PrintIndent(); this->PrintIndent();
...@@ -192,12 +193,14 @@ void CodeGenOpenGL::VisitStmt_(const Store* op) { ...@@ -192,12 +193,14 @@ void CodeGenOpenGL::VisitStmt_(const Store* op) {
} }
} }
// texelFetch(tex, ivec2(idx, 0), 0).r // texelFetch(tex, ivec2(idx & kTextureRowMask, idx >> kTextureRowBits), 0).r
std::string CodeGenOpenGL::TexelFetch(const Variable* buffer, Expr index) { std::string CodeGenOpenGL::TexelFetch(const Variable* buffer, Expr index) {
std::ostringstream os; std::ostringstream os;
os << "texelFetch(" << GetVarID(buffer) << ", ivec2("; os << "texelFetch(" << GetVarID(buffer) << ", ivec2(int(";
PrintExpr(index, os); PrintExpr(index, os);
os << ", 0), 0).r"; os << ") & " << runtime::kTextureRowMask << ", int(";
PrintExpr(index, os);
os << ") >> " << runtime::kTextureRowBits << "), 0).r";
return os.str(); return os.str();
} }
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
* \file opengl_device_api.cc * \file opengl_device_api.cc
*/ */
#include "./opengl_common.h" #include "./opengl_common.h"
#include "./opengl_module.h"
#if TVM_OPENGL_RUNTIME #if TVM_OPENGL_RUNTIME
...@@ -347,8 +348,9 @@ Texture OpenGLWorkspace::CreateTexture(TVMType type, size_t nbytes) { ...@@ -347,8 +348,9 @@ Texture OpenGLWorkspace::CreateTexture(TVMType type, size_t nbytes) {
// Use glTexImage2D with nullptr data to specify GPU data storage. // Use glTexImage2D with nullptr data to specify GPU data storage.
auto texture_format = GetTextureFormat(type); auto texture_format = GetTextureFormat(type);
auto width = static_cast<GLsizei>(nbytes / (type.bits / 8)); auto nelems = static_cast<GLsizei>(nbytes / (type.bits / 8));
auto height = GLsizei(1); auto height = (nelems + kTextureRowSize - 1) / kTextureRowSize;
auto width = (height == 1) ? nelems : kTextureRowSize;
OPENGL_CALL(gl->TexImage2D(GL_TEXTURE_2D, /*level=*/0, OPENGL_CALL(gl->TexImage2D(GL_TEXTURE_2D, /*level=*/0,
texture_format.internal_format, texture_format.internal_format,
width, height, /*border=*/0, width, height, /*border=*/0,
...@@ -402,6 +404,51 @@ Program OpenGLWorkspace::CreateProgram(GLuint fragment_shader) { ...@@ -402,6 +404,51 @@ Program OpenGLWorkspace::CreateProgram(GLuint fragment_shader) {
return Program(this, program); return Program(this, program);
} }
/*!
* \brief Visit a 1D range of an OpenGL texture-backed TVM array.
* When getting/setting a sub image of a texture, we can only specify a 2D
* block (xbeg, ybeg, width, height).
* Since we are storing all TVM arrays using (kTextureRowSize x nrows) 2D
* textures (row-major), a range in an array does not necessarily map to a 2D
* block.
* This function split a 1D range into 3 2D blocks.
* \param beg The index of the first element in the 1D range.
* \param end The index of the last + 1 element in the 1D range.
* \param on_2d_block Callback for each 2D block. Must have interface
* void(GLint xbeg, GLint ybeg, GLsizei width, GLsizei height).
*/
template <typename F>
static void Visit1DRange(GLint beg, GLint end, F&& on_2d_block) {
CHECK_LE(beg, end) << "Invalid range.";
// xbeg kTextureRowSize
// ybeg ....************
// ****************
// ****************
// ylast *********.......
// xlast
GLint xbeg = beg % kTextureRowSize;
GLint ybeg = beg / kTextureRowSize;
GLint xlast = (end - 1) % kTextureRowSize;
GLint ylast = (end - 1) / kTextureRowSize;
if (ybeg == ylast) { // Only one row.
on_2d_block(xbeg, ybeg, end - beg, 1);
return;
}
// First row.
on_2d_block(xbeg, ybeg, kTextureRowSize - xbeg, 1);
// Middle block.
if (ylast - ybeg > 1) {
on_2d_block(0, ybeg + 1, kTextureRowSize, ylast - ybeg - 1);
}
// Last row.
on_2d_block(0, ylast, xlast + 1, 1);
}
void OpenGLWorkspace::PutTextureData(Texture *texture, void OpenGLWorkspace::PutTextureData(Texture *texture,
GLint begin, GLint begin,
GLsizei nelems, GLsizei nelems,
...@@ -409,12 +456,17 @@ void OpenGLWorkspace::PutTextureData(Texture *texture, ...@@ -409,12 +456,17 @@ void OpenGLWorkspace::PutTextureData(Texture *texture,
// Bind to temporary unit. // Bind to temporary unit.
BindTextureUnit(NumTextureUnits() - 1, texture->texture()); BindTextureUnit(NumTextureUnits() - 1, texture->texture());
// Similar to cudaMemcpy. Visit1DRange(begin, begin + nelems, [&](GLint xbeg, GLint ybeg,
OPENGL_CALL(gl->TexSubImage2D(GL_TEXTURE_2D, /*level=*/0, GLsizei width, GLsizei height) {
/*xoffset=*/begin, /*yoffset=*/0, auto offset = (ybeg * kTextureRowSize + xbeg - begin) * texture->elemsz();
/*width=*/nelems, /*height=*/1, const GLvoid* ptr = static_cast<const char*>(data) + offset;
texture->format_.format, texture->format_.type,
data)); // Similar to cudaMemcpy.
OPENGL_CALL(gl->TexSubImage2D(GL_TEXTURE_2D, /*level=*/0,
xbeg, ybeg, width, height,
texture->format_.format,
texture->format_.type, ptr));
});
} }
void OpenGLWorkspace::GetTextureData(const Texture *texture, void OpenGLWorkspace::GetTextureData(const Texture *texture,
...@@ -453,18 +505,29 @@ void OpenGLWorkspace::GetTextureData(const Texture *texture, ...@@ -453,18 +505,29 @@ void OpenGLWorkspace::GetTextureData(const Texture *texture,
auto nchannels = 4; auto nchannels = 4;
auto padded_data_size = nchannels * nelems * elemsz; auto padded_data_size = nchannels * nelems * elemsz;
auto padded_data = std::unique_ptr<char[]>(new char[padded_data_size]); auto padded_data = std::unique_ptr<char[]>(new char[padded_data_size]);
OPENGL_CALL(gl->ReadPixels(/*x=*/begin, /*y=*/0, /*width=*/nelems, Visit1DRange(begin, begin + nelems, [&](GLint xbeg, GLint ybeg,
/*height=*/1, GL_RGBA, GL_FLOAT, GLsizei width, GLsizei height) {
padded_data.get())); auto data_offset = (ybeg * kTextureRowSize + xbeg - begin) * elemsz;
auto padded_data_offset = data_offset * nchannels;
OPENGL_CALL(gl->ReadPixels(xbeg, ybeg, width, height,
GL_RGBA, GL_FLOAT,
padded_data.get() + padded_data_offset));
});
for (GLsizei i = 0; i != nelems; ++i) { for (GLsizei i = 0; i != nelems; ++i) {
auto dst = reinterpret_cast<char *>(data) + i * elemsz; auto dst = reinterpret_cast<char *>(data) + i * elemsz;
auto src = padded_data.get() + nchannels * i * elemsz; auto src = padded_data.get() + nchannels * i * elemsz;
std::memcpy(dst, src, elemsz); std::memcpy(dst, src, elemsz);
} }
#else #else
OPENGL_CALL(gl->ReadPixels(/*x=*/begin, /*y=*/0, /*width=*/nelems, Visit1DRange(begin, begin + nelems, [&](GLint xbeg, GLint ybeg,
/*height=*/1, texture->format_.format, GLsizei width, GLsizei height) {
texture->format_.type, data)); auto offset = (ybeg * kTextureRowSize + xbeg - begin) * texture->elemsz();
GLvoid* ptr = static_cast<char*>(data) + offset;
OPENGL_CALL(gl->ReadPixels(xbeg, ybeg, width, height,
texture->format_.format, texture->format_.type,
ptr));
});
#endif #endif
OPENGL_CALL(gl->DeleteFramebuffers(1, &frame_buffer)); OPENGL_CALL(gl->DeleteFramebuffers(1, &frame_buffer));
......
...@@ -18,6 +18,19 @@ namespace tvm { ...@@ -18,6 +18,19 @@ namespace tvm {
namespace runtime { namespace runtime {
/*! /*!
* \brief The fixed row size of all OpenGL textures in TVM.
*
* OpenGL has texture size limit on each dimension. Suppose we have a limit of
* 1024, then we can have a 2D texture of size (2^10 x 2^10) but not (2^20 x 1).
* This means we don't want to just use (n x 1) 2D textures for all arrays,
* because that would limit our array size to be 1024. Here we use (1024 x m)
* 2D textures. Then we can have arrays of size up to 2^20.
*/
static constexpr int kTextureRowBits = 10;
static constexpr int kTextureRowSize = 1 << kTextureRowBits;
static constexpr int kTextureRowMask = kTextureRowSize - 1;
/*!
* \brief Determines how we supply arguments. * \brief Determines how we supply arguments.
*/ */
enum class OpenGLArgKind { enum class OpenGLArgKind {
......
...@@ -7,7 +7,7 @@ def test_local_gemm(): ...@@ -7,7 +7,7 @@ def test_local_gemm():
if not tvm.module.enabled("llvm"): if not tvm.module.enabled("llvm"):
return return
nn = 2 nn = 1024
n = tvm.var('n') n = tvm.var('n')
n = tvm.convert(nn) n = tvm.convert(nn)
m = n m = n
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment