Commit 80f9ca00 by Zhixun Tan Committed by Tianqi Chen

[OpenGL] Let OpenGL texture always be 1024 x nrows. (#817)

* OpenGL texture is always 1024 x nrows.

* Address review comments.
parent 56c4eeb3
......@@ -154,7 +154,8 @@ void CodeGenOpenGL::BindThreadIndex(const IterVar& iv) {
// Declare threadIdx local variable.
this->PrintIndent();
this->stream << "ivec2 threadIdx = ivec2(gl_FragCoord.xy);\n";
this->stream << "ivec2 threadIdx = ivec2(" << runtime::kTextureRowSize
<< " * int(gl_FragCoord.y) + int(gl_FragCoord.x), 0);\n";
// Return directly if threadIdx.x >= thread_extent.
this->PrintIndent();
......@@ -192,12 +193,14 @@ void CodeGenOpenGL::VisitStmt_(const Store* op) {
}
}
// texelFetch(tex, ivec2(idx, 0), 0).r
// texelFetch(tex, ivec2(idx & kTextureRowMask, idx >> kTextureRowBits), 0).r
std::string CodeGenOpenGL::TexelFetch(const Variable* buffer, Expr index) {
std::ostringstream os;
os << "texelFetch(" << GetVarID(buffer) << ", ivec2(";
os << "texelFetch(" << GetVarID(buffer) << ", ivec2(int(";
PrintExpr(index, os);
os << ", 0), 0).r";
os << ") & " << runtime::kTextureRowMask << ", int(";
PrintExpr(index, os);
os << ") >> " << runtime::kTextureRowBits << "), 0).r";
return os.str();
}
......
......@@ -3,6 +3,7 @@
* \file opengl_device_api.cc
*/
#include "./opengl_common.h"
#include "./opengl_module.h"
#if TVM_OPENGL_RUNTIME
......@@ -347,8 +348,9 @@ Texture OpenGLWorkspace::CreateTexture(TVMType type, size_t nbytes) {
// Use glTexImage2D with nullptr data to specify GPU data storage.
auto texture_format = GetTextureFormat(type);
auto width = static_cast<GLsizei>(nbytes / (type.bits / 8));
auto height = GLsizei(1);
auto nelems = static_cast<GLsizei>(nbytes / (type.bits / 8));
auto height = (nelems + kTextureRowSize - 1) / kTextureRowSize;
auto width = (height == 1) ? nelems : kTextureRowSize;
OPENGL_CALL(gl->TexImage2D(GL_TEXTURE_2D, /*level=*/0,
texture_format.internal_format,
width, height, /*border=*/0,
......@@ -402,6 +404,51 @@ Program OpenGLWorkspace::CreateProgram(GLuint fragment_shader) {
return Program(this, program);
}
/*!
* \brief Visit a 1D range of an OpenGL texture-backed TVM array.
* When getting/setting a sub image of a texture, we can only specify a 2D
* block (xbeg, ybeg, width, height).
* Since we are storing all TVM arrays using (kTextureRowSize x nrows) 2D
* textures (row-major), a range in an array does not necessarily map to a 2D
* block.
* This function split a 1D range into 3 2D blocks.
* \param beg The index of the first element in the 1D range.
* \param end The index of the last + 1 element in the 1D range.
* \param on_2d_block Callback for each 2D block. Must have interface
* void(GLint xbeg, GLint ybeg, GLsizei width, GLsizei height).
*/
template <typename F>
static void Visit1DRange(GLint beg, GLint end, F&& on_2d_block) {
CHECK_LE(beg, end) << "Invalid range.";
// xbeg kTextureRowSize
// ybeg ....************
// ****************
// ****************
// ylast *********.......
// xlast
GLint xbeg = beg % kTextureRowSize;
GLint ybeg = beg / kTextureRowSize;
GLint xlast = (end - 1) % kTextureRowSize;
GLint ylast = (end - 1) / kTextureRowSize;
if (ybeg == ylast) { // Only one row.
on_2d_block(xbeg, ybeg, end - beg, 1);
return;
}
// First row.
on_2d_block(xbeg, ybeg, kTextureRowSize - xbeg, 1);
// Middle block.
if (ylast - ybeg > 1) {
on_2d_block(0, ybeg + 1, kTextureRowSize, ylast - ybeg - 1);
}
// Last row.
on_2d_block(0, ylast, xlast + 1, 1);
}
void OpenGLWorkspace::PutTextureData(Texture *texture,
GLint begin,
GLsizei nelems,
......@@ -409,12 +456,17 @@ void OpenGLWorkspace::PutTextureData(Texture *texture,
// Bind to temporary unit.
BindTextureUnit(NumTextureUnits() - 1, texture->texture());
Visit1DRange(begin, begin + nelems, [&](GLint xbeg, GLint ybeg,
GLsizei width, GLsizei height) {
auto offset = (ybeg * kTextureRowSize + xbeg - begin) * texture->elemsz();
const GLvoid* ptr = static_cast<const char*>(data) + offset;
// Similar to cudaMemcpy.
OPENGL_CALL(gl->TexSubImage2D(GL_TEXTURE_2D, /*level=*/0,
/*xoffset=*/begin, /*yoffset=*/0,
/*width=*/nelems, /*height=*/1,
texture->format_.format, texture->format_.type,
data));
xbeg, ybeg, width, height,
texture->format_.format,
texture->format_.type, ptr));
});
}
void OpenGLWorkspace::GetTextureData(const Texture *texture,
......@@ -453,18 +505,29 @@ void OpenGLWorkspace::GetTextureData(const Texture *texture,
auto nchannels = 4;
auto padded_data_size = nchannels * nelems * elemsz;
auto padded_data = std::unique_ptr<char[]>(new char[padded_data_size]);
OPENGL_CALL(gl->ReadPixels(/*x=*/begin, /*y=*/0, /*width=*/nelems,
/*height=*/1, GL_RGBA, GL_FLOAT,
padded_data.get()));
Visit1DRange(begin, begin + nelems, [&](GLint xbeg, GLint ybeg,
GLsizei width, GLsizei height) {
auto data_offset = (ybeg * kTextureRowSize + xbeg - begin) * elemsz;
auto padded_data_offset = data_offset * nchannels;
OPENGL_CALL(gl->ReadPixels(xbeg, ybeg, width, height,
GL_RGBA, GL_FLOAT,
padded_data.get() + padded_data_offset));
});
for (GLsizei i = 0; i != nelems; ++i) {
auto dst = reinterpret_cast<char *>(data) + i * elemsz;
auto src = padded_data.get() + nchannels * i * elemsz;
std::memcpy(dst, src, elemsz);
}
#else
OPENGL_CALL(gl->ReadPixels(/*x=*/begin, /*y=*/0, /*width=*/nelems,
/*height=*/1, texture->format_.format,
texture->format_.type, data));
Visit1DRange(begin, begin + nelems, [&](GLint xbeg, GLint ybeg,
GLsizei width, GLsizei height) {
auto offset = (ybeg * kTextureRowSize + xbeg - begin) * texture->elemsz();
GLvoid* ptr = static_cast<char*>(data) + offset;
OPENGL_CALL(gl->ReadPixels(xbeg, ybeg, width, height,
texture->format_.format, texture->format_.type,
ptr));
});
#endif
OPENGL_CALL(gl->DeleteFramebuffers(1, &frame_buffer));
......
......@@ -18,6 +18,19 @@ namespace tvm {
namespace runtime {
/*!
* \brief The fixed row size of all OpenGL textures in TVM.
*
* OpenGL has texture size limit on each dimension. Suppose we have a limit of
* 1024, then we can have a 2D texture of size (2^10 x 2^10) but not (2^20 x 1).
* This means we don't want to just use (n x 1) 2D textures for all arrays,
* because that would limit our array size to be 1024. Here we use (1024 x m)
* 2D textures. Then we can have arrays of size up to 2^20.
*/
static constexpr int kTextureRowBits = 10;
static constexpr int kTextureRowSize = 1 << kTextureRowBits;
static constexpr int kTextureRowMask = kTextureRowSize - 1;
/*!
* \brief Determines how we supply arguments.
*/
enum class OpenGLArgKind {
......
......@@ -7,7 +7,7 @@ def test_local_gemm():
if not tvm.module.enabled("llvm"):
return
nn = 2
nn = 1024
n = tvm.var('n')
n = tvm.convert(nn)
m = n
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment