Commit 396bd562 by Leyuan Wang Committed by Tianqi Chen

[TOPI] Conv2d Added and Optimized for Intel HD Graphics (#1290)

parent fb88b74e
......@@ -157,8 +157,8 @@ EXPORT Target rasp(const std::vector<std::string>& options =
EXPORT Target mali(const std::vector<std::string>& options =
std::vector<std::string>());
/*! \return A target for Intel GPU */
EXPORT Target intel_gpu(const std::vector<std::string>& options =
/*! \return A target for Intel Graphics */
EXPORT Target intel_graphics(const std::vector<std::string>& options =
std::vector<std::string>());
/*! \return A target for stackvm */
......
......@@ -76,7 +76,7 @@ class Target(NodeBase):
- :any:`tvm.target.cuda` create CUDA target
- :any:`tvm.target.rocm` create ROCM target
- :any:`tvm.target.mali` create Mali target
- :any:`tvm.target.intel_gpu` create Intel GPU target
- :any:`tvm.target.intel_graphics` create Intel Graphics target
"""
def __init__(self, handle):
super(Target, self).__init__(handle)
......@@ -402,15 +402,15 @@ def mali(options=None):
return _api_internal._TargetCreate("opencl", *opts)
def intel_gpu(options=None):
"""Returns an Intel GPU target.
def intel_graphics(options=None):
"""Returns an Intel Graphics target.
Parameters
----------
options : str or list of str
Additional options
"""
opts = ["-device=intel_gpu"]
opts = ["-device=intel_graphics"]
opts = _merge_opts(opts, options)
return _api_internal._TargetCreate("opencl", *opts)
......
......@@ -76,7 +76,7 @@ Target CreateTarget(const std::string& target_name,
t->keys_array.push_back(ir::StringImm::make("rocm"));
t->keys_array.push_back(ir::StringImm::make("gpu"));
t->max_num_threads = 256;
if (t->device_name == "intel_gpu") {
if (t->device_name == "intel_graphics") {
t->thread_warp_size = 16;
}
} else if (target_name == "metal" || target_name == "vulkan") {
......@@ -274,9 +274,9 @@ Target mali(const std::vector<std::string>& options) {
}));
}
Target intel_gpu(const std::vector<std::string>& options) {
Target intel_graphics(const std::vector<std::string>& options) {
return CreateTarget("opencl", MergeOptions(options, {
"-device=intel_gpu"
"-device=intel_graphics"
}));
}
......
......@@ -159,7 +159,7 @@ void CodeGenOpenCL::PrintStorageSync(const Call* op) {
const std::string& sync = op->args[0].as<StringImm>()->value;
if (sync == "warp") {
this->PrintIndent();
this->stream << "sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n";
this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n";
} else if (sync == "shared") {
this->PrintIndent();
this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n";
......
......@@ -40,7 +40,7 @@ void OpenCLWorkspace::GetAttr(
}
case kWarpSize: {
/* TODO: the warp size of OpenCL device is not always 1
e.g. Intel GPU has a sub group concept which contains 8 - 32 work items,
e.g. Intel Graphics has a sub group concept which contains 8 - 32 work items,
corresponding to the number of SIMD entries the heardware configures.
We need to figure out a way to query this information from the hardware.
*/
......
......@@ -34,7 +34,7 @@ def test_exp():
np.testing.assert_allclose(
b.asnumpy(), np.exp(a.asnumpy()), rtol=1e-5)
check_device("opencl -device=intel_gpu")
check_device("opencl -device=intel_graphics")
check_device("cuda", "llvm")
check_device("vulkan")
......
......@@ -47,7 +47,7 @@ def test_target_string_parse():
assert str(target) == str(tvm.target.cuda("-libs=cublas,cudnn"))
assert tvm.target.intel_gpu().device_name == "intel_gpu"
assert tvm.target.intel_graphics().device_name == "intel_graphics"
if __name__ == "__main__":
test_target_dispatch()
......
......@@ -26,6 +26,7 @@ from . import x86
from . import cuda
from . import rasp
from . import mali
from . import intel_graphics
from . import opengl
from . import util
from . import rocm
......
......@@ -33,9 +33,8 @@ def schedule_global_pool(outs):
else:
Out = outs[0].op.output(0)
s[Pool].set_scope("local")
i, c, h, w = s[Out].op.axis
by, ty = s[Out].split(i, factor=num_thread)
bx, tx = s[Out].split(c, factor=num_thread)
by, ty = s[Out].split(s[Out].op.axis[0], factor=num_thread)
bx, tx = s[Out].split(s[Out].op.axis[1], factor=num_thread)
s[Out].reorder(by, bx, ty, tx)
s[Out].bind(ty, thread_y)
s[Out].bind(tx, thread_x)
......
# pylint: disable=redefined-builtin, wildcard-import
"""Intel Gen9 GPU specific declaration and schedules."""
from __future__ import absolute_import as _abs
from .conv2d import *
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment