Commit 396bd562 by Leyuan Wang Committed by Tianqi Chen

[TOPI] Conv2d Added and Optimized for Intel HD Graphics (#1290)

parent fb88b74e
...@@ -157,8 +157,8 @@ EXPORT Target rasp(const std::vector<std::string>& options = ...@@ -157,8 +157,8 @@ EXPORT Target rasp(const std::vector<std::string>& options =
EXPORT Target mali(const std::vector<std::string>& options = EXPORT Target mali(const std::vector<std::string>& options =
std::vector<std::string>()); std::vector<std::string>());
/*! \return A target for Intel GPU */ /*! \return A target for Intel Graphics */
EXPORT Target intel_gpu(const std::vector<std::string>& options = EXPORT Target intel_graphics(const std::vector<std::string>& options =
std::vector<std::string>()); std::vector<std::string>());
/*! \return A target for stackvm */ /*! \return A target for stackvm */
......
...@@ -76,7 +76,7 @@ class Target(NodeBase): ...@@ -76,7 +76,7 @@ class Target(NodeBase):
- :any:`tvm.target.cuda` create CUDA target - :any:`tvm.target.cuda` create CUDA target
- :any:`tvm.target.rocm` create ROCM target - :any:`tvm.target.rocm` create ROCM target
- :any:`tvm.target.mali` create Mali target - :any:`tvm.target.mali` create Mali target
- :any:`tvm.target.intel_gpu` create Intel GPU target - :any:`tvm.target.intel_graphics` create Intel Graphics target
""" """
def __init__(self, handle): def __init__(self, handle):
super(Target, self).__init__(handle) super(Target, self).__init__(handle)
...@@ -402,15 +402,15 @@ def mali(options=None): ...@@ -402,15 +402,15 @@ def mali(options=None):
return _api_internal._TargetCreate("opencl", *opts) return _api_internal._TargetCreate("opencl", *opts)
def intel_gpu(options=None): def intel_graphics(options=None):
"""Returns an Intel GPU target. """Returns an Intel Graphics target.
Parameters Parameters
---------- ----------
options : str or list of str options : str or list of str
Additional options Additional options
""" """
opts = ["-device=intel_gpu"] opts = ["-device=intel_graphics"]
opts = _merge_opts(opts, options) opts = _merge_opts(opts, options)
return _api_internal._TargetCreate("opencl", *opts) return _api_internal._TargetCreate("opencl", *opts)
......
...@@ -76,7 +76,7 @@ Target CreateTarget(const std::string& target_name, ...@@ -76,7 +76,7 @@ Target CreateTarget(const std::string& target_name,
t->keys_array.push_back(ir::StringImm::make("rocm")); t->keys_array.push_back(ir::StringImm::make("rocm"));
t->keys_array.push_back(ir::StringImm::make("gpu")); t->keys_array.push_back(ir::StringImm::make("gpu"));
t->max_num_threads = 256; t->max_num_threads = 256;
if (t->device_name == "intel_gpu") { if (t->device_name == "intel_graphics") {
t->thread_warp_size = 16; t->thread_warp_size = 16;
} }
} else if (target_name == "metal" || target_name == "vulkan") { } else if (target_name == "metal" || target_name == "vulkan") {
...@@ -274,9 +274,9 @@ Target mali(const std::vector<std::string>& options) { ...@@ -274,9 +274,9 @@ Target mali(const std::vector<std::string>& options) {
})); }));
} }
Target intel_gpu(const std::vector<std::string>& options) { Target intel_graphics(const std::vector<std::string>& options) {
return CreateTarget("opencl", MergeOptions(options, { return CreateTarget("opencl", MergeOptions(options, {
"-device=intel_gpu" "-device=intel_graphics"
})); }));
} }
......
...@@ -159,7 +159,7 @@ void CodeGenOpenCL::PrintStorageSync(const Call* op) { ...@@ -159,7 +159,7 @@ void CodeGenOpenCL::PrintStorageSync(const Call* op) {
const std::string& sync = op->args[0].as<StringImm>()->value; const std::string& sync = op->args[0].as<StringImm>()->value;
if (sync == "warp") { if (sync == "warp") {
this->PrintIndent(); this->PrintIndent();
this->stream << "sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n"; this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n";
} else if (sync == "shared") { } else if (sync == "shared") {
this->PrintIndent(); this->PrintIndent();
this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n"; this->stream << "barrier(CLK_LOCAL_MEM_FENCE);\n";
......
...@@ -40,7 +40,7 @@ void OpenCLWorkspace::GetAttr( ...@@ -40,7 +40,7 @@ void OpenCLWorkspace::GetAttr(
} }
case kWarpSize: { case kWarpSize: {
/* TODO: the warp size of OpenCL device is not always 1 /* TODO: the warp size of OpenCL device is not always 1
e.g. Intel GPU has a sub group concept which contains 8 - 32 work items, e.g. Intel Graphics has a sub group concept which contains 8 - 32 work items,
corresponding to the number of SIMD entries the heardware configures. corresponding to the number of SIMD entries the heardware configures.
We need to figure out a way to query this information from the hardware. We need to figure out a way to query this information from the hardware.
*/ */
......
...@@ -34,7 +34,7 @@ def test_exp(): ...@@ -34,7 +34,7 @@ def test_exp():
np.testing.assert_allclose( np.testing.assert_allclose(
b.asnumpy(), np.exp(a.asnumpy()), rtol=1e-5) b.asnumpy(), np.exp(a.asnumpy()), rtol=1e-5)
check_device("opencl -device=intel_gpu") check_device("opencl -device=intel_graphics")
check_device("cuda", "llvm") check_device("cuda", "llvm")
check_device("vulkan") check_device("vulkan")
......
...@@ -47,7 +47,7 @@ def test_target_string_parse(): ...@@ -47,7 +47,7 @@ def test_target_string_parse():
assert str(target) == str(tvm.target.cuda("-libs=cublas,cudnn")) assert str(target) == str(tvm.target.cuda("-libs=cublas,cudnn"))
assert tvm.target.intel_gpu().device_name == "intel_gpu" assert tvm.target.intel_graphics().device_name == "intel_graphics"
if __name__ == "__main__": if __name__ == "__main__":
test_target_dispatch() test_target_dispatch()
......
...@@ -26,6 +26,7 @@ from . import x86 ...@@ -26,6 +26,7 @@ from . import x86
from . import cuda from . import cuda
from . import rasp from . import rasp
from . import mali from . import mali
from . import intel_graphics
from . import opengl from . import opengl
from . import util from . import util
from . import rocm from . import rocm
......
...@@ -33,9 +33,8 @@ def schedule_global_pool(outs): ...@@ -33,9 +33,8 @@ def schedule_global_pool(outs):
else: else:
Out = outs[0].op.output(0) Out = outs[0].op.output(0)
s[Pool].set_scope("local") s[Pool].set_scope("local")
i, c, h, w = s[Out].op.axis by, ty = s[Out].split(s[Out].op.axis[0], factor=num_thread)
by, ty = s[Out].split(i, factor=num_thread) bx, tx = s[Out].split(s[Out].op.axis[1], factor=num_thread)
bx, tx = s[Out].split(c, factor=num_thread)
s[Out].reorder(by, bx, ty, tx) s[Out].reorder(by, bx, ty, tx)
s[Out].bind(ty, thread_y) s[Out].bind(ty, thread_y)
s[Out].bind(tx, thread_x) s[Out].bind(tx, thread_x)
......
# pylint: disable=redefined-builtin, wildcard-import
"""Intel Gen9 GPU specific declaration and schedules."""
from __future__ import absolute_import as _abs
from .conv2d import *
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment