Commit cdd7f37f by Leyuan Wang Committed by Tianqi Chen

[TOPI] NCHWc added input shape 4 condition, intel graphics conv2d schedule…

[TOPI] NCHWc added input shape 4 condition, intel graphics conv2d schedule debugged for inception_v3 workloads (#2265)
parent 43433fa1
# pylint: disable=invalid-name, unused-argument
# pylint: disable=invalid-name, unused-argument, missing-docstring, no-else-return
"""Definition of nn ops"""
from __future__ import absolute_import
......@@ -170,8 +170,11 @@ def compute_contrib_conv2d_NCHWc(attrs, inputs, _):
out_layout = attrs.get_string("out_layout")
out_dtype = attrs.get_string("out_dtype")
out_dtype = inputs[0].dtype if out_dtype == "same" else out_dtype
_, in_channel_chunk, _, _, in_channel_block = get_const_tuple(inputs[0].shape)
in_channel = in_channel_chunk * in_channel_block
if layout == "NCHW":
_, in_channel, _, _ = get_const_tuple(inputs[0].shape)
else:
_, in_channel_chunk, _, _, in_channel_block = get_const_tuple(inputs[0].shape)
in_channel = in_channel_chunk * in_channel_block
assert dilation == (1, 1), "not support dilate now"
if groups == 1:
# pylint: disable=assignment-from-no-return
......
# pylint: disable=invalid-name,unused-variable,unused-argument,no-else-return, too-many-arguments, too-many-locals, too-many-statements, no-member, too-many-branches
# pylint: disable=invalid-name,unused-variable,unused-argument,no-else-return, too-many-arguments, too-many-locals, too-many-statements, no-member, too-many-branches, too-many-boolean-expressions
"""conv2d schedule on Intel Graphics"""
from __future__ import absolute_import as _abs
......@@ -61,7 +61,7 @@ def _alter_conv2d_layout(attrs, inputs, tinfos):
return sym.contrib.conv2d_NCHWc(*copy_inputs, **new_attrs)
@conv2d_NCHWc.register(["intel_graphics"])
def _decl_conv2d(data, kernel, stride, padding, layout, out_layout, out_dtype='float32'):
def _decl_conv2d(data, kernel, stride, padding, dilation, layout, out_layout, out_dtype='float32'):
"""Conv2D operator for Intel Graphics backend.
Parameters
......@@ -126,8 +126,7 @@ def schedule_conv2d_NCHWc(outs):
for tensor in op.input_tensors:
if tensor.op.input_tensors and tensor.op not in scheduled_ops:
traverse(tensor.op)
if "4_5" in op.tag or "4_4" in op.tag or "2_7" in op.tag or "2_14" in op.tag \
or "1_16" in op.tag:
if 'conv2d' in op.tag:
_schedule_cl_spatialpack_NCHWc(s, op)
scheduled_ops.append(op)
......@@ -156,31 +155,30 @@ def _decl_cl_spatialpack_NCHWc(data, kernel, stride, padding, out_dtype='float16
ry = tvm.reduce_axis((0, kernel_h), name='ry')
rx = tvm.reduce_axis((0, kernel_w), name='rx')
block_w = 0
block_h = 0
block_w = 1
block_h = 1
if stride_h == 2:
if num_filter + kernel_h == 515:
conv_tag = "4_4"
block_h = 4
block_w = 4
else:
conv_tag = "4_5"
block_h = 4
block_w = 5
elif kernel_h == 3:
if num_filter == 512:
conv_tag = "2_7"
block_h = 2
block_w = 7
else:
conv_tag = "2_14"
block_h = 2
block_w = 14
elif kernel_h == 7 and padding == 3 and stride == 1:
block_h = 3
block_w = 4
else:
conv_tag = "1_16"
block_h = 1
block_w = 16
attrs = {'block_h': block_h, 'block_w' : block_w}
c_h = out_height
c_w = out_width
......@@ -202,13 +200,13 @@ def _decl_cl_spatialpack_NCHWc(data, kernel, stride, padding, out_dtype='float16
tvm.sum(
temp[nn, rc, yy * stride_h + ry, xx * stride_w + rx].astype(out_dtype) *
kernel[ff, rc, ry, rx, vc].astype(out_dtype),
axis=[rc, ry, rx]), tag=conv_tag, name='conv')
axis=[rc, ry, rx]), name='conv', attrs=attrs)
output = tvm.compute(
oshape,
lambda nn, ff, yy, xx:
conv[nn][ff//nv][yy][xx][ff%nv],
name='output_unpack', tag=conv_tag)
name='output_unpack', tag='conv2d')
return output
......@@ -224,21 +222,10 @@ def _schedule_cl_spatialpack_NCHWc(s, op):
kernel_L = s.cache_read(kernel, "local", [conv_L])
_, in_channel, temp_h, temp_w = [util.get_const_int(x) for x in temp.shape]
if "1_16" in s[conv].op.tag:
OUTPUT_BLOCK_HEIGHT = 1
OUTPUT_BLOCK_WIDTH = 16
elif "2_14" in s[conv].op.tag:
OUTPUT_BLOCK_HEIGHT = 2
OUTPUT_BLOCK_WIDTH = 14
elif "2_7" in s[conv].op.tag:
OUTPUT_BLOCK_HEIGHT = 2
OUTPUT_BLOCK_WIDTH = 7
elif "4_5" in s[conv].op.tag:
OUTPUT_BLOCK_HEIGHT = 4
OUTPUT_BLOCK_WIDTH = 5
elif "4_4" in s[conv].op.tag:
OUTPUT_BLOCK_HEIGHT = 4
OUTPUT_BLOCK_WIDTH = 4
attrs = s[conv].op.attrs
OUTPUT_BLOCK_HEIGHT = attrs['block_h']
OUTPUT_BLOCK_WIDTH = attrs['block_w']
# schedule conv
z_factor = 1
......@@ -308,7 +295,7 @@ def _schedule_cl_spatialpack_NCHWc(s, op):
@conv2d.register(["intel_graphics"])
def decl_conv2d(data, kernel, stride, padding, layout='NCHW', out_dtype='float32'):
def decl_conv2d(data, kernel, stride, padding, dilation, layout='NCHW', out_dtype='float32'):
"""Conv2D operator for Intel Graphics backend.
Parameters
......@@ -368,8 +355,7 @@ def schedule_conv2d_nchw(outs):
for tensor in op.input_tensors:
if tensor.op.input_tensors and tensor.op not in scheduled_ops:
traverse(tensor.op)
if "4_5" in op.tag or "4_4" in op.tag or "2_7" in op.tag or "2_14" in op.tag \
or "1_16" in op.tag:
if 'conv2d' in op.tag:
_schedule_cl_spatialpack(s, op)
scheduled_ops.append(op)
......@@ -396,31 +382,30 @@ def _decl_cl_spatialpack(data, kernel, stride, padding, layout, out_dtype='float
ry = tvm.reduce_axis((0, kernel_h), name='ry')
rx = tvm.reduce_axis((0, kernel_w), name='rx')
block_w = 0
block_h = 0
block_w = 1
block_h = 1
if stride_h == 2:
if num_filter + kernel_h == 515:
conv_tag = "4_4"
block_h = 4
block_w = 4
else:
conv_tag = "4_5"
block_h = 4
block_w = 5
elif kernel_h == 3:
if num_filter == 512:
conv_tag = "2_7"
block_h = 2
block_w = 7
else:
conv_tag = "2_14"
block_h = 2
block_w = 14
elif kernel_h == 7 and padding == 3 and stride == 1:
block_h = 3
block_w = 4
else:
conv_tag = "1_16"
block_h = 1
block_w = 16
attrs = {'block_h': block_h, 'block_w' : block_w}
c_h = out_height
c_w = out_width
......@@ -453,13 +438,13 @@ def _decl_cl_spatialpack(data, kernel, stride, padding, layout, out_dtype='float
tvm.sum(
temp[nn, rc, yy * stride_h + ry, xx * stride_w + rx].astype(out_dtype) *
kernel_vec[ff, rc, ry, rx, vc].astype(out_dtype),
axis=[rc, ry, rx]), tag=conv_tag, name='conv')
axis=[rc, ry, rx]), name='conv', attrs=attrs)
output = tvm.compute(
oshape,
lambda nn, ff, yy, xx:
conv[nn][ff//nv][yy][xx][ff%nv],
name='output_unpack', tag=conv_tag)
name='output_unpack', tag='conv2d')
return output
......@@ -477,21 +462,9 @@ def _schedule_cl_spatialpack(s, op):
kernel_L = s.cache_read(kernel_vec, "local", [conv_L])
_, in_channel, temp_h, temp_w = [util.get_const_int(x) for x in temp.shape]
if "1_16" in s[conv].op.tag:
OUTPUT_BLOCK_HEIGHT = 1
OUTPUT_BLOCK_WIDTH = 16
elif "2_14" in s[conv].op.tag:
OUTPUT_BLOCK_HEIGHT = 2
OUTPUT_BLOCK_WIDTH = 14
elif "2_7" in s[conv].op.tag:
OUTPUT_BLOCK_HEIGHT = 2
OUTPUT_BLOCK_WIDTH = 7
elif "4_5" in s[conv].op.tag:
OUTPUT_BLOCK_HEIGHT = 4
OUTPUT_BLOCK_WIDTH = 5
elif "4_4" in s[conv].op.tag:
OUTPUT_BLOCK_HEIGHT = 4
OUTPUT_BLOCK_WIDTH = 4
attrs = s[conv].op.attrs
OUTPUT_BLOCK_HEIGHT = attrs['block_h']
OUTPUT_BLOCK_WIDTH = attrs['block_w']
# schedule conv
z_factor = 1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment