Commit 8e03b21e by Prathamesh Kulkarni Committed by Prathamesh Kulkarni

re PR tree-optimization/89007 ([SVE] Implement generic vector average expansion)

2019-12-09  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>

	PR tree-optimization/89007
	* tree-vect-patterns.c (vect_recog_average_pattern): If there is no
	target support available, generate code to distribute rshift over plus
	and add a carry.

testsuite/
	* gcc.target/aarch64/sve/pr89007-1.c: New test.
	* gcc.target/aarch64/sve/pr89007-2.c: Likewise.

From-SVN: r279112
parent 1d214c3f
2019-12-09 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
PR tree-optimization/89007
* tree-vect-patterns.c (vect_recog_average_pattern): If there is no
target support available, generate code to distribute rshift over plus
and add a carry.
2019-12-09 Martin Liska <mliska@suse.cz> 2019-12-09 Martin Liska <mliska@suse.cz>
PR ipa/92737 PR ipa/92737
2019-12-09 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
PR tree-optimization/89007
* gcc.target/aarch64/sve/pr89007-1.c: New test.
* gcc.target/aarch64/sve/pr89007-2.c: Likewise.
2019-12-09 Hongtao Liu <hongtao@intel.com> 2019-12-09 Hongtao Liu <hongtao@intel.com>
* gcc.target/i386/pr92686.inc: New file. * gcc.target/i386/pr92686.inc: New file.
......
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */
/* { dg-final { check-function-bodies "**" "" } } */
#define N 1024
unsigned char dst[N];
unsigned char in1[N];
unsigned char in2[N];
/*
** foo:
** ...
** lsr (z[0-9]+\.b), z[0-9]+\.b, #1
** lsr (z[0-9]+\.b), z[0-9]+\.b, #1
** add (z[0-9]+\.b), (\1, \2|\2, \1)
** orr (z[0-9]+)\.d, z[0-9]+\.d, z[0-9]+\.d
** and (z[0-9]+\.b), \5\.b, #0x1
** add z0\.b, (\3, \6|\6, \3)
** ...
*/
void
foo ()
{
for( int x = 0; x < N; x++ )
dst[x] = (in1[x] + in2[x] + 1) >> 1;
}
/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */
/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */
/* { dg-final { check-function-bodies "**" "" } } */
#define N 1024
unsigned char dst[N];
unsigned char in1[N];
unsigned char in2[N];
/*
** foo:
** ...
** lsr (z[0-9]+\.b), z[0-9]+\.b, #1
** lsr (z[0-9]+\.b), z[0-9]+\.b, #1
** add (z[0-9]+\.b), (\1, \2|\2, \1)
** and (z[0-9]+)\.d, z[0-9]+\.d, z[0-9]+\.d
** and (z[0-9]+\.b), \5\.b, #0x1
** add z0\.b, (\3, \6|\6, \3)
** ...
*/
void
foo ()
{
for( int x = 0; x < N; x++ )
dst[x] = (in1[x] + in2[x]) >> 1;
}
/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */
/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */
...@@ -1928,7 +1928,10 @@ vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out) ...@@ -1928,7 +1928,10 @@ vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out)
TYPE avg = (TYPE) avg'; TYPE avg = (TYPE) avg';
where NTYPE is no wider than half of TYPE. Since only the bottom half where NTYPE is no wider than half of TYPE. Since only the bottom half
of avg is used, all or part of the cast of avg' should become redundant. */ of avg is used, all or part of the cast of avg' should become redundant.
If there is no target support available, generate code to distribute rshift
over plus and add a carry. */
static gimple * static gimple *
vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out) vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
...@@ -2032,9 +2035,20 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out) ...@@ -2032,9 +2035,20 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
/* Check for target support. */ /* Check for target support. */
tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type); tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
if (!new_vectype if (!new_vectype)
|| !direct_internal_fn_supported_p (ifn, new_vectype, return NULL;
OPTIMIZE_FOR_SPEED))
bool fallback_p = false;
if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
;
else if (TYPE_UNSIGNED (new_type)
&& optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
&& optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
&& optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
&& optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
fallback_p = true;
else
return NULL; return NULL;
/* The IR requires a valid vector type for the cast result, even though /* The IR requires a valid vector type for the cast result, even though
...@@ -2043,11 +2057,53 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out) ...@@ -2043,11 +2057,53 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
if (!*type_out) if (!*type_out)
return NULL; return NULL;
/* Generate the IFN_AVG* call. */
tree new_var = vect_recog_temp_ssa_var (new_type, NULL); tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
tree new_ops[2]; tree new_ops[2];
vect_convert_inputs (last_stmt_info, 2, new_ops, new_type, vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
unprom, new_vectype); unprom, new_vectype);
if (fallback_p)
{
/* As a fallback, generate code for following sequence:
shifted_op0 = new_ops[0] >> 1;
shifted_op1 = new_ops[1] >> 1;
sum_of_shifted = shifted_op0 + shifted_op1;
unmasked_carry = new_ops[0] and/or new_ops[1];
carry = unmasked_carry & 1;
new_var = sum_of_shifted + carry;
*/
tree one_cst = build_one_cst (new_type);
gassign *g;
tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
append_pattern_def_seq (last_stmt_info, g, new_vectype);
tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
append_pattern_def_seq (last_stmt_info, g, new_vectype);
tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
shifted_op0, shifted_op1);
append_pattern_def_seq (last_stmt_info, g, new_vectype);
tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
append_pattern_def_seq (last_stmt_info, g, new_vectype);
tree carry = vect_recog_temp_ssa_var (new_type, NULL);
g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
append_pattern_def_seq (last_stmt_info, g, new_vectype);
g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
return vect_convert_output (last_stmt_info, type, g, new_vectype);
}
/* Generate the IFN_AVG* call. */
gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0], gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
new_ops[1]); new_ops[1]);
gimple_call_set_lhs (average_stmt, new_var); gimple_call_set_lhs (average_stmt, new_var);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment