Commit 8e03b21e by Prathamesh Kulkarni Committed by Prathamesh Kulkarni

re PR tree-optimization/89007 ([SVE] Implement generic vector average expansion)

2019-12-09  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>

	PR tree-optimization/89007
	* tree-vect-patterns.c (vect_recog_average_pattern): If there is no
	target support available, generate code to distribute rshift over plus
	and add a carry.

testsuite/
	* gcc.target/aarch64/sve/pr89007-1.c: New test.
	* gcc.target/aarch64/sve/pr89007-2.c: Likewise.

From-SVN: r279112
parent 1d214c3f
2019-12-09 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
PR tree-optimization/89007
* tree-vect-patterns.c (vect_recog_average_pattern): If there is no
target support available, generate code to distribute rshift over plus
and add a carry.
2019-12-09 Martin Liska <mliska@suse.cz>
PR ipa/92737
2019-12-09 Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
PR tree-optimization/89007
* gcc.target/aarch64/sve/pr89007-1.c: New test.
* gcc.target/aarch64/sve/pr89007-2.c: Likewise.
2019-12-09 Hongtao Liu <hongtao@intel.com>
* gcc.target/i386/pr92686.inc: New file.
......
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */
/* { dg-final { check-function-bodies "**" "" } } */
#define N 1024
unsigned char dst[N];
unsigned char in1[N];
unsigned char in2[N];
/*
** foo:
** ...
** lsr (z[0-9]+\.b), z[0-9]+\.b, #1
** lsr (z[0-9]+\.b), z[0-9]+\.b, #1
** add (z[0-9]+\.b), (\1, \2|\2, \1)
** orr (z[0-9]+)\.d, z[0-9]+\.d, z[0-9]+\.d
** and (z[0-9]+\.b), \5\.b, #0x1
** add z0\.b, (\3, \6|\6, \3)
** ...
*/
void
foo ()
{
for( int x = 0; x < N; x++ )
dst[x] = (in1[x] + in2[x] + 1) >> 1;
}
/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */
/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O -ftree-vectorize -march=armv8.2-a+sve --save-temps" } */
/* { dg-final { check-function-bodies "**" "" } } */
#define N 1024
unsigned char dst[N];
unsigned char in1[N];
unsigned char in2[N];
/*
** foo:
** ...
** lsr (z[0-9]+\.b), z[0-9]+\.b, #1
** lsr (z[0-9]+\.b), z[0-9]+\.b, #1
** add (z[0-9]+\.b), (\1, \2|\2, \1)
** and (z[0-9]+)\.d, z[0-9]+\.d, z[0-9]+\.d
** and (z[0-9]+\.b), \5\.b, #0x1
** add z0\.b, (\3, \6|\6, \3)
** ...
*/
void
foo ()
{
for( int x = 0; x < N; x++ )
dst[x] = (in1[x] + in2[x]) >> 1;
}
/* { dg-final { scan-assembler-not {\tuunpklo\t} } } */
/* { dg-final { scan-assembler-not {\tuunpkhi\t} } } */
......@@ -1928,7 +1928,10 @@ vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out)
TYPE avg = (TYPE) avg';
where NTYPE is no wider than half of TYPE. Since only the bottom half
of avg is used, all or part of the cast of avg' should become redundant. */
of avg is used, all or part of the cast of avg' should become redundant.
If there is no target support available, generate code to distribute rshift
over plus and add a carry. */
static gimple *
vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
......@@ -2032,9 +2035,20 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
/* Check for target support. */
tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
if (!new_vectype
|| !direct_internal_fn_supported_p (ifn, new_vectype,
OPTIMIZE_FOR_SPEED))
if (!new_vectype)
return NULL;
bool fallback_p = false;
if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
;
else if (TYPE_UNSIGNED (new_type)
&& optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
&& optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
&& optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
&& optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
fallback_p = true;
else
return NULL;
/* The IR requires a valid vector type for the cast result, even though
......@@ -2043,11 +2057,53 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
if (!*type_out)
return NULL;
/* Generate the IFN_AVG* call. */
tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
tree new_ops[2];
vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
unprom, new_vectype);
if (fallback_p)
{
/* As a fallback, generate code for following sequence:
shifted_op0 = new_ops[0] >> 1;
shifted_op1 = new_ops[1] >> 1;
sum_of_shifted = shifted_op0 + shifted_op1;
unmasked_carry = new_ops[0] and/or new_ops[1];
carry = unmasked_carry & 1;
new_var = sum_of_shifted + carry;
*/
tree one_cst = build_one_cst (new_type);
gassign *g;
tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
append_pattern_def_seq (last_stmt_info, g, new_vectype);
tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
append_pattern_def_seq (last_stmt_info, g, new_vectype);
tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
shifted_op0, shifted_op1);
append_pattern_def_seq (last_stmt_info, g, new_vectype);
tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
append_pattern_def_seq (last_stmt_info, g, new_vectype);
tree carry = vect_recog_temp_ssa_var (new_type, NULL);
g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
append_pattern_def_seq (last_stmt_info, g, new_vectype);
g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
return vect_convert_output (last_stmt_info, type, g, new_vectype);
}
/* Generate the IFN_AVG* call. */
gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
new_ops[1]);
gimple_call_set_lhs (average_stmt, new_var);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment