Commit d03431d0 by Richard Biener Committed by Richard Biener

re PR tree-optimization/92324 (ICE in expand_direct_optab_fn, at internal-fn.c:2890)

2019-11-15  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/92324
	* tree-vect-loop.c (vect_create_epilog_for_reduction): Fix
	singedness of SLP reduction epilouge operations.  Also reduce
	the vector width for SLP reductions before doing elementwise
	operations if possible.

	* gcc.dg/vect/pr92324-4.c: New testcase.

From-SVN: r278289
parent 8eea62d8
2019-11-15 Richard Biener <rguenther@suse.de>
PR tree-optimization/92324
* tree-vect-loop.c (vect_create_epilog_for_reduction): Fix
singedness of SLP reduction epilouge operations. Also reduce
the vector width for SLP reductions before doing elementwise
operations if possible.
2019-11-15 Matthew Malcomson <matthew.malcomson@arm.com> 2019-11-15 Matthew Malcomson <matthew.malcomson@arm.com>
* passes.c (skip_pass): Set epilogue_completed if skipping the * passes.c (skip_pass): Set epilogue_completed if skipping the
2019-11-15 Richard Biener <rguenther@suse.de>
PR tree-optimization/92324
* gcc.dg/vect/pr92324-4.c: New testcase.
2019-11-15 Paul Thomas <pault@gcc.gnu.org> 2019-11-15 Paul Thomas <pault@gcc.gnu.org>
PR fortran/69654 PR fortran/69654
......
#include "tree-vect.h"
unsigned a[1024];
int gres1, gres2;
int __attribute__((noipa))
foo (int n)
{
int res1 = 0;
int res2 = 0;
for (int i = 0; i < n; ++i)
{
res1 = res1 > a[2*i] ? res1 : a[2*i];
res2 = res2 > a[2*i+1] ? res2 : a[2*i+1];
}
gres1 = res1;
gres2 = res2;
}
int main ()
{
check_vect ();
a[30] = (unsigned)__INT_MAX__ + 1;
a[31] = (unsigned)__INT_MAX__ + 1;
foo (16);
if (gres1 != -__INT_MAX__ - 1
|| gres2 != -__INT_MAX__ - 1)
__builtin_abort ();
return 0;
}
...@@ -4930,6 +4930,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, ...@@ -4930,6 +4930,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
bool reduce_with_shift; bool reduce_with_shift;
tree vec_temp; tree vec_temp;
gcc_assert (slp_reduc || new_phis.length () == 1);
/* See if the target wants to do the final (shift) reduction /* See if the target wants to do the final (shift) reduction
in a vector mode of smaller size and first reduce upper/lower in a vector mode of smaller size and first reduce upper/lower
halves against each other. */ halves against each other. */
...@@ -4937,6 +4939,21 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, ...@@ -4937,6 +4939,21 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
tree stype = TREE_TYPE (vectype); tree stype = TREE_TYPE (vectype);
unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant (); unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
unsigned nunits1 = nunits; unsigned nunits1 = nunits;
if ((mode1 = targetm.vectorize.split_reduction (mode)) != mode
&& new_phis.length () == 1)
{
nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
/* For SLP reductions we have to make sure lanes match up, but
since we're doing individual element final reduction reducing
vector width here is even more important.
??? We can also separate lanes with permutes, for the common
case of power-of-two group-size odd/even extracts would work. */
if (slp_reduc && nunits != nunits1)
{
nunits1 = least_common_multiple (nunits1, group_size);
gcc_assert (exact_log2 (nunits1) != -1 && nunits1 <= nunits);
}
}
if (!slp_reduc if (!slp_reduc
&& (mode1 = targetm.vectorize.split_reduction (mode)) != mode) && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
nunits1 = GET_MODE_NUNITS (mode1).to_constant (); nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
...@@ -4958,7 +4975,6 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, ...@@ -4958,7 +4975,6 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
new_temp = new_phi_result; new_temp = new_phi_result;
while (nunits > nunits1) while (nunits > nunits1)
{ {
gcc_assert (!slp_reduc);
nunits /= 2; nunits /= 2;
vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype), vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
stype, nunits); stype, nunits);
...@@ -5113,6 +5129,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, ...@@ -5113,6 +5129,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1)); int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
int element_bitsize = tree_to_uhwi (bitsize); int element_bitsize = tree_to_uhwi (bitsize);
tree compute_type = TREE_TYPE (vectype);
gimple_seq stmts = NULL;
FOR_EACH_VEC_ELT (new_phis, i, new_phi) FOR_EACH_VEC_ELT (new_phis, i, new_phi)
{ {
int bit_offset; int bit_offset;
...@@ -5120,12 +5138,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, ...@@ -5120,12 +5138,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
vec_temp = PHI_RESULT (new_phi); vec_temp = PHI_RESULT (new_phi);
else else
vec_temp = gimple_assign_lhs (new_phi); vec_temp = gimple_assign_lhs (new_phi);
tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, new_temp = gimple_build (&stmts, BIT_FIELD_REF, compute_type,
bitsize_zero_node); vec_temp, bitsize, bitsize_zero_node);
epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_temp);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
/* In SLP we don't need to apply reduction operation, so we just /* In SLP we don't need to apply reduction operation, so we just
collect s' values in SCALAR_RESULTS. */ collect s' values in SCALAR_RESULTS. */
...@@ -5137,14 +5151,9 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, ...@@ -5137,14 +5151,9 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
bit_offset += element_bitsize) bit_offset += element_bitsize)
{ {
tree bitpos = bitsize_int (bit_offset); tree bitpos = bitsize_int (bit_offset);
tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, new_name = gimple_build (&stmts, BIT_FIELD_REF,
bitsize, bitpos); compute_type, vec_temp,
bitsize, bitpos);
epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_name);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
if (slp_reduc) if (slp_reduc)
{ {
/* In SLP we don't need to apply reduction operation, so /* In SLP we don't need to apply reduction operation, so
...@@ -5153,13 +5162,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, ...@@ -5153,13 +5162,8 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
scalar_results.safe_push (new_name); scalar_results.safe_push (new_name);
} }
else else
{ new_temp = gimple_build (&stmts, code, compute_type,
epilog_stmt = gimple_build_assign (new_scalar_dest, code, new_name, new_temp);
new_name, new_temp);
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_temp);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
}
} }
} }
...@@ -5170,24 +5174,28 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info, ...@@ -5170,24 +5174,28 @@ vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
if (slp_reduc) if (slp_reduc)
{ {
tree res, first_res, new_res; tree res, first_res, new_res;
gimple *new_stmt;
/* Reduce multiple scalar results in case of SLP unrolling. */ /* Reduce multiple scalar results in case of SLP unrolling. */
for (j = group_size; scalar_results.iterate (j, &res); for (j = group_size; scalar_results.iterate (j, &res);
j++) j++)
{ {
first_res = scalar_results[j % group_size]; first_res = scalar_results[j % group_size];
new_stmt = gimple_build_assign (new_scalar_dest, code, new_res = gimple_build (&stmts, code, compute_type,
first_res, res); first_res, res);
new_res = make_ssa_name (new_scalar_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_res);
gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
scalar_results[j % group_size] = new_res; scalar_results[j % group_size] = new_res;
} }
for (k = 0; k < group_size; k++)
scalar_results[k] = gimple_convert (&stmts, scalar_type,
scalar_results[k]);
} }
else else
/* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */ {
scalar_results.safe_push (new_temp); /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
new_temp = gimple_convert (&stmts, scalar_type, new_temp);
scalar_results.safe_push (new_temp);
}
gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
} }
if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION) if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment