Commit 05677565 by Richard Biener Committed by Richard Biener

re PR tree-optimization/88315 (SAD and DOT_PROD SLP reductions with initial…

re PR tree-optimization/88315 (SAD and DOT_PROD SLP reductions with initial value != 0 create wrong code)

2018-12-04  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/88315
	* tree-vect-loop.c (get_initial_defs_for_reduction): Simplify
	and fix initialization vector for SAD and DOT_PROD SLP reductions.

	* gcc.dg/vect/slp-reduc-sad.c: Adjust to provide non-trivial
	initial value.

From-SVN: r266771
parent 4b8caa08
2018-12-04 Richard Biener <rguenther@suse.de>
PR tree-optimization/88315
* tree-vect-loop.c (get_initial_defs_for_reduction): Simplify
and fix initialization vector for SAD and DOT_PROD SLP reductions.
2018-12-03 Sandra Loosemore <sandra@codesourcery.com> 2018-12-03 Sandra Loosemore <sandra@codesourcery.com>
PR c/59039 PR c/59039
2018-12-04 Richard Biener <rguenther@suse.de>
PR tree-optimization/88315
* gcc.dg/vect/slp-reduc-sad.c: Adjust to provide non-trivial
initial value.
2018-12-03 Jakub Jelinek <jakub@redhat.com> 2018-12-03 Jakub Jelinek <jakub@redhat.com>
PR middle-end/64242 PR middle-end/64242
......
...@@ -12,7 +12,7 @@ extern void abort (void); ...@@ -12,7 +12,7 @@ extern void abort (void);
int __attribute__((noinline,noclone)) int __attribute__((noinline,noclone))
foo (uint8_t *pix1, uint8_t *pix2, int i_stride_pix2) foo (uint8_t *pix1, uint8_t *pix2, int i_stride_pix2)
{ {
int i_sum = 0; int i_sum = 5;
for( int y = 0; y < 16; y++ ) for( int y = 0; y < 16; y++ )
{ {
i_sum += abs ( pix1[0] - pix2[0] ); i_sum += abs ( pix1[0] - pix2[0] );
...@@ -52,7 +52,7 @@ main () ...@@ -52,7 +52,7 @@ main ()
__asm__ volatile (""); __asm__ volatile ("");
} }
if (foo (X, Y, 16) != 32512) if (foo (X, Y, 16) != 32512 + 5)
abort (); abort ();
return 0; return 0;
......
...@@ -4100,12 +4100,8 @@ get_initial_defs_for_reduction (slp_tree slp_node, ...@@ -4100,12 +4100,8 @@ get_initial_defs_for_reduction (slp_tree slp_node,
unsigned HOST_WIDE_INT nunits; unsigned HOST_WIDE_INT nunits;
unsigned j, number_of_places_left_in_vector; unsigned j, number_of_places_left_in_vector;
tree vector_type; tree vector_type;
tree vop; unsigned int group_size = stmts.length ();
int group_size = stmts.length (); unsigned int i;
unsigned int vec_num, i;
unsigned number_of_copies = 1;
vec<tree> voprnds;
voprnds.create (number_of_vectors);
struct loop *loop; struct loop *loop;
auto_vec<tree, 16> permute_results; auto_vec<tree, 16> permute_results;
...@@ -4138,115 +4134,78 @@ get_initial_defs_for_reduction (slp_tree slp_node, ...@@ -4138,115 +4134,78 @@ get_initial_defs_for_reduction (slp_tree slp_node,
if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits)) if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
nunits = group_size; nunits = group_size;
number_of_copies = nunits * number_of_vectors / group_size;
number_of_places_left_in_vector = nunits; number_of_places_left_in_vector = nunits;
bool constant_p = true; bool constant_p = true;
tree_vector_builder elts (vector_type, nunits, 1); tree_vector_builder elts (vector_type, nunits, 1);
elts.quick_grow (nunits); elts.quick_grow (nunits);
for (j = 0; j < number_of_copies; j++) for (j = 0; j < nunits * number_of_vectors; ++j)
{ {
for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--) tree op;
{ i = j % group_size;
tree op; stmt_vinfo = stmts[i];
/* Get the def before the loop. In reduction chain we have only
one initial value. */
if ((j != (number_of_copies - 1)
|| (reduc_chain && i != 0))
&& neutral_op)
op = neutral_op;
else
op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
/* Create 'vect_ = {op0,op1,...,opn}'. */ /* Get the def before the loop. In reduction chain we have only
number_of_places_left_in_vector--; one initial value. Else we have as many as PHIs in the group. */
elts[number_of_places_left_in_vector] = op; if (reduc_chain)
if (!CONSTANT_CLASS_P (op)) op = j != 0 ? neutral_op : PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
constant_p = false; else if (((vec_oprnds->length () + 1) * nunits
- number_of_places_left_in_vector >= group_size)
&& neutral_op)
op = neutral_op;
else
op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
if (number_of_places_left_in_vector == 0) /* Create 'vect_ = {op0,op1,...,opn}'. */
{ number_of_places_left_in_vector--;
gimple_seq ctor_seq = NULL; elts[nunits - number_of_places_left_in_vector - 1] = op;
tree init; if (!CONSTANT_CLASS_P (op))
if (constant_p && !neutral_op constant_p = false;
? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
: known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)) if (number_of_places_left_in_vector == 0)
/* Build the vector directly from ELTS. */ {
init = gimple_build_vector (&ctor_seq, &elts); gimple_seq ctor_seq = NULL;
else if (neutral_op) tree init;
{ if (constant_p && !neutral_op
/* Build a vector of the neutral value and shift the ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
other elements into place. */ : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
init = gimple_build_vector_from_val (&ctor_seq, vector_type, /* Build the vector directly from ELTS. */
neutral_op); init = gimple_build_vector (&ctor_seq, &elts);
int k = nunits; else if (neutral_op)
while (k > 0 && elts[k - 1] == neutral_op) {
k -= 1; /* Build a vector of the neutral value and shift the
while (k > 0) other elements into place. */
{ init = gimple_build_vector_from_val (&ctor_seq, vector_type,
k -= 1; neutral_op);
init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT, int k = nunits;
vector_type, init, elts[k]); while (k > 0 && elts[k - 1] == neutral_op)
} k -= 1;
} while (k > 0)
else
{ {
/* First time round, duplicate ELTS to fill the k -= 1;
required number of vectors, then cherry pick the init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
appropriate result for each iteration. */ vector_type, init, elts[k]);
if (vec_oprnds->is_empty ())
duplicate_and_interleave (&ctor_seq, vector_type, elts,
number_of_vectors,
permute_results);
init = permute_results[number_of_vectors - j - 1];
} }
if (ctor_seq != NULL) }
gsi_insert_seq_on_edge_immediate (pe, ctor_seq); else
voprnds.quick_push (init);
number_of_places_left_in_vector = nunits;
elts.new_vector (vector_type, nunits, 1);
elts.quick_grow (nunits);
constant_p = true;
}
}
}
/* Since the vectors are created in the reverse order, we should invert
them. */
vec_num = voprnds.length ();
for (j = vec_num; j != 0; j--)
{
vop = voprnds[j - 1];
vec_oprnds->quick_push (vop);
}
voprnds.release ();
/* In case that VF is greater than the unrolling factor needed for the SLP
group of stmts, NUMBER_OF_VECTORS to be created is greater than
NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
to replicate the vectors. */
tree neutral_vec = NULL;
while (number_of_vectors > vec_oprnds->length ())
{
if (neutral_op)
{
if (!neutral_vec)
{ {
gimple_seq ctor_seq = NULL; /* First time round, duplicate ELTS to fill the
neutral_vec = gimple_build_vector_from_val required number of vectors, then cherry pick the
(&ctor_seq, vector_type, neutral_op); appropriate result for each iteration. */
if (ctor_seq != NULL) if (vec_oprnds->is_empty ())
gsi_insert_seq_on_edge_immediate (pe, ctor_seq); duplicate_and_interleave (&ctor_seq, vector_type, elts,
number_of_vectors,
permute_results);
init = permute_results[number_of_vectors - j - 1];
} }
vec_oprnds->quick_push (neutral_vec); if (ctor_seq != NULL)
} gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
else vec_oprnds->quick_push (init);
{
for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++) number_of_places_left_in_vector = nunits;
vec_oprnds->quick_push (vop); elts.new_vector (vector_type, nunits, 1);
} elts.quick_grow (nunits);
constant_p = true;
}
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment