Commit 05677565 by Richard Biener Committed by Richard Biener

re PR tree-optimization/88315 (SAD and DOT_PROD SLP reductions with initial…

re PR tree-optimization/88315 (SAD and DOT_PROD SLP reductions with initial value != 0 create wrong code)

2018-12-04  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/88315
	* tree-vect-loop.c (get_initial_defs_for_reduction): Simplify
	and fix initialization vector for SAD and DOT_PROD SLP reductions.

	* gcc.dg/vect/slp-reduc-sad.c: Adjust to provide non-trivial
	initial value.

From-SVN: r266771
parent 4b8caa08
2018-12-04 Richard Biener <rguenther@suse.de>
PR tree-optimization/88315
* tree-vect-loop.c (get_initial_defs_for_reduction): Simplify
and fix initialization vector for SAD and DOT_PROD SLP reductions.
2018-12-03 Sandra Loosemore <sandra@codesourcery.com>
PR c/59039
2018-12-04 Richard Biener <rguenther@suse.de>
PR tree-optimization/88315
* gcc.dg/vect/slp-reduc-sad.c: Adjust to provide non-trivial
initial value.
2018-12-03 Jakub Jelinek <jakub@redhat.com>
PR middle-end/64242
......
......@@ -12,7 +12,7 @@ extern void abort (void);
int __attribute__((noinline,noclone))
foo (uint8_t *pix1, uint8_t *pix2, int i_stride_pix2)
{
int i_sum = 0;
int i_sum = 5;
for( int y = 0; y < 16; y++ )
{
i_sum += abs ( pix1[0] - pix2[0] );
......@@ -52,7 +52,7 @@ main ()
__asm__ volatile ("");
}
if (foo (X, Y, 16) != 32512)
if (foo (X, Y, 16) != 32512 + 5)
abort ();
return 0;
......
......@@ -4100,12 +4100,8 @@ get_initial_defs_for_reduction (slp_tree slp_node,
unsigned HOST_WIDE_INT nunits;
unsigned j, number_of_places_left_in_vector;
tree vector_type;
tree vop;
int group_size = stmts.length ();
unsigned int vec_num, i;
unsigned number_of_copies = 1;
vec<tree> voprnds;
voprnds.create (number_of_vectors);
unsigned int group_size = stmts.length ();
unsigned int i;
struct loop *loop;
auto_vec<tree, 16> permute_results;
......@@ -4138,115 +4134,78 @@ get_initial_defs_for_reduction (slp_tree slp_node,
if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
nunits = group_size;
number_of_copies = nunits * number_of_vectors / group_size;
number_of_places_left_in_vector = nunits;
bool constant_p = true;
tree_vector_builder elts (vector_type, nunits, 1);
elts.quick_grow (nunits);
for (j = 0; j < number_of_copies; j++)
for (j = 0; j < nunits * number_of_vectors; ++j)
{
for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
{
tree op;
/* Get the def before the loop. In reduction chain we have only
one initial value. */
if ((j != (number_of_copies - 1)
|| (reduc_chain && i != 0))
&& neutral_op)
op = neutral_op;
else
op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
tree op;
i = j % group_size;
stmt_vinfo = stmts[i];
/* Create 'vect_ = {op0,op1,...,opn}'. */
number_of_places_left_in_vector--;
elts[number_of_places_left_in_vector] = op;
if (!CONSTANT_CLASS_P (op))
constant_p = false;
/* Get the def before the loop. In reduction chain we have only
one initial value. Else we have as many as PHIs in the group. */
if (reduc_chain)
op = j != 0 ? neutral_op : PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
else if (((vec_oprnds->length () + 1) * nunits
- number_of_places_left_in_vector >= group_size)
&& neutral_op)
op = neutral_op;
else
op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
if (number_of_places_left_in_vector == 0)
{
gimple_seq ctor_seq = NULL;
tree init;
if (constant_p && !neutral_op
? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
: known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
/* Build the vector directly from ELTS. */
init = gimple_build_vector (&ctor_seq, &elts);
else if (neutral_op)
{
/* Build a vector of the neutral value and shift the
other elements into place. */
init = gimple_build_vector_from_val (&ctor_seq, vector_type,
neutral_op);
int k = nunits;
while (k > 0 && elts[k - 1] == neutral_op)
k -= 1;
while (k > 0)
{
k -= 1;
init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
vector_type, init, elts[k]);
}
}
else
/* Create 'vect_ = {op0,op1,...,opn}'. */
number_of_places_left_in_vector--;
elts[nunits - number_of_places_left_in_vector - 1] = op;
if (!CONSTANT_CLASS_P (op))
constant_p = false;
if (number_of_places_left_in_vector == 0)
{
gimple_seq ctor_seq = NULL;
tree init;
if (constant_p && !neutral_op
? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
: known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
/* Build the vector directly from ELTS. */
init = gimple_build_vector (&ctor_seq, &elts);
else if (neutral_op)
{
/* Build a vector of the neutral value and shift the
other elements into place. */
init = gimple_build_vector_from_val (&ctor_seq, vector_type,
neutral_op);
int k = nunits;
while (k > 0 && elts[k - 1] == neutral_op)
k -= 1;
while (k > 0)
{
/* First time round, duplicate ELTS to fill the
required number of vectors, then cherry pick the
appropriate result for each iteration. */
if (vec_oprnds->is_empty ())
duplicate_and_interleave (&ctor_seq, vector_type, elts,
number_of_vectors,
permute_results);
init = permute_results[number_of_vectors - j - 1];
k -= 1;
init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
vector_type, init, elts[k]);
}
if (ctor_seq != NULL)
gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
voprnds.quick_push (init);
number_of_places_left_in_vector = nunits;
elts.new_vector (vector_type, nunits, 1);
elts.quick_grow (nunits);
constant_p = true;
}
}
}
/* Since the vectors are created in the reverse order, we should invert
them. */
vec_num = voprnds.length ();
for (j = vec_num; j != 0; j--)
{
vop = voprnds[j - 1];
vec_oprnds->quick_push (vop);
}
voprnds.release ();
/* In case that VF is greater than the unrolling factor needed for the SLP
group of stmts, NUMBER_OF_VECTORS to be created is greater than
NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
to replicate the vectors. */
tree neutral_vec = NULL;
while (number_of_vectors > vec_oprnds->length ())
{
if (neutral_op)
{
if (!neutral_vec)
}
else
{
gimple_seq ctor_seq = NULL;
neutral_vec = gimple_build_vector_from_val
(&ctor_seq, vector_type, neutral_op);
if (ctor_seq != NULL)
gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
/* First time round, duplicate ELTS to fill the
required number of vectors, then cherry pick the
appropriate result for each iteration. */
if (vec_oprnds->is_empty ())
duplicate_and_interleave (&ctor_seq, vector_type, elts,
number_of_vectors,
permute_results);
init = permute_results[number_of_vectors - j - 1];
}
vec_oprnds->quick_push (neutral_vec);
}
else
{
for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
vec_oprnds->quick_push (vop);
}
if (ctor_seq != NULL)
gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
vec_oprnds->quick_push (init);
number_of_places_left_in_vector = nunits;
elts.new_vector (vector_type, nunits, 1);
elts.quick_grow (nunits);
constant_p = true;
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment