Commit be43a887 by Richard Biener Committed by Richard Biener

re PR tree-optimization/87105 (Autovectorization [X86, SSE2, AVX2, DoublePrecision])

2018-10-24  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/87105
	* tree-vect-data-refs.c (vect_analyze_group_access_1): Adjust
	dump classification.
	(vect_analyze_data_ref_accesses): Handle duplicate loads and
	stores by splitting the affected group after the fact.
	* tree-vect-slp.c (vect_build_slp_tree_2): Dump when we
	fail the SLP build because of size constraints.

	* gcc.dg/vect/bb-slp-39.c: New testcase.
	* gfortran.dg/vect/pr83232.f90: Un-XFAIL.

From-SVN: r265457
parent dc6b6330
2018-10-24 Richard Biener <rguenther@suse.de>
PR tree-optimization/87105
* tree-vect-data-refs.c (vect_analyze_group_access_1): Adjust
dump classification.
(vect_analyze_data_ref_accesses): Handle duplicate loads and
stores by splitting the affected group after the fact.
* tree-vect-slp.c (vect_build_slp_tree_2): Dump when we
fail the SLP build because of size constraints.
2018-10-24 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* configure.ac (gcc_cv_ld_aligned_shf_merge): New test.
2018-10-24 Richard Biener <rguenther@suse.de>
PR tree-optimization/87105
* gcc.dg/vect/bb-slp-39.c: New testcase.
* gfortran.dg/vect/pr83232.f90: Un-XFAIL.
2018-10-24 Richard Biener <rguenther@suse.de>
PR tree-optimization/84013
* gcc.dg/tree-ssa/restrict-9.c: New testcase.
......
/* { dg-do compile } */
/* { dg-require-effective-target vect_double } */
double x[1024];
void foo (double *p)
{
x[0] = 1.;
x[1] = 2.;
*p = 7.; // aliasing store
x[0] = x[0] + 1;
x[1] = x[1] + 1;
*p = 8.; // aliasing store
x[1] = x[1] + 1;
x[0] = x[0] + 1;
}
/* See that we vectorize three SLP instances. */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp2" } } */
......@@ -27,7 +27,5 @@
call foo (Einc)
END SUBROUTINE
! We should vectorize (1) and (2)
! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "slp1" } }
! We fail to vectorize at (3), this can be fixed in the future
! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp1" { xfail *-*-* } } }
! We should vectorize (1), (2) and (3)
! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp1" } }
......@@ -2472,7 +2472,7 @@ vect_analyze_group_access_1 (dr_vec_info *dr_info)
}
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
dump_printf_loc (MSG_NOTE, vect_location,
"Two or more load stmts share the same dr.\n");
/* For load use the same data-ref load. */
......@@ -2838,6 +2838,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
determining what dependencies are reversed. */
vec<data_reference_p> datarefs_copy = datarefs.copy ();
datarefs_copy.qsort (dr_group_sort_cmp);
hash_set<stmt_vec_info> to_fixup;
/* Build the interleaving chains. */
for (i = 0; i < datarefs_copy.length () - 1;)
......@@ -2920,36 +2921,32 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
{
gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
< gimple_uid (DR_STMT (drb)));
/* ??? For now we simply "drop" the later reference which is
otherwise the same rather than finishing off this group.
In the end we'd want to re-process duplicates forming
multiple groups from the refs, likely by just collecting
all candidates (including duplicates and split points
below) in a vector and then process them together. */
continue;
/* Simply link in duplicates and fix up the chain below. */
}
/* If init_b == init_a + the size of the type * k, we have an
interleaving, and DRA is accessed before DRB. */
HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
if (type_size_a == 0
|| (init_b - init_a) % type_size_a != 0)
break;
/* If we have a store, the accesses are adjacent. This splits
groups into chunks we support (we don't support vectorization
of stores with gaps). */
if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
break;
/* If the step (if not zero or non-constant) is greater than the
difference between data-refs' inits this splits groups into
suitable sizes. */
if (tree_fits_shwi_p (DR_STEP (dra)))
else
{
HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
if (step != 0 && step <= (init_b - init_a))
/* If init_b == init_a + the size of the type * k, we have an
interleaving, and DRA is accessed before DRB. */
HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
if (type_size_a == 0
|| (init_b - init_a) % type_size_a != 0)
break;
/* If we have a store, the accesses are adjacent. This splits
groups into chunks we support (we don't support vectorization
of stores with gaps). */
if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
break;
/* If the step (if not zero or non-constant) is greater than the
difference between data-refs' inits this splits groups into
suitable sizes. */
if (tree_fits_shwi_p (DR_STEP (dra)))
{
HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
if (step != 0 && step <= (init_b - init_a))
break;
}
}
if (dump_enabled_p ())
......@@ -2968,9 +2965,64 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
lastinfo = stmtinfo_b;
if (init_b == init_prev
&& !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
&& dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"Queuing group with duplicate access for fixup\n");
}
}
/* Fixup groups with duplicate entries by splitting it. */
while (1)
{
hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
if (!(it != to_fixup.end ()))
break;
stmt_vec_info grp = *it;
to_fixup.remove (grp);
/* Find the earliest duplicate group member. */
unsigned first_duplicate = -1u;
stmt_vec_info next, g = grp;
while ((next = DR_GROUP_NEXT_ELEMENT (g)))
{
if ((DR_INIT (STMT_VINFO_DR_INFO (next)->dr)
== DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
&& gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
g = next;
}
if (first_duplicate == -1U)
continue;
/* Then move all stmts after the first duplicate to a new group.
Note this is a heuristic but one with the property that *it
is fixed up completely. */
g = grp;
stmt_vec_info newgroup = NULL, ng;
while ((next = DR_GROUP_NEXT_ELEMENT (g)))
{
if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
{
DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
if (!newgroup)
newgroup = next;
else
DR_GROUP_NEXT_ELEMENT (ng) = next;
ng = next;
DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
}
else
g = DR_GROUP_NEXT_ELEMENT (g);
}
DR_GROUP_NEXT_ELEMENT (ng) = NULL;
/* Fixup the new group which still may contain duplicates. */
to_fixup.add (newgroup);
}
FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
{
dr_vec_info *dr_info = vinfo->lookup_dr (dr);
......
......@@ -1191,6 +1191,10 @@ vect_build_slp_tree_2 (vec_info *vinfo,
if (++this_tree_size > max_tree_size)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
vect_location,
"Build SLP failed: SLP tree too large\n");
FOR_EACH_VEC_ELT (children, j, child)
vect_free_slp_tree (child, false);
vect_free_oprnd_info (oprnds_info);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment