Commit a1f072e2 by Richard Biener Committed by Richard Biener

re PR tree-optimization/87105 (Autovectorization [X86, SSE2, AVX2, DoublePrecision])

2018-10-26  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/87105
	* tree-vectorizer.h (_slp_tree::refcnt): New member.
	* tree-vect-slp.c (vect_free_slp_tree): Decrement and honor
	refcnt.
	(vect_create_new_slp_node): Initialize refcnt to one.
	(bst_traits): Move.
	(scalar_stmts_set_t, bst_fail): Remove.
	(vect_build_slp_tree_2): Add bst_map argument and adjust calls.
	(vect_build_slp_tree): Add bst_map argument and lookup
	already created SLP nodes.
	(vect_print_slp_tree): Handle a SLP graph, print SLP node
	addresses.
	(vect_slp_rearrange_stmts): Handle a SLP graph.
	(vect_analyze_slp_instance): Adjust and free SLP nodes from
	the CSE map.  Fix indenting.
	(vect_schedule_slp_instance): Add short-cut.

	* g++.dg/vect/slp-pr87105.cc: Adjust.
	* gcc.dg/torture/20181024-1.c: New testcase.
	* g++.dg/opt/20181025-1.C: Likewise.

From-SVN: r265522
parent 74ca1c01
2018-10-26 Richard Biener <rguenther@suse.de>
PR tree-optimization/87105
* tree-vectorizer.h (_slp_tree::refcnt): New member.
* tree-vect-slp.c (vect_free_slp_tree): Decrement and honor
refcnt.
(vect_create_new_slp_node): Initialize refcnt to one.
(bst_traits): Move.
(scalar_stmts_set_t, bst_fail): Remove.
(vect_build_slp_tree_2): Add bst_map argument and adjust calls.
(vect_build_slp_tree): Add bst_map argument and lookup
already created SLP nodes.
(vect_print_slp_tree): Handle a SLP graph, print SLP node
addresses.
(vect_slp_rearrange_stmts): Handle a SLP graph.
(vect_analyze_slp_instance): Adjust and free SLP nodes from
the CSE map. Fix indenting.
(vect_schedule_slp_instance): Add short-cut.
2018-10-26 Martin Liska <mliska@suse.cz> 2018-10-26 Martin Liska <mliska@suse.cz>
PR testsuite/86158 PR testsuite/86158
2018-10-26 Richard Biener <rguenther@suse.de> 2018-10-26 Richard Biener <rguenther@suse.de>
PR tree-optimization/87105
* g++.dg/vect/slp-pr87105.cc: Adjust.
* gcc.dg/torture/20181024-1.c: New testcase.
* g++.dg/opt/20181025-1.C: Likewise.
2018-10-26 Richard Biener <rguenther@suse.de>
PR testsuite/87754 PR testsuite/87754
* g++.dg/lto/odr-1_0.C: Fix pattern. * g++.dg/lto/odr-1_0.C: Fix pattern.
......
// { dg-do compile }
// { dg-options "-Ofast" }
template <typename Number>
class Vector {
typedef Number value_type;
typedef const value_type *const_iterator;
Number norm_sqr () const;
const_iterator begin () const;
unsigned int dim;
};
template <typename Number>
static inline Number
local_sqr (const Number x)
{
return x*x;
}
template <typename Number>
Number
Vector<Number>::norm_sqr () const
{
Number sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
const_iterator ptr = begin(), eptr = ptr + (dim/4)*4;
while (ptr!=eptr)
{
sum0 += ::local_sqr(*ptr++);
sum1 += ::local_sqr(*ptr++);
}
return sum0+sum1+sum2+sum3;
}
template class Vector<double>;
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// { dg-require-effective-target c++11 } // { dg-require-effective-target c++11 }
// { dg-require-effective-target vect_double } // { dg-require-effective-target vect_double }
// For MIN/MAX recognition // For MIN/MAX recognition
// { dg-additional-options "-ffast-math -fvect-cost-model" } // { dg-additional-options "-ffast-math" }
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
...@@ -99,6 +99,7 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept { ...@@ -99,6 +99,7 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept {
// We should have if-converted everything down to straight-line code // We should have if-converted everything down to straight-line code
// { dg-final { scan-tree-dump-times "<bb \[0-9\]+>" 1 "slp2" } } // { dg-final { scan-tree-dump-times "<bb \[0-9\]+>" 1 "slp2" } }
// We fail to elide an earlier store which makes us not handle a later // { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" } }
// duplicate one for vectorization. // It's a bit awkward to detect that all stores were vectorized but the
// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail *-*-* } } } // following more or less does the trick
// { dg-final { scan-tree-dump "vect_iftmp\[^\r\m\]* = MIN" "slp2" } }
/* { dg-do compile } */
/* { dg-additional-options "-march=core-avx2" { target { x86_64-*-* i?86-*-* } } } */
typedef enum {
C = 0, N, S, E, W, T, B, NE, NW, SE, SW, NT, NB, ST, SB, ET, EB, WT, WB, FLAGS, N_CELL_ENTRIES} CELL_ENTRIES;
typedef double LBM_Grid[(130)*100*100*N_CELL_ENTRIES];
void foo( LBM_Grid srcGrid )
{
double ux , uy , uz , rho , ux1, uy1, uz1, rho1, ux2, uy2, uz2, rho2, u2, px, py;
int i;
for( i = 0;
i < (N_CELL_ENTRIES*( 100*100));
i += N_CELL_ENTRIES )
{
rho1 = + ((srcGrid)[((C)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((N)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((S)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((E)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((W)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((T)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((B)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((NE)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((NW)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((SE)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((SW)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((NT)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((NB)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((ST)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((SB)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((ET)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((EB)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((WT)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((WB)+N_CELL_ENTRIES*( 100*100))+(i)]);
rho = 2.0*rho1 - rho2;
px = (((i / N_CELL_ENTRIES) % 100) / (0.5*(100-1))) - 1.0;
uz = 0.01 * (1.0-px*px) * (1.0-py*py);
u2 = 1.5 * (ux*ux + uy*uy + uz*uz);
(((srcGrid)[((C))+(i)])) = (1.0/ 3.0)*rho*(1.0 - u2);
(((srcGrid)[((N))+(i)])) = (1.0/18.0)*rho*(1.0 + uy*(4.5*uy + 3.0) - u2);
}
}
...@@ -57,6 +57,9 @@ vect_free_slp_tree (slp_tree node, bool final_p) ...@@ -57,6 +57,9 @@ vect_free_slp_tree (slp_tree node, bool final_p)
int i; int i;
slp_tree child; slp_tree child;
if (--node->refcnt != 0)
return;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
vect_free_slp_tree (child, final_p); vect_free_slp_tree (child, final_p);
...@@ -82,7 +85,6 @@ vect_free_slp_tree (slp_tree node, bool final_p) ...@@ -82,7 +85,6 @@ vect_free_slp_tree (slp_tree node, bool final_p)
free (node); free (node);
} }
/* Free the memory allocated for the SLP instance. FINAL_P is true if we /* Free the memory allocated for the SLP instance. FINAL_P is true if we
have vectorized the instance or if we have made a final decision not have vectorized the instance or if we have made a final decision not
to vectorize the statements in any way. */ to vectorize the statements in any way. */
...@@ -126,6 +128,7 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts) ...@@ -126,6 +128,7 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
SLP_TREE_LOAD_PERMUTATION (node) = vNULL; SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
SLP_TREE_TWO_OPERATORS (node) = false; SLP_TREE_TWO_OPERATORS (node) = false;
SLP_TREE_DEF_TYPE (node) = vect_internal_def; SLP_TREE_DEF_TYPE (node) = vect_internal_def;
node->refcnt = 1;
unsigned i; unsigned i;
FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info) FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
...@@ -1021,9 +1024,6 @@ bst_traits::equal (value_type existing, value_type candidate) ...@@ -1021,9 +1024,6 @@ bst_traits::equal (value_type existing, value_type candidate)
return true; return true;
} }
typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
static scalar_stmts_set_t *bst_fail;
typedef hash_map <vec <gimple *>, slp_tree, typedef hash_map <vec <gimple *>, slp_tree,
simple_hashmap_traits <bst_traits, slp_tree> > simple_hashmap_traits <bst_traits, slp_tree> >
scalar_stmts_to_slp_tree_map_t; scalar_stmts_to_slp_tree_map_t;
...@@ -1034,30 +1034,33 @@ vect_build_slp_tree_2 (vec_info *vinfo, ...@@ -1034,30 +1034,33 @@ vect_build_slp_tree_2 (vec_info *vinfo,
poly_uint64 *max_nunits, poly_uint64 *max_nunits,
vec<slp_tree> *loads, vec<slp_tree> *loads,
bool *matches, unsigned *npermutes, unsigned *tree_size, bool *matches, unsigned *npermutes, unsigned *tree_size,
unsigned max_tree_size); unsigned max_tree_size,
scalar_stmts_to_slp_tree_map_t *bst_map);
static slp_tree static slp_tree
vect_build_slp_tree (vec_info *vinfo, vect_build_slp_tree (vec_info *vinfo,
vec<stmt_vec_info> stmts, unsigned int group_size, vec<stmt_vec_info> stmts, unsigned int group_size,
poly_uint64 *max_nunits, vec<slp_tree> *loads, poly_uint64 *max_nunits, vec<slp_tree> *loads,
bool *matches, unsigned *npermutes, unsigned *tree_size, bool *matches, unsigned *npermutes, unsigned *tree_size,
unsigned max_tree_size) unsigned max_tree_size,
scalar_stmts_to_slp_tree_map_t *bst_map)
{ {
if (bst_fail->contains (stmts)) if (slp_tree *leader = bst_map->get (stmts))
return NULL;
slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
loads, matches, npermutes, tree_size,
max_tree_size);
/* When SLP build fails for stmts record this, otherwise SLP build
can be exponential in time when we allow to construct parts from
scalars, see PR81723. */
if (! res)
{ {
vec <stmt_vec_info> x; if (dump_enabled_p ())
x.create (stmts.length ()); dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
x.splice (stmts); *leader ? "" : "failed ", *leader);
bst_fail->add (x); if (*leader)
(*leader)->refcnt++;
return *leader;
} }
slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
loads, matches, npermutes, tree_size,
max_tree_size, bst_map);
/* Keep a reference for the bst_map use. */
if (res)
res->refcnt++;
bst_map->put (stmts.copy (), res);
return res; return res;
} }
...@@ -1074,7 +1077,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, ...@@ -1074,7 +1077,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
poly_uint64 *max_nunits, poly_uint64 *max_nunits,
vec<slp_tree> *loads, vec<slp_tree> *loads,
bool *matches, unsigned *npermutes, unsigned *tree_size, bool *matches, unsigned *npermutes, unsigned *tree_size,
unsigned max_tree_size) unsigned max_tree_size,
scalar_stmts_to_slp_tree_map_t *bst_map)
{ {
unsigned nops, i, this_tree_size = 0; unsigned nops, i, this_tree_size = 0;
poly_uint64 this_max_nunits = *max_nunits; poly_uint64 this_max_nunits = *max_nunits;
...@@ -1205,7 +1209,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, ...@@ -1205,7 +1209,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
group_size, &this_max_nunits, group_size, &this_max_nunits,
&this_loads, matches, npermutes, &this_loads, matches, npermutes,
&this_tree_size, &this_tree_size,
max_tree_size)) != NULL) max_tree_size, bst_map)) != NULL)
{ {
/* If we have all children of child built up from scalars then just /* If we have all children of child built up from scalars then just
throw that away and build it up this node from scalars. */ throw that away and build it up this node from scalars. */
...@@ -1348,7 +1352,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, ...@@ -1348,7 +1352,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
group_size, &this_max_nunits, group_size, &this_max_nunits,
&this_loads, tem, npermutes, &this_loads, tem, npermutes,
&this_tree_size, &this_tree_size,
max_tree_size)) != NULL) max_tree_size, bst_map)) != NULL)
{ {
/* ... so if successful we can apply the operand swapping /* ... so if successful we can apply the operand swapping
to the GIMPLE IL. This is necessary because for example to the GIMPLE IL. This is necessary because for example
...@@ -1441,21 +1445,37 @@ fail: ...@@ -1441,21 +1445,37 @@ fail:
static void static void
vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc, vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
slp_tree node) slp_tree node, hash_set<slp_tree> &visited)
{ {
int i; int i;
stmt_vec_info stmt_info; stmt_vec_info stmt_info;
slp_tree child; slp_tree child;
dump_printf_loc (dump_kind, loc, "node%s\n", if (visited.add (node))
return;
dump_printf_loc (dump_kind, loc, "node%s %p\n",
SLP_TREE_DEF_TYPE (node) != vect_internal_def SLP_TREE_DEF_TYPE (node) != vect_internal_def
? " (external)" : ""); ? " (external)" : "", node);
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt); dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt);
if (SLP_TREE_CHILDREN (node).is_empty ())
return;
dump_printf_loc (dump_kind, loc, "\tchildren");
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
vect_print_slp_tree (dump_kind, loc, child); dump_printf (dump_kind, " %p", (void *)child);
dump_printf (dump_kind, "\n");
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
vect_print_slp_tree (dump_kind, loc, child, visited);
} }
static void
vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
slp_tree node)
{
hash_set<slp_tree> visited;
vect_print_slp_tree (dump_kind, loc, node, visited);
}
/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID). /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
...@@ -1509,15 +1529,19 @@ vect_mark_slp_stmts_relevant (slp_tree node) ...@@ -1509,15 +1529,19 @@ vect_mark_slp_stmts_relevant (slp_tree node)
static void static void
vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size, vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
vec<unsigned> permutation) vec<unsigned> permutation,
hash_set<slp_tree> &visited)
{ {
stmt_vec_info stmt_info; stmt_vec_info stmt_info;
vec<stmt_vec_info> tmp_stmts; vec<stmt_vec_info> tmp_stmts;
unsigned int i; unsigned int i;
slp_tree child; slp_tree child;
if (visited.add (node))
return;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
vect_slp_rearrange_stmts (child, group_size, permutation); vect_slp_rearrange_stmts (child, group_size, permutation, visited);
gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ()); gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
tmp_stmts.create (group_size); tmp_stmts.create (group_size);
...@@ -1578,8 +1602,9 @@ vect_attempt_slp_rearrange_stmts (slp_instance slp_instn) ...@@ -1578,8 +1602,9 @@ vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
statements in the nodes is not important unless they are memory statements in the nodes is not important unless they are memory
accesses, we can rearrange the statements in all the nodes accesses, we can rearrange the statements in all the nodes
according to the order of the loads. */ according to the order of the loads. */
hash_set<slp_tree> visited;
vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size, vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
node->load_permutation); node->load_permutation, visited);
/* We are done, no actual permutations need to be generated. */ /* We are done, no actual permutations need to be generated. */
poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn); poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
...@@ -1889,12 +1914,18 @@ vect_analyze_slp_instance (vec_info *vinfo, ...@@ -1889,12 +1914,18 @@ vect_analyze_slp_instance (vec_info *vinfo,
/* Build the tree for the SLP instance. */ /* Build the tree for the SLP instance. */
bool *matches = XALLOCAVEC (bool, group_size); bool *matches = XALLOCAVEC (bool, group_size);
unsigned npermutes = 0; unsigned npermutes = 0;
bst_fail = new scalar_stmts_set_t (); scalar_stmts_to_slp_tree_map_t *bst_map
= new scalar_stmts_to_slp_tree_map_t ();
poly_uint64 max_nunits = nunits; poly_uint64 max_nunits = nunits;
node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
&max_nunits, &loads, matches, &npermutes, &max_nunits, &loads, matches, &npermutes,
NULL, max_tree_size); NULL, max_tree_size, bst_map);
delete bst_fail; /* The map keeps a reference on SLP nodes built, release that. */
for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
it != bst_map->end (); ++it)
if ((*it).second)
vect_free_slp_tree ((*it).second, false);
delete bst_map;
if (node != NULL) if (node != NULL)
{ {
/* Calculate the unrolling factor based on the smallest type. */ /* Calculate the unrolling factor based on the smallest type. */
...@@ -1924,109 +1955,109 @@ vect_analyze_slp_instance (vec_info *vinfo, ...@@ -1924,109 +1955,109 @@ vect_analyze_slp_instance (vec_info *vinfo,
} }
else else
{ {
/* Create a new SLP instance. */ /* Create a new SLP instance. */
new_instance = XNEW (struct _slp_instance); new_instance = XNEW (struct _slp_instance);
SLP_INSTANCE_TREE (new_instance) = node; SLP_INSTANCE_TREE (new_instance) = node;
SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
SLP_INSTANCE_LOADS (new_instance) = loads; SLP_INSTANCE_LOADS (new_instance) = loads;
/* Compute the load permutation. */ /* Compute the load permutation. */
slp_tree load_node; slp_tree load_node;
bool loads_permuted = false; bool loads_permuted = false;
FOR_EACH_VEC_ELT (loads, i, load_node) FOR_EACH_VEC_ELT (loads, i, load_node)
{
vec<unsigned> load_permutation;
int j;
stmt_vec_info load_info;
bool this_load_permuted = false;
load_permutation.create (group_size);
stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
(SLP_TREE_SCALAR_STMTS (load_node)[0]);
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
{ {
int load_place = vect_get_place_in_interleaving_chain vec<unsigned> load_permutation;
(load_info, first_stmt_info); int j;
gcc_assert (load_place != -1); stmt_vec_info load_info;
if (load_place != j) bool this_load_permuted = false;
this_load_permuted = true; load_permutation.create (group_size);
load_permutation.safe_push (load_place); stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
(SLP_TREE_SCALAR_STMTS (load_node)[0]);
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
{
int load_place = vect_get_place_in_interleaving_chain
(load_info, first_stmt_info);
gcc_assert (load_place != -1);
if (load_place != j)
this_load_permuted = true;
load_permutation.safe_push (load_place);
}
if (!this_load_permuted
/* The load requires permutation when unrolling exposes
a gap either because the group is larger than the SLP
group-size or because there is a gap between the groups. */
&& (known_eq (unrolling_factor, 1U)
|| (group_size == DR_GROUP_SIZE (first_stmt_info)
&& DR_GROUP_GAP (first_stmt_info) == 0)))
{
load_permutation.release ();
continue;
}
SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
loads_permuted = true;
} }
if (!this_load_permuted
/* The load requires permutation when unrolling exposes if (loads_permuted)
a gap either because the group is larger than the SLP
group-size or because there is a gap between the groups. */
&& (known_eq (unrolling_factor, 1U)
|| (group_size == DR_GROUP_SIZE (first_stmt_info)
&& DR_GROUP_GAP (first_stmt_info) == 0)))
{ {
load_permutation.release (); if (!vect_supported_load_permutation_p (new_instance))
continue; {
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Build SLP failed: unsupported load "
"permutation %G", stmt_info->stmt);
vect_free_slp_instance (new_instance, false);
return false;
}
} }
SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
loads_permuted = true;
}
if (loads_permuted)
{
if (!vect_supported_load_permutation_p (new_instance))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Build SLP failed: unsupported load "
"permutation %G", stmt_info->stmt);
vect_free_slp_instance (new_instance, false);
return false;
}
}
/* If the loads and stores can be handled with load/store-lan /* If the loads and stores can be handled with load/store-lan
instructions do not generate this SLP instance. */ instructions do not generate this SLP instance. */
if (is_a <loop_vec_info> (vinfo) if (is_a <loop_vec_info> (vinfo)
&& loads_permuted && loads_permuted
&& dr && vect_store_lanes_supported (vectype, group_size, false)) && dr && vect_store_lanes_supported (vectype, group_size, false))
{
slp_tree load_node;
FOR_EACH_VEC_ELT (loads, i, load_node)
{ {
stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT slp_tree load_node;
(SLP_TREE_SCALAR_STMTS (load_node)[0]); FOR_EACH_VEC_ELT (loads, i, load_node)
/* Use SLP for strided accesses (or if we can't load-lanes). */ {
if (STMT_VINFO_STRIDED_P (stmt_vinfo) stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
|| ! vect_load_lanes_supported (SLP_TREE_SCALAR_STMTS (load_node)[0]);
(STMT_VINFO_VECTYPE (stmt_vinfo), /* Use SLP for strided accesses (or if we can't load-lanes). */
DR_GROUP_SIZE (stmt_vinfo), false)) if (STMT_VINFO_STRIDED_P (stmt_vinfo)
break; || ! vect_load_lanes_supported
(STMT_VINFO_VECTYPE (stmt_vinfo),
DR_GROUP_SIZE (stmt_vinfo), false))
break;
}
if (i == loads.length ())
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Built SLP cancelled: can use "
"load/store-lanes\n");
vect_free_slp_instance (new_instance, false);
return false;
}
} }
if (i == loads.length ())
vinfo->slp_instances.safe_push (new_instance);
if (dump_enabled_p ())
{ {
if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location,
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "Final SLP tree for instance:\n");
"Built SLP cancelled: can use " vect_print_slp_tree (MSG_NOTE, vect_location, node);
"load/store-lanes\n");
vect_free_slp_instance (new_instance, false);
return false;
} }
}
vinfo->slp_instances.safe_push (new_instance); return true;
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"Final SLP tree for instance:\n");
vect_print_slp_tree (MSG_NOTE, vect_location, node);
} }
return true;
}
} }
else else
{ {
/* Failed to SLP. */ /* Failed to SLP. */
/* Free the allocated memory. */ /* Free the allocated memory. */
scalar_stmts.release (); scalar_stmts.release ();
loads.release (); loads.release ();
} }
/* For basic block SLP, try to break the group up into multiples of the /* For basic block SLP, try to break the group up into multiples of the
...@@ -3749,8 +3780,13 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance, ...@@ -3749,8 +3780,13 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return; return;
/* See if we have already vectorized the node in the graph of the
SLP instance. */
if (SLP_TREE_VEC_STMTS (node).exists ())
return;
/* See if we have already vectorized the same set of stmts and reuse their /* See if we have already vectorized the same set of stmts and reuse their
vectorized stmts. */ vectorized stmts across instances. */
if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node))) if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
{ {
SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader)); SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
...@@ -3778,8 +3814,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance, ...@@ -3778,8 +3814,7 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
group_size = SLP_INSTANCE_GROUP_SIZE (instance); group_size = SLP_INSTANCE_GROUP_SIZE (instance);
gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0); gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
if (!SLP_TREE_VEC_STMTS (node).exists ()) SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, dump_printf_loc (MSG_NOTE, vect_location,
......
...@@ -130,6 +130,8 @@ struct _slp_tree { ...@@ -130,6 +130,8 @@ struct _slp_tree {
scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF
divided by vector size. */ divided by vector size. */
unsigned int vec_stmts_size; unsigned int vec_stmts_size;
/* Reference count in the SLP graph. */
unsigned int refcnt;
/* Whether the scalar computations use two different operators. */ /* Whether the scalar computations use two different operators. */
bool two_operators; bool two_operators;
/* The DEF type of this node. */ /* The DEF type of this node. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment