Commit f7300fff by Richard Biener Committed by Richard Biener

re PR tree-optimization/83202 (Try joining operations on consecutive array…

re PR tree-optimization/83202 (Try joining operations on consecutive array elements during tree vectorization)

2017-11-29  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/83202
	* tree-vect-slp.c (scalar_stmts_set_t): New typedef.
	(bst_fail): Use it.
	(vect_analyze_slp_cost_1): Add visited set, do not account SLP
	nodes vectorized to the same stmts multiple times.
	(vect_analyze_slp_cost): Allocate a visited set and pass it down.
	(vect_analyze_slp_instance): Adjust.
	(scalar_stmts_to_slp_tree_map_t): New typedef.
	(vect_schedule_slp_instance): Add a map recording the SLP node
	representing the vectorized stmts for a set of scalar stmts.
	Avoid code-generating redundancies.
	(vect_schedule_slp): Allocate map and pass it down.

	* gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c: New testcase.

From-SVN: r255233
parent d5ed6a87
2017-11-29 Richard Biener <rguenther@suse.de>
PR tree-optimization/83202
* tree-vect-slp.c (scalar_stmts_set_t): New typedef.
(bst_fail): Use it.
(vect_analyze_slp_cost_1): Add visited set, do not account SLP
nodes vectorized to the same stmts multiple times.
(vect_analyze_slp_cost): Allocate a visited set and pass it down.
(vect_analyze_slp_instance): Adjust.
(scalar_stmts_to_slp_tree_map_t): New typedef.
(vect_schedule_slp_instance): Add a map recording the SLP node
representing the vectorized stmts for a set of scalar stmts.
Avoid code-generating redundancies.
(vect_schedule_slp): Allocate map and pass it down.
2017-11-29 Nathan Sidwell <nathan@acm.org> 2017-11-29 Nathan Sidwell <nathan@acm.org>
PR c++/83187 PR c++/83187
2017-11-29 Richard Biener <rguenther@suse.de>
PR tree-optimization/83202
* gcc.dg/vect/costmodel/x86_64/costmodel-pr83202.c: New testcase.
2017-11-29 Nathan Sidwell <nathan@acm.org> 2017-11-29 Nathan Sidwell <nathan@acm.org>
PR c++/83187 PR c++/83187
......
/* { dg-do compile } */
/* { dg-require-effective-target vect_double } */
void test(double data[4][2])
{
for (int i = 0; i < 4; i++)
{
data[i][0] = data[i][0] * data[i][0];
data[i][1] = data[i][1] * data[i][1];
}
}
/* We should vectorize this with SLP and V2DF vectors. */
/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
...@@ -961,7 +961,8 @@ bst_traits::equal (value_type existing, value_type candidate) ...@@ -961,7 +961,8 @@ bst_traits::equal (value_type existing, value_type candidate)
return true; return true;
} }
static hash_set <vec <gimple *>, bst_traits> *bst_fail; typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
static scalar_stmts_set_t *bst_fail;
static slp_tree static slp_tree
vect_build_slp_tree_2 (vec_info *vinfo, vect_build_slp_tree_2 (vec_info *vinfo,
...@@ -1674,7 +1675,8 @@ static void ...@@ -1674,7 +1675,8 @@ static void
vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
stmt_vector_for_cost *prologue_cost_vec, stmt_vector_for_cost *prologue_cost_vec,
stmt_vector_for_cost *body_cost_vec, stmt_vector_for_cost *body_cost_vec,
unsigned ncopies_for_cost) unsigned ncopies_for_cost,
scalar_stmts_set_t* visited)
{ {
unsigned i, j; unsigned i, j;
slp_tree child; slp_tree child;
...@@ -1682,11 +1684,18 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, ...@@ -1682,11 +1684,18 @@ vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
stmt_vec_info stmt_info; stmt_vec_info stmt_info;
tree lhs; tree lhs;
/* If we already costed the exact same set of scalar stmts we're done.
We share the generated vector stmts for those. */
if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
return;
visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
/* Recurse down the SLP tree. */ /* Recurse down the SLP tree. */
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec, vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
body_cost_vec, ncopies_for_cost); body_cost_vec, ncopies_for_cost, visited);
/* Look at the first scalar stmt to determine the cost. */ /* Look at the first scalar stmt to determine the cost. */
stmt = SLP_TREE_SCALAR_STMTS (node)[0]; stmt = SLP_TREE_SCALAR_STMTS (node)[0];
...@@ -1871,9 +1880,11 @@ vect_analyze_slp_cost (slp_instance instance, void *data) ...@@ -1871,9 +1880,11 @@ vect_analyze_slp_cost (slp_instance instance, void *data)
prologue_cost_vec.create (10); prologue_cost_vec.create (10);
body_cost_vec.create (10); body_cost_vec.create (10);
scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance), vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
&prologue_cost_vec, &body_cost_vec, &prologue_cost_vec, &body_cost_vec,
ncopies_for_cost); ncopies_for_cost, visited);
delete visited;
/* Record the prologue costs, which were delayed until we were /* Record the prologue costs, which were delayed until we were
sure that SLP was successful. */ sure that SLP was successful. */
...@@ -2037,7 +2048,7 @@ vect_analyze_slp_instance (vec_info *vinfo, ...@@ -2037,7 +2048,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
/* Build the tree for the SLP instance. */ /* Build the tree for the SLP instance. */
bool *matches = XALLOCAVEC (bool, group_size); bool *matches = XALLOCAVEC (bool, group_size);
unsigned npermutes = 0; unsigned npermutes = 0;
bst_fail = new hash_set <vec <gimple *>, bst_traits> (); bst_fail = new scalar_stmts_set_t ();
node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
&max_nunits, &loads, matches, &npermutes, &max_nunits, &loads, matches, &npermutes,
NULL, max_tree_size); NULL, max_tree_size);
...@@ -3702,12 +3713,15 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, ...@@ -3702,12 +3713,15 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
return true; return true;
} }
typedef hash_map <vec <gimple *>, slp_tree,
simple_hashmap_traits <bst_traits, slp_tree> >
scalar_stmts_to_slp_tree_map_t;
/* Vectorize SLP instance tree in postorder. */ /* Vectorize SLP instance tree in postorder. */
static bool static bool
vect_schedule_slp_instance (slp_tree node, slp_instance instance) vect_schedule_slp_instance (slp_tree node, slp_instance instance,
scalar_stmts_to_slp_tree_map_t *bst_map)
{ {
gimple *stmt; gimple *stmt;
bool grouped_store, is_store; bool grouped_store, is_store;
...@@ -3721,8 +3735,19 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance) ...@@ -3721,8 +3735,19 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance)
if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return false; return false;
/* See if we have already vectorized the same set of stmts and reuse their
vectorized stmts. */
slp_tree &leader
= bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ());
if (leader)
{
SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader));
return false;
}
leader = node;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
vect_schedule_slp_instance (child, instance); vect_schedule_slp_instance (child, instance, bst_map);
/* Push SLP node def-type to stmts. */ /* Push SLP node def-type to stmts. */
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
...@@ -3894,8 +3919,11 @@ vect_schedule_slp (vec_info *vinfo) ...@@ -3894,8 +3919,11 @@ vect_schedule_slp (vec_info *vinfo)
FOR_EACH_VEC_ELT (slp_instances, i, instance) FOR_EACH_VEC_ELT (slp_instances, i, instance)
{ {
/* Schedule the tree of INSTANCE. */ /* Schedule the tree of INSTANCE. */
scalar_stmts_to_slp_tree_map_t *bst_map
= new scalar_stmts_to_slp_tree_map_t ();
is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance), is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
instance); instance, bst_map);
delete bst_map;
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, dump_printf_loc (MSG_NOTE, vect_location,
"vectorizing stmts using SLP.\n"); "vectorizing stmts using SLP.\n");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment