Commit b266b968 by Richard Biener Committed by Richard Biener

tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split out from ...

2015-06-10  Richard Biener  <rguenther@suse.de>

	* tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split
	out from ...
	(vect_supported_load_permutation_p): ... here.  Handle
	supportable permutations in reductions.
	* tree-vect-stmts.c (vectorizable_load): Handle SLP permutations
	for vectorizing strided group loads.

From-SVN: r224324
parent 8ffd51d2
2015-06-10 Richard Biener <rguenther@suse.de>
* tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split
out from ...
(vect_supported_load_permutation_p): ... here. Handle
supportable permutations in reductions.
* tree-vect-stmts.c (vectorizable_load): Handle SLP permutations
for vectorizing strided group loads.
2015-06-10 Jakub Jelinek <jakub@redhat.com> 2015-06-10 Jakub Jelinek <jakub@redhat.com>
PR target/66470 PR target/66470
......
...@@ -1299,6 +1299,67 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size, ...@@ -1299,6 +1299,67 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
} }
/* Attempt to reorder stmts in a reduction chain so that we don't
require any load permutation. Return true if that was possible,
otherwise return false. */
static bool
vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
{
unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
unsigned int i, j;
sbitmap load_index;
unsigned int lidx;
slp_tree node, load;
/* Compare all the permutation sequences to the first one. We know
that at least one load is permuted. */
node = SLP_INSTANCE_LOADS (slp_instn)[0];
if (!node->load_permutation.exists ())
return false;
for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
{
if (!load->load_permutation.exists ())
return false;
FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
if (lidx != node->load_permutation[j])
return false;
}
/* Check that the loads in the first sequence are different and there
are no gaps between them. */
load_index = sbitmap_alloc (group_size);
bitmap_clear (load_index);
FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
{
if (bitmap_bit_p (load_index, lidx))
{
sbitmap_free (load_index);
return false;
}
bitmap_set_bit (load_index, lidx);
}
for (i = 0; i < group_size; i++)
if (!bitmap_bit_p (load_index, i))
{
sbitmap_free (load_index);
return false;
}
sbitmap_free (load_index);
/* This permutation is valid for reduction. Since the order of the
statements in the nodes is not important unless they are memory
accesses, we can rearrange the statements in all the nodes
according to the order of the loads. */
vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
node->load_permutation);
/* We are done, no actual permutations need to be generated. */
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
SLP_TREE_LOAD_PERMUTATION (node).release ();
return true;
}
/* Check if the required load permutations in the SLP instance /* Check if the required load permutations in the SLP instance
SLP_INSTN are supported. */ SLP_INSTN are supported. */
...@@ -1307,7 +1368,6 @@ vect_supported_load_permutation_p (slp_instance slp_instn) ...@@ -1307,7 +1368,6 @@ vect_supported_load_permutation_p (slp_instance slp_instn)
{ {
unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn); unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
unsigned int i, j, k, next; unsigned int i, j, k, next;
sbitmap load_index;
slp_tree node; slp_tree node;
gimple stmt, load, next_load, first_load; gimple stmt, load, next_load, first_load;
struct data_reference *dr; struct data_reference *dr;
...@@ -1342,59 +1402,14 @@ vect_supported_load_permutation_p (slp_instance slp_instn) ...@@ -1342,59 +1402,14 @@ vect_supported_load_permutation_p (slp_instance slp_instn)
stmt = SLP_TREE_SCALAR_STMTS (node)[0]; stmt = SLP_TREE_SCALAR_STMTS (node)[0];
/* Reduction (there are no data-refs in the root). /* Reduction (there are no data-refs in the root).
In reduction chain the order of the loads is important. */ In reduction chain the order of the loads is not important. */
if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)) if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
&& !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{ {
slp_tree load; if (vect_attempt_slp_rearrange_stmts (slp_instn))
unsigned int lidx; return true;
/* Compare all the permutation sequences to the first one. We know /* Fallthru to general load permutation handling. */
that at least one load is permuted. */
node = SLP_INSTANCE_LOADS (slp_instn)[0];
if (!node->load_permutation.exists ())
return false;
for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
{
if (!load->load_permutation.exists ())
return false;
FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
if (lidx != node->load_permutation[j])
return false;
}
/* Check that the loads in the first sequence are different and there
are no gaps between them. */
load_index = sbitmap_alloc (group_size);
bitmap_clear (load_index);
FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
{
if (bitmap_bit_p (load_index, lidx))
{
sbitmap_free (load_index);
return false;
}
bitmap_set_bit (load_index, lidx);
}
for (i = 0; i < group_size; i++)
if (!bitmap_bit_p (load_index, i))
{
sbitmap_free (load_index);
return false;
}
sbitmap_free (load_index);
/* This permutation is valid for reduction. Since the order of the
statements in the nodes is not important unless they are memory
accesses, we can rearrange the statements in all the nodes
according to the order of the loads. */
vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
node->load_permutation);
/* We are done, no actual permutations need to be generated. */
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
SLP_TREE_LOAD_PERMUTATION (node).release ();
return true;
} }
/* In basic block vectorization we allow any subchain of an interleaving /* In basic block vectorization we allow any subchain of an interleaving
......
...@@ -5995,9 +5995,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -5995,9 +5995,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
if ((grouped_load if ((grouped_load
&& (slp || PURE_SLP_STMT (stmt_info))) && (slp || PURE_SLP_STMT (stmt_info)))
&& (group_size > nunits && (group_size > nunits
|| nunits % group_size != 0 || nunits % group_size != 0))
/* We don't support load permutations. */
|| slp_perm))
{ {
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unhandled strided group load\n"); "unhandled strided group load\n");
...@@ -6294,6 +6292,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -6294,6 +6292,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0); alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
int nloads = nunits; int nloads = nunits;
tree ltype = TREE_TYPE (vectype); tree ltype = TREE_TYPE (vectype);
auto_vec<tree> dr_chain;
if (slp) if (slp)
{ {
nloads = nunits / group_size; nloads = nunits / group_size;
...@@ -6303,7 +6302,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -6303,7 +6302,8 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
ltype = vectype; ltype = vectype;
ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype))); ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
gcc_assert (!slp_perm); if (slp_perm)
dr_chain.create (ncopies);
} }
for (j = 0; j < ncopies; j++) for (j = 0; j < ncopies; j++)
{ {
...@@ -6350,13 +6350,20 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -6350,13 +6350,20 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
} }
if (slp) if (slp)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); {
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
if (slp_perm)
dr_chain.quick_push (gimple_assign_lhs (new_stmt));
}
if (j == 0) if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt); prev_stmt_info = vinfo_for_stmt (new_stmt);
} }
if (slp_perm)
vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
slp_node_instance, false);
return true; return true;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment