Commit 6983e6b5 by Richard Biener Committed by Richard Biener

tree-vect-slp.c (vect_build_slp_tree_1): Split out from ...

2013-04-17  Richard Biener  <rguenther@suse.de>

	* tree-vect-slp.c (vect_build_slp_tree_1): Split out from ...
	(vect_build_slp_tree): ... here.
	(vect_build_slp_tree_1): Compute which stmts of the SLP group
	match.  Remove special-casing of mismatched complex loads.
	(vect_build_slp_tree): Based on the result from vect_build_slp_tree_1
	re-try the match with swapped commutative operands.
	(vect_supported_load_permutation_p): Remove special-casing of
	mismatched complex loads.
	(vect_analyze_slp_instance): Adjust.

From-SVN: r198026
parent ef23e6a2
2013-04-17 Richard Biener <rguenther@suse.de> 2013-04-17 Richard Biener <rguenther@suse.de>
* tree-vect-slp.c (vect_build_slp_tree_1): Split out from ...
(vect_build_slp_tree): ... here.
(vect_build_slp_tree_1): Compute which stmts of the SLP group
match. Remove special-casing of mismatched complex loads.
(vect_build_slp_tree): Based on the result from vect_build_slp_tree_1
re-try the match with swapped commutative operands.
(vect_supported_load_permutation_p): Remove special-casing of
mismatched complex loads.
(vect_analyze_slp_instance): Adjust.
2013-04-17 Richard Biener <rguenther@suse.de>
PR rtl-optimization/56921 PR rtl-optimization/56921
* cfgloop.h (struct loop): Add simple_loop_desc member. * cfgloop.h (struct loop): Add simple_loop_desc member.
(struct niter_desc): Mark with GTY(()). (struct niter_desc): Mark with GTY(()).
......
...@@ -376,25 +376,25 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -376,25 +376,25 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
} }
/* Recursively build an SLP tree starting from NODE. /* Verify if the scalar stmts STMTS are isomorphic, require data
Fail (and return FALSE) if def-stmts are not isomorphic, require data permutation or are of unsupported types of operation. Return
permutation or are of unsupported types of operation. Otherwise, return true if they are, otherwise return false and indicate in *MATCHES
TRUE. */ which stmts are not isomorphic to the first one. If MATCHES[0]
is false then this indicates the comparison could not be
carried out or the stmts will never be vectorized by SLP. */
static bool static bool
vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
slp_tree *node, unsigned int group_size, vec<gimple> stmts, unsigned int group_size,
unsigned int *max_nunits, unsigned nops, unsigned int *max_nunits,
vec<slp_tree> *loads, unsigned int vectorization_factor, bool *matches)
unsigned int vectorization_factor)
{ {
unsigned int i; unsigned int i;
vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
gimple stmt = stmts[0]; gimple stmt = stmts[0];
enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK; enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
enum tree_code first_cond_code = ERROR_MARK; enum tree_code first_cond_code = ERROR_MARK;
tree lhs; tree lhs;
bool stop_recursion = false, need_same_oprnds = false; bool need_same_oprnds = false;
tree vectype, scalar_type, first_op1 = NULL_TREE; tree vectype, scalar_type, first_op1 = NULL_TREE;
optab optab; optab optab;
int icode; int icode;
...@@ -403,27 +403,13 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -403,27 +403,13 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
struct data_reference *first_dr; struct data_reference *first_dr;
HOST_WIDE_INT dummy; HOST_WIDE_INT dummy;
gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL; gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
vec<slp_oprnd_info> oprnds_info;
unsigned int nops;
slp_oprnd_info oprnd_info;
tree cond; tree cond;
if (is_gimple_call (stmt))
nops = gimple_call_num_args (stmt);
else if (is_gimple_assign (stmt))
{
nops = gimple_num_ops (stmt) - 1;
if (gimple_assign_rhs_code (stmt) == COND_EXPR)
nops++;
}
else
return false;
oprnds_info = vect_create_oprnd_info (nops, group_size);
/* For every stmt in NODE find its def stmt/s. */ /* For every stmt in NODE find its def stmt/s. */
FOR_EACH_VEC_ELT (stmts, i, stmt) FOR_EACH_VEC_ELT (stmts, i, stmt)
{ {
matches[i] = false;
if (dump_enabled_p ()) if (dump_enabled_p ())
{ {
dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for "); dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
...@@ -439,8 +425,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -439,8 +425,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
"Build SLP failed: unvectorizable statement "); "Build SLP failed: unvectorizable statement ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
} }
/* Fatal mismatch. */
vect_free_oprnd_info (oprnds_info); matches[0] = false;
return false; return false;
} }
...@@ -454,8 +440,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -454,8 +440,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
"GIMPLE_CALL "); "GIMPLE_CALL ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
} }
/* Fatal mismatch. */
vect_free_oprnd_info (oprnds_info); matches[0] = false;
return false; return false;
} }
...@@ -471,8 +457,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -471,8 +457,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
"comparison "); "comparison ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
} }
/* Fatal mismatch. */
vect_free_oprnd_info (oprnds_info); matches[0] = false;
return false; return false;
} }
...@@ -487,8 +473,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -487,8 +473,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
scalar_type); scalar_type);
} }
/* Fatal mismatch. */
vect_free_oprnd_info (oprnds_info); matches[0] = false;
return false; return false;
} }
...@@ -515,8 +501,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -515,8 +501,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
"Build SLP failed: unsupported call type "); "Build SLP failed: unsupported call type ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
} }
/* Fatal mismatch. */
vect_free_oprnd_info (oprnds_info); matches[0] = false;
return false; return false;
} }
} }
...@@ -552,7 +538,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -552,7 +538,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Build SLP failed: no optab."); "Build SLP failed: no optab.");
vect_free_oprnd_info (oprnds_info); /* Fatal mismatch. */
matches[0] = false;
return false; return false;
} }
icode = (int) optab_handler (optab, vec_mode); icode = (int) optab_handler (optab, vec_mode);
...@@ -562,7 +549,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -562,7 +549,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Build SLP failed: " "Build SLP failed: "
"op not supported by target."); "op not supported by target.");
vect_free_oprnd_info (oprnds_info); /* Fatal mismatch. */
matches[0] = false;
return false; return false;
} }
optab_op2_mode = insn_data[icode].operand[2].mode; optab_op2_mode = insn_data[icode].operand[2].mode;
...@@ -600,9 +588,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -600,9 +588,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
"in stmt "); "in stmt ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
} }
/* Mismatch. */
vect_free_oprnd_info (oprnds_info); continue;
return false;
} }
if (need_same_oprnds if (need_same_oprnds
...@@ -615,9 +602,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -615,9 +602,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
"arguments in "); "arguments in ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
} }
/* Mismatch. */
vect_free_oprnd_info (oprnds_info); continue;
return false;
} }
if (rhs_code == CALL_EXPR) if (rhs_code == CALL_EXPR)
...@@ -636,9 +622,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -636,9 +622,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
stmt, 0); stmt, 0);
} }
/* Mismatch. */
vect_free_oprnd_info (oprnds_info); continue;
return false;
} }
} }
} }
...@@ -649,12 +634,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -649,12 +634,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
if (REFERENCE_CLASS_P (lhs)) if (REFERENCE_CLASS_P (lhs))
{ {
/* Store. */ /* Store. */
if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, ;
stmt, (i == 0), &oprnds_info))
{
vect_free_oprnd_info (oprnds_info);
return false;
}
} }
else else
{ {
...@@ -681,8 +661,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -681,8 +661,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
stmt, 0); stmt, 0);
} }
/* Fatal mismatch. */
vect_free_oprnd_info (oprnds_info); matches[0] = false;
return false; return false;
} }
...@@ -705,8 +685,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -705,8 +685,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
stmt, 0); stmt, 0);
} }
/* Fatal mismatch. */
vect_free_oprnd_info (oprnds_info); matches[0] = false;
return false; return false;
} }
...@@ -715,11 +695,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -715,11 +695,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
if (prev_first_load) if (prev_first_load)
{ {
/* Check that there are no loads from different interleaving /* Check that there are no loads from different interleaving
chains in the same node. The only exception is complex chains in the same node. */
numbers. */ if (prev_first_load != first_load)
if (prev_first_load != first_load
&& rhs_code != REALPART_EXPR
&& rhs_code != IMAGPART_EXPR)
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
{ {
...@@ -730,9 +707,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -730,9 +707,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
stmt, 0); stmt, 0);
} }
/* Mismatch. */
vect_free_oprnd_info (oprnds_info); continue;
return false;
} }
} }
else else
...@@ -755,15 +731,11 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -755,15 +731,11 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
stmt, 0); stmt, 0);
} }
/* Fatal mismatch. */
vect_free_oprnd_info (oprnds_info); matches[0] = false;
return false; return false;
} }
} }
/* We stop the tree when we reach a group of loads. */
stop_recursion = true;
continue;
} }
} /* Grouped access. */ } /* Grouped access. */
else else
...@@ -779,7 +751,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -779,7 +751,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
} }
/* FORNOW: Not grouped loads are not supported. */ /* FORNOW: Not grouped loads are not supported. */
vect_free_oprnd_info (oprnds_info); /* Fatal mismatch. */
matches[0] = false;
return false; return false;
} }
...@@ -796,8 +769,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -796,8 +769,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported "); dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
} }
/* Fatal mismatch. */
vect_free_oprnd_info (oprnds_info); matches[0] = false;
return false; return false;
} }
...@@ -817,53 +790,161 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -817,53 +790,161 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
stmt, 0); stmt, 0);
} }
/* Mismatch. */
vect_free_oprnd_info (oprnds_info); continue;
return false;
} }
} }
/* Find the def-stmts. */
if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, stmt,
(i == 0), &oprnds_info))
{
vect_free_oprnd_info (oprnds_info);
return false;
}
} }
matches[i] = true;
}
for (i = 0; i < group_size; ++i)
if (!matches[i])
return false;
return true;
}
/* Recursively build an SLP tree starting from NODE.
Fail (and return a value not equal to zero) if def-stmts are not
isomorphic, require data permutation or are of unsupported types of
operation. Otherwise, return 0.
The value returned is the depth in the SLP tree where a mismatch
was found. */
static bool
vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
slp_tree *node, unsigned int group_size,
unsigned int *max_nunits,
vec<slp_tree> *loads,
unsigned int vectorization_factor,
bool *matches, unsigned *npermutes)
{
unsigned nops, i, this_npermutes = 0;
gimple stmt;
if (!matches)
matches = XALLOCAVEC (bool, group_size);
if (!npermutes)
npermutes = &this_npermutes;
matches[0] = false;
stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
if (is_gimple_call (stmt))
nops = gimple_call_num_args (stmt);
else if (is_gimple_assign (stmt))
{
nops = gimple_num_ops (stmt) - 1;
if (gimple_assign_rhs_code (stmt) == COND_EXPR)
nops++;
} }
else
return false;
/* Grouped loads were reached - stop the recursion. */ if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
if (stop_recursion) SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
max_nunits, vectorization_factor, matches))
return false;
/* If the SLP node is a load, terminate the recursion. */
if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
&& DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
{ {
loads->safe_push (*node); loads->safe_push (*node);
vect_free_oprnd_info (oprnds_info);
return true; return true;
} }
/* Get at the operands, verifying they are compatible. */
vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
slp_oprnd_info oprnd_info;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
{
if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
stmt, (i == 0), &oprnds_info))
{
vect_free_oprnd_info (oprnds_info);
return false;
}
}
stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
/* Create SLP_TREE nodes for the definition node/s. */ /* Create SLP_TREE nodes for the definition node/s. */
FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info) FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
{ {
slp_tree child; slp_tree child;
unsigned old_nloads = loads->length ();
unsigned old_max_nunits = *max_nunits;
if (oprnd_info->first_dt != vect_internal_def) if (oprnd_info->first_dt != vect_internal_def)
continue; continue;
child = vect_create_new_slp_node (oprnd_info->def_stmts); child = vect_create_new_slp_node (oprnd_info->def_stmts);
if (!child if (!child)
|| !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size, {
max_nunits, loads,
vectorization_factor))
{
if (child)
oprnd_info->def_stmts = vNULL;
vect_free_slp_tree (child);
vect_free_oprnd_info (oprnds_info); vect_free_oprnd_info (oprnds_info);
return false; return false;
}
bool *matches = XALLOCAVEC (bool, group_size);
if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
group_size, max_nunits, loads,
vectorization_factor, matches, npermutes))
{
oprnd_info->def_stmts = vNULL;
SLP_TREE_CHILDREN (*node).quick_push (child);
continue;
}
/* If the SLP build for operand zero failed and operand zero
and one can be commutated try that for the scalar stmts
that failed the match. */
if (i == 0
/* A first scalar stmt mismatch signals a fatal mismatch. */
&& matches[0]
/* ??? For COND_EXPRs we can swap the comparison operands
as well as the arms under some constraints. */
&& nops == 2
&& oprnds_info[1]->first_dt == vect_internal_def
&& is_gimple_assign (stmt)
&& commutative_tree_code (gimple_assign_rhs_code (stmt))
/* Do so only if the number of not successful permutes was nor more
than a cut-ff as re-trying the recursive match on
possibly each level of the tree would expose exponential
behavior. */
&& *npermutes < 4)
{
/* Roll back. */
*max_nunits = old_max_nunits;
loads->truncate (old_nloads);
/* Swap mismatched definition stmts. */
for (unsigned j = 0; j < group_size; ++j)
if (!matches[j])
{
gimple tem = oprnds_info[0]->def_stmts[j];
oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
oprnds_info[1]->def_stmts[j] = tem;
}
/* And try again ... */
if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
group_size, max_nunits, loads,
vectorization_factor,
matches, npermutes))
{
oprnd_info->def_stmts = vNULL;
SLP_TREE_CHILDREN (*node).quick_push (child);
continue;
}
++*npermutes;
} }
oprnd_info->def_stmts.create (0); oprnd_info->def_stmts = vNULL;
SLP_TREE_CHILDREN (*node).quick_push (child); vect_free_slp_tree (child);
vect_free_oprnd_info (oprnds_info);
return false;
} }
vect_free_oprnd_info (oprnds_info); vect_free_oprnd_info (oprnds_info);
...@@ -1045,9 +1126,8 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size, ...@@ -1045,9 +1126,8 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
int i = 0, j, prev = -1, next, k, number_of_groups; int i = 0, j, prev = -1, next, k, number_of_groups;
bool supported, bad_permutation = false; bool supported, bad_permutation = false;
sbitmap load_index; sbitmap load_index;
slp_tree node, other_complex_node; slp_tree node;
gimple stmt, first = NULL, other_node_first, load, next_load, first_load; gimple stmt, load, next_load, first_load;
unsigned complex_numbers = 0;
struct data_reference *dr; struct data_reference *dr;
bb_vec_info bb_vinfo; bb_vec_info bb_vinfo;
...@@ -1071,66 +1151,9 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size, ...@@ -1071,66 +1151,9 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
/* Check that all the load nodes are of the same size. */ /* Check that all the load nodes are of the same size. */
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
{ if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size) return false;
return false;
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
if (is_gimple_assign (stmt)
&& (gimple_assign_rhs_code (stmt) == REALPART_EXPR
|| gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
complex_numbers++;
}
/* Complex operands can be swapped as following:
real_c = real_b + real_a;
imag_c = imag_a + imag_b;
i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
{real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
chains are mixed, they match the above pattern. */
if (complex_numbers)
{
FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
{
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, stmt)
{
if (j == 0)
first = stmt;
else
{
if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
{
if (complex_numbers != 2)
return false;
if (i == 0)
k = 1;
else
k = 0;
other_complex_node = SLP_INSTANCE_LOADS (slp_instn)[k];
other_node_first =
SLP_TREE_SCALAR_STMTS (other_complex_node)[0];
if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
!= other_node_first)
return false;
}
}
}
}
}
/* We checked that this case ok, so there is no need to proceed with
permutation tests. */
if (complex_numbers == 2
&& SLP_INSTANCE_LOADS (slp_instn).length () == 2)
{
SLP_INSTANCE_LOADS (slp_instn).release ();
SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
return true;
}
node = SLP_INSTANCE_TREE (slp_instn); node = SLP_INSTANCE_TREE (slp_instn);
stmt = SLP_TREE_SCALAR_STMTS (node)[0]; stmt = SLP_TREE_SCALAR_STMTS (node)[0];
/* LOAD_PERMUTATION is a list of indices of all the loads of the SLP /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
...@@ -1593,7 +1616,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -1593,7 +1616,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
/* Build the tree for the SLP instance. */ /* Build the tree for the SLP instance. */
if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size, if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
&max_nunits, &loads, &max_nunits, &loads,
vectorization_factor)) vectorization_factor, NULL, NULL))
{ {
/* Calculate the unrolling factor based on the smallest type. */ /* Calculate the unrolling factor based on the smallest type. */
if (max_nunits > nunits) if (max_nunits > nunits)
...@@ -1629,19 +1652,15 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -1629,19 +1652,15 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
FOR_EACH_VEC_ELT (loads, i, load_node) FOR_EACH_VEC_ELT (loads, i, load_node)
{ {
int j; int j;
gimple load; gimple load, first_stmt;
first_stmt = GROUP_FIRST_ELEMENT
(vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load) FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
{ {
int load_place; int load_place
load_place = vect_get_place_in_interleaving_chain = vect_get_place_in_interleaving_chain (load, first_stmt);
(load, GROUP_FIRST_ELEMENT (vinfo_for_stmt (load))); gcc_assert (load_place != -1);
if (load_place != j if (load_place != j)
/* ??? We allow loads from different groups to
get to here for a special case handled in
the permutation code. Make sure we get to that. */
|| (GROUP_FIRST_ELEMENT
(vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]))
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (load))))
loads_permuted = true; loads_permuted = true;
load_permutation.safe_push (load_place); load_permutation.safe_push (load_place);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment