Commit 90dd6e3d by Richard Biener Committed by Richard Biener

re PR fortran/62283 (basic-block vectorization fails)

2015-04-28  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/62283
	* tree-vect-slp.c (vect_build_slp_tree): When the SLP build
	fails fatally and we are vectorizing a basic-block simply
	cause the child to be constructed piecewise.
	(vect_analyze_slp_cost_1): Adjust.
	(vect_detect_hybrid_slp_stmts): Likewise.
	(vect_bb_slp_scalar_cost): Likewise.
	(vect_get_constant_vectors): For piecewise constructed
	constants place them after the last def.
	(vect_get_slp_defs): Adjust.
	* tree-vect-stmts.c (vect_is_simple_use): Detect in-BB
	externals for basic-block vectorization.

	* gfortran.dg/vect/pr62283-2.f: New testcase.
	* gcc.dg/vect/bb-slp-14.c: Adjust.

From-SVN: r222514
parent fde9b31b
2015-04-28 Richard Biener <rguenther@suse.de>
PR tree-optimization/62283
* tree-vect-slp.c (vect_build_slp_tree): When the SLP build
fails fatally and we are vectorizing a basic-block simply
cause the child to be constructed piecewise.
(vect_analyze_slp_cost_1): Adjust.
(vect_detect_hybrid_slp_stmts): Likewise.
(vect_bb_slp_scalar_cost): Likewise.
(vect_get_constant_vectors): For piecewise constructed
constants place them after the last def.
(vect_get_slp_defs): Adjust.
* tree-vect-stmts.c (vect_is_simple_use): Detect in-BB
externals for basic-block vectorization.
2015-04-28 Thomas Preud'homme <thomas.preudhomme@arm.com> 2015-04-28 Thomas Preud'homme <thomas.preudhomme@arm.com>
PR target/63503 PR target/63503
......
2015-04-28 Richard Biener <rguenther@suse.de> 2015-04-28 Richard Biener <rguenther@suse.de>
PR tree-optimization/62283
* gfortran.dg/vect/pr62283-2.f: New testcase.
* gcc.dg/vect/bb-slp-14.c: Adjust.
2015-04-28 Richard Biener <rguenther@suse.de>
PR tree-optimization/65851 PR tree-optimization/65851
* g++.dg/torture/pr65851.C: New testcase. * g++.dg/torture/pr65851.C: New testcase.
......
...@@ -14,7 +14,8 @@ main1 (unsigned int x, unsigned int y) ...@@ -14,7 +14,8 @@ main1 (unsigned int x, unsigned int y)
int i; int i;
unsigned int a0, a1, a2, a3; unsigned int a0, a1, a2, a3;
/* Not consecutive load with permutation - not supported. */ /* Not consecutive load with permutation - supported with building up
the vector from scalars. */
a0 = in[0] + 23; a0 = in[0] + 23;
a1 = in[1] + 142; a1 = in[1] + 142;
a2 = in[1] + 2; a2 = in[1] + 2;
...@@ -47,6 +48,6 @@ int main (void) ...@@ -47,6 +48,6 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2" } } */ /* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
/* { dg-final { cleanup-tree-dump "slp2" } } */ /* { dg-final { cleanup-tree-dump "slp2" } } */
! { dg-do compile }
! { dg-require-effective-target vect_float }
! { dg-additional-options "-fdump-tree-slp2-details" }
subroutine saxpy(alpha,x,y)
real x(4),y(4),alpha
y(1)=y(1)+alpha*x(1)
y(2)=y(2)+alpha*x(2)
y(3)=y(3)+alpha*x(3)
y(4)=y(4)+alpha*x(4)
end
! { dg-final { scan-tree-dump "basic block vectorized" "slp2" } }
! { dg-final { cleanup-tree-dump "slp2" } }
! { dg-final { cleanup-tree-dump "vect" } }
...@@ -1017,6 +1017,29 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -1017,6 +1017,29 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
continue; continue;
} }
/* If the SLP build failed fatally and we analyze a basic-block
simply treat nodes we fail to build as externally defined
(and thus build vectors from the scalar defs).
The cost model will reject outright expensive cases.
??? This doesn't treat cases where permutation ultimatively
fails (or we don't try permutation below). Ideally we'd
even compute a permutation that will end up with the maximum
SLP tree size... */
if (bb_vinfo
&& !matches[0]
/* ??? Rejecting patterns this way doesn't work. We'd have to
do extra work to cancel the pattern so the uses see the
scalar version. */
&& !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
{
dump_printf_loc (MSG_NOTE, vect_location,
"Building vector operands from scalars\n");
oprnd_info->def_stmts = vNULL;
vect_free_slp_tree (child);
SLP_TREE_CHILDREN (*node).quick_push (NULL);
continue;
}
/* If the SLP build for operand zero failed and operand zero /* If the SLP build for operand zero failed and operand zero
and one can be commutated try that for the scalar stmts and one can be commutated try that for the scalar stmts
that failed the match. */ that failed the match. */
...@@ -1417,9 +1440,10 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo, ...@@ -1417,9 +1440,10 @@ vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
/* Recurse down the SLP tree. */ /* Recurse down the SLP tree. */
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo, if (child)
instance, child, prologue_cost_vec, vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
ncopies_for_cost); instance, child, prologue_cost_vec,
ncopies_for_cost);
/* Look at the first scalar stmt to determine the cost. */ /* Look at the first scalar stmt to determine the cost. */
stmt = SLP_TREE_SCALAR_STMTS (node)[0]; stmt = SLP_TREE_SCALAR_STMTS (node)[0];
...@@ -1885,7 +1909,8 @@ vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype) ...@@ -1885,7 +1909,8 @@ vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
STMT_SLP_TYPE (stmt_vinfo) = hybrid; STMT_SLP_TYPE (stmt_vinfo) = hybrid;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
vect_detect_hybrid_slp_stmts (child, i, stype); if (child)
vect_detect_hybrid_slp_stmts (child, i, stype);
} }
/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */ /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
...@@ -2162,7 +2187,8 @@ vect_bb_slp_scalar_cost (basic_block bb, ...@@ -2162,7 +2187,8 @@ vect_bb_slp_scalar_cost (basic_block bb,
} }
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
scalar_cost += vect_bb_slp_scalar_cost (bb, child, life); if (child)
scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
return scalar_cost; return scalar_cost;
} }
...@@ -2612,6 +2638,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, ...@@ -2612,6 +2638,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
number_of_places_left_in_vector = nunits; number_of_places_left_in_vector = nunits;
elts = XALLOCAVEC (tree, nunits); elts = XALLOCAVEC (tree, nunits);
bool place_after_defs = false;
for (j = 0; j < number_of_copies; j++) for (j = 0; j < number_of_copies; j++)
{ {
for (i = group_size - 1; stmts.iterate (i, &stmt); i--) for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
...@@ -2682,6 +2709,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, ...@@ -2682,6 +2709,7 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
/* Create 'vect_ = {op0,op1,...,opn}'. */ /* Create 'vect_ = {op0,op1,...,opn}'. */
number_of_places_left_in_vector--; number_of_places_left_in_vector--;
tree orig_op = op;
if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op))) if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
{ {
if (CONSTANT_CLASS_P (op)) if (CONSTANT_CLASS_P (op))
...@@ -2704,6 +2732,12 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, ...@@ -2704,6 +2732,12 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
elts[number_of_places_left_in_vector] = op; elts[number_of_places_left_in_vector] = op;
if (!CONSTANT_CLASS_P (op)) if (!CONSTANT_CLASS_P (op))
constant_p = false; constant_p = false;
if (TREE_CODE (orig_op) == SSA_NAME
&& !SSA_NAME_IS_DEFAULT_DEF (orig_op)
&& STMT_VINFO_BB_VINFO (stmt_vinfo)
&& (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
== gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
place_after_defs = true;
if (number_of_places_left_in_vector == 0) if (number_of_places_left_in_vector == 0)
{ {
...@@ -2720,16 +2754,25 @@ vect_get_constant_vectors (tree op, slp_tree slp_node, ...@@ -2720,16 +2754,25 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]); CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
vec_cst = build_constructor (vector_type, v); vec_cst = build_constructor (vector_type, v);
} }
voprnds.quick_push (vect_init_vector (stmt, vec_cst, tree init;
vector_type, NULL)); gimple_stmt_iterator gsi;
if (place_after_defs)
{
gsi = gsi_for_stmt
(vect_find_last_scalar_stmt_in_slp (slp_node));
init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
}
else
init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
if (ctor_seq != NULL) if (ctor_seq != NULL)
{ {
gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ()); gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
gsi_insert_seq_before_without_update (&gsi, ctor_seq, gsi_insert_seq_before_without_update (&gsi, ctor_seq,
GSI_SAME_STMT); GSI_SAME_STMT);
ctor_seq = NULL; ctor_seq = NULL;
} }
voprnds.quick_push (init);
place_after_defs = false;
} }
} }
} }
...@@ -2825,20 +2868,26 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node, ...@@ -2825,20 +2868,26 @@ vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
child = SLP_TREE_CHILDREN (slp_node)[child_index]; child = SLP_TREE_CHILDREN (slp_node)[child_index];
/* We have to check both pattern and original def, if available. */ /* We have to check both pattern and original def, if available. */
gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0]; if (child)
gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
|| (related
&& operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
{ {
/* The number of vector defs is determined by the number of gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
vector statements in the node from which we get those gimple related
statements. */ = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
vectorized_defs = true; if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
child_index++; || (related
&& operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
{
/* The number of vector defs is determined by the number of
vector statements in the node from which we get those
statements. */
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
vectorized_defs = true;
child_index++;
}
} }
else
child_index++;
} }
if (!vectorized_defs) if (!vectorized_defs)
......
...@@ -7752,7 +7752,10 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo, ...@@ -7752,7 +7752,10 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
else else
{ {
stmt_vinfo = vinfo_for_stmt (*def_stmt); stmt_vinfo = vinfo_for_stmt (*def_stmt);
*dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
*dt = vect_external_def;
else
*dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
} }
if (dump_enabled_p ()) if (dump_enabled_p ())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment