Commit 30c5a937 by Richard Biener Committed by Richard Biener

re PR tree-optimization/64909 (Missed vectorization with bdver1)

2015-04-04  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/64909
	PR tree-optimization/65660
	* tree-vectorizer.h (vect_get_known_peeling_cost): Adjust
	to take a cost vector for scalar iteration cost.
	(vect_get_single_scalar_iteration_cost): Likewise.
	* tree-vect-loop.c (vect_get_single_scalar_iteration_cost):
	Compute the scalar iteration cost into a cost vector.
	(vect_get_known_peeling_cost): Use the scalar cost vector to
	account for the cost of the peeled iterations.
	(vect_estimate_min_profitable_iters): Likewise.
	* tree-vect-data-refs.c (vect_peeling_hash_get_lowest_cost):
	Likewise.

From-SVN: r221866
parent 0da0c47d
2015-04-04 Richard Biener <rguenther@suse.de>
PR tree-optimization/64909
PR tree-optimization/65660
* tree-vectorizer.h (vect_get_known_peeling_cost): Adjust
to take a cost vector for scalar iteration cost.
(vect_get_single_scalar_iteration_cost): Likewise.
* tree-vect-loop.c (vect_get_single_scalar_iteration_cost):
Compute the scalar iteration cost into a cost vector.
(vect_get_known_peeling_cost): Use the scalar cost vector to
account for the cost of the peeled iterations.
(vect_estimate_min_profitable_iters): Likewise.
* tree-vect-data-refs.c (vect_peeling_hash_get_lowest_cost):
Likewise.
2015-04-04 Alan Modra <amodra@gmail.com> 2015-04-04 Alan Modra <amodra@gmail.com>
PR target/65576 PR target/65576
......
...@@ -1152,7 +1152,6 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot, ...@@ -1152,7 +1152,6 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
struct data_reference *dr; struct data_reference *dr;
stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec; stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
int single_iter_cost;
prologue_cost_vec.create (2); prologue_cost_vec.create (2);
body_cost_vec.create (2); body_cost_vec.create (2);
...@@ -1175,14 +1174,11 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot, ...@@ -1175,14 +1174,11 @@ vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
SET_DR_MISALIGNMENT (dr, save_misalignment); SET_DR_MISALIGNMENT (dr, save_misalignment);
} }
single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo); auto_vec<stmt_info_for_cost> scalar_cost_vec;
vect_get_single_scalar_iteration_cost (loop_vinfo, &scalar_cost_vec);
outside_cost += vect_get_known_peeling_cost outside_cost += vect_get_known_peeling_cost
(loop_vinfo, elem->npeel, &dummy, (loop_vinfo, elem->npeel, &dummy,
/* ??? We use this cost as number of stmts with scalar_stmt cost, &scalar_cost_vec, &prologue_cost_vec, &epilogue_cost_vec);
thus divide by that. This introduces rounding errors, thus better
introduce a new cost kind (raw_cost? scalar_iter_cost?). */
single_iter_cost / vect_get_stmt_cost (scalar_stmt),
&prologue_cost_vec, &epilogue_cost_vec);
/* Prologue and epilogue costs are added to the target model later. /* Prologue and epilogue costs are added to the target model later.
These costs depend only on the scalar iteration cost, the These costs depend only on the scalar iteration cost, the
......
...@@ -2653,12 +2653,13 @@ vect_force_simple_reduction (loop_vec_info loop_info, gimple phi, ...@@ -2653,12 +2653,13 @@ vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
/* Calculate the cost of one scalar iteration of the loop. */ /* Calculate the cost of one scalar iteration of the loop. */
int int
vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo) vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo,
stmt_vector_for_cost *scalar_cost_vec)
{ {
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0; int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
int innerloop_iters, i, stmt_cost; int innerloop_iters, i;
/* Count statements in scalar loop. Using this as scalar cost for a single /* Count statements in scalar loop. Using this as scalar cost for a single
iteration for now. iteration for now.
...@@ -2699,17 +2700,20 @@ vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo) ...@@ -2699,17 +2700,20 @@ vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
&& !STMT_VINFO_IN_PATTERN_P (stmt_info)) && !STMT_VINFO_IN_PATTERN_P (stmt_info))
continue; continue;
vect_cost_for_stmt kind;
if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))) if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
{ {
if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))) if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
stmt_cost = vect_get_stmt_cost (scalar_load); kind = scalar_load;
else else
stmt_cost = vect_get_stmt_cost (scalar_store); kind = scalar_store;
} }
else else
stmt_cost = vect_get_stmt_cost (scalar_stmt); kind = scalar_stmt;
scalar_single_iter_cost += stmt_cost * factor; scalar_single_iter_cost
+= record_stmt_cost (scalar_cost_vec, factor, kind,
NULL, 0, vect_prologue);
} }
} }
return scalar_single_iter_cost; return scalar_single_iter_cost;
...@@ -2719,7 +2723,7 @@ vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo) ...@@ -2719,7 +2723,7 @@ vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
int int
vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
int *peel_iters_epilogue, int *peel_iters_epilogue,
int scalar_single_iter_cost, stmt_vector_for_cost *scalar_cost_vec,
stmt_vector_for_cost *prologue_cost_vec, stmt_vector_for_cost *prologue_cost_vec,
stmt_vector_for_cost *epilogue_cost_vec) stmt_vector_for_cost *epilogue_cost_vec)
{ {
...@@ -2736,8 +2740,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, ...@@ -2736,8 +2740,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
/* If peeled iterations are known but number of scalar loop /* If peeled iterations are known but number of scalar loop
iterations are unknown, count a taken branch per peeled loop. */ iterations are unknown, count a taken branch per peeled loop. */
retval = record_stmt_cost (prologue_cost_vec, 2, cond_branch_taken, retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
NULL, 0, vect_prologue); NULL, 0, vect_prologue);
retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
NULL, 0, vect_epilogue);
} }
else else
{ {
...@@ -2751,14 +2757,21 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, ...@@ -2751,14 +2757,21 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
*peel_iters_epilogue = vf; *peel_iters_epilogue = vf;
} }
stmt_info_for_cost *si;
int j;
if (peel_iters_prologue) if (peel_iters_prologue)
FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
retval += record_stmt_cost (prologue_cost_vec, retval += record_stmt_cost (prologue_cost_vec,
peel_iters_prologue * scalar_single_iter_cost, si->count * peel_iters_prologue,
scalar_stmt, NULL, 0, vect_prologue); si->kind, NULL, si->misalign,
vect_prologue);
if (*peel_iters_epilogue) if (*peel_iters_epilogue)
FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
retval += record_stmt_cost (epilogue_cost_vec, retval += record_stmt_cost (epilogue_cost_vec,
*peel_iters_epilogue * scalar_single_iter_cost, si->count * *peel_iters_epilogue,
scalar_stmt, NULL, 0, vect_epilogue); si->kind, NULL, si->misalign,
vect_epilogue);
return retval; return retval;
} }
...@@ -2833,12 +2846,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, ...@@ -2833,12 +2846,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
TODO: Consider assigning different costs to different scalar TODO: Consider assigning different costs to different scalar
statements. */ statements. */
scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo); auto_vec<stmt_info_for_cost> scalar_cost_vec;
/* ??? Below we use this cost as number of stmts with scalar_stmt cost, scalar_single_iter_cost
thus divide by that. This introduces rounding errors, thus better = vect_get_single_scalar_iteration_cost (loop_vinfo, &scalar_cost_vec);
introduce a new cost kind (raw_cost? scalar_iter_cost?). */
int scalar_single_iter_stmts
= scalar_single_iter_cost / vect_get_stmt_cost (scalar_stmt);
/* Add additional cost for the peeled instructions in prologue and epilogue /* Add additional cost for the peeled instructions in prologue and epilogue
loop. loop.
...@@ -2866,18 +2876,29 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, ...@@ -2866,18 +2876,29 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
branch per peeled loop. Even if scalar loop iterations are known, branch per peeled loop. Even if scalar loop iterations are known,
vector iterations are not known since peeled prologue iterations are vector iterations are not known since peeled prologue iterations are
not known. Hence guards remain the same. */ not known. Hence guards remain the same. */
(void) add_stmt_cost (target_cost_data, 2, cond_branch_taken, (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
NULL, 0, vect_prologue); NULL, 0, vect_prologue);
(void) add_stmt_cost (target_cost_data, 2, cond_branch_not_taken, (void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
NULL, 0, vect_prologue); NULL, 0, vect_prologue);
/* FORNOW: Don't attempt to pass individual scalar instructions to (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
the model; just assume linear cost for scalar iterations. */ NULL, 0, vect_epilogue);
(void) add_stmt_cost (target_cost_data, 1, cond_branch_not_taken,
NULL, 0, vect_epilogue);
stmt_info_for_cost *si;
int j;
FOR_EACH_VEC_ELT (scalar_cost_vec, j, si)
{
struct _stmt_vec_info *stmt_info
= si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
(void) add_stmt_cost (target_cost_data, (void) add_stmt_cost (target_cost_data,
peel_iters_prologue * scalar_single_iter_stmts, si->count * peel_iters_prologue,
scalar_stmt, NULL, 0, vect_prologue); si->kind, stmt_info, si->misalign,
vect_prologue);
(void) add_stmt_cost (target_cost_data, (void) add_stmt_cost (target_cost_data,
peel_iters_epilogue * scalar_single_iter_stmts, si->count * peel_iters_epilogue,
scalar_stmt, NULL, 0, vect_epilogue); si->kind, stmt_info, si->misalign,
vect_epilogue);
}
} }
else else
{ {
...@@ -2892,7 +2913,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, ...@@ -2892,7 +2913,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
(void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue, (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
&peel_iters_epilogue, &peel_iters_epilogue,
scalar_single_iter_stmts, &scalar_cost_vec,
&prologue_cost_vec, &prologue_cost_vec,
&epilogue_cost_vec); &epilogue_cost_vec);
......
...@@ -1101,10 +1101,12 @@ extern bool vectorizable_reduction (gimple, gimple_stmt_iterator *, gimple *, ...@@ -1101,10 +1101,12 @@ extern bool vectorizable_reduction (gimple, gimple_stmt_iterator *, gimple *,
extern bool vectorizable_induction (gimple, gimple_stmt_iterator *, gimple *); extern bool vectorizable_induction (gimple, gimple_stmt_iterator *, gimple *);
extern tree get_initial_def_for_reduction (gimple, tree, tree *); extern tree get_initial_def_for_reduction (gimple, tree, tree *);
extern int vect_min_worthwhile_factor (enum tree_code); extern int vect_min_worthwhile_factor (enum tree_code);
extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, int, extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
stmt_vector_for_cost *, stmt_vector_for_cost *,
stmt_vector_for_cost *,
stmt_vector_for_cost *);
extern int vect_get_single_scalar_iteration_cost (loop_vec_info,
stmt_vector_for_cost *); stmt_vector_for_cost *);
extern int vect_get_single_scalar_iteration_cost (loop_vec_info);
/* In tree-vect-slp.c. */ /* In tree-vect-slp.c. */
extern void vect_free_slp_instance (slp_instance); extern void vect_free_slp_instance (slp_instance);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment