Commit 98bdbb39 by Jan Hubicka Committed by Jan Hubicka

predict.h (force_edge_cold): Declare.

	* predict.h (force_edge_cold): Declare.
	* predict.c (force_edge_cold): New function.
	* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Fix profile
	updating.
	(canonicalize_loop_induction_variables): Fix formating.

	* gcc.dg/tree-ssa/cunroll-12.c: New testcase.
	* gcc.dg/tree-ssa/cunroll-13.c: New testcase.
	* gcc.dg/tree-ssa/cunroll-14.c: New testcase.

From-SVN: r236874
parent d5cf3d8d
2016-05-30 Jan Hubicka <hubicka@ucw.cz>
* predict.h (force_edge_cold): Declare.
* predict.c (force_edge_cold): New function.
* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Fix profile
updating.
(canonicalize_loop_induction_variables): Fix formating.
2016-05-30 Eric Botcazou <ebotcazou@adacore.com> 2016-05-30 Eric Botcazou <ebotcazou@adacore.com>
* config/visium/visium.c (visium_split_double_add): Minor tweaks. * config/visium/visium.c (visium_split_double_add): Minor tweaks.
......
...@@ -3249,3 +3249,99 @@ report_predictor_hitrates (void) ...@@ -3249,3 +3249,99 @@ report_predictor_hitrates (void)
loop_optimizer_finalize (); loop_optimizer_finalize ();
} }
/* Force edge E to be cold.
If IMPOSSIBLE is true, for edge to have count and probability 0 otherwise
keep low probability to represent possible error in a guess. This is used
i.e. in case we predict loop to likely iterate given number of times but
we are not 100% sure.
This function locally updates profile without attempt to keep global
consistency which can not be reached in full generality without full profile
rebuild from probabilities alone. Doing so is not necessarily a good idea
because frequencies and counts may be more realistic then probabilities.
In some cases (such as for elimination of early exits during full loop
unrolling) the caller can ensure that profile will get consistent
afterwards. */
void
force_edge_cold (edge e, bool impossible)
{
gcov_type count_sum = 0;
int prob_sum = 0;
edge_iterator ei;
edge e2;
gcov_type old_count = e->count;
int old_probability = e->probability;
gcov_type gcov_scale = REG_BR_PROB_BASE;
int prob_scale = REG_BR_PROB_BASE;
/* If edge is already improbably or cold, just return. */
if (e->probability <= impossible ? PROB_VERY_UNLIKELY : 0
&& (!impossible || !e->count))
return;
FOR_EACH_EDGE (e2, ei, e->src->succs)
if (e2 != e)
{
count_sum += e2->count;
prob_sum += e2->probability;
}
/* If there are other edges out of e->src, redistribute probabilitity
there. */
if (prob_sum)
{
e->probability
= MIN (e->probability, impossible ? 0 : PROB_VERY_UNLIKELY);
if (old_probability)
e->count = RDIV (e->count * e->probability, old_probability);
else
e->count = MIN (e->count, impossible ? 0 : 1);
if (count_sum)
gcov_scale = RDIV ((count_sum + old_count - e->count) * REG_BR_PROB_BASE,
count_sum);
prob_scale = RDIV ((REG_BR_PROB_BASE - e->probability) * REG_BR_PROB_BASE,
prob_sum);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Making edge %i->%i %s by redistributing "
"probability to other edges.\n",
e->src->index, e->dest->index,
impossible ? "imposisble" : "cold");
FOR_EACH_EDGE (e2, ei, e->src->succs)
if (e2 != e)
{
e2->count = RDIV (e2->count * gcov_scale, REG_BR_PROB_BASE);
e2->probability = RDIV (e2->probability * prob_scale,
REG_BR_PROB_BASE);
}
}
/* If all edges out of e->src are unlikely, the basic block itself
is unlikely. */
else
{
e->probability = REG_BR_PROB_BASE;
/* If we did not adjusting, the source basic block has no likely edeges
leaving other direction. In that case force that bb cold, too.
This in general is difficult task to do, but handle special case when
BB has only one predecestor. This is common case when we are updating
after loop transforms. */
if (!prob_sum && !count_sum && single_pred_p (e->src)
&& e->src->frequency > (impossible ? 0 : 1))
{
int old_frequency = e->src->frequency;
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Making bb %i %s.\n", e->src->index,
impossible ? "imposisble" : "cold");
e->src->frequency = MIN (e->src->frequency, impossible ? 0 : 1);
e->src->count = e->count = RDIV (e->src->count * e->src->frequency,
old_frequency);
force_edge_cold (single_pred_edge (e->src), impossible);
}
else if (dump_file && (dump_flags & TDF_DETAILS)
&& maybe_hot_bb_p (cfun, e->src))
fprintf (dump_file, "Giving up on making bb %i %s.\n", e->src->index,
impossible ? "imposisble" : "cold");
}
}
...@@ -91,5 +91,6 @@ extern tree build_predict_expr (enum br_predictor, enum prediction); ...@@ -91,5 +91,6 @@ extern tree build_predict_expr (enum br_predictor, enum prediction);
extern const char *predictor_name (enum br_predictor); extern const char *predictor_name (enum br_predictor);
extern void rebuild_frequencies (void); extern void rebuild_frequencies (void);
extern void report_predictor_hitrates (void); extern void report_predictor_hitrates (void);
extern void force_edge_cold (edge, bool);
#endif /* GCC_PREDICT_H */ #endif /* GCC_PREDICT_H */
2016-05-30 Jan Hubicka <hubicka@ucw.cz>
* gcc.dg/tree-ssa/cunroll-12.c: New testcase.
* gcc.dg/tree-ssa/cunroll-13.c: New testcase.
* gcc.dg/tree-ssa/cunroll-14.c: New testcase.
2016-05-30 Tom de Vries <tom@codesourcery.com> 2016-05-30 Tom de Vries <tom@codesourcery.com>
PR tree-optimization/69067 PR tree-optimization/69067
......
/* { dg-do compile } */
/* { dg-options "-O3 -Warray-bounds -fdump-tree-cunroll-blocks-details" } */
struct a {int a[8];int b;};
void
t(struct a *a)
{
for (int i=0;a->a[i];i++)
a->a[i]++;
}
/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
/* { dg-do compile } */
/* { dg-options "-O3 -fdisable-tree-cunrolli -fdisable-tree-vrp1 -fdump-tree-cunroll-blocks-details" } */
struct a {int a[8];int b;};
void
t(struct a *a)
{
for (int i=0;i<123456 && a->a[i];i++)
a->a[i]++;
}
/* This pass relies on the fact that we do not eliminate the redundant test for i early.
It is necessary to disable all passes that do so. At the moment it is vrp1 and cunrolli. */
/* { dg-final { scan-tree-dump-times "Loop 1 iterates 123454 times" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-times "Last iteration exit edge was proved true" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-times "Exit condition of peeled iterations was eliminated" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
/* { dg-do compile } */
/* { dg-options "-O3 -fdump-tree-cunroll-blocks-details" } */
struct a {int a[100];};
void
t(struct a *a)
{
for (int i=0;i<5 && a->a[i];i++)
a->a[i]++;
}
/* { dg-final { scan-tree-dump-times "loop with 5 iterations completely unrolled" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
/* { dg-final { scan-tree-dump-times "Loop 1 iterates 4 times" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-times "Last iteration exit edge was proved true" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-times "Exit condition of peeled iterations was eliminated" 1 "cunroll" } } */
...@@ -730,8 +730,14 @@ try_unroll_loop_completely (struct loop *loop, ...@@ -730,8 +730,14 @@ try_unroll_loop_completely (struct loop *loop,
if (ul == UL_SINGLE_ITER) if (ul == UL_SINGLE_ITER)
return false; return false;
/* EXIT can be removed only if we are sure it passes first N_UNROLL
iterations. */
bool remove_exit = (exit && niter
&& TREE_CODE (niter) == INTEGER_CST
&& wi::leu_p (n_unroll, wi::to_widest (niter)));
large = tree_estimate_loop_size large = tree_estimate_loop_size
(loop, exit, edge_to_cancel, &size, (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
ninsns = size.overall; ninsns = size.overall;
if (large) if (large)
...@@ -837,8 +843,20 @@ try_unroll_loop_completely (struct loop *loop, ...@@ -837,8 +843,20 @@ try_unroll_loop_completely (struct loop *loop,
initialize_original_copy_tables (); initialize_original_copy_tables ();
wont_exit = sbitmap_alloc (n_unroll + 1); wont_exit = sbitmap_alloc (n_unroll + 1);
bitmap_ones (wont_exit); if (exit && niter
bitmap_clear_bit (wont_exit, 0); && TREE_CODE (niter) == INTEGER_CST
&& wi::leu_p (n_unroll, wi::to_widest (niter)))
{
bitmap_ones (wont_exit);
if (wi::eq_p (wi::to_widest (niter), n_unroll)
|| edge_to_cancel)
bitmap_clear_bit (wont_exit, 0);
}
else
{
exit = NULL;
bitmap_clear (wont_exit);
}
if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
n_unroll, wont_exit, n_unroll, wont_exit,
...@@ -869,6 +887,7 @@ try_unroll_loop_completely (struct loop *loop, ...@@ -869,6 +887,7 @@ try_unroll_loop_completely (struct loop *loop,
if (edge_to_cancel) if (edge_to_cancel)
{ {
gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src)); gcond *cond = as_a <gcond *> (last_stmt (edge_to_cancel->src));
force_edge_cold (edge_to_cancel, true);
if (edge_to_cancel->flags & EDGE_TRUE_VALUE) if (edge_to_cancel->flags & EDGE_TRUE_VALUE)
gimple_cond_make_false (cond); gimple_cond_make_false (cond);
else else
...@@ -1112,8 +1131,8 @@ canonicalize_loop_induction_variables (struct loop *loop, ...@@ -1112,8 +1131,8 @@ canonicalize_loop_induction_variables (struct loop *loop,
if (dump_file && (dump_flags & TDF_DETAILS) if (dump_file && (dump_flags & TDF_DETAILS)
&& likely_max_loop_iterations_int (loop) >= 0) && likely_max_loop_iterations_int (loop) >= 0)
{ {
fprintf (dump_file, "Loop likely %d iterates at most %i times.\n", loop->num, fprintf (dump_file, "Loop %d likely iterates at most %i times.\n",
(int)likely_max_loop_iterations_int (loop)); loop->num, (int)likely_max_loop_iterations_int (loop));
} }
/* Remove exits that are known to be never taken based on loop bound. /* Remove exits that are known to be never taken based on loop bound.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment