Commit f5b25e15 by Jan Hubicka

Make profile estimation more precise

While analyzing code size regression in SPEC2k GCC binary I noticed that we
perform some inline decisions because we think that number of executions are
very high.
In particular there was inline decision inlining gen_rtx_fmt_ee to find_reloads
believing that it is called 4 billion times.  This turned out to be cummulation
of roundoff errors in propagate_freq which was bit mechanically updated from
original sreals to C++ sreals and later to new probabilities.

This led us to estimate that a loopback edge is reached with probability 2.3
which was capped to 1-1/10000 and since this happened in nested loop it quickly
escalated to large values.

Originally capping to REG_BR_PROB_BASE avoided such problems but now we have
much higher range.

This patch avoids going from probabilites to REG_BR_PROB_BASE so precision is
kept.  In addition it makes the propagation to not estimate more than
param-max-predicted-loop-iterations.  The first change makes the cap to not
be triggered on the gcc build, but it is still better to be safe than sorry.

	* ipa-fnsummary.c (estimate_calls_size_and_time): Fix formating of
	dump.
	* params.opt: (max-predicted-iterations): Set bounds.
	* predict.c (real_almost_one, real_br_prob_base,
	real_inv_br_prob_base, real_one_half, real_bb_freq_max): Remove.
	(propagate_freq): Add max_cyclic_prob parameter; cap cyclic
	probabilities; do not truncate to reg_br_prob_bases.
	(estimate_loops_at_level): Pass max_cyclic_prob.
	(estimate_loops): Compute max_cyclic_prob.
	(estimate_bb_frequencies): Do not initialize real_*; update calculation
	of back edge prob.
	* profile-count.c (profile_probability::to_sreal): New.
	* profile-count.h (class sreal): Move up in file.
	(profile_probability::to_sreal): Declare.
parent 801f5b96
2020-01-16 Jan Hubicka <hubicka@ucw.cz>
* ipa-fnsummary.c (estimate_calls_size_and_time): Fix formating of
dump.
* params.opt: (max-predicted-iterations): Set bounds.
* predict.c (real_almost_one, real_br_prob_base,
real_inv_br_prob_base, real_one_half, real_bb_freq_max): Remove.
(propagate_freq): Add max_cyclic_prob parameter; cap cyclic
probabilities; do not truncate to reg_br_prob_bases.
(estimate_loops_at_level): Pass max_cyclic_prob.
(estimate_loops): Compute max_cyclic_prob.
(estimate_bb_frequencies): Do not initialize real_*; update calculation
of back edge prob.
* profile-count.c (profile_probability::to_sreal): New.
* profile-count.h (class sreal): Move up in file.
(profile_probability::to_sreal): Declare.
2020-01-16 Stam Markianos-Wright <stam.markianos-wright@arm.com> 2020-01-16 Stam Markianos-Wright <stam.markianos-wright@arm.com>
* config/arm/arm.c * config/arm/arm.c
......
...@@ -3258,7 +3258,7 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size, ...@@ -3258,7 +3258,7 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size,
gcc_assert (*size == old_size); gcc_assert (*size == old_size);
if (time && (*time - old_time > 1 || *time - old_time < -1) if (time && (*time - old_time > 1 || *time - old_time < -1)
&& dump_file) && dump_file)
fprintf (dump_file, "Time mismatch in call summary %f!=%f", fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
old_time.to_double (), old_time.to_double (),
time->to_double ()); time->to_double ());
} }
......
...@@ -555,7 +555,7 @@ Common Joined UInteger Var(param_max_pow_sqrt_depth) Init(5) IntegerRange(1, 32) ...@@ -555,7 +555,7 @@ Common Joined UInteger Var(param_max_pow_sqrt_depth) Init(5) IntegerRange(1, 32)
Maximum depth of sqrt chains to use when synthesizing exponentiation by a real constant. Maximum depth of sqrt chains to use when synthesizing exponentiation by a real constant.
-param=max-predicted-iterations= -param=max-predicted-iterations=
Common Joined UInteger Var(param_max_predicted_iterations) Init(100) Param Optimization Common Joined UInteger Var(param_max_predicted_iterations) Init(100) IntegerRange(1, 65536) Param Optimization
The maximum number of loop iterations we predict statically. The maximum number of loop iterations we predict statically.
-param=max-reload-search-insns= -param=max-reload-search-insns=
......
...@@ -76,10 +76,6 @@ enum predictor_reason ...@@ -76,10 +76,6 @@ enum predictor_reason
static const char *reason_messages[] = {"", " (ignored)", static const char *reason_messages[] = {"", " (ignored)",
" (single edge duplicate)", " (edge pair duplicate)"}; " (single edge duplicate)", " (edge pair duplicate)"};
/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX. */
static sreal real_almost_one, real_br_prob_base,
real_inv_br_prob_base, real_one_half, real_bb_freq_max;
static void combine_predictions_for_insn (rtx_insn *, basic_block); static void combine_predictions_for_insn (rtx_insn *, basic_block);
static void dump_prediction (FILE *, enum br_predictor, int, basic_block, static void dump_prediction (FILE *, enum br_predictor, int, basic_block,
...@@ -3266,7 +3262,8 @@ public: ...@@ -3266,7 +3262,8 @@ public:
TOVISIT, starting in HEAD. */ TOVISIT, starting in HEAD. */
static void static void
propagate_freq (basic_block head, bitmap tovisit) propagate_freq (basic_block head, bitmap tovisit,
sreal max_cyclic_prob)
{ {
basic_block bb; basic_block bb;
basic_block last; basic_block last;
...@@ -3322,22 +3319,14 @@ propagate_freq (basic_block head, bitmap tovisit) ...@@ -3322,22 +3319,14 @@ propagate_freq (basic_block head, bitmap tovisit)
FOR_EACH_EDGE (e, ei, bb->preds) FOR_EACH_EDGE (e, ei, bb->preds)
if (EDGE_INFO (e)->back_edge) if (EDGE_INFO (e)->back_edge)
{ cyclic_probability += EDGE_INFO (e)->back_edge_prob;
cyclic_probability += EDGE_INFO (e)->back_edge_prob;
}
else if (!(e->flags & EDGE_DFS_BACK)) else if (!(e->flags & EDGE_DFS_BACK))
{ {
/* frequency += (e->probability
* BLOCK_INFO (e->src)->frequency /
REG_BR_PROB_BASE); */
/* FIXME: Graphite is producing edges with no profile. Once /* FIXME: Graphite is producing edges with no profile. Once
this is fixed, drop this. */ this is fixed, drop this. */
sreal tmp = e->probability.initialized_p () ? sreal tmp = e->probability.initialized_p () ?
e->probability.to_reg_br_prob_base () : 0; e->probability.to_sreal () : 0;
tmp *= BLOCK_INFO (e->src)->frequency; frequency += tmp * BLOCK_INFO (e->src)->frequency;
tmp *= real_inv_br_prob_base;
frequency += tmp;
} }
if (cyclic_probability == 0) if (cyclic_probability == 0)
...@@ -3346,14 +3335,29 @@ propagate_freq (basic_block head, bitmap tovisit) ...@@ -3346,14 +3335,29 @@ propagate_freq (basic_block head, bitmap tovisit)
} }
else else
{ {
if (cyclic_probability > real_almost_one) if (cyclic_probability > max_cyclic_prob)
cyclic_probability = real_almost_one; {
if (dump_file)
/* BLOCK_INFO (bb)->frequency = frequency fprintf (dump_file,
/ (1 - cyclic_probability) */ "cyclic probability of bb %i is %f (capped to %f)"
"; turning freq %f",
cyclic_probability = sreal (1) - cyclic_probability; bb->index, cyclic_probability.to_double (),
BLOCK_INFO (bb)->frequency = frequency / cyclic_probability; max_cyclic_prob.to_double (),
frequency.to_double ());
cyclic_probability = max_cyclic_prob;
}
else if (dump_file)
fprintf (dump_file,
"cyclic probability of bb %i is %f; turning freq %f",
bb->index, cyclic_probability.to_double (),
frequency.to_double ());
BLOCK_INFO (bb)->frequency = frequency
/ (sreal (1) - cyclic_probability);
if (dump_file)
fprintf (dump_file, " to %f\n",
BLOCK_INFO (bb)->frequency.to_double ());
} }
} }
...@@ -3362,16 +3366,11 @@ propagate_freq (basic_block head, bitmap tovisit) ...@@ -3362,16 +3366,11 @@ propagate_freq (basic_block head, bitmap tovisit)
e = find_edge (bb, head); e = find_edge (bb, head);
if (e) if (e)
{ {
/* EDGE_INFO (e)->back_edge_prob
= ((e->probability * BLOCK_INFO (bb)->frequency)
/ REG_BR_PROB_BASE); */
/* FIXME: Graphite is producing edges with no profile. Once /* FIXME: Graphite is producing edges with no profile. Once
this is fixed, drop this. */ this is fixed, drop this. */
sreal tmp = e->probability.initialized_p () ? sreal tmp = e->probability.initialized_p () ?
e->probability.to_reg_br_prob_base () : 0; e->probability.to_sreal () : 0;
tmp *= BLOCK_INFO (bb)->frequency; EDGE_INFO (e)->back_edge_prob = tmp * BLOCK_INFO (bb)->frequency;
EDGE_INFO (e)->back_edge_prob = tmp * real_inv_br_prob_base;
} }
/* Propagate to successor blocks. */ /* Propagate to successor blocks. */
...@@ -3396,7 +3395,7 @@ propagate_freq (basic_block head, bitmap tovisit) ...@@ -3396,7 +3395,7 @@ propagate_freq (basic_block head, bitmap tovisit)
/* Estimate frequencies in loops at same nest level. */ /* Estimate frequencies in loops at same nest level. */
static void static void
estimate_loops_at_level (class loop *first_loop) estimate_loops_at_level (class loop *first_loop, sreal max_cyclic_prob)
{ {
class loop *loop; class loop *loop;
...@@ -3407,7 +3406,7 @@ estimate_loops_at_level (class loop *first_loop) ...@@ -3407,7 +3406,7 @@ estimate_loops_at_level (class loop *first_loop)
unsigned i; unsigned i;
auto_bitmap tovisit; auto_bitmap tovisit;
estimate_loops_at_level (loop->inner); estimate_loops_at_level (loop->inner, max_cyclic_prob);
/* Find current loop back edge and mark it. */ /* Find current loop back edge and mark it. */
e = loop_latch_edge (loop); e = loop_latch_edge (loop);
...@@ -3417,7 +3416,7 @@ estimate_loops_at_level (class loop *first_loop) ...@@ -3417,7 +3416,7 @@ estimate_loops_at_level (class loop *first_loop)
for (i = 0; i < loop->num_nodes; i++) for (i = 0; i < loop->num_nodes; i++)
bitmap_set_bit (tovisit, bbs[i]->index); bitmap_set_bit (tovisit, bbs[i]->index);
free (bbs); free (bbs);
propagate_freq (loop->header, tovisit); propagate_freq (loop->header, tovisit, max_cyclic_prob);
} }
} }
...@@ -3428,17 +3427,18 @@ estimate_loops (void) ...@@ -3428,17 +3427,18 @@ estimate_loops (void)
{ {
auto_bitmap tovisit; auto_bitmap tovisit;
basic_block bb; basic_block bb;
sreal max_cyclic_prob = (sreal)1 - (sreal)1 / param_max_predicted_iterations;
/* Start by estimating the frequencies in the loops. */ /* Start by estimating the frequencies in the loops. */
if (number_of_loops (cfun) > 1) if (number_of_loops (cfun) > 1)
estimate_loops_at_level (current_loops->tree_root->inner); estimate_loops_at_level (current_loops->tree_root->inner, max_cyclic_prob);
/* Now propagate the frequencies through all the blocks. */ /* Now propagate the frequencies through all the blocks. */
FOR_ALL_BB_FN (bb, cfun) FOR_ALL_BB_FN (bb, cfun)
{ {
bitmap_set_bit (tovisit, bb->index); bitmap_set_bit (tovisit, bb->index);
} }
propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit); propagate_freq (ENTRY_BLOCK_PTR_FOR_FN (cfun), tovisit, max_cyclic_prob);
} }
/* Drop the profile for NODE to guessed, and update its frequency based on /* Drop the profile for NODE to guessed, and update its frequency based on
...@@ -3844,21 +3844,6 @@ estimate_bb_frequencies (bool force) ...@@ -3844,21 +3844,6 @@ estimate_bb_frequencies (bool force)
if (force || profile_status_for_fn (cfun) != PROFILE_READ if (force || profile_status_for_fn (cfun) != PROFILE_READ
|| !update_max_bb_count ()) || !update_max_bb_count ())
{ {
static int real_values_initialized = 0;
if (!real_values_initialized)
{
real_values_initialized = 1;
real_br_prob_base = REG_BR_PROB_BASE;
/* Scaling frequencies up to maximal profile count may result in
frequent overflows especially when inlining loops.
Small scalling results in unnecesary precision loss. Stay in
the half of the (exponential) range. */
real_bb_freq_max = (uint64_t)1 << (profile_count::n_bits / 2);
real_one_half = sreal (1, -1);
real_inv_br_prob_base = sreal (1) / real_br_prob_base;
real_almost_one = sreal (1) - real_inv_br_prob_base;
}
mark_dfs_back_edges (); mark_dfs_back_edges ();
...@@ -3879,10 +3864,10 @@ estimate_bb_frequencies (bool force) ...@@ -3879,10 +3864,10 @@ estimate_bb_frequencies (bool force)
this is fixed, drop this. */ this is fixed, drop this. */
if (e->probability.initialized_p ()) if (e->probability.initialized_p ())
EDGE_INFO (e)->back_edge_prob EDGE_INFO (e)->back_edge_prob
= e->probability.to_reg_br_prob_base (); = e->probability.to_sreal ();
else else
EDGE_INFO (e)->back_edge_prob = REG_BR_PROB_BASE / 2; /* back_edge_prob = 0.5 */
EDGE_INFO (e)->back_edge_prob *= real_inv_br_prob_base; EDGE_INFO (e)->back_edge_prob = sreal (1, -1);
} }
} }
...@@ -3895,14 +3880,18 @@ estimate_bb_frequencies (bool force) ...@@ -3895,14 +3880,18 @@ estimate_bb_frequencies (bool force)
if (freq_max < BLOCK_INFO (bb)->frequency) if (freq_max < BLOCK_INFO (bb)->frequency)
freq_max = BLOCK_INFO (bb)->frequency; freq_max = BLOCK_INFO (bb)->frequency;
freq_max = real_bb_freq_max / freq_max; /* Scaling frequencies up to maximal profile count may result in
frequent overflows especially when inlining loops.
Small scalling results in unnecesary precision loss. Stay in
the half of the (exponential) range. */
freq_max = (sreal (1) << (profile_count::n_bits / 2)) / freq_max;
if (freq_max < 16) if (freq_max < 16)
freq_max = 16; freq_max = 16;
profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa (); profile_count ipa_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.ipa ();
cfun->cfg->count_max = profile_count::uninitialized (); cfun->cfg->count_max = profile_count::uninitialized ();
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
{ {
sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + real_one_half; sreal tmp = BLOCK_INFO (bb)->frequency * freq_max + sreal (1, -1);
profile_count count = profile_count::from_gcov_type (tmp.to_int ()); profile_count count = profile_count::from_gcov_type (tmp.to_int ());
/* If we have profile feedback in which this function was never /* If we have profile feedback in which this function was never
......
...@@ -446,3 +446,12 @@ profile_probability::combine_with_count (profile_count count1, ...@@ -446,3 +446,12 @@ profile_probability::combine_with_count (profile_count count1,
else else
return *this * even () + other * even (); return *this * even () + other * even ();
} }
/* Return probability as sreal in range [0, 1]. */
sreal
profile_probability::to_sreal () const
{
gcc_checking_assert (initialized_p ());
return ((sreal)m_val) >> (n_bits - 2);
}
...@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see
struct function; struct function;
struct profile_count; struct profile_count;
class sreal;
/* Quality of the profile count. Because gengtype does not support enums /* Quality of the profile count. Because gengtype does not support enums
inside of classes, this is in global namespace. */ inside of classes, this is in global namespace. */
...@@ -614,6 +615,8 @@ public: ...@@ -614,6 +615,8 @@ public:
profile_probability other, profile_probability other,
profile_count count2) const; profile_count count2) const;
/* Return probability as sreal. */
sreal to_sreal () const;
/* LTO streaming support. */ /* LTO streaming support. */
static profile_probability stream_in (class lto_input_block *); static profile_probability stream_in (class lto_input_block *);
void stream_out (struct output_block *); void stream_out (struct output_block *);
...@@ -674,8 +677,6 @@ public: ...@@ -674,8 +677,6 @@ public:
*/ */
class sreal;
struct GTY(()) profile_count struct GTY(()) profile_count
{ {
public: public:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment