Commit 208e5afa by Jan Hubicka Committed by Jan Hubicka

re PR ipa/63967 (r217633 caused internal compiler error: in…

re PR ipa/63967 (r217633 caused internal compiler error: in estimate_edge_growth, at ipa-inline.h:299)


	PR ipa/63967
	PR ipa/64425
	* ipa-inline.c (compute_uninlined_call_time,
	compute_inlined_call_time): Use counts for extra precision when
	needed possible.
	(big_speedup_p): Fix formating.
	(RELATIVE_TIME_BENEFIT_RANGE): Remove.
	(relative_time_benefit): Remove.
	(edge_badness): Turn DECL_DISREGARD_INLINE_LIMITS into hint;
	merge guessed and read profile paths.
	(inline_small_functions): Count only !optimize_size functions into
	initial size; be more lax about sanity check when profile is used;
	be sure to update inlined function profile when profile is read.

From-SVN: r219452
parent d8d5aef1
2015-01-12 Jan Hubicka <hubicka@ucw.cz> 2015-01-12 Jan Hubicka <hubicka@ucw.cz>
PR ipa/63967
PR ipa/64425
* ipa-inline.c (compute_uninlined_call_time,
compute_inlined_call_time): Use counts for extra precision when
needed possible.
(big_speedup_p): Fix formating.
(RELATIVE_TIME_BENEFIT_RANGE): Remove.
(relative_time_benefit): Remove.
(edge_badness): Turn DECL_DISREGARD_INLINE_LIMITS into hint;
merge guessed and read profile paths.
(inline_small_functions): Count only !optimize_size functions into
initial size; be more lax about sanity check when profile is used;
be sure to update inlined function profile when profile is read.
2015-01-12 Jan Hubicka <hubicka@ucw.cz>
PR ipa/63470 PR ipa/63470
* ipa-inline-analysis.c (inline_edge_duplication_hook): Adjust * ipa-inline-analysis.c (inline_edge_duplication_hook): Adjust
cost when edge becomes direct. cost when edge becomes direct.
......
...@@ -530,12 +530,19 @@ inline sreal ...@@ -530,12 +530,19 @@ inline sreal
compute_uninlined_call_time (struct inline_summary *callee_info, compute_uninlined_call_time (struct inline_summary *callee_info,
struct cgraph_edge *edge) struct cgraph_edge *edge)
{ {
sreal uninlined_call_time = (sreal)callee_info->time sreal uninlined_call_time = (sreal)callee_info->time;
* MAX (edge->frequency, 1) cgraph_node *caller = (edge->caller->global.inlined_to
* cgraph_freq_base_rec; ? edge->caller->global.inlined_to
int caller_time = inline_summaries->get (edge->caller->global.inlined_to : edge->caller);
? edge->caller->global.inlined_to
: edge->caller)->time; if (edge->count && caller->count)
uninlined_call_time *= (sreal)edge->count / caller->count;
if (edge->frequency)
uninlined_call_time *= cgraph_freq_base_rec * edge->frequency;
else
uninlined_call_time = uninlined_call_time >> 11;
int caller_time = inline_summaries->get (caller)->time;
return uninlined_call_time + caller_time; return uninlined_call_time + caller_time;
} }
...@@ -546,13 +553,28 @@ inline sreal ...@@ -546,13 +553,28 @@ inline sreal
compute_inlined_call_time (struct cgraph_edge *edge, compute_inlined_call_time (struct cgraph_edge *edge,
int edge_time) int edge_time)
{ {
int caller_time = inline_summaries->get (edge->caller->global.inlined_to cgraph_node *caller = (edge->caller->global.inlined_to
? edge->caller->global.inlined_to ? edge->caller->global.inlined_to
: edge->caller)->time; : edge->caller);
sreal time = (sreal)caller_time int caller_time = inline_summaries->get (caller)->time;
+ ((sreal) (edge_time - inline_edge_summary (edge)->call_stmt_time) sreal time = edge_time;
* MAX (edge->frequency, 1)
* cgraph_freq_base_rec); if (edge->count && caller->count)
time *= (sreal)edge->count / caller->count;
if (edge->frequency)
time *= cgraph_freq_base_rec * edge->frequency;
else
time = time >> 11;
/* This calculation should match one in ipa-inline-analysis.
FIXME: Once ipa-inline-analysis is converted to sreal this can be
simplified. */
time -= (sreal) ((gcov_type) edge->frequency
* inline_edge_summary (edge)->call_stmt_time
* (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE)) / INLINE_TIME_SCALE;
time += caller_time;
if (time <= 0)
time = ((sreal) 1) >> 8;
gcc_checking_assert (time >= 0); gcc_checking_assert (time >= 0);
return time; return time;
} }
...@@ -563,8 +585,10 @@ compute_inlined_call_time (struct cgraph_edge *edge, ...@@ -563,8 +585,10 @@ compute_inlined_call_time (struct cgraph_edge *edge,
static bool static bool
big_speedup_p (struct cgraph_edge *e) big_speedup_p (struct cgraph_edge *e)
{ {
sreal time = compute_uninlined_call_time (inline_summaries->get (e->callee), e); sreal time = compute_uninlined_call_time (inline_summaries->get (e->callee),
e);
sreal inlined_time = compute_inlined_call_time (e, estimate_edge_time (e)); sreal inlined_time = compute_inlined_call_time (e, estimate_edge_time (e));
if (time - inlined_time if (time - inlined_time
> (sreal) time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP) > (sreal) time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)
* percent_rec) * percent_rec)
...@@ -862,49 +886,6 @@ want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold) ...@@ -862,49 +886,6 @@ want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
return true; return true;
} }
#define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
/* Return relative time improvement for inlining EDGE in range
as value NUMERATOR/DENOMINATOR. */
static inline void
relative_time_benefit (struct inline_summary *callee_info,
struct cgraph_edge *edge,
int edge_time,
sreal *numerator,
sreal *denominator)
{
/* Inlining into extern inline function is not a win. */
if (DECL_EXTERNAL (edge->caller->global.inlined_to
? edge->caller->global.inlined_to->decl
: edge->caller->decl))
{
*numerator = (sreal) 1;
*denominator = (sreal) 1024;
return;
}
sreal uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
sreal inlined_call_time = compute_inlined_call_time (edge, edge_time);
/* Compute relative time benefit, i.e. how much the call becomes faster.
??? perhaps computing how much the caller+calle together become faster
would lead to more realistic results. */
if (uninlined_call_time == (sreal) 0)
uninlined_call_time = 1;
/* Avoid zeros, these are not useful later in calculations. */
if (uninlined_call_time == inlined_call_time)
*numerator = ((sreal) 1)>>8;
else
*numerator = uninlined_call_time - inlined_call_time;
*denominator = uninlined_call_time;
#ifdef ENABLE_CHECKING
gcc_checking_assert (*numerator >= 0);
gcc_checking_assert (*denominator >= 0);
#endif
}
/* A cost model driving the inlining heuristics in a way so the edges with /* A cost model driving the inlining heuristics in a way so the edges with
smallest badness are inlined first. After each inlining is performed smallest badness are inlined first. After each inlining is performed
the costs of all caller edges of nodes affected are recomputed so the the costs of all caller edges of nodes affected are recomputed so the
...@@ -919,9 +900,9 @@ edge_badness (struct cgraph_edge *edge, bool dump) ...@@ -919,9 +900,9 @@ edge_badness (struct cgraph_edge *edge, bool dump)
struct cgraph_node *callee = edge->callee->ultimate_alias_target (); struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
struct inline_summary *callee_info = inline_summaries->get (callee); struct inline_summary *callee_info = inline_summaries->get (callee);
inline_hints hints; inline_hints hints;
cgraph_node *caller = (edge->caller->global.inlined_to
if (DECL_DISREGARD_INLINE_LIMITS (callee->decl)) ? edge->caller->global.inlined_to
return sreal::min (); : edge->caller);
growth = estimate_edge_growth (edge); growth = estimate_edge_growth (edge);
edge_time = estimate_edge_time (edge); edge_time = estimate_edge_time (edge);
...@@ -954,59 +935,38 @@ edge_badness (struct cgraph_edge *edge, bool dump) ...@@ -954,59 +935,38 @@ edge_badness (struct cgraph_edge *edge, bool dump)
fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (), fprintf (dump_file, " %f: Growth %d <= 0\n", badness.to_double (),
growth); growth);
} }
/* Inlining into EXTERNAL functions is not going to change anything unless
/* When profiling is available, compute badness as: they are themselves inlined. */
else if (DECL_EXTERNAL (caller->decl))
edge_count * relative_time_benefit
goodness = -------------------------------------------
growth_of_caller
badness = - goodness
The fraction is upside down, because on edge counts and time beneits
the bounds are known. Edge growth is essentially unlimited. */
else if (max_count)
{ {
sreal numerator, denominator;
relative_time_benefit (callee_info, edge, edge_time, &numerator,
&denominator);
if (edge->count)
numerator *= edge->count;
denominator *= growth;
badness = - numerator / denominator;
if (dump) if (dump)
{ fprintf (dump_file, " max: function is external\n");
sreal num,den; return sreal::max ();
relative_time_benefit (callee_info, edge, edge_time, &num, &den);
fprintf (dump_file,
" %f: profile info. count %"PRId64
" * Relative benefit %f / growth %i\n",
badness.to_double (), (int64_t)edge->count,
(num / den * 100).to_double (), growth);
}
} }
/* When profile is available. Compute badness as:
/* When function local profile is available. Compute badness as:
relative_time_benefit time_saved * caller_count
goodness = --------------------------------- goodness = ---------------------------------
growth_of_caller * overall_growth growth_of_caller * overall_growth
badness = - goodness badness = - goodness
compensated by the inline hints. Again use negative value to make calls with profile appear hotter
then calls without.
*/ */
/* TODO: We ought suport mixing units where some functions are profiled else if (opt_for_fn (caller->decl, flag_guess_branch_prob) || caller->count)
and some not. */
else if (flag_guess_branch_prob)
{ {
sreal numerator, denominator; sreal numerator, denominator;
relative_time_benefit (callee_info, edge, edge_time, &numerator,
&denominator); numerator = (compute_uninlined_call_time (callee_info, edge)
denominator *= growth; - compute_inlined_call_time (edge, edge_time));
if (numerator == 0)
numerator = ((sreal) 1 >> 8);
if (caller->count)
numerator *= caller->count;
else if (opt_for_fn (caller->decl, flag_branch_probabilities))
numerator = numerator >> 11;
denominator = growth;
if (callee_info->growth > 0) if (callee_info->growth > 0)
denominator *= callee_info->growth; denominator *= callee_info->growth;
...@@ -1014,14 +974,13 @@ edge_badness (struct cgraph_edge *edge, bool dump) ...@@ -1014,14 +974,13 @@ edge_badness (struct cgraph_edge *edge, bool dump)
if (dump) if (dump)
{ {
sreal num,den;
relative_time_benefit (callee_info, edge, edge_time, &num, &den);
fprintf (dump_file, fprintf (dump_file,
" %f: guessed profile. frequency %f," " %f: guessed profile. frequency %f, count %"PRId64
" benefit %f%%, time w/o inlining %f, time w inlining %f" " caller count %"PRId64
" time w/o inlining %f, time w inlining %f"
" overall growth %i (current) %i (original)\n", " overall growth %i (current) %i (original)\n",
badness.to_double (), (double)edge->frequency / CGRAPH_FREQ_BASE, badness.to_double (), (double)edge->frequency / CGRAPH_FREQ_BASE,
(num/den).to_double () * 100, edge->count, caller->count,
compute_uninlined_call_time (callee_info, edge).to_double (), compute_uninlined_call_time (callee_info, edge).to_double (),
compute_inlined_call_time (edge, edge_time).to_double (), compute_inlined_call_time (edge, edge_time).to_double (),
estimate_growth (callee), estimate_growth (callee),
...@@ -1062,7 +1021,9 @@ edge_badness (struct cgraph_edge *edge, bool dump) ...@@ -1062,7 +1021,9 @@ edge_badness (struct cgraph_edge *edge, bool dump)
badness = badness.shift (badness > 0 ? 2 : -2); badness = badness.shift (badness > 0 ? 2 : -2);
else if (hints & (INLINE_HINT_cross_module)) else if (hints & (INLINE_HINT_cross_module))
badness = badness.shift (badness > 0 ? 1 : -1); badness = badness.shift (badness > 0 ? 1 : -1);
if ((hints & INLINE_HINT_declared_inline)) if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
badness = badness.shift (badness > 0 ? -4 : 4);
else if ((hints & INLINE_HINT_declared_inline))
badness = badness.shift (badness > 0 ? -3 : 3); badness = badness.shift (badness > 0 ? -3 : 3);
if (dump) if (dump)
fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ()); fprintf (dump_file, " Adjusted by hints %f\n", badness.to_double ());
...@@ -1592,6 +1553,7 @@ inline_small_functions (void) ...@@ -1592,6 +1553,7 @@ inline_small_functions (void)
/* Do not account external functions, they will be optimized out /* Do not account external functions, they will be optimized out
if not inlined. Also only count the non-cold portion of program. */ if not inlined. Also only count the non-cold portion of program. */
if (!DECL_EXTERNAL (node->decl) if (!DECL_EXTERNAL (node->decl)
&& !opt_for_fn (node->decl, optimize_size)
&& node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED) && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED)
initial_size += info->size; initial_size += info->size;
info->growth = estimate_growth (node); info->growth = estimate_growth (node);
...@@ -1696,8 +1658,10 @@ inline_small_functions (void) ...@@ -1696,8 +1658,10 @@ inline_small_functions (void)
Increases of badness are handled lazilly; when we see key with out Increases of badness are handled lazilly; when we see key with out
of date value on it, we re-insert it now. */ of date value on it, we re-insert it now. */
current_badness = edge_badness (edge, false); current_badness = edge_badness (edge, false);
gcc_assert (cached_badness == current_badness); /* Disable checking for profile because roundoff errors may cause slight
gcc_assert (current_badness >= badness); deviations in the order. */
gcc_assert (max_count || cached_badness == current_badness);
gcc_assert (max_count || current_badness >= badness);
#else #else
current_badness = edge_badness (edge, false); current_badness = edge_badness (edge, false);
#endif #endif
...@@ -1835,6 +1799,14 @@ inline_small_functions (void) ...@@ -1835,6 +1799,14 @@ inline_small_functions (void)
called by function we inlined (since number of it inlinable callers called by function we inlined (since number of it inlinable callers
might change). */ might change). */
update_caller_keys (&edge_heap, where, updated_nodes, NULL); update_caller_keys (&edge_heap, where, updated_nodes, NULL);
/* Offline copy count has possibly changed, recompute if profile is
available. */
if (max_count)
{
struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
if (n != edge->callee && n->analyzed)
update_callee_keys (&edge_heap, n, updated_nodes);
}
bitmap_clear (updated_nodes); bitmap_clear (updated_nodes);
if (dump_file) if (dump_file)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment