Commit 4adaad64 by Jan Hubicka Committed by Jan Hubicka

re PR tree-optimization/79224 (Large C-Ray slowdown)


	PR ipa/79224
	* ipa-inline-analysis.c (dump_predicate): Add optional parameter NL.
	(account_size_time): Use two predicates - exec_pred and
	nonconst_pred_ptr.
	(evaluate_conditions_for_known_args): Compute both clause and
	nonspec_clause.
	(evaluate_properties_for_edge): Evaulate both clause and nonspec_clause.
	(inline_summary_t::duplicate): Update.
	(estimate_function_body_sizes): Caluculate exec and nonconst predicates
	separately.
	(compute_inline_parameters): Likewise.
	(estimate_edge_size_and_time): Update caluclation of time.
	(estimate_node_size_and_time): Compute both time and nonspecialized
	time.
	(estimate_ipcp_clone_size_and_time): Update.
	(inline_merge_summary): Update.
	(do_estimate_edge_time): Update.
	(do_estimate_edge_size): Update.
	(do_estimate_edge_hints): Update.
	(inline_read_section, inline_write_summary): Stream both new predicates.
	* ipa-inline.c (compute_uninlined_call_time): Take uninlined_call_time
	as argument.
	(compute_inlined_call_time): Cleanup.
	(big_speedup_p): Update.
	(edge_badness): Update.
	* ipa-inline.h (INLINE_TIME_SCALE): Remove.
	(size_time_entry): Replace predicate by exec_predicate and
	nonconst_predicate.
	(edge_growth_cache_entry): Cache both time nad nonspecialized time.
	(estimate_edge_time): Return also nonspec_time.
	(reset_edge_growth_cache): Update.

From-SVN: r247417
parent 8cbe7981
2017-04-29 Jan Hubicka <hubicka@ucw.cz>
PR ipa/79224
* ipa-inline-analysis.c (dump_predicate): Add optional parameter NL.
(account_size_time): Use two predicates - exec_pred and
nonconst_pred_ptr.
(evaluate_conditions_for_known_args): Compute both clause and
nonspec_clause.
(evaluate_properties_for_edge): Evaulate both clause and nonspec_clause.
(inline_summary_t::duplicate): Update.
(estimate_function_body_sizes): Caluculate exec and nonconst predicates
separately.
(compute_inline_parameters): Likewise.
(estimate_edge_size_and_time): Update caluclation of time.
(estimate_node_size_and_time): Compute both time and nonspecialized
time.
(estimate_ipcp_clone_size_and_time): Update.
(inline_merge_summary): Update.
(do_estimate_edge_time): Update.
(do_estimate_edge_size): Update.
(do_estimate_edge_hints): Update.
(inline_read_section, inline_write_summary): Stream both new predicates.
* ipa-inline.c (compute_uninlined_call_time): Take uninlined_call_time
as argument.
(compute_inlined_call_time): Cleanup.
(big_speedup_p): Update.
(edge_badness): Update.
* ipa-inline.h (INLINE_TIME_SCALE): Remove.
(size_time_entry): Replace predicate by exec_predicate and
nonconst_predicate.
(edge_growth_cache_entry): Cache both time nad nonspecialized time.
(estimate_edge_time): Return also nonspec_time.
(reset_edge_growth_cache): Update.
2017-04-29 Jakub Jelinek <jakub@redhat.com> 2017-04-29 Jakub Jelinek <jakub@redhat.com>
PR rtl-optimization/80491 PR rtl-optimization/80491
......
...@@ -639,10 +639,9 @@ want_early_inline_function_p (struct cgraph_edge *e) ...@@ -639,10 +639,9 @@ want_early_inline_function_p (struct cgraph_edge *e)
does not happen. */ does not happen. */
inline sreal inline sreal
compute_uninlined_call_time (struct inline_summary *callee_info, compute_uninlined_call_time (struct cgraph_edge *edge,
struct cgraph_edge *edge) sreal uninlined_call_time)
{ {
sreal uninlined_call_time = (sreal)callee_info->time;
cgraph_node *caller = (edge->caller->global.inlined_to cgraph_node *caller = (edge->caller->global.inlined_to
? edge->caller->global.inlined_to ? edge->caller->global.inlined_to
: edge->caller); : edge->caller);
...@@ -677,12 +676,10 @@ compute_inlined_call_time (struct cgraph_edge *edge, ...@@ -677,12 +676,10 @@ compute_inlined_call_time (struct cgraph_edge *edge,
else else
time = time >> 11; time = time >> 11;
/* This calculation should match one in ipa-inline-analysis. /* This calculation should match one in ipa-inline-analysis.c
FIXME: Once ipa-inline-analysis is converted to sreal this can be (estimate_edge_size_and_time). */
simplified. */ time -= (sreal) edge->frequency
time -= (sreal) ((gcov_type) edge->frequency * inline_edge_summary (edge)->call_stmt_time / CGRAPH_FREQ_BASE;
* inline_edge_summary (edge)->call_stmt_time
* (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE)) / INLINE_TIME_SCALE;
time += caller_time; time += caller_time;
if (time <= 0) if (time <= 0)
time = ((sreal) 1) >> 8; time = ((sreal) 1) >> 8;
...@@ -696,12 +693,13 @@ compute_inlined_call_time (struct cgraph_edge *edge, ...@@ -696,12 +693,13 @@ compute_inlined_call_time (struct cgraph_edge *edge,
static bool static bool
big_speedup_p (struct cgraph_edge *e) big_speedup_p (struct cgraph_edge *e)
{ {
sreal time = compute_uninlined_call_time (inline_summaries->get (e->callee), sreal unspec_time;
e); sreal spec_time = estimate_edge_time (e, &unspec_time);
sreal inlined_time = compute_inlined_call_time (e, estimate_edge_time (e)); sreal time = compute_uninlined_call_time (e, unspec_time);
sreal inlined_time = compute_inlined_call_time (e, spec_time);
if (time - inlined_time if (time - inlined_time
> (sreal) time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP) > (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP))
* percent_rec) * percent_rec)
return true; return true;
return false; return false;
...@@ -1011,7 +1009,7 @@ edge_badness (struct cgraph_edge *edge, bool dump) ...@@ -1011,7 +1009,7 @@ edge_badness (struct cgraph_edge *edge, bool dump)
{ {
sreal badness; sreal badness;
int growth; int growth;
sreal edge_time; sreal edge_time, unspec_edge_time;
struct cgraph_node *callee = edge->callee->ultimate_alias_target (); struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
struct inline_summary *callee_info = inline_summaries->get (callee); struct inline_summary *callee_info = inline_summaries->get (callee);
inline_hints hints; inline_hints hints;
...@@ -1020,12 +1018,11 @@ edge_badness (struct cgraph_edge *edge, bool dump) ...@@ -1020,12 +1018,11 @@ edge_badness (struct cgraph_edge *edge, bool dump)
: edge->caller); : edge->caller);
growth = estimate_edge_growth (edge); growth = estimate_edge_growth (edge);
edge_time = estimate_edge_time (edge); edge_time = estimate_edge_time (edge, &unspec_edge_time);
hints = estimate_edge_hints (edge); hints = estimate_edge_hints (edge);
gcc_checking_assert (edge_time >= 0); gcc_checking_assert (edge_time >= 0);
/* FIXME: -1 to care of rounding issues should go away once cache is migrated. /* Check that inlined time is better, but tolerate some roundoff issues. */
to sreals. */ gcc_checking_assert ((edge_time - callee_info->time).to_int () <= 0);
gcc_checking_assert (edge_time <= callee_info->time);
gcc_checking_assert (growth <= callee_info->size); gcc_checking_assert (growth <= callee_info->size);
if (dump) if (dump)
...@@ -1035,9 +1032,10 @@ edge_badness (struct cgraph_edge *edge, bool dump) ...@@ -1035,9 +1032,10 @@ edge_badness (struct cgraph_edge *edge, bool dump)
edge->caller->order, edge->caller->order,
xstrdup_for_dump (callee->name ()), xstrdup_for_dump (callee->name ()),
edge->callee->order); edge->callee->order);
fprintf (dump_file, " size growth %i, time %f ", fprintf (dump_file, " size growth %i, time %f unspec %f ",
growth, growth,
edge_time.to_double ()); edge_time.to_double (),
unspec_edge_time.to_double ());
dump_inline_hints (dump_file, hints); dump_inline_hints (dump_file, hints);
if (big_speedup_p (edge)) if (big_speedup_p (edge))
fprintf (dump_file, " big_speedup"); fprintf (dump_file, " big_speedup");
...@@ -1076,7 +1074,7 @@ edge_badness (struct cgraph_edge *edge, bool dump) ...@@ -1076,7 +1074,7 @@ edge_badness (struct cgraph_edge *edge, bool dump)
sreal numerator, denominator; sreal numerator, denominator;
int overall_growth; int overall_growth;
numerator = (compute_uninlined_call_time (callee_info, edge) numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
- compute_inlined_call_time (edge, edge_time)); - compute_inlined_call_time (edge, edge_time));
if (numerator == 0) if (numerator == 0)
numerator = ((sreal) 1 >> 8); numerator = ((sreal) 1 >> 8);
...@@ -1162,13 +1160,14 @@ edge_badness (struct cgraph_edge *edge, bool dump) ...@@ -1162,13 +1160,14 @@ edge_badness (struct cgraph_edge *edge, bool dump)
fprintf (dump_file, fprintf (dump_file,
" %f: guessed profile. frequency %f, count %" PRId64 " %f: guessed profile. frequency %f, count %" PRId64
" caller count %" PRId64 " caller count %" PRId64
" time w/o inlining %f, time w/ inlining %f" " time w/o inlining %f, time with inlining %f"
" overall growth %i (current) %i (original)" " overall growth %i (current) %i (original)"
" %i (compensated)\n", " %i (compensated)\n",
badness.to_double (), badness.to_double (),
(double)edge->frequency / CGRAPH_FREQ_BASE, (double)edge->frequency / CGRAPH_FREQ_BASE,
edge->count, caller->count, edge->count, caller->count,
compute_uninlined_call_time (callee_info, edge).to_double (), compute_uninlined_call_time (edge,
unspec_edge_time).to_double (),
compute_inlined_call_time (edge, edge_time).to_double (), compute_inlined_call_time (edge, edge_time).to_double (),
estimate_growth (callee), estimate_growth (callee),
callee_info->growth, overall_growth); callee_info->growth, overall_growth);
...@@ -2056,8 +2055,9 @@ inline_small_functions (void) ...@@ -2056,8 +2055,9 @@ inline_small_functions (void)
if (dump_file) if (dump_file)
{ {
fprintf (dump_file, fprintf (dump_file,
" Inlined into %s which now has time %f and size %i, " " Inlined %s into %s which now has time %f and size %i, "
"net change of %+i.\n", "net change of %+i.\n",
edge->callee->name (),
edge->caller->name (), edge->caller->name (),
inline_summaries->get (edge->caller)->time.to_double (), inline_summaries->get (edge->caller)->time.to_double (),
inline_summaries->get (edge->caller)->size, inline_summaries->get (edge->caller)->size,
......
...@@ -103,13 +103,16 @@ struct GTY(()) predicate ...@@ -103,13 +103,16 @@ struct GTY(()) predicate
context. We keep simple array of record, every containing of predicate context. We keep simple array of record, every containing of predicate
and time/size to account. and time/size to account.
We keep values scaled up, so fractional sizes and times can be We keep values scaled up, so fractional sizes can be accounted. */
accounted. */
#define INLINE_SIZE_SCALE 2 #define INLINE_SIZE_SCALE 2
#define INLINE_TIME_SCALE (CGRAPH_FREQ_BASE * 2)
struct GTY(()) size_time_entry struct GTY(()) size_time_entry
{ {
struct predicate predicate; /* Predicate for code to be executed. */
struct predicate exec_predicate;
/* Predicate for value to be constant and optimized out in a specialized copy.
When deciding on specialization this makes it possible to see how much
the executed code paths will simplify. */
struct predicate nonconst_predicate;
int size; int size;
sreal GTY((skip)) time; sreal GTY((skip)) time;
}; };
...@@ -230,9 +233,11 @@ struct inline_edge_summary ...@@ -230,9 +233,11 @@ struct inline_edge_summary
typedef struct inline_edge_summary inline_edge_summary_t; typedef struct inline_edge_summary inline_edge_summary_t;
extern vec<inline_edge_summary_t> inline_edge_summary_vec; extern vec<inline_edge_summary_t> inline_edge_summary_vec;
/* Data we cache about callgraph edges during inlining to avoid expensive
re-computations during the greedy algorithm. */
struct edge_growth_cache_entry struct edge_growth_cache_entry
{ {
sreal time; sreal time, nonspec_time;
int size; int size;
inline_hints hints; inline_hints hints;
}; };
...@@ -315,12 +320,14 @@ estimate_edge_growth (struct cgraph_edge *edge) ...@@ -315,12 +320,14 @@ estimate_edge_growth (struct cgraph_edge *edge)
EDGE. */ EDGE. */
static inline sreal static inline sreal
estimate_edge_time (struct cgraph_edge *edge) estimate_edge_time (struct cgraph_edge *edge, sreal *nonspec_time = NULL)
{ {
sreal ret; sreal ret;
if ((int)edge_growth_cache.length () <= edge->uid if ((int)edge_growth_cache.length () <= edge->uid
|| !edge_growth_cache[edge->uid].size) || !edge_growth_cache[edge->uid].size)
return do_estimate_edge_time (edge); return do_estimate_edge_time (edge);
if (nonspec_time)
*nonspec_time = edge_growth_cache[edge->uid].nonspec_time;
return edge_growth_cache[edge->uid].time; return edge_growth_cache[edge->uid].time;
} }
...@@ -345,7 +352,7 @@ reset_edge_growth_cache (struct cgraph_edge *edge) ...@@ -345,7 +352,7 @@ reset_edge_growth_cache (struct cgraph_edge *edge)
{ {
if ((int)edge_growth_cache.length () > edge->uid) if ((int)edge_growth_cache.length () > edge->uid)
{ {
struct edge_growth_cache_entry zero = {0, 0, 0}; struct edge_growth_cache_entry zero = {0, 0, 0, 0};
edge_growth_cache[edge->uid] = zero; edge_growth_cache[edge->uid] = zero;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment