Commit 4adaad64 by Jan Hubicka Committed by Jan Hubicka

re PR tree-optimization/79224 (Large C-Ray slowdown)


	PR ipa/79224
	* ipa-inline-analysis.c (dump_predicate): Add optional parameter NL.
	(account_size_time): Use two predicates - exec_pred and
	nonconst_pred_ptr.
	(evaluate_conditions_for_known_args): Compute both clause and
	nonspec_clause.
	(evaluate_properties_for_edge): Evaulate both clause and nonspec_clause.
	(inline_summary_t::duplicate): Update.
	(estimate_function_body_sizes): Caluculate exec and nonconst predicates
	separately.
	(compute_inline_parameters): Likewise.
	(estimate_edge_size_and_time): Update caluclation of time.
	(estimate_node_size_and_time): Compute both time and nonspecialized
	time.
	(estimate_ipcp_clone_size_and_time): Update.
	(inline_merge_summary): Update.
	(do_estimate_edge_time): Update.
	(do_estimate_edge_size): Update.
	(do_estimate_edge_hints): Update.
	(inline_read_section, inline_write_summary): Stream both new predicates.
	* ipa-inline.c (compute_uninlined_call_time): Take uninlined_call_time
	as argument.
	(compute_inlined_call_time): Cleanup.
	(big_speedup_p): Update.
	(edge_badness): Update.
	* ipa-inline.h (INLINE_TIME_SCALE): Remove.
	(size_time_entry): Replace predicate by exec_predicate and
	nonconst_predicate.
	(edge_growth_cache_entry): Cache both time nad nonspecialized time.
	(estimate_edge_time): Return also nonspec_time.
	(reset_edge_growth_cache): Update.

From-SVN: r247417
parent 8cbe7981
2017-04-29 Jan Hubicka <hubicka@ucw.cz>
PR ipa/79224
* ipa-inline-analysis.c (dump_predicate): Add optional parameter NL.
(account_size_time): Use two predicates - exec_pred and
nonconst_pred_ptr.
(evaluate_conditions_for_known_args): Compute both clause and
nonspec_clause.
(evaluate_properties_for_edge): Evaulate both clause and nonspec_clause.
(inline_summary_t::duplicate): Update.
(estimate_function_body_sizes): Caluculate exec and nonconst predicates
separately.
(compute_inline_parameters): Likewise.
(estimate_edge_size_and_time): Update caluclation of time.
(estimate_node_size_and_time): Compute both time and nonspecialized
time.
(estimate_ipcp_clone_size_and_time): Update.
(inline_merge_summary): Update.
(do_estimate_edge_time): Update.
(do_estimate_edge_size): Update.
(do_estimate_edge_hints): Update.
(inline_read_section, inline_write_summary): Stream both new predicates.
* ipa-inline.c (compute_uninlined_call_time): Take uninlined_call_time
as argument.
(compute_inlined_call_time): Cleanup.
(big_speedup_p): Update.
(edge_badness): Update.
* ipa-inline.h (INLINE_TIME_SCALE): Remove.
(size_time_entry): Replace predicate by exec_predicate and
nonconst_predicate.
(edge_growth_cache_entry): Cache both time nad nonspecialized time.
(estimate_edge_time): Return also nonspec_time.
(reset_edge_growth_cache): Update.
2017-04-29 Jakub Jelinek <jakub@redhat.com>
PR rtl-optimization/80491
......
......@@ -639,10 +639,9 @@ want_early_inline_function_p (struct cgraph_edge *e)
does not happen. */
inline sreal
compute_uninlined_call_time (struct inline_summary *callee_info,
struct cgraph_edge *edge)
compute_uninlined_call_time (struct cgraph_edge *edge,
sreal uninlined_call_time)
{
sreal uninlined_call_time = (sreal)callee_info->time;
cgraph_node *caller = (edge->caller->global.inlined_to
? edge->caller->global.inlined_to
: edge->caller);
......@@ -677,12 +676,10 @@ compute_inlined_call_time (struct cgraph_edge *edge,
else
time = time >> 11;
/* This calculation should match one in ipa-inline-analysis.
FIXME: Once ipa-inline-analysis is converted to sreal this can be
simplified. */
time -= (sreal) ((gcov_type) edge->frequency
* inline_edge_summary (edge)->call_stmt_time
* (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE)) / INLINE_TIME_SCALE;
/* This calculation should match one in ipa-inline-analysis.c
(estimate_edge_size_and_time). */
time -= (sreal) edge->frequency
* inline_edge_summary (edge)->call_stmt_time / CGRAPH_FREQ_BASE;
time += caller_time;
if (time <= 0)
time = ((sreal) 1) >> 8;
......@@ -696,12 +693,13 @@ compute_inlined_call_time (struct cgraph_edge *edge,
static bool
big_speedup_p (struct cgraph_edge *e)
{
sreal time = compute_uninlined_call_time (inline_summaries->get (e->callee),
e);
sreal inlined_time = compute_inlined_call_time (e, estimate_edge_time (e));
sreal unspec_time;
sreal spec_time = estimate_edge_time (e, &unspec_time);
sreal time = compute_uninlined_call_time (e, unspec_time);
sreal inlined_time = compute_inlined_call_time (e, spec_time);
if (time - inlined_time
> (sreal) time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)
> (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP))
* percent_rec)
return true;
return false;
......@@ -1011,7 +1009,7 @@ edge_badness (struct cgraph_edge *edge, bool dump)
{
sreal badness;
int growth;
sreal edge_time;
sreal edge_time, unspec_edge_time;
struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
struct inline_summary *callee_info = inline_summaries->get (callee);
inline_hints hints;
......@@ -1020,12 +1018,11 @@ edge_badness (struct cgraph_edge *edge, bool dump)
: edge->caller);
growth = estimate_edge_growth (edge);
edge_time = estimate_edge_time (edge);
edge_time = estimate_edge_time (edge, &unspec_edge_time);
hints = estimate_edge_hints (edge);
gcc_checking_assert (edge_time >= 0);
/* FIXME: -1 to care of rounding issues should go away once cache is migrated.
to sreals. */
gcc_checking_assert (edge_time <= callee_info->time);
/* Check that inlined time is better, but tolerate some roundoff issues. */
gcc_checking_assert ((edge_time - callee_info->time).to_int () <= 0);
gcc_checking_assert (growth <= callee_info->size);
if (dump)
......@@ -1035,9 +1032,10 @@ edge_badness (struct cgraph_edge *edge, bool dump)
edge->caller->order,
xstrdup_for_dump (callee->name ()),
edge->callee->order);
fprintf (dump_file, " size growth %i, time %f ",
fprintf (dump_file, " size growth %i, time %f unspec %f ",
growth,
edge_time.to_double ());
edge_time.to_double (),
unspec_edge_time.to_double ());
dump_inline_hints (dump_file, hints);
if (big_speedup_p (edge))
fprintf (dump_file, " big_speedup");
......@@ -1076,7 +1074,7 @@ edge_badness (struct cgraph_edge *edge, bool dump)
sreal numerator, denominator;
int overall_growth;
numerator = (compute_uninlined_call_time (callee_info, edge)
numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
- compute_inlined_call_time (edge, edge_time));
if (numerator == 0)
numerator = ((sreal) 1 >> 8);
......@@ -1162,13 +1160,14 @@ edge_badness (struct cgraph_edge *edge, bool dump)
fprintf (dump_file,
" %f: guessed profile. frequency %f, count %" PRId64
" caller count %" PRId64
" time w/o inlining %f, time w/ inlining %f"
" time w/o inlining %f, time with inlining %f"
" overall growth %i (current) %i (original)"
" %i (compensated)\n",
badness.to_double (),
(double)edge->frequency / CGRAPH_FREQ_BASE,
edge->count, caller->count,
compute_uninlined_call_time (callee_info, edge).to_double (),
compute_uninlined_call_time (edge,
unspec_edge_time).to_double (),
compute_inlined_call_time (edge, edge_time).to_double (),
estimate_growth (callee),
callee_info->growth, overall_growth);
......@@ -2056,8 +2055,9 @@ inline_small_functions (void)
if (dump_file)
{
fprintf (dump_file,
" Inlined into %s which now has time %f and size %i, "
" Inlined %s into %s which now has time %f and size %i, "
"net change of %+i.\n",
edge->callee->name (),
edge->caller->name (),
inline_summaries->get (edge->caller)->time.to_double (),
inline_summaries->get (edge->caller)->size,
......
......@@ -103,13 +103,16 @@ struct GTY(()) predicate
context. We keep simple array of record, every containing of predicate
and time/size to account.
We keep values scaled up, so fractional sizes and times can be
accounted. */
We keep values scaled up, so fractional sizes can be accounted. */
#define INLINE_SIZE_SCALE 2
#define INLINE_TIME_SCALE (CGRAPH_FREQ_BASE * 2)
struct GTY(()) size_time_entry
{
struct predicate predicate;
/* Predicate for code to be executed. */
struct predicate exec_predicate;
/* Predicate for value to be constant and optimized out in a specialized copy.
When deciding on specialization this makes it possible to see how much
the executed code paths will simplify. */
struct predicate nonconst_predicate;
int size;
sreal GTY((skip)) time;
};
......@@ -230,9 +233,11 @@ struct inline_edge_summary
typedef struct inline_edge_summary inline_edge_summary_t;
extern vec<inline_edge_summary_t> inline_edge_summary_vec;
/* Data we cache about callgraph edges during inlining to avoid expensive
re-computations during the greedy algorithm. */
struct edge_growth_cache_entry
{
sreal time;
sreal time, nonspec_time;
int size;
inline_hints hints;
};
......@@ -315,12 +320,14 @@ estimate_edge_growth (struct cgraph_edge *edge)
EDGE. */
static inline sreal
estimate_edge_time (struct cgraph_edge *edge)
estimate_edge_time (struct cgraph_edge *edge, sreal *nonspec_time = NULL)
{
sreal ret;
if ((int)edge_growth_cache.length () <= edge->uid
|| !edge_growth_cache[edge->uid].size)
return do_estimate_edge_time (edge);
if (nonspec_time)
*nonspec_time = edge_growth_cache[edge->uid].nonspec_time;
return edge_growth_cache[edge->uid].time;
}
......@@ -345,7 +352,7 @@ reset_edge_growth_cache (struct cgraph_edge *edge)
{
if ((int)edge_growth_cache.length () > edge->uid)
{
struct edge_growth_cache_entry zero = {0, 0, 0};
struct edge_growth_cache_entry zero = {0, 0, 0, 0};
edge_growth_cache[edge->uid] = zero;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment