Commit 4adaad64 by Jan Hubicka Committed by Jan Hubicka

re PR tree-optimization/79224 (Large C-Ray slowdown)


	PR ipa/79224
	* ipa-inline-analysis.c (dump_predicate): Add optional parameter NL.
	(account_size_time): Use two predicates - exec_pred and
	nonconst_pred_ptr.
	(evaluate_conditions_for_known_args): Compute both clause and
	nonspec_clause.
	(evaluate_properties_for_edge): Evaulate both clause and nonspec_clause.
	(inline_summary_t::duplicate): Update.
	(estimate_function_body_sizes): Caluculate exec and nonconst predicates
	separately.
	(compute_inline_parameters): Likewise.
	(estimate_edge_size_and_time): Update caluclation of time.
	(estimate_node_size_and_time): Compute both time and nonspecialized
	time.
	(estimate_ipcp_clone_size_and_time): Update.
	(inline_merge_summary): Update.
	(do_estimate_edge_time): Update.
	(do_estimate_edge_size): Update.
	(do_estimate_edge_hints): Update.
	(inline_read_section, inline_write_summary): Stream both new predicates.
	* ipa-inline.c (compute_uninlined_call_time): Take uninlined_call_time
	as argument.
	(compute_inlined_call_time): Cleanup.
	(big_speedup_p): Update.
	(edge_badness): Update.
	* ipa-inline.h (INLINE_TIME_SCALE): Remove.
	(size_time_entry): Replace predicate by exec_predicate and
	nonconst_predicate.
	(edge_growth_cache_entry): Cache both time nad nonspecialized time.
	(estimate_edge_time): Return also nonspec_time.
	(reset_edge_growth_cache): Update.

From-SVN: r247417
parent 8cbe7981
2017-04-29 Jan Hubicka <hubicka@ucw.cz>
PR ipa/79224
* ipa-inline-analysis.c (dump_predicate): Add optional parameter NL.
(account_size_time): Use two predicates - exec_pred and
nonconst_pred_ptr.
(evaluate_conditions_for_known_args): Compute both clause and
nonspec_clause.
(evaluate_properties_for_edge): Evaulate both clause and nonspec_clause.
(inline_summary_t::duplicate): Update.
(estimate_function_body_sizes): Caluculate exec and nonconst predicates
separately.
(compute_inline_parameters): Likewise.
(estimate_edge_size_and_time): Update caluclation of time.
(estimate_node_size_and_time): Compute both time and nonspecialized
time.
(estimate_ipcp_clone_size_and_time): Update.
(inline_merge_summary): Update.
(do_estimate_edge_time): Update.
(do_estimate_edge_size): Update.
(do_estimate_edge_hints): Update.
(inline_read_section, inline_write_summary): Stream both new predicates.
* ipa-inline.c (compute_uninlined_call_time): Take uninlined_call_time
as argument.
(compute_inlined_call_time): Cleanup.
(big_speedup_p): Update.
(edge_badness): Update.
* ipa-inline.h (INLINE_TIME_SCALE): Remove.
(size_time_entry): Replace predicate by exec_predicate and
nonconst_predicate.
(edge_growth_cache_entry): Cache both time nad nonspecialized time.
(estimate_edge_time): Return also nonspec_time.
(reset_edge_growth_cache): Update.
2017-04-29 Jakub Jelinek <jakub@redhat.com>
PR rtl-optimization/80491
......
......@@ -585,10 +585,12 @@ dump_clause (FILE *f, conditions conds, clause_t clause)
}
/* Dump predicate PREDICATE. */
/* Dump PREDICATE to F. CONDS a vector of conditions used when evauating
predicats. When NL is true new line is output at the end of dump. */
static void
dump_predicate (FILE *f, conditions conds, struct predicate *pred)
dump_predicate (FILE *f, conditions conds, struct predicate *pred,
bool nl = true)
{
int i;
if (true_predicate_p (pred))
......@@ -600,7 +602,8 @@ dump_predicate (FILE *f, conditions conds, struct predicate *pred)
fprintf (f, " && ");
dump_clause (f, conds, pred->clause[i]);
}
fprintf (f, "\n");
if (nl)
fprintf (f, "\n");
}
......@@ -660,17 +663,27 @@ dump_inline_hints (FILE *f, inline_hints hints)
}
/* Record SIZE and TIME under condition PRED into the inline summary. */
/* Record SIZE and TIME to SUMMARY.
The accounted code will be executed when EXEC_PRED is true.
When NONCONST_PRED is false the code will evaulate to constant and
will get optimized out in specialized clones of the function. */
static void
account_size_time (struct inline_summary *summary, int size, sreal time,
struct predicate *pred)
struct predicate *exec_pred,
struct predicate *nonconst_pred_ptr)
{
size_time_entry *e;
bool found = false;
int i;
struct predicate nonconst_pred;
if (false_predicate_p (pred))
if (false_predicate_p (exec_pred))
return;
nonconst_pred = and_predicates (summary->conds, nonconst_pred_ptr, exec_pred);
if (false_predicate_p (&nonconst_pred))
return;
/* We need to create initial empty unconitional clause, but otherwie
......@@ -681,7 +694,8 @@ account_size_time (struct inline_summary *summary, int size, sreal time,
gcc_assert (time >= 0);
for (i = 0; vec_safe_iterate (summary->entry, i, &e); i++)
if (predicates_equal_p (&e->predicate, pred))
if (predicates_equal_p (&e->exec_predicate, exec_pred)
&& predicates_equal_p (&e->nonconst_predicate, &nonconst_pred))
{
found = true;
break;
......@@ -691,7 +705,7 @@ account_size_time (struct inline_summary *summary, int size, sreal time,
i = 0;
found = true;
e = &(*summary->entry)[0];
gcc_assert (!e->predicate.clause[0]);
gcc_assert (!e->exec_predicate.clause[0]);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
"\t\tReached limit on number of entries, "
......@@ -700,17 +714,25 @@ account_size_time (struct inline_summary *summary, int size, sreal time,
if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
{
fprintf (dump_file,
"\t\tAccounting size:%3.2f, time:%3.2f on %spredicate:",
"\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
((double) size) / INLINE_SIZE_SCALE,
(time.to_double ()) / INLINE_TIME_SCALE, found ? "" : "new ");
dump_predicate (dump_file, summary->conds, pred);
(time.to_double ()), found ? "" : "new ");
dump_predicate (dump_file, summary->conds, exec_pred, 0);
if (!predicates_equal_p (exec_pred, &nonconst_pred))
{
fprintf (dump_file, " nonconst:");
dump_predicate (dump_file, summary->conds, &nonconst_pred);
}
else
fprintf (dump_file, "\n");
}
if (!found)
{
struct size_time_entry new_entry;
new_entry.size = size;
new_entry.time = time;
new_entry.predicate = *pred;
new_entry.exec_predicate = *exec_pred;
new_entry.nonconst_predicate = nonconst_pred;
vec_safe_push (summary->entry, new_entry);
}
else
......@@ -795,21 +817,33 @@ set_hint_predicate (struct predicate **p, struct predicate new_predicate)
}
/* KNOWN_VALS is partial mapping of parameters of NODE to constant values.
/* Compute what conditions may or may not hold given invormation about
parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
copy when called in a given context. It is a bitmask of conditions. Bit
0 means that condition is known to be false, while bit 1 means that condition
may or may not be true. These differs - for example NOT_INLINED condition
is always false in the second and also builtin_constant_p tests can not use
the fact that parameter is indeed a constant.
KNOWN_VALS is partial mapping of parameters of NODE to constant values.
KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
Return clause of possible truths. When INLINE_P is true, assume that we are
inlining.
ERROR_MARK means compile time invariant. */
static clause_t
static void
evaluate_conditions_for_known_args (struct cgraph_node *node,
bool inline_p,
vec<tree> known_vals,
vec<ipa_agg_jump_function_p>
known_aggs)
known_aggs,
clause_t *ret_clause,
clause_t *ret_nonspec_clause)
{
clause_t clause = inline_p ? 0 : 1 << predicate_not_inlined_condition;
clause_t nonspec_clause = 1 << predicate_not_inlined_condition;
struct inline_summary *info = inline_summaries->get (node);
int i;
struct condition *c;
......@@ -828,6 +862,7 @@ evaluate_conditions_for_known_args (struct cgraph_node *node,
if (c->operand_num >= (int) known_vals.length ())
{
clause |= 1 << (i + predicate_first_dynamic_condition);
nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
continue;
}
......@@ -859,18 +894,26 @@ evaluate_conditions_for_known_args (struct cgraph_node *node,
if (!val)
{
clause |= 1 << (i + predicate_first_dynamic_condition);
nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
continue;
}
if (c->code == CHANGED)
continue;
{
nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
continue;
}
if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
{
clause |= 1 << (i + predicate_first_dynamic_condition);
nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
continue;
}
if (c->code == IS_NOT_CONSTANT)
continue;
{
nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
continue;
}
val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
res = val
......@@ -881,8 +924,11 @@ evaluate_conditions_for_known_args (struct cgraph_node *node,
continue;
clause |= 1 << (i + predicate_first_dynamic_condition);
nonspec_clause |= 1 << (i + predicate_first_dynamic_condition);
}
return clause;
*ret_clause = clause;
if (ret_nonspec_clause)
*ret_nonspec_clause = nonspec_clause;
}
......@@ -890,7 +936,7 @@ evaluate_conditions_for_known_args (struct cgraph_node *node,
static void
evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
clause_t *clause_ptr,
clause_t *clause_ptr, clause_t *nonspec_clause_ptr,
vec<tree> *known_vals_ptr,
vec<ipa_polymorphic_call_context>
*known_contexts_ptr,
......@@ -976,9 +1022,9 @@ evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
}
}
if (clause_ptr)
*clause_ptr = evaluate_conditions_for_known_args (callee, inline_p,
known_vals, known_aggs);
evaluate_conditions_for_known_args (callee, inline_p,
known_vals, known_aggs, clause_ptr,
nonspec_clause_ptr);
if (known_vals_ptr)
*known_vals_ptr = known_vals;
......@@ -1172,12 +1218,16 @@ inline_summary_t::duplicate (cgraph_node *src,
}
}
}
possible_truths = evaluate_conditions_for_known_args (dst, false,
known_vals,
vNULL);
evaluate_conditions_for_known_args (dst, false,
known_vals,
vNULL,
&possible_truths,
/* We are going to specialize,
so ignore nonspec truths. */
NULL);
known_vals.release ();
account_size_time (info, 0, 0, &true_pred);
account_size_time (info, 0, 0, &true_pred, &true_pred);
/* Remap size_time vectors.
Simplify the predicate by prunning out alternatives that are known
......@@ -1186,14 +1236,21 @@ inline_summary_t::duplicate (cgraph_node *src,
to be true. */
for (i = 0; vec_safe_iterate (entry, i, &e); i++)
{
struct predicate new_predicate;
new_predicate = remap_predicate_after_duplication (&e->predicate,
struct predicate new_exec_pred;
struct predicate new_nonconst_pred;
new_exec_pred = remap_predicate_after_duplication (&e->exec_predicate,
possible_truths,
info);
if (false_predicate_p (&new_predicate))
new_nonconst_pred
= remap_predicate_after_duplication (&e->nonconst_predicate,
possible_truths,
info);
if (false_predicate_p (&new_exec_pred)
|| false_predicate_p (&new_nonconst_pred))
optimized_out_size += e->size;
else
account_size_time (info, e->size, e->time, &new_predicate);
account_size_time (info, e->size, e->time, &new_exec_pred,
&new_nonconst_pred);
}
/* Remap edge predicates with the same simplification as above.
......@@ -1439,10 +1496,21 @@ dump_inline_summary (FILE *f, struct cgraph_node *node)
fprintf (f, " In SCC: %i\n", (int) s->scc_no);
for (i = 0; vec_safe_iterate (s->entry, i, &e); i++)
{
fprintf (f, " size:%f, time:%f, predicate:",
fprintf (f, " size:%f, time:%f",
(double) e->size / INLINE_SIZE_SCALE,
e->time.to_double () / INLINE_TIME_SCALE);
dump_predicate (f, s->conds, &e->predicate);
e->time.to_double ());
if (!true_predicate_p (&e->exec_predicate))
{
fprintf (f, ", executed if:");
dump_predicate (f, s->conds, &e->exec_predicate, 0);
}
if (!predicates_equal_p (&e->exec_predicate,
&e->nonconst_predicate))
{
fprintf (f, ", nonconst if:");
dump_predicate (f, s->conds, &e->nonconst_predicate, 0);
}
fprintf (f, "\n");
}
if (s->loop_iterations)
{
......@@ -2585,10 +2653,11 @@ estimate_function_body_sizes (struct cgraph_node *node, bool early)
/* When we run into maximal number of entries, we assign everything to the
constant truth case. Be sure to have it in list. */
bb_predicate = true_predicate ();
account_size_time (info, 0, 0, &bb_predicate);
account_size_time (info, 0, 0, &bb_predicate, &bb_predicate);
bb_predicate = not_inlined_predicate ();
account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &bb_predicate);
account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &bb_predicate,
&bb_predicate);
if (fbi.info)
compute_bb_predicates (&fbi, node, info);
......@@ -2746,10 +2815,10 @@ estimate_function_body_sizes (struct cgraph_node *node, bool early)
will_be_nonconstant
= will_be_nonconstant_predicate (&fbi, info,
stmt, nonconstant_names);
else
will_be_nonconstant = true_predicate ();
if (this_time || this_size)
{
struct predicate p;
this_time *= freq;
prob = eliminated_by_inlining_prob (stmt);
......@@ -2759,15 +2828,15 @@ estimate_function_body_sizes (struct cgraph_node *node, bool early)
if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
if (fbi.info)
p = and_predicates (info->conds, &bb_predicate,
&will_be_nonconstant);
else
p = true_predicate ();
struct predicate p = and_predicates (info->conds, &bb_predicate,
&will_be_nonconstant);
/* We can ignore statement when we proved it is never going
to happen, but we can not do that for call statements
because edges are accounted specially. */
if (!false_predicate_p (&p)
|| (is_gimple_call (stmt)
&& !false_predicate_p (&bb_predicate)))
if (!false_predicate_p (is_gimple_call (stmt)
? &bb_predicate : &p))
{
time += this_time;
size += this_size;
......@@ -2781,13 +2850,18 @@ estimate_function_body_sizes (struct cgraph_node *node, bool early)
if (prob)
{
struct predicate ip = not_inlined_predicate ();
ip = and_predicates (info->conds, &ip, &p);
ip = and_predicates (info->conds, &ip, &bb_predicate);
account_size_time (info, this_size * prob,
this_time * prob, &ip);
(sreal)(this_time * prob)
/ (CGRAPH_FREQ_BASE * 2), &ip,
&p);
}
if (prob != 2)
account_size_time (info, this_size * (2 - prob),
this_time * (2 - prob), &p);
(sreal)(this_time * (2 - prob))
/ (CGRAPH_FREQ_BASE * 2),
&bb_predicate,
&p);
}
if (!info->fp_expressions && fp_expression_p (stmt))
......@@ -2969,9 +3043,9 @@ compute_inline_parameters (struct cgraph_node *node, bool early)
es->call_stmt_size = eni_size_weights.call_cost;
es->call_stmt_time = eni_time_weights.call_cost;
account_size_time (info, INLINE_SIZE_SCALE * 2,
INLINE_TIME_SCALE * 2, &t);
2, &t, &t);
t = not_inlined_predicate ();
account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &t);
account_size_time (info, 2 * INLINE_SIZE_SCALE, 0, &t, &t);
inline_update_overall_summary (node);
info->self_size = info->size;
info->self_time = info->time;
......@@ -3048,7 +3122,7 @@ compute_inline_parameters (struct cgraph_node *node, bool early)
if (flag_checking)
{
inline_update_overall_summary (node);
gcc_assert (info->time == info->self_time
gcc_assert (!(info->time - info->self_time).to_int ()
&& info->size == info->self_size);
}
}
......@@ -3174,8 +3248,11 @@ estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
*size += cur_size;
if (min_size)
*min_size += cur_size;
*time += call_time * prob / REG_BR_PROB_BASE
* e->frequency * (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE);
if (prob == REG_BR_PROB_BASE)
*time += ((sreal)(call_time * e->frequency)) / CGRAPH_FREQ_BASE;
else
*time += ((sreal)call_time) * (prob * e->frequency)
/ (CGRAPH_FREQ_BASE * REG_BR_PROB_BASE);
}
......@@ -3257,10 +3334,13 @@ estimate_calls_size_and_time (struct cgraph_node *node, int *size,
static void
estimate_node_size_and_time (struct cgraph_node *node,
clause_t possible_truths,
clause_t nonspec_possible_truths,
vec<tree> known_vals,
vec<ipa_polymorphic_call_context> known_contexts,
vec<ipa_agg_jump_function_p> known_aggs,
int *ret_size, int *ret_min_size, sreal *ret_time,
int *ret_size, int *ret_min_size,
sreal *ret_time,
sreal *ret_nonspecialized_time,
inline_hints *ret_hints,
vec<inline_param_summary>
inline_param_summary)
......@@ -3292,31 +3372,57 @@ estimate_node_size_and_time (struct cgraph_node *node,
}
}
for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
if (evaluate_predicate (&e->predicate, possible_truths))
{
size += e->size;
gcc_checking_assert (e->time >= 0);
gcc_checking_assert (time >= 0);
if (!inline_param_summary.exists ())
time += e->time;
else
{
int prob = predicate_probability (info->conds,
&e->predicate,
possible_truths,
inline_param_summary);
gcc_checking_assert (prob >= 0);
gcc_checking_assert (prob <= REG_BR_PROB_BASE);
time += e->time * prob / REG_BR_PROB_BASE;
}
gcc_checking_assert (time >= 0);
estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
known_vals, known_contexts, known_aggs);
sreal nonspecialized_time = time;
}
gcc_checking_assert (true_predicate_p (&(*info->entry)[0].predicate));
for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
{
bool nonconst = evaluate_predicate (&e->nonconst_predicate,
possible_truths);
bool exec = evaluate_predicate (&e->exec_predicate,
nonspec_possible_truths);
gcc_assert (!nonconst || exec);
if (exec)
{
gcc_checking_assert (e->time >= 0);
gcc_checking_assert (time >= 0);
/* We compute specialized size only because size of nonspecialized
copy is context independent.
The difference between nonspecialized execution and specialized is
that nonspecialized is not going to have optimized out computations
known to be constant in a specialized setting. */
if (nonconst)
size += e->size;
nonspecialized_time += e->time;
if (!nonconst)
;
else if (!inline_param_summary.exists ())
{
if (nonconst)
time += e->time;
}
else
{
int prob = predicate_probability (info->conds,
&e->nonconst_predicate,
possible_truths,
inline_param_summary);
gcc_checking_assert (prob >= 0);
gcc_checking_assert (prob <= REG_BR_PROB_BASE);
time += e->time * prob / REG_BR_PROB_BASE;
}
gcc_checking_assert (time >= 0);
}
}
gcc_checking_assert (true_predicate_p (&(*info->entry)[0].exec_predicate));
gcc_checking_assert (true_predicate_p (&(*info->entry)[0].nonconst_predicate));
min_size = (*info->entry)[0].size;
gcc_checking_assert (size >= 0);
gcc_checking_assert (time >= 0);
gcc_checking_assert (nonspecialized_time >= time);
if (info->loop_iterations
&& !evaluate_predicate (info->loop_iterations, possible_truths))
......@@ -3332,18 +3438,16 @@ estimate_node_size_and_time (struct cgraph_node *node,
if (DECL_DECLARED_INLINE_P (node->decl))
hints |= INLINE_HINT_declared_inline;
estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
known_vals, known_contexts, known_aggs);
gcc_checking_assert (size >= 0);
gcc_checking_assert (time >= 0);
time = time / INLINE_TIME_SCALE;
size = RDIV (size, INLINE_SIZE_SCALE);
min_size = RDIV (min_size, INLINE_SIZE_SCALE);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "\n size:%i time:%f\n", (int) size, time.to_double ());
fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
time.to_double (), nonspecialized_time.to_double ());
if (ret_time)
*ret_time = time;
if (ret_nonspecialized_time)
*ret_nonspecialized_time = nonspecialized_time;
if (ret_size)
*ret_size = size;
if (ret_min_size)
......@@ -3368,12 +3472,15 @@ estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
int *ret_size, sreal *ret_time,
inline_hints *hints)
{
clause_t clause;
clause = evaluate_conditions_for_known_args (node, false, known_vals,
known_aggs);
estimate_node_size_and_time (node, clause, known_vals, known_contexts,
known_aggs, ret_size, NULL, ret_time, hints, vNULL);
clause_t clause, nonspec_clause;
sreal nonspec_time;
evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
&clause, &nonspec_clause);
estimate_node_size_and_time (node, clause, nonspec_clause,
known_vals, known_contexts,
known_aggs, ret_size, NULL, ret_time,
&nonspec_time, hints, vNULL);
}
/* Translate all conditions from callee representation into caller
......@@ -3645,7 +3752,7 @@ inline_merge_summary (struct cgraph_edge *edge)
struct cgraph_node *to = (edge->caller->global.inlined_to
? edge->caller->global.inlined_to : edge->caller);
struct inline_summary *info = inline_summaries->get (to);
clause_t clause = 0; /* not_inline is known to be false. */
clause_t clause = 0; /* not_inline is known to be false. */
size_time_entry *e;
vec<int> operand_map = vNULL;
vec<int> offset_map = vNULL;
......@@ -3662,7 +3769,7 @@ inline_merge_summary (struct cgraph_edge *edge)
info->fp_expressions |= callee_info->fp_expressions;
if (callee_info->conds)
evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL);
evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
if (ipa_node_params_sum && callee_info->conds)
{
struct ipa_edge_args *args = IPA_EDGE_REF (edge);
......@@ -3705,14 +3812,19 @@ inline_merge_summary (struct cgraph_edge *edge)
for (i = 0; vec_safe_iterate (callee_info->entry, i, &e); i++)
{
struct predicate p = remap_predicate (info, callee_info,
&e->predicate, operand_map,
&e->exec_predicate, operand_map,
offset_map, clause,
&toplev_predicate);
if (!false_predicate_p (&p))
struct predicate nonconstp
= remap_predicate (info, callee_info,
&e->nonconst_predicate, operand_map,
offset_map, clause,
&toplev_predicate);
if (!false_predicate_p (&p) && !false_predicate_p (&nonconstp))
{
sreal add_time = e->time * edge->frequency / CGRAPH_FREQ_BASE;
sreal add_time = ((sreal)e->time * edge->frequency) / CGRAPH_FREQ_BASE;
int prob = predicate_probability (callee_info->conds,
&e->predicate,
&e->nonconst_predicate,
clause, es->param);
add_time = add_time * prob / REG_BR_PROB_BASE;
if (prob != REG_BR_PROB_BASE
......@@ -3721,7 +3833,7 @@ inline_merge_summary (struct cgraph_edge *edge)
fprintf (dump_file, "\t\tScaling time by probability:%f\n",
(double) prob / REG_BR_PROB_BASE);
}
account_size_time (info, e->size, add_time, &p);
account_size_time (info, e->size, add_time, &p, &nonconstp);
}
}
remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
......@@ -3761,13 +3873,13 @@ inline_update_overall_summary (struct cgraph_node *node)
info->time = 0;
for (i = 0; vec_safe_iterate (info->entry, i, &e); i++)
{
info->size += e->size, info->time += e->time;
info->size += e->size;
info->time += e->time;
}
estimate_calls_size_and_time (node, &info->size, &info->min_size,
&info->time, NULL,
~(clause_t) (1 << predicate_false_condition),
vNULL, vNULL, vNULL);
info->time = info->time / INLINE_TIME_SCALE;
info->size = (info->size + INLINE_SIZE_SCALE / 2) / INLINE_SIZE_SCALE;
}
......@@ -3803,11 +3915,11 @@ simple_edge_hints (struct cgraph_edge *edge)
sreal
do_estimate_edge_time (struct cgraph_edge *edge)
{
sreal time;
sreal time, nonspec_time;
int size;
inline_hints hints;
struct cgraph_node *callee;
clause_t clause;
clause_t clause, nonspec_clause;
vec<tree> known_vals;
vec<ipa_polymorphic_call_context> known_contexts;
vec<ipa_agg_jump_function_p> known_aggs;
......@@ -3818,10 +3930,11 @@ do_estimate_edge_time (struct cgraph_edge *edge)
gcc_checking_assert (edge->inline_failed);
evaluate_properties_for_edge (edge, true,
&clause, &known_vals, &known_contexts,
&known_aggs);
estimate_node_size_and_time (callee, clause, known_vals, known_contexts,
known_aggs, &size, &min_size, &time, &hints, es->param);
&clause, &nonspec_clause, &known_vals,
&known_contexts, &known_aggs);
estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
known_contexts, known_aggs, &size, &min_size,
&time, &nonspec_time, &hints, es->param);
/* When we have profile feedback, we can quite safely identify hot
edges and for those we disable size limits. Don't do that when
......@@ -3846,6 +3959,7 @@ do_estimate_edge_time (struct cgraph_edge *edge)
if ((int) edge_growth_cache.length () <= edge->uid)
edge_growth_cache.safe_grow_cleared (symtab->edges_max_uid);
edge_growth_cache[edge->uid].time = time;
edge_growth_cache[edge->uid].nonspec_time = nonspec_time;
edge_growth_cache[edge->uid].size = size + (size >= 0);
hints |= simple_edge_hints (edge);
......@@ -3863,7 +3977,7 @@ do_estimate_edge_size (struct cgraph_edge *edge)
{
int size;
struct cgraph_node *callee;
clause_t clause;
clause_t clause, nonspec_clause;
vec<tree> known_vals;
vec<ipa_polymorphic_call_context> known_contexts;
vec<ipa_agg_jump_function_p> known_aggs;
......@@ -3883,10 +3997,12 @@ do_estimate_edge_size (struct cgraph_edge *edge)
/* Early inliner runs without caching, go ahead and do the dirty work. */
gcc_checking_assert (edge->inline_failed);
evaluate_properties_for_edge (edge, true,
&clause, &known_vals, &known_contexts,
&clause, &nonspec_clause,
&known_vals, &known_contexts,
&known_aggs);
estimate_node_size_and_time (callee, clause, known_vals, known_contexts,
known_aggs, &size, NULL, NULL, NULL, vNULL);
estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
known_contexts, known_aggs, &size, NULL, NULL,
NULL, NULL, vNULL);
known_vals.release ();
known_contexts.release ();
known_aggs.release ();
......@@ -3902,7 +4018,7 @@ do_estimate_edge_hints (struct cgraph_edge *edge)
{
inline_hints hints;
struct cgraph_node *callee;
clause_t clause;
clause_t clause, nonspec_clause;
vec<tree> known_vals;
vec<ipa_polymorphic_call_context> known_contexts;
vec<ipa_agg_jump_function_p> known_aggs;
......@@ -3922,10 +4038,12 @@ do_estimate_edge_hints (struct cgraph_edge *edge)
/* Early inliner runs without caching, go ahead and do the dirty work. */
gcc_checking_assert (edge->inline_failed);
evaluate_properties_for_edge (edge, true,
&clause, &known_vals, &known_contexts,
&clause, &nonspec_clause,
&known_vals, &known_contexts,
&known_aggs);
estimate_node_size_and_time (callee, clause, known_vals, known_contexts,
known_aggs, NULL, NULL, NULL, &hints, vNULL);
estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
known_contexts, known_aggs, NULL, NULL,
NULL, NULL, &hints, vNULL);
known_vals.release ();
known_contexts.release ();
known_aggs.release ();
......@@ -4304,7 +4422,8 @@ inline_read_section (struct lto_file_decl_data *file_data, const char *data,
e.size = streamer_read_uhwi (&ib);
e.time = sreal::stream_in (&ib);
e.predicate = read_predicate (&ib);
e.exec_predicate = read_predicate (&ib);
e.nonconst_predicate = read_predicate (&ib);
vec_safe_push (info->entry, e);
}
......@@ -4463,7 +4582,8 @@ inline_write_summary (void)
{
streamer_write_uhwi (ob, e->size);
e->time.stream_out (ob);
write_predicate (ob, &e->predicate);
write_predicate (ob, &e->exec_predicate);
write_predicate (ob, &e->nonconst_predicate);
}
write_predicate (ob, info->loop_iterations);
write_predicate (ob, info->loop_stride);
......
......@@ -639,10 +639,9 @@ want_early_inline_function_p (struct cgraph_edge *e)
does not happen. */
inline sreal
compute_uninlined_call_time (struct inline_summary *callee_info,
struct cgraph_edge *edge)
compute_uninlined_call_time (struct cgraph_edge *edge,
sreal uninlined_call_time)
{
sreal uninlined_call_time = (sreal)callee_info->time;
cgraph_node *caller = (edge->caller->global.inlined_to
? edge->caller->global.inlined_to
: edge->caller);
......@@ -677,12 +676,10 @@ compute_inlined_call_time (struct cgraph_edge *edge,
else
time = time >> 11;
/* This calculation should match one in ipa-inline-analysis.
FIXME: Once ipa-inline-analysis is converted to sreal this can be
simplified. */
time -= (sreal) ((gcov_type) edge->frequency
* inline_edge_summary (edge)->call_stmt_time
* (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE)) / INLINE_TIME_SCALE;
/* This calculation should match one in ipa-inline-analysis.c
(estimate_edge_size_and_time). */
time -= (sreal) edge->frequency
* inline_edge_summary (edge)->call_stmt_time / CGRAPH_FREQ_BASE;
time += caller_time;
if (time <= 0)
time = ((sreal) 1) >> 8;
......@@ -696,12 +693,13 @@ compute_inlined_call_time (struct cgraph_edge *edge,
static bool
big_speedup_p (struct cgraph_edge *e)
{
sreal time = compute_uninlined_call_time (inline_summaries->get (e->callee),
e);
sreal inlined_time = compute_inlined_call_time (e, estimate_edge_time (e));
sreal unspec_time;
sreal spec_time = estimate_edge_time (e, &unspec_time);
sreal time = compute_uninlined_call_time (e, unspec_time);
sreal inlined_time = compute_inlined_call_time (e, spec_time);
if (time - inlined_time
> (sreal) time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)
> (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP))
* percent_rec)
return true;
return false;
......@@ -1011,7 +1009,7 @@ edge_badness (struct cgraph_edge *edge, bool dump)
{
sreal badness;
int growth;
sreal edge_time;
sreal edge_time, unspec_edge_time;
struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
struct inline_summary *callee_info = inline_summaries->get (callee);
inline_hints hints;
......@@ -1020,12 +1018,11 @@ edge_badness (struct cgraph_edge *edge, bool dump)
: edge->caller);
growth = estimate_edge_growth (edge);
edge_time = estimate_edge_time (edge);
edge_time = estimate_edge_time (edge, &unspec_edge_time);
hints = estimate_edge_hints (edge);
gcc_checking_assert (edge_time >= 0);
/* FIXME: -1 to care of rounding issues should go away once cache is migrated.
to sreals. */
gcc_checking_assert (edge_time <= callee_info->time);
/* Check that inlined time is better, but tolerate some roundoff issues. */
gcc_checking_assert ((edge_time - callee_info->time).to_int () <= 0);
gcc_checking_assert (growth <= callee_info->size);
if (dump)
......@@ -1035,9 +1032,10 @@ edge_badness (struct cgraph_edge *edge, bool dump)
edge->caller->order,
xstrdup_for_dump (callee->name ()),
edge->callee->order);
fprintf (dump_file, " size growth %i, time %f ",
fprintf (dump_file, " size growth %i, time %f unspec %f ",
growth,
edge_time.to_double ());
edge_time.to_double (),
unspec_edge_time.to_double ());
dump_inline_hints (dump_file, hints);
if (big_speedup_p (edge))
fprintf (dump_file, " big_speedup");
......@@ -1076,7 +1074,7 @@ edge_badness (struct cgraph_edge *edge, bool dump)
sreal numerator, denominator;
int overall_growth;
numerator = (compute_uninlined_call_time (callee_info, edge)
numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
- compute_inlined_call_time (edge, edge_time));
if (numerator == 0)
numerator = ((sreal) 1 >> 8);
......@@ -1162,13 +1160,14 @@ edge_badness (struct cgraph_edge *edge, bool dump)
fprintf (dump_file,
" %f: guessed profile. frequency %f, count %" PRId64
" caller count %" PRId64
" time w/o inlining %f, time w/ inlining %f"
" time w/o inlining %f, time with inlining %f"
" overall growth %i (current) %i (original)"
" %i (compensated)\n",
badness.to_double (),
(double)edge->frequency / CGRAPH_FREQ_BASE,
edge->count, caller->count,
compute_uninlined_call_time (callee_info, edge).to_double (),
compute_uninlined_call_time (edge,
unspec_edge_time).to_double (),
compute_inlined_call_time (edge, edge_time).to_double (),
estimate_growth (callee),
callee_info->growth, overall_growth);
......@@ -2056,8 +2055,9 @@ inline_small_functions (void)
if (dump_file)
{
fprintf (dump_file,
" Inlined into %s which now has time %f and size %i, "
" Inlined %s into %s which now has time %f and size %i, "
"net change of %+i.\n",
edge->callee->name (),
edge->caller->name (),
inline_summaries->get (edge->caller)->time.to_double (),
inline_summaries->get (edge->caller)->size,
......
......@@ -103,13 +103,16 @@ struct GTY(()) predicate
context. We keep simple array of record, every containing of predicate
and time/size to account.
We keep values scaled up, so fractional sizes and times can be
accounted. */
We keep values scaled up, so fractional sizes can be accounted. */
#define INLINE_SIZE_SCALE 2
#define INLINE_TIME_SCALE (CGRAPH_FREQ_BASE * 2)
struct GTY(()) size_time_entry
{
struct predicate predicate;
/* Predicate for code to be executed. */
struct predicate exec_predicate;
/* Predicate for value to be constant and optimized out in a specialized copy.
When deciding on specialization this makes it possible to see how much
the executed code paths will simplify. */
struct predicate nonconst_predicate;
int size;
sreal GTY((skip)) time;
};
......@@ -230,9 +233,11 @@ struct inline_edge_summary
typedef struct inline_edge_summary inline_edge_summary_t;
extern vec<inline_edge_summary_t> inline_edge_summary_vec;
/* Data we cache about callgraph edges during inlining to avoid expensive
re-computations during the greedy algorithm. */
struct edge_growth_cache_entry
{
sreal time;
sreal time, nonspec_time;
int size;
inline_hints hints;
};
......@@ -315,12 +320,14 @@ estimate_edge_growth (struct cgraph_edge *edge)
EDGE. */
static inline sreal
estimate_edge_time (struct cgraph_edge *edge)
estimate_edge_time (struct cgraph_edge *edge, sreal *nonspec_time = NULL)
{
sreal ret;
if ((int)edge_growth_cache.length () <= edge->uid
|| !edge_growth_cache[edge->uid].size)
return do_estimate_edge_time (edge);
if (nonspec_time)
*nonspec_time = edge_growth_cache[edge->uid].nonspec_time;
return edge_growth_cache[edge->uid].time;
}
......@@ -345,7 +352,7 @@ reset_edge_growth_cache (struct cgraph_edge *edge)
{
if ((int)edge_growth_cache.length () > edge->uid)
{
struct edge_growth_cache_entry zero = {0, 0, 0};
struct edge_growth_cache_entry zero = {0, 0, 0, 0};
edge_growth_cache[edge->uid] = zero;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment