Commit 41f669d8 by Jan Hubicka Committed by Jan Hubicka

lto-cgraph.c (lto_output_node, [...]): Stream split_part.


	* lto-cgraph.c (lto_output_node, input_overwrite_node): Stream
	split_part.
	* ipa-inline.c (edge_badness): Add wrapper penalty.
	(sum_callers): Move up.
	(inline_small_functions): Set single_caller.
	* ipa-inline.h (inline_summary): Add single_caller.
	* ipa-split.c (split_function): Set split_part.
	(cgraph_node::create_clone): Do not shadow decl; copy split_part.
	* cgraph.h (cgraph_node): Add split_part.

	* gcc.dg/ipa/inlinehint-4.c: New testcase.

From-SVN: r221806
parent 9fd542c6
2015-03-31 Jan Hubicka <hubicka@ucw.cz>
* lto-cgraph.c (lto_output_node, input_overwrite_node): Stream
split_part.
* ipa-inline.c (edge_badness): Add wrapper penalty.
(sum_callers): Move up.
(inline_small_functions): Set single_caller.
* ipa-inline.h (inline_summary): Add single_caller.
* ipa-split.c (split_function): Set split_part.
(cgraph_node::create_clone): Do not shadow decl; copy split_part.
* cgraph.h (cgraph_node): Add split_part.
2015-03-31 Uros Bizjak <ubizjak@gmail.com>
PR target/58945
......
......@@ -1319,6 +1319,8 @@ public:
unsigned merged : 1;
/* True if function was created to be executed in parallel. */
unsigned parallelized_function : 1;
/* True if function is part split out by ipa-split. */
unsigned split_part : 1;
private:
/* Worker for call_for_symbol_and_aliases. */
......
......@@ -1088,6 +1088,7 @@ edge_badness (struct cgraph_edge *edge, bool dump)
else if (opt_for_fn (caller->decl, flag_guess_branch_prob) || caller->count)
{
sreal numerator, denominator;
int overall_growth;
numerator = (compute_uninlined_call_time (callee_info, edge)
- compute_inlined_call_time (edge, edge_time));
......@@ -1098,8 +1099,74 @@ edge_badness (struct cgraph_edge *edge, bool dump)
else if (opt_for_fn (caller->decl, flag_branch_probabilities))
numerator = numerator >> 11;
denominator = growth;
if (callee_info->growth > 0)
denominator *= callee_info->growth * callee_info->growth;
overall_growth = callee_info->growth;
/* Look for inliner wrappers of the form:
inline_caller ()
{
do_fast_job...
if (need_more_work)
noninline_callee ();
}
Withhout panilizing this case, we usually inline noninline_callee
into the inline_caller because overall_growth is small preventing
further inlining of inline_caller.
Penalize only callgraph edges to functions with small overall
growth ...
*/
if (growth > overall_growth
/* ... and having only one caller which is not inlined ... */
&& callee_info->single_caller
&& !edge->caller->global.inlined_to
/* ... and edges executed only conditionally ... */
&& edge->frequency < CGRAPH_FREQ_BASE
/* ... consider case where callee is not inline but caller is ... */
&& ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
&& DECL_DECLARED_INLINE_P (caller->decl))
/* ... or when early optimizers decided to split and edge
frequency still indicates splitting is a win ... */
|| (callee->split_part && !caller->split_part
&& edge->frequency
< CGRAPH_FREQ_BASE
* PARAM_VALUE
(PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100
/* ... and do not overwrite user specified hints. */
&& (!DECL_DECLARED_INLINE_P (edge->callee->decl)
|| DECL_DECLARED_INLINE_P (caller->decl)))))
{
struct inline_summary *caller_info = inline_summaries->get (caller);
int caller_growth = caller_info->growth;
/* Only apply the penalty when caller looks like inline candidate,
and it is not called once and. */
if (!caller_info->single_caller && overall_growth < caller_growth
&& caller_info->inlinable
&& caller_info->size
< (DECL_DECLARED_INLINE_P (caller->decl)
? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO))
{
if (dump)
fprintf (dump_file,
" Wrapper penalty. Increasing growth %i to %i\n",
overall_growth, caller_growth);
overall_growth = caller_growth;
}
}
if (overall_growth > 0)
{
/* Strongly preffer functions with few callers that can be inlined
fully. The square root here leads to smaller binaries at average.
Watch however for extreme cases and return to linear function
when growth is large. */
if (overall_growth < 256)
overall_growth *= overall_growth;
else
overall_growth += 256 * 256 - 256;
denominator *= overall_growth;
}
badness = - numerator / denominator;
......@@ -1109,13 +1176,15 @@ edge_badness (struct cgraph_edge *edge, bool dump)
" %f: guessed profile. frequency %f, count %"PRId64
" caller count %"PRId64
" time w/o inlining %f, time w inlining %f"
" overall growth %i (current) %i (original)\n",
badness.to_double (), (double)edge->frequency / CGRAPH_FREQ_BASE,
" overall growth %i (current) %i (original)"
" %i (compensated)\n",
badness.to_double (),
(double)edge->frequency / CGRAPH_FREQ_BASE,
edge->count, caller->count,
compute_uninlined_call_time (callee_info, edge).to_double (),
compute_inlined_call_time (edge, edge_time).to_double (),
estimate_growth (callee),
callee_info->growth);
callee_info->growth, overall_growth);
}
}
/* When function local profile is not available or it does not give
......@@ -1133,8 +1202,8 @@ edge_badness (struct cgraph_edge *edge, bool dump)
else
badness = badness << nest;
if (dump)
fprintf (dump_file, " %f: no profile. nest %i\n", badness.to_double (),
nest);
fprintf (dump_file, " %f: no profile. nest %i\n",
badness.to_double (), nest);
}
gcc_checking_assert (badness != 0);
......@@ -1649,6 +1718,20 @@ inline_account_function_p (struct cgraph_node *node)
&& node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
}
/* Count number of callers of NODE and store it into DATA (that
points to int. Worker for cgraph_for_node_and_aliases. */
static bool
sum_callers (struct cgraph_node *node, void *data)
{
struct cgraph_edge *e;
int *num_calls = (int *)data;
for (e = node->callers; e; e = e->next_caller)
(*num_calls)++;
return false;
}
/* We use greedy algorithm for inlining of small functions:
All inline candidates are put into prioritized heap ordered in
increasing badness.
......@@ -1693,6 +1776,12 @@ inline_small_functions (void)
if (inline_account_function_p (node))
initial_size += info->size;
info->growth = estimate_growth (node);
int num_calls = 0;
node->call_for_symbol_and_aliases (sum_callers, &num_calls,
true);
if (num_calls == 1)
info->single_caller = true;
if (dfs && dfs->next_cycle)
{
struct cgraph_node *n2;
......@@ -2085,20 +2174,6 @@ flatten_function (struct cgraph_node *node, bool early)
inline_update_overall_summary (node);
}
/* Count number of callers of NODE and store it into DATA (that
points to int. Worker for cgraph_for_node_and_aliases. */
static bool
sum_callers (struct cgraph_node *node, void *data)
{
struct cgraph_edge *e;
int *num_calls = (int *)data;
for (e = node->callers; e; e = e->next_caller)
(*num_calls)++;
return false;
}
/* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases.
DATA points to number of calls originally found so we avoid infinite
recursion. */
......
......@@ -129,6 +129,9 @@ struct GTY(()) inline_summary
/* True when function contains cilk spawn (and thus we can not inline
into it). */
unsigned contains_cilk_spawn : 1;
/* True wen there is only one caller of the function before small function
inlining. */
unsigned int single_caller : 1;
/* Information about function that will result after applying all the
inline decisions present in the callgraph. Generally kept up to
......
......@@ -1402,6 +1402,8 @@ split_function (basic_block return_bb, struct split_point *split_point,
(vNULL, NULL, args_to_skip, !split_part_return_p, split_point->split_bbs,
split_point->entry_bb, "part");
node->split_part = true;
/* Let's take a time profile for splitted function. */
node->tp_first_run = cur_node->tp_first_run + 1;
......
......@@ -578,6 +578,7 @@ lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
bp_pack_enum (&bp, ld_plugin_symbol_resolution,
LDPR_NUM_KNOWN, node->resolution);
bp_pack_value (&bp, node->instrumentation_clone, 1);
bp_pack_value (&bp, node->split_part, 1);
streamer_write_bitpack (&bp);
streamer_write_data_stream (ob->main_stream, section, strlen (section) + 1);
......@@ -1214,6 +1215,7 @@ input_overwrite_node (struct lto_file_decl_data *file_data,
node->resolution = bp_unpack_enum (bp, ld_plugin_symbol_resolution,
LDPR_NUM_KNOWN);
node->instrumentation_clone = bp_unpack_value (bp, 1);
node->split_part = bp_unpack_value (bp, 1);
gcc_assert (flag_ltrans
|| (!node->in_other_partition
&& !node->used_from_other_partition));
......
2015-03-31 Jan Hubicka <hubicka@ucw.cz>
* gcc.dg/ipa/inlinehint-4.c: New testcase.
2015-03-31 Alex Velenko <Alex.Velenko@arm.com>
* gcc.target/arm/pr45701-1.c (history_expand_line_internal): Add an
......
/* { dg-options "-O3 -fdump-ipa-inline-details -fno-early-inlining --param large-unit-insns=1" } */
/* { dg-add-options bind_pic_locally } */
int *hashval;
int *hash;
int hash_size;
static int
lookup_slow (int val)
{
int i = val % hash_size;
while (hashval[i] && hashval[i] != val)
i++;
return hash[i];
}
static inline int
lookup (int val)
{
static int cache, cache_val;
if (val == cache_val)
return cache;
else
{
cache_val = val;
cache = lookup_slow (val);
return cache;
}
}
int
test (int i)
{
return lookup (i) + lookup (2 * i) + lookup (3 * i) + lookup (4 * i) +
lookup (5 * i) + lookup (6 * i) + lookup (7 * i) + lookup (8 * i) +
lookup (9 * i);
}
/* { dg-final { scan-ipa-dump "Wrapper penalty" "inline" } } */
/* { dg-final { scan-ipa-dump-not "Inlining lookup_slow to lookup" "inline" } } */
/* { dg-final { scan-ipa-dump "Inlining lookup to test" "inline" } } */
/* { dg-final { cleanup-ipa-dump "inline" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment