Commit b58b1157 by Jan Hubicka Committed by Jan Hubicka

cgraph.c (cgraph_max_uid): New global variable.


	* cgraph.c (cgraph_max_uid): New global variable.
	(cgraph_node): Set uid field.
	(create_edge): Keep inline flags consistent.
	(dump_cgraph): Dump more info.
	* cgraph.h (struct cgraph_local_info): Remove inline_many and
	can_inline_once; add inlinable, disgread_inline_limits, and self_insn
	(struct cgraph_global_info): Add insns, calls, cloned_times,
	will_be_output.
	(struct cgraph_node): Add uid.
	(struct cgraph_edge): Add inline_call.
	(cgraph_max_uid, cgraph_inline_p): Declare.
	* cgraph.c: Include params.h and fibheap.h
	(cgraph_mark_functions_to_inline_once): Kill.
	(INSNS_PER_CALL): New constant.
	(ncalls_inlined, nfunctions_inlined, initial_insns, overall_insns): New
	static variables.
	(cgraph_finalize_function): Do not analyze inlining.
	(cgraph_finalize_compilation_unit): Set inlining attributes.
	(cgraph_mark_functions_to_output): More consistency checks.
	(cgraph_optimize_function): Set current_function_decl to NULL.
	(cgraph_expand_function): Use new inline flags.
	(cgraph_postorder): Expand from cgraph_expand_functions.
	(INLINED_TIMES, SET_INLINED_TIMES): New macros.
	(cgraph_inlined_into, cgraph_inlined_callees,
	cgraph_estimate_size_after_inlining, cgraph_estimate_growth,
	cgraph_mark_inline, cgraph_check_inline_limits,
	cgraph_default_inline_p, cgraph_decide_inling_of_small_functions,
	cgraph_decide_inlining, cgraph_inline_p): New functions.
	* params.def (PARAM_LARGE_FUNCTION_INSNS, PARAM_LARGE_FUNCTION_GROWTH,
	PARAM_INLINE_UNIT_GROWTH): New parameters.
	* tree-inline.c (struct inline_data): New field current_decl.
	(expand_call_inline): Avoid forward declarations; use
	inlinable_function_p.
	(optimize_inline_calls): Set id.current_decl.

Co-Authored-By: Gerald Pfeifer <pfeifer@dbai.tuwien.ac.at>

From-SVN: r69262
parent 27b8e366
Sat Jul 12 03:06:01 CEST 2003 Jan Hubicka <jh@suse.cz>
Gerald Pfeifer <pfeifer@dbai.tuwien.ac.at>
* cgraph.c (cgraph_max_uid): New global variable.
(cgraph_node): Set uid field.
(create_edge): Keep inline flags consistent.
(dump_cgraph): Dump more info.
* cgraph.h (struct cgraph_local_info): Remove inline_many and
can_inline_once; add inlinable, disgread_inline_limits, and self_insn
(struct cgraph_global_info): Add insns, calls, cloned_times,
will_be_output.
(struct cgraph_node): Add uid.
(struct cgraph_edge): Add inline_call.
(cgraph_max_uid, cgraph_inline_p): Declare.
* cgraph.c: Include params.h and fibheap.h
(cgraph_mark_functions_to_inline_once): Kill.
(INSNS_PER_CALL): New constant.
(ncalls_inlined, nfunctions_inlined, initial_insns, overall_insns): New
static variables.
(cgraph_finalize_function): Do not analyze inlining.
(cgraph_finalize_compilation_unit): Set inlining attributes.
(cgraph_mark_functions_to_output): More consistency checks.
(cgraph_optimize_function): Set current_function_decl to NULL.
(cgraph_expand_function): Use new inline flags.
(cgraph_postorder): Expand from cgraph_expand_functions.
(INLINED_TIMES, SET_INLINED_TIMES): New macros.
(cgraph_inlined_into, cgraph_inlined_callees,
cgraph_estimate_size_after_inlining, cgraph_estimate_growth,
cgraph_mark_inline, cgraph_check_inline_limits,
cgraph_default_inline_p, cgraph_decide_inling_of_small_functions,
cgraph_decide_inlining, cgraph_inline_p): New functions.
* params.def (PARAM_LARGE_FUNCTION_INSNS, PARAM_LARGE_FUNCTION_GROWTH,
PARAM_INLINE_UNIT_GROWTH): New parameters.
* tree-inline.c (struct inline_data): New field current_decl.
(expand_call_inline): Avoid forward declarations; use
inlinable_function_p.
(optimize_inline_calls): Set id.current_decl.
2003-07-11 Andrew Pinski <pinskia@physics.uc.edu>
* configure.in: Remove wrongly added definition of
......
......@@ -48,6 +48,9 @@ struct cgraph_node *cgraph_nodes_queue;
/* Number of nodes in existence. */
int cgraph_n_nodes;
/* Maximal uid used in cgraph nodes. */
int cgraph_max_uid;
/* Set when whole unit has been analyzed so we can access global info. */
bool cgraph_global_info_ready = false;
......@@ -114,6 +117,7 @@ cgraph_node (decl)
node = ggc_alloc_cleared (sizeof (*node));
node->decl = decl;
node->next = cgraph_nodes;
node->uid = cgraph_max_uid++;
if (cgraph_nodes)
cgraph_nodes->previous = node;
node->previous = NULL;
......@@ -157,6 +161,19 @@ create_edge (caller, callee)
struct cgraph_node *caller, *callee;
{
struct cgraph_edge *edge = ggc_alloc (sizeof (struct cgraph_edge));
struct cgraph_edge *edge2;
edge->inline_call = false;
/* At the moment we don't associate calls with specific CALL_EXPRs
as we probably ought to, so we must preserve inline_call flags to
be the same in all copies of the same edge. */
if (cgraph_global_info_ready)
for (edge2 = caller->callees; edge2; edge2 = edge2->next_caller)
if (edge2->callee == callee)
{
edge->inline_call = edge2->inline_call;
break;
}
edge->caller = caller;
edge->callee = callee;
......@@ -337,6 +354,8 @@ dump_cgraph (f)
{
struct cgraph_edge *edge;
fprintf (f, "%s", cgraph_node_name (node));
if (node->local.self_insns)
fprintf (f, " %i insns", node->local.self_insns);
if (node->origin)
fprintf (f, " nested in: %s", cgraph_node_name (node->origin));
if (node->needed)
......@@ -346,13 +365,32 @@ dump_cgraph (f)
if (DECL_SAVED_TREE (node->decl))
fprintf (f, " tree");
if (node->local.disgread_inline_limits)
fprintf (f, " always_inline");
else if (node->local.inlinable)
fprintf (f, " inlinable");
if (node->global.insns && node->global.insns != node->local.self_insns)
fprintf (f, " %i insns after inlining", node->global.insns);
if (node->global.cloned_times > 1)
fprintf (f, " cloned %ix", node->global.cloned_times);
if (node->global.calls)
fprintf (f, " %i calls", node->global.calls);
fprintf (f, "\n called by :");
for (edge = node->callers; edge; edge = edge->next_caller)
{
fprintf (f, "%s ", cgraph_node_name (edge->caller));
if (edge->inline_call)
fprintf(f, "(inlined) ");
}
fprintf (f, "\n calls: ");
for (edge = node->callees; edge; edge = edge->next_callee)
{
fprintf (f, "%s ", cgraph_node_name (edge->callee));
if (edge->inline_call)
fprintf(f, "(inlined) ");
}
fprintf (f, "\n");
}
}
......
......@@ -30,13 +30,15 @@ struct cgraph_local_info GTY(())
/* Set when function function is visible in current compilation unit only
and it's address is never taken. */
bool local;
/* Set when function is small enough to be inlinable many times. */
bool inline_many;
/* Set when function can be inlined once (false only for functions calling
alloca, using varargs and so on). */
bool can_inline_once;
/* Set once it has been finalized so we consider it to be output. */
bool finalized;
/* False when there is something making inlining impossible (such as va_arg) */
bool inlinable;
/* True when function should be inlined independently on it's size. */
bool disgread_inline_limits;
/* Size of the function before inlining. */
int self_insns;
};
/* Information about the function that needs to be computed globally
......@@ -46,6 +48,20 @@ struct cgraph_global_info GTY(())
{
/* Set when the function will be inlined exactly once. */
bool inline_once;
/* Estimated size of the function after inlining. */
int insns;
/* Number of direct calls not inlined into the function body. */
int calls;
/* Number of times given function will be cloned during output. */
int cloned_times;
/* Set to true for all reachable functions before inlining is decided.
Once we inline all calls to the function and the function is local,
it is set to false. */
bool will_be_output;
};
/* Information about the function that is propagated by the RTL backend.
......@@ -77,6 +93,8 @@ struct cgraph_node GTY((chain_next ("%h.next"), chain_prev ("%h.previous")))
struct cgraph_node *next_nested;
/* Pointer to the next function in cgraph_nodes_queue. */
struct cgraph_node *next_needed;
/* Unique id of the node. */
int uid;
PTR GTY ((skip (""))) aux;
/* Set when function must be output - it is externally visible
......@@ -102,6 +120,7 @@ struct cgraph_edge GTY(())
struct cgraph_node *callee;
struct cgraph_edge *next_caller;
struct cgraph_edge *next_callee;
bool inline_call;
};
/* The cgraph_varpool data strutcture.
......@@ -124,6 +143,7 @@ struct cgraph_varpool_node GTY(())
extern GTY(()) struct cgraph_node *cgraph_nodes;
extern GTY(()) int cgraph_n_nodes;
extern GTY(()) int cgraph_max_uid;
extern bool cgraph_global_info_ready;
extern GTY(()) struct cgraph_node *cgraph_nodes_queue;
extern FILE *cgraph_dump_file;
......@@ -157,5 +177,6 @@ void cgraph_finalize_compilation_unit PARAMS ((void));
void cgraph_create_edges PARAMS ((tree, tree));
void cgraph_optimize PARAMS ((void));
void cgraph_mark_needed_node PARAMS ((struct cgraph_node *, int));
bool cgraph_inline_p PARAMS ((tree, tree));
#endif /* GCC_CGRAPH_H */
/* Callgraph handling code.
/* Callgraph based intraprocedural optimizations.
Copyright (C) 2003 Free Software Foundation, Inc.
Contributed by Jan Hubicka
......@@ -35,15 +35,25 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "cgraph.h"
#include "diagnostic.h"
#include "timevar.h"
#include "params.h"
#include "fibheap.h"
#include "c-common.h"
#define INSNS_PER_CALL 10
static void cgraph_expand_functions PARAMS ((void));
static void cgraph_mark_functions_to_output PARAMS ((void));
static void cgraph_expand_function PARAMS ((struct cgraph_node *));
static tree record_call_1 PARAMS ((tree *, int *, void *));
static void cgraph_mark_local_functions PARAMS ((void));
static void cgraph_mark_functions_to_inline_once PARAMS ((void));
static void cgraph_optimize_function PARAMS ((struct cgraph_node *));
/* Statistics we collect about inlining algorithm. */
static int ncalls_inlined;
static int nfunctions_inlined;
static int initial_insns;
static int overall_insns;
/* Analyze function once it is parsed. Set up the local information
available - create cgraph edges for function calls via BODY. */
......@@ -141,6 +151,8 @@ cgraph_finalize_compilation_unit ()
struct cgraph_edge *edge;
cgraph_varpool_assemble_pending_decls ();
if (!quiet_flag)
fprintf (stderr, "\nAnalyzing compilation unit\n");
timevar_push (TV_CGRAPH);
if (cgraph_dump_file)
......@@ -170,14 +182,6 @@ cgraph_finalize_compilation_unit ()
(*lang_hooks.callgraph.lower_function) (decl);
current_function_decl = node->decl;
if (!node->needed && !DECL_COMDAT (node->decl))
node->local.can_inline_once = tree_inlinable_function_p (decl, 1);
else
node->local.can_inline_once = 0;
if (flag_inline_trees)
node->local.inline_many = tree_inlinable_function_p (decl, 0);
else
node->local.inline_many = 0;
/* At the moment frontend automatically emits all nested functions. */
if (node->nested)
......@@ -189,9 +193,18 @@ cgraph_finalize_compilation_unit ()
cgraph_mark_needed_node (node2, 0);
}
/* First kill forward declaration so reverse inling works properly. */
/* First kill forward declaration so reverse inlining works properly. */
cgraph_create_edges (decl, DECL_SAVED_TREE (decl));
node->local.inlinable = tree_inlinable_function_p (decl, 1);
if (!DECL_ESTIMATED_INSNS (decl))
DECL_ESTIMATED_INSNS (decl)
= (*lang_hooks.tree_inlining.estimate_num_insns) (decl);
node->local.self_insns = DECL_ESTIMATED_INSNS (decl);
if (node->local.inlinable)
node->local.disgread_inline_limits
= (*lang_hooks.tree_inlining.disregard_inline_limits) (decl);
for (edge = node->callees; edge; edge = edge->next_callee)
{
if (!edge->callee->reachable)
......@@ -241,16 +254,20 @@ cgraph_mark_functions_to_output ()
for (node = cgraph_nodes; node; node = node->next)
{
tree decl = node->decl;
struct cgraph_edge *e;
if (node->output)
abort ();
for (e = node->callers; e; e = e->next_caller)
if (!e->inline_call)
break;
/* We need to output all local functions that are used and not
always inlined, as well as those that are reachable from
outside the current compilation unit. */
if (DECL_SAVED_TREE (decl)
&& (node->needed
|| (!node->local.inline_many && !node->global.inline_once
&& node->reachable)
|| (DECL_ASSEMBLER_NAME_SET_P (decl)
&& TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl))))
|| (e && node->reachable))
&& !TREE_ASM_WRITTEN (decl) && !node->origin
&& !DECL_EXTERNAL (decl))
node->output = 1;
......@@ -266,6 +283,8 @@ cgraph_optimize_function (node)
tree decl = node->decl;
timevar_push (TV_INTEGRATION);
/* optimize_inline_calls avoids inlining of current_function_decl. */
current_function_decl = 0;
if (flag_inline_trees)
optimize_inline_calls (decl);
if (node->nested)
......@@ -283,6 +302,7 @@ cgraph_expand_function (node)
struct cgraph_node *node;
{
tree decl = node->decl;
struct cgraph_edge *e;
announce_function (decl);
......@@ -292,41 +312,26 @@ cgraph_expand_function (node)
via lang_expand_decl_stmt. */
(*lang_hooks.callgraph.expand_function) (decl);
/* When we decided to inline the function once, we never ever should
need to output it separately. */
if (node->global.inline_once)
abort ();
if (!node->local.inline_many
|| !node->callers)
for (e = node->callers; e; e = e->next_caller)
if (e->inline_call)
break;
if (!e)
DECL_SAVED_TREE (decl) = NULL;
current_function_decl = NULL;
}
/* Expand all functions that must be output.
Attempt to topologically sort the nodes so function is output when
all called functions are already assembled to allow data to be
propagated across the callgraph. Use a stack to get smaller distance
between a function and it's callees (later we may choose to use a more
sophisticated algorithm for function reordering; we will likely want
to use subsections to make the output functions appear in top-down
order. */
static void
cgraph_expand_functions ()
/* Fill array order with all nodes with output flag set in the reverse
topological order. */
static int
cgraph_postorder (struct cgraph_node **order)
{
struct cgraph_node *node, *node2;
struct cgraph_node **stack =
xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes);
struct cgraph_node **order =
xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes);
int stack_size = 0;
int order_pos = 0;
struct cgraph_edge *edge, last;
int i;
cgraph_mark_functions_to_output ();
struct cgraph_node **stack =
xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes);
/* We have to deal with cycles nicely, so use a depth first traversal
output algorithm. Ignore the fact that some functions won't need
......@@ -372,97 +377,739 @@ cgraph_expand_functions ()
}
}
}
for (i = order_pos - 1; i >= 0; i--)
free (stack);
return order_pos;
}
#define INLINED_TIMES(node) ((size_t)(node)->aux)
#define SET_INLINED_TIMES(node,times) ((node)->aux = (void *)(times))
/* Return list of nodes we decided to inline NODE into, set their output
flag and compute INLINED_TIMES.
We do simple backtracing to get INLINED_TIMES right. This should not be
expensive as we limit the amount of inlining. Alternatively we may first
discover set of nodes, topologically sort these and propagate
INLINED_TIMES */
static int
cgraph_inlined_into (struct cgraph_node *node, struct cgraph_node **array)
{
int nfound = 0;
struct cgraph_edge **stack;
struct cgraph_edge *e, *e1;
int sp;
int i;
/* Fast path: since we traverse in mostly topological order, we will likely
find no edges. */
for (e = node->callers; e; e = e->next_caller)
if (e->inline_call)
break;
if (!e)
return 0;
/* Allocate stack for back-tracking up callgraph. */
stack = xmalloc ((cgraph_n_nodes + 1) * sizeof (struct cgraph_edge));
sp = 0;
/* Push the first edge on to the stack. */
stack[sp++] = e;
while (sp)
{
node = order[i];
if (node->output)
struct cgraph_node *caller;
/* Look at the edge on the top of the stack. */
e = stack[sp - 1];
caller = e->caller;
/* Check if the caller destination has been visited yet. */
if (!caller->output)
{
if (!node->reachable)
abort ();
node->output = 0;
cgraph_expand_function (node);
array[nfound++] = e->caller;
/* Mark that we have visited the destination. */
caller->output = true;
SET_INLINED_TIMES (caller, 0);
}
SET_INLINED_TIMES (caller, INLINED_TIMES (caller) + 1);
for (e1 = caller->callers; e1; e1 = e1->next_caller)
if (e1->inline_call)
break;
if (e1)
stack[sp++] = e1;
else
{
while (true)
{
for (e1 = e->next_caller; e1; e1 = e1->next_caller)
if (e1->inline_call)
break;
if (e1)
{
stack[sp - 1] = e1;
break;
}
else
{
sp--;
if (!sp)
break;
e = stack[sp - 1];
}
}
}
}
free (stack);
free (order);
if (cgraph_dump_file)
{
fprintf (cgraph_dump_file, "Found inline predecesors of %s:",
cgraph_node_name (node));
for (i = 0; i < nfound; i++)
{
fprintf (cgraph_dump_file, " %s", cgraph_node_name (array[i]));
if (INLINED_TIMES (array[i]) != 1)
fprintf (cgraph_dump_file, " (%i times)",
INLINED_TIMES (array[i]));
}
fprintf (cgraph_dump_file, "\n");
}
return nfound;
}
/* Mark all local functions.
We can not use node->needed directly as it is modified during
execution of cgraph_optimize. */
/* Return list of nodes we decided to inline into NODE, set their output
flag and compute INLINED_TIMES.
This function is identical to cgraph_inlined_into with callers and callees
nodes swapped. */
static int
cgraph_inlined_callees (struct cgraph_node *node, struct cgraph_node **array)
{
int nfound = 0;
struct cgraph_edge **stack;
struct cgraph_edge *e, *e1;
int sp;
int i;
/* Fast path: since we traverse in mostly topological order, we will likely
find no edges. */
for (e = node->callees; e; e = e->next_callee)
if (e->inline_call)
break;
if (!e)
return 0;
/* Allocate stack for back-tracking up callgraph. */
stack = xmalloc ((cgraph_n_nodes + 1) * sizeof (struct cgraph_edge));
sp = 0;
/* Push the first edge on to the stack. */
stack[sp++] = e;
while (sp)
{
struct cgraph_node *callee;
/* Look at the edge on the top of the stack. */
e = stack[sp - 1];
callee = e->callee;
/* Check if the callee destination has been visited yet. */
if (!callee->output)
{
array[nfound++] = e->callee;
/* Mark that we have visited the destination. */
callee->output = true;
SET_INLINED_TIMES (callee, 0);
}
SET_INLINED_TIMES (callee, INLINED_TIMES (callee) + 1);
for (e1 = callee->callees; e1; e1 = e1->next_callee)
if (e1->inline_call)
break;
if (e1)
stack[sp++] = e1;
else
{
while (true)
{
for (e1 = e->next_callee; e1; e1 = e1->next_callee)
if (e1->inline_call)
break;
if (e1)
{
stack[sp - 1] = e1;
break;
}
else
{
sp--;
if (!sp)
break;
e = stack[sp - 1];
}
}
}
}
free (stack);
if (cgraph_dump_file)
{
fprintf (cgraph_dump_file, "Found inline successors of %s:",
cgraph_node_name (node));
for (i = 0; i < nfound; i++)
{
fprintf (cgraph_dump_file, " %s", cgraph_node_name (array[i]));
if (INLINED_TIMES (array[i]) != 1)
fprintf (cgraph_dump_file, " (%i times)",
INLINED_TIMES (array[i]));
}
fprintf (cgraph_dump_file, "\n");
}
return nfound;
}
/* Estimate size of the function after inlining WHAT into TO. */
static int
cgraph_estimate_size_after_inlining (int times,
struct cgraph_node *to,
struct cgraph_node *what)
{
return (what->global.insns - INSNS_PER_CALL) *times + to->global.insns;
}
/* Estimate the growth caused by inlining NODE into all callees. */
static int
cgraph_estimate_growth (struct cgraph_node *node)
{
int growth = 0;
int calls_saved = 0;
int clones_added = 0;
struct cgraph_edge *e;
for (e = node->callers; e; e = e->next_caller)
if (!e->inline_call)
{
growth += ((cgraph_estimate_size_after_inlining (1, e->caller, node)
-
e->caller->global.insns) *e->caller->global.cloned_times);
calls_saved += e->caller->global.cloned_times;
clones_added += e->caller->global.cloned_times;
}
/* ??? Wrong for self recursive functions or cases where we decide to not
inline for different reasons, but it is not big deal as in that case
we will keep the body around, but we will also avoid some inlining. */
if (!node->needed && !node->origin && !DECL_EXTERNAL (node->decl))
growth -= node->global.insns, clones_added--;
if (!calls_saved)
calls_saved = 1;
return growth;
}
/* Update insn sizes after inlining WHAT into TO that is already inlined into
all nodes in INLINED array. */
static void
cgraph_mark_local_functions ()
cgraph_mark_inline (struct cgraph_node *to,
struct cgraph_node *what,
struct cgraph_node **inlined, int ninlined,
struct cgraph_node **inlined_callees,
int ninlined_callees)
{
int i;
int times = 0;
int clones = 0;
struct cgraph_edge *e;
bool called = false;
int new_insns;
for (e = what->callers; e; e = e->next_caller)
{
if (e->caller == to)
{
if (e->inline_call)
abort ();
e->inline_call = true;
times++;
clones += e->caller->global.cloned_times;
}
else if (!e->inline_call)
called = true;
}
if (!times)
abort ();
ncalls_inlined += times;
new_insns = cgraph_estimate_size_after_inlining (times, to, what);
if (to->global.will_be_output)
overall_insns += new_insns - to->global.insns;
to->global.insns = new_insns;
to->global.calls += (what->global.calls - 1) *times;
if (!called && !what->needed && !what->origin
&& !DECL_EXTERNAL (what->decl))
{
if (!what->global.will_be_output)
abort ();
clones--;
nfunctions_inlined++;
what->global.will_be_output = 0;
overall_insns -= what->global.insns;
}
what->global.cloned_times += clones;
if (to->global.calls < 0)
abort ();
for (i = 0; i < ninlined; i++)
{
new_insns =
cgraph_estimate_size_after_inlining (INLINED_TIMES (inlined[i]) *
times, inlined[i], what);
if (inlined[i]->global.will_be_output)
overall_insns += new_insns - inlined[i]->global.insns;
inlined[i]->global.insns = new_insns;
inlined[i]->global.calls +=
(what->global.calls - 1) *INLINED_TIMES (inlined[i]) * times;
if (inlined[i]->global.calls < 0)
abort ();
}
for (i = 0; i < ninlined_callees; i++)
{
inlined_callees[i]->global.cloned_times +=
INLINED_TIMES (inlined_callees[i]) * clones;
}
}
/* Return false when inlining WHAT into TO is not good idea as it would cause
too large growth of function bodies. */
static bool
cgraph_check_inline_limits (struct cgraph_node *to,
struct cgraph_node *what,
struct cgraph_node **inlined, int ninlined)
{
int i;
int times = 0;
struct cgraph_edge *e;
int newsize;
int limit;
for (e = to->callees; e; e = e->next_callee)
if (e->callee == what)
times++;
/* When inlining large function body called once into small function,
take the inlined function as base for limiting the growth. */
if (to->local.self_insns > what->local.self_insns)
limit = to->local.self_insns;
else
limit = what->local.self_insns;
limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
newsize = cgraph_estimate_size_after_inlining (times, to, what);
if (newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
&& newsize > limit)
return false;
for (i = 0; i < ninlined; i++)
{
newsize =
cgraph_estimate_size_after_inlining (INLINED_TIMES (inlined[i]) *
times, inlined[i], what);
if (newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
&& newsize >
inlined[i]->local.self_insns *
(100 + PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH)) / 100)
return false;
}
return true;
}
/* Return true when function N is small enought to be inlined. */
static bool
cgraph_default_inline_p (struct cgraph_node *n)
{
if (!DECL_INLINE (n->decl) || !DECL_SAVED_TREE (n->decl))
return false;
if (DID_INLINE_FUNC (n->decl))
return n->global.insns < MAX_INLINE_INSNS_AUTO;
else
return n->global.insns < MAX_INLINE_INSNS_SINGLE;
}
/* We use greedy algorithm for inlining of small functions:
All inline candidates are put into prioritized heap based on estimated
growth of the overall number of instructions and then update the estimates.
INLINED and INLINED_CALEES are just pointers to arrays large enought
to be passed to cgraph_inlined_into and cgraph_inlined_callees. */
static void
cgraph_decide_inlining_of_small_functions (struct cgraph_node **inlined,
struct cgraph_node
**inlined_callees)
{
int i;
struct cgraph_node *node;
fibheap_t heap = fibheap_new ();
struct fibnode **heap_node =
xcalloc (sizeof (struct fibnode *), cgraph_max_uid);
int ninlined, ninlined_callees;
int max_insns = ((HOST_WIDEST_INT) initial_insns
* (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "Marking local functions:");
/* Put all inline candidates into the heap. */
/* Figure out functions we want to assemble. */
for (node = cgraph_nodes; node; node = node->next)
{
node->local.local = (!node->needed
&& DECL_SAVED_TREE (node->decl)
&& !DECL_COMDAT (node->decl)
&& !TREE_PUBLIC (node->decl));
if (cgraph_dump_file && node->local.local)
fprintf (cgraph_dump_file, " %s", cgraph_node_name (node));
struct cgraph_edge *e;
if (!node->local.inlinable || !node->callers
|| !cgraph_default_inline_p (node))
continue;
/* Rule out always_inline functions we dealt with earler. */
for (e = node->callers; e; e = e->next_caller)
if (e->inline_call)
break;
if (e)
continue;
heap_node[node->uid] =
fibheap_insert (heap, cgraph_estimate_growth (node), node);
}
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "\n");
fprintf (cgraph_dump_file, "\n\nDeciding on inlining: ");
while ((node = fibheap_extract_min (heap)) && overall_insns <= max_insns)
{
struct cgraph_edge *e;
int old_insns = overall_insns;
heap_node[node->uid] = NULL;
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "Considering %s %i insns, growth %i.\n",
cgraph_node_name (node), node->global.insns,
cgraph_estimate_growth (node));
if (!cgraph_default_inline_p (node))
{
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "Function too large.\n");
continue;
}
ninlined_callees = cgraph_inlined_callees (node, inlined_callees);
for (e = node->callers; e; e = e->next_caller)
if (!e->inline_call && e->caller != node)
{
ninlined = cgraph_inlined_into (e->caller, inlined);
if (e->callee->output
|| !cgraph_check_inline_limits (e->caller, node, inlined,
ninlined))
{
for (i = 0; i < ninlined; i++)
inlined[i]->output = 0, node->aux = 0;
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "Not inlining into %s\n",
cgraph_node_name (e->caller));
continue;
}
cgraph_mark_inline (e->caller, node, inlined, ninlined,
inlined_callees, ninlined_callees);
if (heap_node[e->caller->uid])
fibheap_replace_key (heap, heap_node[e->caller->uid],
cgraph_estimate_growth (e->caller));
/* Size of the functions we updated into has changed, so update
the keys. */
for (i = 0; i < ninlined; i++)
{
inlined[i]->output = 0, node->aux = 0;
if (heap_node[inlined[i]->uid])
fibheap_replace_key (heap, heap_node[inlined[i]->uid],
cgraph_estimate_growth (inlined[i]));
}
}
/* Similarly all functions called by function we just inlined
are now called more times; update keys. */
for (e = node->callees; e; e = e->next_callee)
if (!e->inline_call && heap_node[e->callee->uid])
fibheap_replace_key (heap, heap_node[e->callee->uid],
cgraph_estimate_growth (e->callee));
for (i = 0; i < ninlined_callees; i++)
{
struct cgraph_edge *e;
for (e = inlined_callees[i]->callees; e; e = e->next_callee)
if (!e->inline_call && heap_node[e->callee->uid])
fibheap_replace_key (heap, heap_node[e->callee->uid],
cgraph_estimate_growth (e->callee));
inlined_callees[i]->output = 0, node->aux = 0;
}
if (cgraph_dump_file)
fprintf (cgraph_dump_file,
"Created %i clones, Num insns:%i (%+i), %.2f%%.\n\n",
node->global.cloned_times - 1,
overall_insns, overall_insns - old_insns,
overall_insns * 100.0 / initial_insns);
}
if (cgraph_dump_file && !fibheap_empty (heap))
fprintf (cgraph_dump_file, "inline-unit-growth limit reached.\n");
fibheap_delete (heap);
free (heap_node);
}
/* Decide what function should be inlined because they are invoked once
(so inlining won't result in duplication of the code). */
/* Decide on the inlining. We do so in the topological order to avoid
expenses on updating datastructures. */
static void
cgraph_mark_functions_to_inline_once ()
cgraph_decide_inlining (void)
{
struct cgraph_node *node, *node1;
struct cgraph_node *node;
int nnodes;
struct cgraph_node **order =
xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes);
struct cgraph_node **inlined =
xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes);
struct cgraph_node **inlined_callees =
xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes);
int ninlined;
int ninlined_callees;
int i, y;
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "\n\nMarking functions to inline once:");
for (node = cgraph_nodes; node; node = node->next)
{
int ncalls = 0;
struct cgraph_edge *e;
node->global.insns = node->local.self_insns;
for (e = node->callees; e; e = e->next_callee)
ncalls++;
node->global.calls = ncalls;
if (!DECL_EXTERNAL (node->decl))
{
node->global.cloned_times = 1;
initial_insns += node->local.self_insns;
node->global.will_be_output = true;
}
}
overall_insns = initial_insns;
nnodes = cgraph_postorder (order);
/* Now look for function called only once and mark them to inline.
From this point number of calls to given function won't grow. */
for (node = cgraph_nodes; node; node = node->next)
node->aux = 0;
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "\n\nDeciding on always_inline functions:\n");
/* In the first pass mark all always_inline edges. Do this with a priority
so no our decisions makes this impossible. */
for (i = nnodes - 1; i >= 0; i--)
{
struct cgraph_edge *e;
node = order[i];
for (e = node->callees; e; e = e->next_callee)
if (e->callee->local.disgread_inline_limits)
break;
if (!e)
continue;
if (cgraph_dump_file)
fprintf (cgraph_dump_file,
"Considering %s %i insns (always inline)\n",
cgraph_node_name (node), node->global.insns);
ninlined = cgraph_inlined_into (order[i], inlined);
for (; e; e = e->next_callee)
{
if (e->inline_call || !e->callee->local.disgread_inline_limits)
continue;
if (e->callee->output || e->callee == node)
continue;
ninlined_callees =
cgraph_inlined_callees (e->callee, inlined_callees);
cgraph_mark_inline (node, e->callee, inlined, ninlined,
inlined_callees, ninlined_callees);
for (y = 0; y < ninlined_callees; y++)
inlined_callees[y]->output = 0, node->aux = 0;
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "Inlined %i times. Now %i insns\n\n",
node->global.cloned_times, overall_insns);
}
for (y = 0; y < ninlined; y++)
inlined[y]->output = 0, node->aux = 0;
}
cgraph_decide_inlining_of_small_functions (inlined, inlined_callees);
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "\n\nFunctions to inline once:\n");
/* And finally decide what functions are called once. */
for (i = nnodes - 1; i >= 0; i--)
{
node = order[i];
if (node->callers && !node->callers->next_caller && !node->needed
&& node->local.can_inline_once)
&& node->local.inlinable && !node->callers->inline_call
&& !DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl))
{
bool ok = true;
struct cgraph_node *node1;
/* Verify that we won't duplicate the caller. */
for (node1 = node->callers->caller;
node1->local.inline_many
&& node1->callers
&& ok;
node1 = node1->callers->caller)
node1->callers && node1->callers->inline_call
&& ok; node1 = node1->callers->caller)
if (node1->callers->next_caller || node1->needed)
ok = false;
if (ok)
{
node->global.inline_once = true;
if (cgraph_dump_file)
fprintf (cgraph_dump_file, " %s", cgraph_node_name (node));
fprintf (cgraph_dump_file,
"Considering %s %i insns (called once)\n",
cgraph_node_name (node), node->global.insns);
ninlined = cgraph_inlined_into (node->callers->caller, inlined);
if (cgraph_check_inline_limits
(node->callers->caller, node, inlined, ninlined))
{
ninlined_callees =
cgraph_inlined_callees (node, inlined_callees);
cgraph_mark_inline (node->callers->caller, node, inlined,
ninlined, inlined_callees,
ninlined_callees);
for (y = 0; y < ninlined_callees; y++)
inlined_callees[y]->output = 0, node->aux = 0;
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "Inlined. Now %i insns\n\n", overall_insns);
}
for (y = 0; y < ninlined; y++)
inlined[y]->output = 0, node->aux = 0;
}
}
}
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "\n");
fprintf (cgraph_dump_file,
"\nInlined %i calls, elliminated %i functions, %i insns turned to %i insns.\n",
ncalls_inlined, nfunctions_inlined, initial_insns,
overall_insns);
free (order);
free (inlined);
free (inlined_callees);
}
/* Return true when CALLER_DECL should be inlined into CALLEE_DECL. */
bool
cgraph_inline_p (tree caller_decl, tree callee_decl)
{
struct cgraph_node *caller = cgraph_node (caller_decl);
struct cgraph_node *callee = cgraph_node (callee_decl);
struct cgraph_edge *e;
for (e = caller->callees; e; e = e->next_callee)
if (e->callee == callee)
return e->inline_call;
/* We do not record builtins in the callgraph. Perhaps it would make more
sense to do so and then prune out those not overwriten by explicit
function body. */
return false;
}
/* Expand all functions that must be output.
Attempt to topologically sort the nodes so function is output when
all called functions are already assembled to allow data to be
propagated accross the callgraph. Use a stack to get smaller distance
between a function and it's callees (later we may choose to use a more
sophisticated algorithm for function reordering; we will likely want
to use subsections to make the output functions appear in top-down
order). */
static void
cgraph_expand_functions ()
{
struct cgraph_node *node;
struct cgraph_node **order =
xcalloc (sizeof (struct cgraph_node *), cgraph_n_nodes);
int order_pos = 0;
int i;
cgraph_mark_functions_to_output ();
order_pos = cgraph_postorder (order);
for (i = order_pos - 1; i >= 0; i--)
{
node = order[i];
if (node->output)
{
if (!node->reachable)
abort ();
node->output = 0;
cgraph_expand_function (node);
}
}
free (order);
}
/* Mark all local functions.
Local function is function whose calls can occur only in the
current compilation unit so we change it's calling convetion.
We simply mark all static functions whose address is not taken
as local. */
static void
cgraph_mark_local_functions ()
{
struct cgraph_node *node;
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "Marking local functions:");
/* Figure out functions we want to assemble. */
for (node = cgraph_nodes; node; node = node->next)
{
node->local.local = (!node->needed
&& DECL_SAVED_TREE (node->decl)
&& !TREE_PUBLIC (node->decl));
if (cgraph_dump_file && node->local.local)
fprintf (cgraph_dump_file, " %s", cgraph_node_name (node));
}
if (cgraph_dump_file)
fprintf (cgraph_dump_file, "\n");
}
/* Perform simple optimizations based on callgraph. */
void
cgraph_optimize ()
{
struct cgraph_node *node;
bool changed = true;
timevar_push (TV_CGRAPHOPT);
if (!quiet_flag)
fprintf (stderr, "Performing intraprocedural optimizations\n");
if (cgraph_dump_file)
{
fprintf (cgraph_dump_file, "Initial callgraph:");
......@@ -470,7 +1117,7 @@ cgraph_optimize ()
}
cgraph_mark_local_functions ();
cgraph_mark_functions_to_inline_once ();
cgraph_decide_inlining ();
cgraph_global_info_ready = true;
if (cgraph_dump_file)
......@@ -480,39 +1127,10 @@ cgraph_optimize ()
}
timevar_pop (TV_CGRAPHOPT);
if (!quiet_flag)
fprintf (stderr, "\n\nAssembling functions:");
fprintf (stderr, "Assembling functions:");
/* Output everything.
??? Our inline heuristic may decide to not inline functions previously
marked as inlinable thus adding new function bodies that must be output.
Later we should move all inlining decisions to callgraph code to make
this impossible. */
/* Output everything. */
cgraph_expand_functions ();
if (!quiet_flag)
fprintf (stderr, "\n\nAssembling functions that failed to inline:");
while (changed && !errorcount && !sorrycount)
{
changed = false;
for (node = cgraph_nodes; node; node = node->next)
{
tree decl = node->decl;
if (!node->origin
&& !TREE_ASM_WRITTEN (decl)
&& DECL_SAVED_TREE (decl)
&& !DECL_EXTERNAL (decl))
{
struct cgraph_edge *edge;
for (edge = node->callers; edge; edge = edge->next_caller)
if (TREE_ASM_WRITTEN (edge->caller->decl))
{
changed = true;
cgraph_expand_function (node);
break;
}
}
}
}
if (cgraph_dump_file)
{
fprintf (cgraph_dump_file, "Final callgraph:");
......
......@@ -4572,6 +4572,7 @@ After exceeding the maximum number of inlined instructions by repeated
inlining, a linear function is used to decrease the allowable size
for single functions. The slope of that function is the negative
reciprocal of the number specified here.
This parameter is ignored when @option{-funit-at-a-time} is used.
The default value is 32.
@item min-inline-insns
......@@ -4579,8 +4580,27 @@ The repeated inlining is throttled more and more by the linear function
after exceeding the limit. To avoid too much throttling, a minimum for
this function is specified here to allow repeated inlining for very small
functions even when a lot of repeated inlining already has been done.
This parameter is ignored when @option{-funit-at-a-time} is used.
The default value is 10.
@item large-function-insns
The limit specifying really large functions. For functions greater than this
limit inlining is constrained by @option{--param large-function-growth}.
This parameter is usefull primarily to avoid extreme compilation time caused by non-linear
algorithms used by the backend.
This parameter is ignored when @option{-funit-at-a-time} is not used.
The default value is 30000.
@item large-function-growth
Specifies maximal growth of large functtion caused by inlining in percents.
This parameter is ignored when @option{-funit-at-a-time} is not used.
The default value is 200.
@item inline-unit-growth
Specifies maximal overall growth of the compilation unit caused by inlining.
This parameter is ignored when @option{-funit-at-a-time} is not used.
The default value is 150.
@item max-inline-insns-rtl
For languages that use the RTL inliner (this happens at a later stage
than tree inlining), you can set the maximum allowable size (counted
......
......@@ -152,6 +152,19 @@ DEFPARAM(PARAM_MAX_PENDING_LIST_LENGTH,
"The maximum length of scheduling's pending operations list",
32)
DEFPARAM(PARAM_LARGE_FUNCTION_INSNS,
"large-function-insns",
"The size of function body to be considered large",
10000)
DEFPARAM(PARAM_LARGE_FUNCTION_GROWTH,
"large-function-growth",
"Maximal growth due to inlining of large function (in percents)",
100)
DEFPARAM(PARAM_INLINE_UNIT_GROWTH,
"inline-unit-growth",
"how much can given compilation unit grow because of the inlining (in percents)",
50)
/* The GCSE optimization will be disabled if it would require
significantly more memory than this value. */
DEFPARAM(PARAM_MAX_GCSE_MEMORY,
......
// { dg-options "-Winline -O2" }
// { dg-options "-Winline -O2 -fno-unit-at-a-time" }
static inline int foo(int x); // { dg-warning "" }
......
......@@ -106,6 +106,7 @@ typedef struct inline_data
htab_t tree_pruner;
/* Decl of function we are inlining into. */
tree decl;
tree current_decl;
} inline_data;
/* Prototypes. */
......@@ -1145,6 +1146,10 @@ expand_call_inline (tree *tp, int *walk_subtrees, void *data)
if (!fn)
return NULL_TREE;
/* Turn forward declarations into real ones. */
if (flag_unit_at_a_time)
fn = cgraph_node (fn)->decl;
/* If fn is a declaration of a function in a nested scope that was
globally declared inline, we don't set its DECL_INITIAL.
However, we can't blindly follow DECL_ABSTRACT_ORIGIN because the
......@@ -1159,9 +1164,9 @@ expand_call_inline (tree *tp, int *walk_subtrees, void *data)
/* Don't try to inline functions that are not well-suited to
inlining. */
if ((!flag_unit_at_a_time || !DECL_SAVED_TREE (fn)
|| !cgraph_global_info (fn)->inline_once)
&& !inlinable_function_p (fn, id, 0))
if (!DECL_SAVED_TREE (fn)
|| (flag_unit_at_a_time && !cgraph_inline_p (id->current_decl, fn))
|| (!flag_unit_at_a_time && !inlinable_function_p (fn, id, 0)))
{
if (warn_inline && DECL_INLINE (fn) && !DID_INLINE_FUNC (fn)
&& !DECL_IN_SYSTEM_HEADER (fn))
......@@ -1403,7 +1408,12 @@ expand_call_inline (tree *tp, int *walk_subtrees, void *data)
}
/* Recurse into the body of the just inlined function. */
{
tree old_decl = id->current_decl;
id->current_decl = fn;
expand_calls_inline (inlined_body, id);
id->current_decl = old_decl;
}
VARRAY_POP (id->fns);
/* If we've returned to the top level, clear out the record of how
......@@ -1446,6 +1456,7 @@ optimize_inline_calls (tree fn)
memset (&id, 0, sizeof (id));
id.decl = fn;
id.current_decl = fn;
/* Don't allow recursion into FN. */
VARRAY_TREE_INIT (id.fns, 32, "fns");
VARRAY_PUSH_TREE (id.fns, fn);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment