Commit ff28a94d by Jan Hubicka Committed by Jan Hubicka

invoke.texi (large-stack-frame, [...]): New params.

	* invoke.texi (large-stack-frame, large-stack-frame-growth): New params.
	* cgraph.c (dump_cgraph_node): Dump stack usage.
	* cgraph.h (cgraph_local_info): Add estimated_self_stack_size.
	(cgraph_global_info): Add estimated_stack_size and stack_frame_offset.
	* cgraphunit.c (cgraph_analyze_function): Analyze stack sizes.
	* ipa-inline.c (cgraph_clone_inlined_nodes): Propagate stack usage.
	(cgraph_check_inline_limits): Limit stack growth.
	* cfgexpand.c: Include tree-inline.h.
	(account_stack_vars): New function.
	(expand_one_var): New param to just account the stack; return estimated
	size.
	(expand_used_vars_for_block): Update call of expand_one_var.
	(account_used_vars_for_block): New function.
	(estimated_stack_frame_size): Likewise.
	(init_vars_expansion, fini_vars_expansion): Break out from..
	(expand_used_vars): ... here.
	* tree-inline.h (estimated_stack_frame_size): Declare.
	* params.def (PARAM_LARGE_STACK_FRAME, PARAM_STACK_FRAME_GROWTH): New.

From-SVN: r119281
parent 00df9589
2006-11-28 Jan Hubicka <jh@suse.cz>
* invoke.texi (large-stack-frame, large-stack-frame-growth): New params.
* cgraph.c (dump_cgraph_node): Dump stack usage.
* cgraph.h (cgraph_local_info): Add estimated_self_stack_size.
(cgraph_global_info): Add estimated_stack_size and stack_frame_offset.
* cgraphunit.c (cgraph_analyze_function): Analyze stack sizes.
* ipa-inline.c (cgraph_clone_inlined_nodes): Propagate stack usage.
(cgraph_check_inline_limits): Limit stack growth.
* cfgexpand.c: Include tree-inline.h.
(account_stack_vars): New function.
(expand_one_var): New param to just account the stack; return estimated
size.
(expand_used_vars_for_block): Update call of expand_one_var.
(account_used_vars_for_block): New function.
(estimated_stack_frame_size): Likewise.
(init_vars_expansion, fini_vars_expansion): Break out from..
(expand_used_vars): ... here.
* tree-inline.h (estimated_stack_frame_size): Declare.
* params.def (PARAM_LARGE_STACK_FRAME, PARAM_STACK_FRAME_GROWTH): New.
2006-11-28 Richard Guenther <rguenther@suse.de>
* ggc-page.c (ggc_print_statistics): Use %ul and a cast to
......
......@@ -39,6 +39,7 @@ Boston, MA 02110-1301, USA. */
#include "toplev.h"
#include "debug.h"
#include "params.h"
#include "tree-inline.h"
/* Verify that there is exactly single jump instruction since last and attach
REG_BR_PROB note specifying probability.
......@@ -583,6 +584,28 @@ expand_stack_vars (bool (*pred) (tree))
}
}
/* Take into account all sizes of partitions and reset DECL_RTLs. */
static HOST_WIDE_INT
account_stack_vars (void)
{
size_t si, j, i, n = stack_vars_num;
HOST_WIDE_INT size = 0;
for (si = 0; si < n; ++si)
{
i = stack_vars_sorted[si];
/* Skip variables that aren't partition representatives, for now. */
if (stack_vars[i].representative != i)
continue;
size += stack_vars[i].size;
for (j = i; j != EOC; j = stack_vars[j].next)
SET_DECL_RTL (stack_vars[j].decl, NULL);
}
return size;
}
/* A subroutine of expand_one_var. Called to immediately assign rtl
to a variable to be allocated in the stack frame. */
......@@ -721,31 +744,53 @@ defer_stack_allocation (tree var, bool toplevel)
/* A subroutine of expand_used_vars. Expand one variable according to
its flavor. Variables to be placed on the stack are not actually
expanded yet, merely recorded. */
expanded yet, merely recorded.
When REALLY_EXPAND is false, only add stack values to be allocated.
Return stack usage this variable is supposed to take.
*/
static void
expand_one_var (tree var, bool toplevel)
static HOST_WIDE_INT
expand_one_var (tree var, bool toplevel, bool really_expand)
{
if (TREE_CODE (var) != VAR_DECL)
{
if (really_expand)
lang_hooks.expand_decl (var);
}
else if (DECL_EXTERNAL (var))
;
else if (DECL_HAS_VALUE_EXPR_P (var))
;
else if (TREE_STATIC (var))
{
if (really_expand)
expand_one_static_var (var);
}
else if (DECL_RTL_SET_P (var))
;
else if (TREE_TYPE (var) == error_mark_node)
{
if (really_expand)
expand_one_error_var (var);
}
else if (DECL_HARD_REGISTER (var))
{
if (really_expand)
expand_one_hard_reg_var (var);
}
else if (use_register_for_decl (var))
{
if (really_expand)
expand_one_register_var (var);
}
else if (defer_stack_allocation (var, toplevel))
add_stack_var (var);
else
{
expand_one_stack_var (var);
return tree_low_cst (DECL_SIZE_UNIT (var), 1);
}
return 0;
}
/* A subroutine of expand_used_vars. Walk down through the BLOCK tree
......@@ -770,7 +815,7 @@ expand_used_vars_for_block (tree block, bool toplevel)
care of this. */
|| (!flag_unit_at_a_time && TREE_STATIC (t)
&& DECL_PRESERVE_P (t)))
expand_one_var (t, toplevel);
expand_one_var (t, toplevel, true);
this_sv_num = stack_vars_num;
......@@ -947,30 +992,137 @@ create_stack_guard (void)
cfun->stack_protect_guard = guard;
}
/* Expand all variables used in the function. */
/* A subroutine of expand_used_vars. Walk down through the BLOCK tree
expanding variables. Those variables that can be put into registers
are allocated pseudos; those that can't are put on the stack.
static void
expand_used_vars (void)
TOPLEVEL is true if this is the outermost BLOCK. */
static HOST_WIDE_INT
account_used_vars_for_block (tree block, bool toplevel)
{
tree t, outer_block = DECL_INITIAL (current_function_decl);
size_t i, j, old_sv_num, this_sv_num, new_sv_num;
tree t;
HOST_WIDE_INT size = 0;
/* Compute the phase of the stack frame for this function. */
old_sv_num = toplevel ? 0 : stack_vars_num;
/* Expand all variables at this level. */
for (t = BLOCK_VARS (block); t ; t = TREE_CHAIN (t))
if (TREE_USED (t))
size += expand_one_var (t, toplevel, false);
this_sv_num = stack_vars_num;
/* Expand all variables at containing levels. */
for (t = BLOCK_SUBBLOCKS (block); t ; t = BLOCK_CHAIN (t))
size += account_used_vars_for_block (t, false);
/* Since we do not track exact variable lifetimes (which is not even
possible for variables whose address escapes), we mirror the block
tree in the interference graph. Here we cause all variables at this
level, and all sublevels, to conflict. Do make certain that a
variable conflicts with itself. */
if (old_sv_num < this_sv_num)
{
int align = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
int off = STARTING_FRAME_OFFSET % align;
frame_phase = off ? align - off : 0;
new_sv_num = stack_vars_num;
resize_stack_vars_conflict (new_sv_num);
for (i = old_sv_num; i < new_sv_num; ++i)
for (j = i < this_sv_num ? i+1 : this_sv_num; j-- > old_sv_num ;)
add_stack_var_conflict (i, j);
}
return size;
}
/* Prepare for expanding variables. */
static void
init_vars_expansion (void)
{
tree t;
/* Set TREE_USED on all variables in the unexpanded_var_list. */
for (t = cfun->unexpanded_var_list; t; t = TREE_CHAIN (t))
TREE_USED (TREE_VALUE (t)) = 1;
/* Clear TREE_USED on all variables associated with a block scope. */
clear_tree_used (outer_block);
clear_tree_used (DECL_INITIAL (current_function_decl));
/* Initialize local stack smashing state. */
has_protected_decls = false;
has_short_buffer = false;
}
/* Free up stack variable graph data. */
static void
fini_vars_expansion (void)
{
XDELETEVEC (stack_vars);
XDELETEVEC (stack_vars_sorted);
XDELETEVEC (stack_vars_conflict);
stack_vars = NULL;
stack_vars_alloc = stack_vars_num = 0;
stack_vars_conflict = NULL;
stack_vars_conflict_alloc = 0;
}
HOST_WIDE_INT
estimated_stack_frame_size (void)
{
HOST_WIDE_INT size = 0;
tree t, outer_block = DECL_INITIAL (current_function_decl);
init_vars_expansion ();
/* At this point all variables on the unexpanded_var_list with TREE_USED
set are not associated with any block scope. Lay them out. */
for (t = cfun->unexpanded_var_list; t; t = TREE_CHAIN (t))
{
tree var = TREE_VALUE (t);
if (TREE_USED (var))
size += expand_one_var (var, true, false);
TREE_USED (var) = 1;
}
size += account_used_vars_for_block (outer_block, true);
if (stack_vars_num > 0)
{
/* Due to the way alias sets work, no variables with non-conflicting
alias sets may be assigned the same address. Add conflicts to
reflect this. */
add_alias_set_conflicts ();
/* If stack protection is enabled, we don't share space between
vulnerable data and non-vulnerable data. */
if (flag_stack_protect)
add_stack_protection_conflicts ();
/* Now that we have collected all stack variables, and have computed a
minimal interference graph, attempt to save some stack space. */
partition_stack_vars ();
if (dump_file)
dump_stack_var_partition ();
size += account_stack_vars ();
fini_vars_expansion ();
}
return size;
}
/* Expand all variables used in the function. */
static void
expand_used_vars (void)
{
tree t, outer_block = DECL_INITIAL (current_function_decl);
/* Compute the phase of the stack frame for this function. */
{
int align = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
int off = STARTING_FRAME_OFFSET % align;
frame_phase = off ? align - off : 0;
}
init_vars_expansion ();
/* At this point all variables on the unexpanded_var_list with TREE_USED
set are not associated with any block scope. Lay them out. */
......@@ -1005,7 +1157,7 @@ expand_used_vars (void)
TREE_USED (var) = 1;
if (expand_now)
expand_one_var (var, true);
expand_one_var (var, true, true);
}
cfun->unexpanded_var_list = NULL_TREE;
......@@ -1059,14 +1211,7 @@ expand_used_vars (void)
expand_stack_vars (NULL);
/* Free up stack variable graph data. */
XDELETEVEC (stack_vars);
XDELETEVEC (stack_vars_sorted);
XDELETEVEC (stack_vars_conflict);
stack_vars = NULL;
stack_vars_alloc = stack_vars_num = 0;
stack_vars_conflict = NULL;
stack_vars_conflict_alloc = 0;
fini_vars_expansion ();
}
/* If the target requires that FRAME_OFFSET be aligned, do it. */
......
......@@ -716,6 +716,10 @@ dump_cgraph_node (FILE *f, struct cgraph_node *node)
fprintf (f, " %i insns", node->local.self_insns);
if (node->global.insns && node->global.insns != node->local.self_insns)
fprintf (f, " (%i after inlining)", node->global.insns);
if (node->local.estimated_self_stack_size)
fprintf (f, " %i bytes stack usage", (int)node->local.estimated_self_stack_size);
if (node->global.estimated_stack_size != node->local.estimated_self_stack_size)
fprintf (f, " %i bytes after inlining", (int)node->global.estimated_stack_size);
if (node->origin)
fprintf (f, " nested in: %s", cgraph_node_name (node->origin));
if (node->needed)
......
......@@ -51,6 +51,9 @@ enum availability
struct cgraph_local_info GTY(())
{
/* Estiimated stack frame consumption by the function. */
HOST_WIDE_INT estimated_self_stack_size;
/* Size of the function before inlining. */
int self_insns;
......@@ -88,6 +91,11 @@ struct cgraph_local_info GTY(())
struct cgraph_global_info GTY(())
{
/* Estimated stack frame consumption by the function. */
HOST_WIDE_INT estimated_stack_size;
/* Expected offset of the stack frame of inlined function. */
HOST_WIDE_INT stack_frame_offset;
/* For inline clones this points to the function they will be inlined into. */
struct cgraph_node *inlined_to;
......
......@@ -950,6 +950,9 @@ cgraph_analyze_function (struct cgraph_node *node)
/* First kill forward declaration so reverse inlining works properly. */
cgraph_create_edges (node, decl);
node->local.estimated_self_stack_size = estimated_stack_frame_size ();
node->global.estimated_stack_size = node->local.estimated_self_stack_size;
node->global.stack_frame_offset = 0;
node->local.inlinable = tree_inlinable_function_p (decl);
if (!flag_unit_at_a_time)
node->local.self_insns = estimate_num_insns (decl);
......
......@@ -6033,6 +6033,15 @@ This parameter is ignored when @option{-funit-at-a-time} is not used.
The default value is 50 which limits unit growth to 1.5 times the original
size.
@item large-stack-frame
The limit specifying large stack frames. While inlining the algorithm is trying
to not grow past this limit too much. Default value is 256 bytes.
@item large-stack-frame-growth
Specifies maximal growth of large stack frames caused by inlining in percents.
The default value is 1000 which limits large stack frame growth to 11 times
the original size.
@item max-inline-insns-recursive
@itemx max-inline-insns-recursive-auto
Specifies maximum number of instructions out-of-line copy of self recursive inline
......
......@@ -115,6 +115,7 @@ cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to,
void
cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_original)
{
HOST_WIDE_INT peak;
if (duplicate)
{
/* We may eliminate the need for out-of-line copy to be output.
......@@ -141,6 +142,11 @@ cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_o
e->callee->global.inlined_to = e->caller->global.inlined_to;
else
e->callee->global.inlined_to = e->caller;
e->callee->global.stack_frame_offset
= e->caller->global.stack_frame_offset + e->caller->local.estimated_self_stack_size;
peak = e->callee->global.stack_frame_offset + e->callee->local.estimated_self_stack_size;
if (e->callee->global.inlined_to->global.estimated_stack_size < peak)
e->callee->global.inlined_to->global.estimated_stack_size = peak;
/* Recursively clone all bodies. */
for (e = e->callee->callees; e; e = e->next_callee)
......@@ -257,6 +263,7 @@ cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,
struct cgraph_edge *e;
int newsize;
int limit;
HOST_WIDE_INT stack_size_limit, inlined_stack;
if (one_only)
times = 1;
......@@ -288,6 +295,21 @@ cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,
*reason = N_("--param large-function-growth limit reached");
return false;
}
stack_size_limit = to->local.estimated_self_stack_size;
stack_size_limit += stack_size_limit * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100;
inlined_stack = (to->global.stack_frame_offset
+ to->local.estimated_self_stack_size
+ what->global.estimated_stack_size);
if (inlined_stack > stack_size_limit
&& inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
{
if (reason)
*reason = N_("--param large-stack-frame-growth limit reached");
return false;
}
return true;
}
......
......@@ -198,6 +198,14 @@ DEFPARAM(PARAM_INLINE_CALL_COST,
"inline-call-cost",
"expense of call operation relative to ordinary arithmetic operations",
16, 0, 0)
DEFPARAM(PARAM_LARGE_STACK_FRAME,
"large-stack-frame",
"The size of stack frame to be considered large",
256, 0, 0)
DEFPARAM(PARAM_STACK_FRAME_GROWTH,
"large-stack-frame-growth",
"Maximal stack frame growth due to inlining (in percent)",
1000, 0, 0)
/* The GCSE optimization will be disabled if it would require
significantly more memory than this value. */
......
2006-11-28 Jan Hubicka <jh@suse.cz>
* gcc.dg/winline-1.c: New test.
2006-11-28 Nathan Sidwell <nathan@codesourcery.com>
* gcc.dg/pack-test-1.c: Use __alignof__ to determine natural
/* { dg-do compile } */
/* { dg-options "-O2 -Winline --param large-stack-frame=10 --param large-stack-frame-growth=2" } */
int a,b;
void test(char *);
static inline
int aa (void)
{
char t[10];
test(t);
}
static inline
int bb (void)
{ /* { dg-warning "large-stack-frame" "" } */
char t[100];
test(t);
}
t()
{
if (a)
aa();
if (b)
bb(); /* { dg-warning "called from here" "" } */
}
......@@ -110,6 +110,8 @@ void tree_function_versioning (tree, tree, varray_type, bool);
extern tree remap_decl (tree decl, copy_body_data *id);
extern tree remap_type (tree type, copy_body_data *id);
extern HOST_WIDE_INT estimated_stack_frame_size (void);
/* 0 if we should not perform inlining.
1 if we should expand functions calls inline at the tree level.
2 if we should consider *all* functions to be inline
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment