Commit 7072df0a by Dominik Vogt Committed by Andreas Krebbel

Allocate constant size dynamic stack space in the prologue

The attached patch fixes a warning during Linux kernel compilation
on S/390 due to -mwarn-dynamicstack and runtime alignment of stack
variables with constant size causing cfun->calls_alloca to be set
(even if alloca is not used at all).  The patched code places
constant size runtime aligned variables in the "virtual stack
vars" area instead of creating a "virtual stack dynamic" area.

This behaviour is activated by defining

  #define ALLOCATE_DYNAMIC_STACK_SPACE_IN_PROLOGUE 1

in the backend; otherwise the old logic is used.

The kernel uses runtime alignment for the page structure (aligned
to 16 bytes), and apart from triggereing the alloca warning
(-mwarn-dynamicstack), the current Gcc also generates inefficient
code like

  aghi %r15,-160  # prologue: create stack frame
    lgr %r11,%r15   # prologue: generate frame pointer
      aghi %r15,-32   # space for dynamic stack

which could be simplified to

  aghi %r15,-192

(if later optimization passes are able to get rid of the frame
pointer).  Is there a specific reason why the patched behaviour
shouldn't be used for all platforms?

--

As the placement of runtime aligned stack variables with constant
size is done completely in the middleend, I don't see a way to fix
this in the backend.

gcc/ChangeLog:

2016-07-18  Dominik Vogt  <vogt@linux.vnet.ibm.com>

	* cfgexpand.c (expand_stack_vars): Implement synamic stack space
	allocation in the prologue.
	* explow.c (get_dynamic_stack_base): New function to return an address
	expression for the dynamic stack base.
	(get_dynamic_stack_size): New function to do the required dynamic stack
	space size calculations.
	(allocate_dynamic_stack_space): Use new functions.
	(align_dynamic_address): Move some code from
	allocate_dynamic_stack_space to new function.
	* explow.h (get_dynamic_stack_base, get_dynamic_stack_size): Export.

gcc/testsuite/ChangeLog:

2016-07-18  Dominik Vogt  <vogt@linux.vnet.ibm.com>

	* gcc.target/s390/warn-dynamicstack-1.c: New test.
	* gcc.dg/stack-usage-2.c (foo3): Adapt expected warning.
	stack-layout-dynamic-1.c: New test.

From-SVN: r238432
parent 54158a1a
2016-07-18 Dominik Vogt <vogt@linux.vnet.ibm.com>
* cfgexpand.c (expand_stack_vars): Implement synamic stack space
allocation in the prologue.
* explow.c (get_dynamic_stack_base): New function to return an address
expression for the dynamic stack base.
(get_dynamic_stack_size): New function to do the required dynamic stack
space size calculations.
(allocate_dynamic_stack_space): Use new functions.
(align_dynamic_address): Move some code from
allocate_dynamic_stack_space to new function.
* explow.h (get_dynamic_stack_base, get_dynamic_stack_size): Export.
2016-07-18 Andreas Krebbel <krebbel@linux.vnet.ibm.com> 2016-07-18 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* config/s390/s390.c (s390_encode_section_info): Always set * config/s390/s390.c (s390_encode_section_info): Always set
......
...@@ -1053,6 +1053,7 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data) ...@@ -1053,6 +1053,7 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
HOST_WIDE_INT large_size = 0, large_alloc = 0; HOST_WIDE_INT large_size = 0, large_alloc = 0;
rtx large_base = NULL; rtx large_base = NULL;
unsigned large_align = 0; unsigned large_align = 0;
bool large_allocation_done = false;
tree decl; tree decl;
/* Determine if there are any variables requiring "large" alignment. /* Determine if there are any variables requiring "large" alignment.
...@@ -1096,11 +1097,6 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data) ...@@ -1096,11 +1097,6 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
large_size &= -(HOST_WIDE_INT)alignb; large_size &= -(HOST_WIDE_INT)alignb;
large_size += stack_vars[i].size; large_size += stack_vars[i].size;
} }
/* If there were any, allocate space. */
if (large_size > 0)
large_base = allocate_dynamic_stack_space (GEN_INT (large_size), 0,
large_align, true);
} }
for (si = 0; si < n; ++si) for (si = 0; si < n; ++si)
...@@ -1186,6 +1182,22 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data) ...@@ -1186,6 +1182,22 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
/* Large alignment is only processed in the last pass. */ /* Large alignment is only processed in the last pass. */
if (pred) if (pred)
continue; continue;
/* If there were any variables requiring "large" alignment, allocate
space. */
if (large_size > 0 && ! large_allocation_done)
{
HOST_WIDE_INT loffset;
rtx large_allocsize;
large_allocsize = GEN_INT (large_size);
get_dynamic_stack_size (&large_allocsize, 0, large_align, NULL);
loffset = alloc_stack_frame_space
(INTVAL (large_allocsize),
PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
large_base = get_dynamic_stack_base (loffset, large_align);
large_allocation_done = true;
}
gcc_assert (large_base != NULL); gcc_assert (large_base != NULL);
large_alloc += alignb - 1; large_alloc += alignb - 1;
......
...@@ -1146,82 +1146,55 @@ record_new_stack_level (void) ...@@ -1146,82 +1146,55 @@ record_new_stack_level (void)
update_sjlj_context (); update_sjlj_context ();
} }
/* Return an rtx representing the address of an area of memory dynamically /* Return an rtx doing runtime alignment to REQUIRED_ALIGN on TARGET. */
pushed on the stack. static rtx
align_dynamic_address (rtx target, unsigned required_align)
{
/* CEIL_DIV_EXPR needs to worry about the addition overflowing,
but we know it can't. So add ourselves and then do
TRUNC_DIV_EXPR. */
target = expand_binop (Pmode, add_optab, target,
gen_int_mode (required_align / BITS_PER_UNIT - 1,
Pmode),
NULL_RTX, 1, OPTAB_LIB_WIDEN);
target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
gen_int_mode (required_align / BITS_PER_UNIT,
Pmode),
NULL_RTX, 1);
target = expand_mult (Pmode, target,
gen_int_mode (required_align / BITS_PER_UNIT,
Pmode),
NULL_RTX, 1);
Any required stack pointer alignment is preserved. return target;
}
SIZE is an rtx representing the size of the area. /* Return an rtx through *PSIZE, representing the size of an area of memory to
be dynamically pushed on the stack.
*PSIZE is an rtx representing the size of the area.
SIZE_ALIGN is the alignment (in bits) that we know SIZE has. This SIZE_ALIGN is the alignment (in bits) that we know SIZE has. This
parameter may be zero. If so, a proper value will be extracted parameter may be zero. If so, a proper value will be extracted
from SIZE if it is constant, otherwise BITS_PER_UNIT will be assumed. from SIZE if it is constant, otherwise BITS_PER_UNIT will be assumed.
REQUIRED_ALIGN is the alignment (in bits) required for the region REQUIRED_ALIGN is the alignment (in bits) required for the region
of memory. of memory.
If CANNOT_ACCUMULATE is set to TRUE, the caller guarantees that the If PSTACK_USAGE_SIZE is not NULL it points to a value that is increased for
stack space allocated by the generated code cannot be added with itself the additional size returned. */
in the course of the execution of the function. It is always safe to void
pass FALSE here and the following criterion is sufficient in order to get_dynamic_stack_size (rtx *psize, unsigned size_align,
pass TRUE: every path in the CFG that starts at the allocation point and unsigned required_align,
loops to it executes the associated deallocation code. */ HOST_WIDE_INT *pstack_usage_size)
rtx
allocate_dynamic_stack_space (rtx size, unsigned size_align,
unsigned required_align, bool cannot_accumulate)
{ {
HOST_WIDE_INT stack_usage_size = -1; unsigned extra = 0;
rtx_code_label *final_label; rtx size = *psize;
rtx final_target, target;
unsigned extra;
/* If we're asking for zero bytes, it doesn't matter what we point
to since we can't dereference it. But return a reasonable
address anyway. */
if (size == const0_rtx)
return virtual_stack_dynamic_rtx;
/* Otherwise, show we're calling alloca or equivalent. */
cfun->calls_alloca = 1;
/* If stack usage info is requested, look into the size we are passed.
We need to do so this early to avoid the obfuscation that may be
introduced later by the various alignment operations. */
if (flag_stack_usage_info)
{
if (CONST_INT_P (size))
stack_usage_size = INTVAL (size);
else if (REG_P (size))
{
/* Look into the last emitted insn and see if we can deduce
something for the register. */
rtx_insn *insn;
rtx set, note;
insn = get_last_insn ();
if ((set = single_set (insn)) && rtx_equal_p (SET_DEST (set), size))
{
if (CONST_INT_P (SET_SRC (set)))
stack_usage_size = INTVAL (SET_SRC (set));
else if ((note = find_reg_equal_equiv_note (insn))
&& CONST_INT_P (XEXP (note, 0)))
stack_usage_size = INTVAL (XEXP (note, 0));
}
}
/* If the size is not constant, we can't say anything. */
if (stack_usage_size == -1)
{
current_function_has_unbounded_dynamic_stack_size = 1;
stack_usage_size = 0;
}
}
/* Ensure the size is in the proper mode. */ /* Ensure the size is in the proper mode. */
if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode) if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode)
size = convert_to_mode (Pmode, size, 1); size = convert_to_mode (Pmode, size, 1);
/* Adjust SIZE_ALIGN, if needed. */
if (CONST_INT_P (size)) if (CONST_INT_P (size))
{ {
unsigned HOST_WIDE_INT lsb; unsigned HOST_WIDE_INT lsb;
...@@ -1255,8 +1228,8 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, ...@@ -1255,8 +1228,8 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
size = plus_constant (Pmode, size, extra); size = plus_constant (Pmode, size, extra);
size = force_operand (size, NULL_RTX); size = force_operand (size, NULL_RTX);
if (flag_stack_usage_info) if (flag_stack_usage_info && pstack_usage_size)
stack_usage_size += extra; *pstack_usage_size += extra;
if (extra && size_align > BITS_PER_UNIT) if (extra && size_align > BITS_PER_UNIT)
size_align = BITS_PER_UNIT; size_align = BITS_PER_UNIT;
...@@ -1278,13 +1251,89 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, ...@@ -1278,13 +1251,89 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
{ {
size = round_push (size); size = round_push (size);
if (flag_stack_usage_info) if (flag_stack_usage_info && pstack_usage_size)
{ {
int align = crtl->preferred_stack_boundary / BITS_PER_UNIT; int align = crtl->preferred_stack_boundary / BITS_PER_UNIT;
stack_usage_size = (stack_usage_size + align - 1) / align * align; *pstack_usage_size =
(*pstack_usage_size + align - 1) / align * align;
} }
} }
*psize = size;
}
/* Return an rtx representing the address of an area of memory dynamically
pushed on the stack.
Any required stack pointer alignment is preserved.
SIZE is an rtx representing the size of the area.
SIZE_ALIGN is the alignment (in bits) that we know SIZE has. This
parameter may be zero. If so, a proper value will be extracted
from SIZE if it is constant, otherwise BITS_PER_UNIT will be assumed.
REQUIRED_ALIGN is the alignment (in bits) required for the region
of memory.
If CANNOT_ACCUMULATE is set to TRUE, the caller guarantees that the
stack space allocated by the generated code cannot be added with itself
in the course of the execution of the function. It is always safe to
pass FALSE here and the following criterion is sufficient in order to
pass TRUE: every path in the CFG that starts at the allocation point and
loops to it executes the associated deallocation code. */
rtx
allocate_dynamic_stack_space (rtx size, unsigned size_align,
unsigned required_align, bool cannot_accumulate)
{
HOST_WIDE_INT stack_usage_size = -1;
rtx_code_label *final_label;
rtx final_target, target;
/* If we're asking for zero bytes, it doesn't matter what we point
to since we can't dereference it. But return a reasonable
address anyway. */
if (size == const0_rtx)
return virtual_stack_dynamic_rtx;
/* Otherwise, show we're calling alloca or equivalent. */
cfun->calls_alloca = 1;
/* If stack usage info is requested, look into the size we are passed.
We need to do so this early to avoid the obfuscation that may be
introduced later by the various alignment operations. */
if (flag_stack_usage_info)
{
if (CONST_INT_P (size))
stack_usage_size = INTVAL (size);
else if (REG_P (size))
{
/* Look into the last emitted insn and see if we can deduce
something for the register. */
rtx_insn *insn;
rtx set, note;
insn = get_last_insn ();
if ((set = single_set (insn)) && rtx_equal_p (SET_DEST (set), size))
{
if (CONST_INT_P (SET_SRC (set)))
stack_usage_size = INTVAL (SET_SRC (set));
else if ((note = find_reg_equal_equiv_note (insn))
&& CONST_INT_P (XEXP (note, 0)))
stack_usage_size = INTVAL (XEXP (note, 0));
}
}
/* If the size is not constant, we can't say anything. */
if (stack_usage_size == -1)
{
current_function_has_unbounded_dynamic_stack_size = 1;
stack_usage_size = 0;
}
}
get_dynamic_stack_size (&size, size_align, required_align, &stack_usage_size);
target = gen_reg_rtx (Pmode); target = gen_reg_rtx (Pmode);
/* The size is supposed to be fully adjusted at this point so record it /* The size is supposed to be fully adjusted at this point so record it
...@@ -1447,19 +1496,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, ...@@ -1447,19 +1496,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
target = final_target; target = final_target;
} }
/* CEIL_DIV_EXPR needs to worry about the addition overflowing, target = align_dynamic_address (target, required_align);
but we know it can't. So add ourselves and then do
TRUNC_DIV_EXPR. */
target = expand_binop (Pmode, add_optab, target,
gen_int_mode (required_align / BITS_PER_UNIT - 1,
Pmode),
NULL_RTX, 1, OPTAB_LIB_WIDEN);
target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target,
gen_int_mode (required_align / BITS_PER_UNIT, Pmode),
NULL_RTX, 1);
target = expand_mult (Pmode, target,
gen_int_mode (required_align / BITS_PER_UNIT, Pmode),
NULL_RTX, 1);
/* Now that we've committed to a return value, mark its alignment. */ /* Now that we've committed to a return value, mark its alignment. */
mark_reg_pointer (target, required_align); mark_reg_pointer (target, required_align);
...@@ -1469,6 +1506,38 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, ...@@ -1469,6 +1506,38 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align,
return target; return target;
} }
/* Return an rtx representing the address of an area of memory already
statically pushed onto the stack in the virtual stack vars area. (It is
assumed that the area is allocated in the function prologue.)
Any required stack pointer alignment is preserved.
OFFSET is the offset of the area into the virtual stack vars area.
REQUIRED_ALIGN is the alignment (in bits) required for the region
of memory. */
rtx
get_dynamic_stack_base (HOST_WIDE_INT offset, unsigned required_align)
{
rtx target;
if (crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
target = gen_reg_rtx (Pmode);
emit_move_insn (target, virtual_stack_vars_rtx);
target = expand_binop (Pmode, add_optab, target,
gen_int_mode (offset, Pmode),
NULL_RTX, 1, OPTAB_LIB_WIDEN);
target = align_dynamic_address (target, required_align);
/* Now that we've committed to a return value, mark its alignment. */
mark_reg_pointer (target, required_align);
return target;
}
/* A front end may want to override GCC's stack checking by providing a /* A front end may want to override GCC's stack checking by providing a
run-time routine to call to check the stack, so provide a mechanism for run-time routine to call to check the stack, so provide a mechanism for
......
...@@ -87,6 +87,14 @@ extern void record_new_stack_level (void); ...@@ -87,6 +87,14 @@ extern void record_new_stack_level (void);
/* Allocate some space on the stack dynamically and return its address. */ /* Allocate some space on the stack dynamically and return its address. */
extern rtx allocate_dynamic_stack_space (rtx, unsigned, unsigned, bool); extern rtx allocate_dynamic_stack_space (rtx, unsigned, unsigned, bool);
/* Calculate the necessary size of a constant dynamic stack allocation from the
size of the variable area. */
extern void get_dynamic_stack_size (rtx *, unsigned, unsigned, HOST_WIDE_INT *);
/* Returns the address of the dynamic stack space without allocating it. */
extern rtx get_dynamic_stack_base (HOST_WIDE_INT offset,
unsigned required_align);
/* Emit one stack probe at ADDRESS, an address within the stack. */ /* Emit one stack probe at ADDRESS, an address within the stack. */
extern void emit_stack_probe (rtx); extern void emit_stack_probe (rtx);
......
2016-07-18 Dominik Vogt <vogt@linux.vnet.ibm.com>
* gcc.target/s390/warn-dynamicstack-1.c: New test.
* gcc.dg/stack-usage-2.c (foo3): Adapt expected warning.
stack-layout-dynamic-1.c: New test.
2016-07-18 Andreas Krebbel <krebbel@linux.vnet.ibm.com> 2016-07-18 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* gcc.target/s390/nolrl-1.c: New test. * gcc.target/s390/nolrl-1.c: New test.
......
/* Verify that run time aligned local variables are aloocated in the prologue
in one pass together with normal local variables. */
/* { dg-do compile } */
/* { dg-options "-O0 -fomit-frame-pointer" } */
extern void bar (void *, void *, void *);
void foo (void)
{
int i;
__attribute__ ((aligned(65536))) char runtime_aligned_1[512];
__attribute__ ((aligned(32768))) char runtime_aligned_2[1024];
bar (&i, &runtime_aligned_1, &runtime_aligned_2);
}
/* { dg-final { scan-assembler-not "cfi_def_cfa_register" } } */
...@@ -16,7 +16,9 @@ int foo2 (void) /* { dg-warning "stack usage is \[0-9\]* bytes" } */ ...@@ -16,7 +16,9 @@ int foo2 (void) /* { dg-warning "stack usage is \[0-9\]* bytes" } */
return 0; return 0;
} }
int foo3 (void) /* { dg-warning "stack usage might be \[0-9\]* bytes" } */ /* The actual warning depends on whether stack space is allocated dynamically
or statically. */
int foo3 (void) /* { dg-warning "stack usage (might be)|(is) \[0-9\]* bytes" } */
{ {
char arr[1024] __attribute__((aligned (512))); char arr[1024] __attribute__((aligned (512)));
arr[0] = 1; arr[0] = 1;
......
/* Check that the stack pointer is decreased only once in a funtion with
runtime aligned stack variables and -mwarn-dynamicstack does not generate a
warning. */
/* { dg-do compile { target { s390*-*-* } } } */
/* { dg-options "-O2 -mwarn-dynamicstack" } */
extern int bar (char *pl);
int foo (long size)
{
char __attribute__ ((aligned(16))) l = size;
return bar (&l);
}
/* { dg-final { scan-assembler-times "%r15,-" 1 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment