Commit 8c6e3b23 by Tamar Christina Committed by Tamar Christina

Ensure that outgoing argument size is at least 8 bytes when alloca and stack-clash.

This patch adds a requirement that the number of outgoing arguments for a
function is at least 8 bytes when using stack-clash protection and alloca.

By using this condition we can avoid a check in the alloca code and so have
smaller and simpler code there.

A simplified version of the AArch64 stack frames is:

   +-----------------------+                                              
   |                       |                                                 
   |                       |                                              
   |                       |                                              
   +-----------------------+                                              
   |LR                     |                                              
   +-----------------------+                                              
   |FP                     |                                              
   +-----------------------+                                              
   |dynamic allocations    | ----  expanding area which will push the outgoing
   +-----------------------+       args down during each allocation.
   |padding                |
   +-----------------------+
   |outgoing stack args    | ---- safety buffer of 8 bytes (aligned)
   +-----------------------+

By always defining an outgoing argument, alloca(0) effectively is safe to probe
at $sp due to the reserved buffer being there.  It will never corrupt the stack.

This is also safe for alloca(x) where x is 0 or x % page_size == 0.  In the
former it is the same case as alloca(0) while the latter is safe because any
allocation pushes the outgoing stack args down:

   |FP                     |                                              
   +-----------------------+                                              
   |                       |
   |dynamic allocations    | ----  alloca (x)
   |                       |
   +-----------------------+
   |padding                |
   +-----------------------+
   |outgoing stack args    | ---- safety buffer of 8 bytes (aligned)
   +-----------------------+

Which means when you probe for the residual, if it's 0 you'll again just probe
in the outgoing stack args range, which we know is non-zero (at least 8 bytes).

gcc/

	PR target/86486
	* config/aarch64/aarch64.h (STACK_CLASH_MIN_BYTES_OUTGOING_ARGS,
	STACK_DYNAMIC_OFFSET): New.
	* config/aarch64/aarch64.c (aarch64_layout_frame):
	Update outgoing args size.
	(aarch64_stack_clash_protection_alloca_probe_range,
	TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE): New.

gcc/testsuite/

	PR target/86486
	* gcc.target/aarch64/stack-check-alloca-1.c: New.
	* gcc.target/aarch64/stack-check-alloca-10.c: New.
	* gcc.target/aarch64/stack-check-alloca-2.c: New.
	* gcc.target/aarch64/stack-check-alloca-3.c: New.
	* gcc.target/aarch64/stack-check-alloca-4.c: New.
	* gcc.target/aarch64/stack-check-alloca-5.c: New.
	* gcc.target/aarch64/stack-check-alloca-6.c: New.
	* gcc.target/aarch64/stack-check-alloca-7.c: New.
	* gcc.target/aarch64/stack-check-alloca-8.c: New.
	* gcc.target/aarch64/stack-check-alloca-9.c: New.
	* gcc.target/aarch64/stack-check-alloca.h: New.
	* gcc.target/aarch64/stack-check-14.c: New.
	* gcc.target/aarch64/stack-check-15.c: New.

From-SVN: r264751
parent 2c25083e
2018-10-01 Tamar Christina <tamar.christina@arm.com> 2018-10-01 Tamar Christina <tamar.christina@arm.com>
PR target/86486 PR target/86486
* config/aarch64/aarch64.h (STACK_CLASH_MIN_BYTES_OUTGOING_ARGS,
STACK_DYNAMIC_OFFSET): New.
* config/aarch64/aarch64.c (aarch64_layout_frame):
Update outgoing args size.
(aarch64_stack_clash_protection_alloca_probe_range,
TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE): New.
2018-10-01 Tamar Christina <tamar.christina@arm.com>
PR target/86486
* explow.c (anti_adjust_stack_and_probe_stack_clash): Support custom * explow.c (anti_adjust_stack_and_probe_stack_clash): Support custom
probe ranges. probe ranges.
* target.def (stack_clash_protection_alloca_probe_range): New. * target.def (stack_clash_protection_alloca_probe_range): New.
...@@ -4129,6 +4129,10 @@ aarch64_layout_frame (void) ...@@ -4129,6 +4129,10 @@ aarch64_layout_frame (void)
cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain (); cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain ();
/* Adjust the outgoing arguments size if required. Keep it in sync with what
the mid-end is doing. */
crtl->outgoing_args_size = STACK_DYNAMIC_OFFSET (cfun);
#define SLOT_NOT_REQUIRED (-2) #define SLOT_NOT_REQUIRED (-2)
#define SLOT_REQUIRED (-1) #define SLOT_REQUIRED (-1)
...@@ -4899,6 +4903,16 @@ aarch64_set_handled_components (sbitmap components) ...@@ -4899,6 +4903,16 @@ aarch64_set_handled_components (sbitmap components)
cfun->machine->reg_is_wrapped_separately[regno] = true; cfun->machine->reg_is_wrapped_separately[regno] = true;
} }
/* On AArch64 we have an ABI defined safe buffer. This constant is used to
determining the probe offset for alloca. */
static HOST_WIDE_INT
aarch64_stack_clash_protection_alloca_probe_range (void)
{
return STACK_CLASH_CALLER_GUARD;
}
/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch /* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
registers. If POLY_SIZE is not large enough to require a probe this function registers. If POLY_SIZE is not large enough to require a probe this function
will only adjust the stack. When allocating the stack space will only adjust the stack. When allocating the stack space
...@@ -18413,6 +18427,10 @@ aarch64_libgcc_floating_mode_supported_p ...@@ -18413,6 +18427,10 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_CONSTANT_ALIGNMENT #undef TARGET_CONSTANT_ALIGNMENT
#define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment #define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
#undef TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE
#define TARGET_STACK_CLASH_PROTECTION_ALLOCA_PROBE_RANGE \
aarch64_stack_clash_protection_alloca_probe_range
#undef TARGET_COMPUTE_PRESSURE_CLASSES #undef TARGET_COMPUTE_PRESSURE_CLASSES
#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes #define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
......
...@@ -88,6 +88,10 @@ ...@@ -88,6 +88,10 @@
before probing has to be done for stack clash protection. */ before probing has to be done for stack clash protection. */
#define STACK_CLASH_CALLER_GUARD 1024 #define STACK_CLASH_CALLER_GUARD 1024
/* This value represents the minimum amount of bytes we expect the function's
outgoing arguments to be when stack-clash is enabled. */
#define STACK_CLASH_MIN_BYTES_OUTGOING_ARGS 8
/* This value controls how many pages we manually unroll the loop for when /* This value controls how many pages we manually unroll the loop for when
generating stack clash probes. */ generating stack clash probes. */
#define STACK_CLASH_MAX_UNROLL_PAGES 4 #define STACK_CLASH_MAX_UNROLL_PAGES 4
...@@ -1076,4 +1080,17 @@ extern poly_uint16 aarch64_sve_vg; ...@@ -1076,4 +1080,17 @@ extern poly_uint16 aarch64_sve_vg;
#define REGMODE_NATURAL_SIZE(MODE) aarch64_regmode_natural_size (MODE) #define REGMODE_NATURAL_SIZE(MODE) aarch64_regmode_natural_size (MODE)
/* Allocate a minimum of STACK_CLASH_MIN_BYTES_OUTGOING_ARGS bytes for the
outgoing arguments if stack clash protection is enabled. This is essential
as the extra arg space allows us to skip a check in alloca. */
#undef STACK_DYNAMIC_OFFSET
#define STACK_DYNAMIC_OFFSET(FUNDECL) \
((flag_stack_clash_protection \
&& cfun->calls_alloca \
&& known_lt (crtl->outgoing_args_size, \
STACK_CLASH_MIN_BYTES_OUTGOING_ARGS)) \
? ROUND_UP (STACK_CLASH_MIN_BYTES_OUTGOING_ARGS, \
STACK_BOUNDARY / BITS_PER_UNIT) \
: (crtl->outgoing_args_size + STACK_POINTER_OFFSET))
#endif /* GCC_AARCH64_H */ #endif /* GCC_AARCH64_H */
2018-10-01 Tamar Christina <tamar.christina@arm.com> 2018-10-01 Tamar Christina <tamar.christina@arm.com>
PR target/86486 PR target/86486
* gcc.target/aarch64/stack-check-alloca-1.c: New.
* gcc.target/aarch64/stack-check-alloca-10.c: New.
* gcc.target/aarch64/stack-check-alloca-2.c: New.
* gcc.target/aarch64/stack-check-alloca-3.c: New.
* gcc.target/aarch64/stack-check-alloca-4.c: New.
* gcc.target/aarch64/stack-check-alloca-5.c: New.
* gcc.target/aarch64/stack-check-alloca-6.c: New.
* gcc.target/aarch64/stack-check-alloca-7.c: New.
* gcc.target/aarch64/stack-check-alloca-8.c: New.
* gcc.target/aarch64/stack-check-alloca-9.c: New.
* gcc.target/aarch64/stack-check-alloca.h: New.
* gcc.target/aarch64/stack-check-14.c: New.
* gcc.target/aarch64/stack-check-15.c: New.
2018-10-01 Tamar Christina <tamar.christina@arm.com>
PR target/86486
* gcc.target/aarch64/stack-check-prologue-16.c: New test * gcc.target/aarch64/stack-check-prologue-16.c: New test
* gcc.target/aarch64/stack-check-cfa-3.c: New test. * gcc.target/aarch64/stack-check-cfa-3.c: New test.
* gcc.target/aarch64/sve/struct_vect_24.c: New test. * gcc.target/aarch64/sve/struct_vect_24.c: New test.
......
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
int t1(int);
int t2(int x)
{
char *p = __builtin_alloca (4050);
x = t1 (x);
return p[x];
}
/* This test has a constant sized alloca that is smaller than the
probe interval. Only one probe is required since the value is larger
than 1024 bytes but smaller than 63k.
The form can change quite a bit so we just check for two
probes without looking at the actual address. */
/* { dg-final { scan-assembler-times "str\\txzr," 1 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
int t1(int);
int t2(int x)
{
char *p = __builtin_alloca (x);
x = t1 (x);
return p[x];
}
/* This test has a variable sized alloca. It requires 3 probes.
One in the loop, one for the residual, one for when it's < 1024 and one for
when it's not.
The form can change quite a bit so we just check for two
probes without looking at the actual address. */
/* { dg-final { scan-assembler-times "str\\txzr," 3 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE y
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 2 } } */
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp\]} 1 } } */
/* Dynamic alloca, expect loop, and 2 probes with 1kB offset and 1 at sp.
1st probe is inside the loop for the full guard-size allocations, second
probe is for the case where residual is zero and the final probe for when
residiual is > 1024 bytes. */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 127.5 * 64 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 2 } } */
/* Large alloca of an amount which isn't a multiple of a guard-size, and
residiual is more than 1kB. Loop expected with one 1Kb probe offset and
one residual probe at offset 1kB. */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 0
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-not {str\s+xzr,} } } */
/* Alloca of 0 should emit no probes, boundary condition. */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 100
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 8\]} 1 } } */
/* Alloca is less than 1kB, 1 probe expected at word offset. */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 2 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 1 } } */
/* Alloca is more than 1kB, but less than guard-size, 1 probe expected at
1kB offset. */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 63 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 1 } } */
/* Alloca is more than 1kB, but less than guard-size, 1 probe expected at
1kB offset. */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 63.5 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 1 } } */
/* Alloca is more than 1kB, but less than guard-size, 1 probe expected at 1kB
offset. */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 64 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 1 } } */
/* Alloca is exactly one guard-size, 1 probe expected at 1kB offset.
Boundary condition. */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 65 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 1 } } */
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 8\]} 1 } } */
/* Alloca is more than one guard-page, and residual is exactly 1Kb. 2 probes
expected. One at 1kB offset for the guard-size allocation and one at word
offset for the residual. */
/* { dg-do compile } */
/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
/* { dg-require-effective-target supports_stack_clash_protection } */
#define SIZE 127 * 64 * 1024
#include "stack-check-alloca.h"
/* { dg-final { scan-assembler-times {str\s+xzr, \[sp, 1024\]} 1 } } */
/* Large alloca of a constant amount which is a multiple of a guard-size,
no residiual. Loop expected with one 1Kb probe offset and no residual probe
because residual is at compile time known to be zero. */
#include <alloca.h>
__attribute__((noinline, noipa))
void g (char* ptr, int y)
{
ptr[y] = '\0';
}
void f_caller (int y)
{
char* pStr = alloca(SIZE);
g (pStr, y);
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment