Commit 5be6b295 by Wilco Dijkstra Committed by Wilco Dijkstra

Improve stack adjustment by reusing a temporary move immediate from the epilog...

Improve stack adjustment by reusing a temporary move immediate from the epilog
if the register is still valid in the epilog.  This generates smaller code for
leaf functions with a stack size of more then 4KB.

gcc/
	* config/aarch64/aarch64.c (aarch64_add_constant_internal):
	Add extra argument to allow emitting the move immediate.
	Use add/sub with positive immediate.
	(aarch64_add_constant): Add inline function.
	(aarch64_add_sp): Likewise.
	(aarch64_sub_sp): Likewise.
	(aarch64_expand_prologue): Call aarch64_sub_sp.
	(aarch64_expand_epilogue): Call aarch64_add_sp.
	Decide when to leave out move.
	(aarch64_output_mi_thunk): Call aarch64_add_constant.

testsuite/
	* gcc.target/aarch64/test_frame_17.c: New test.

From-SVN: r241420
parent 4b0685d9
2016-10-21 Wilco Dijkstra <wdijkstr@arm.com> 2016-10-21 Wilco Dijkstra <wdijkstr@arm.com>
* config/aarch64/aarch64.c (aarch64_add_constant_internal):
Add extra argument to allow emitting the move immediate.
Use add/sub with positive immediate.
(aarch64_add_constant): Add inline function.
(aarch64_add_sp): Likewise.
(aarch64_sub_sp): Likewise.
(aarch64_expand_prologue): Call aarch64_sub_sp.
(aarch64_expand_epilogue): Call aarch64_add_sp.
Decide when to leave out move.
(aarch64_output_mi_thunk): Call aarch64_add_constant.
2016-10-21 Wilco Dijkstra <wdijkstr@arm.com>
* config/aarch64/aarch64.c (aarch64_layout_frame): * config/aarch64/aarch64.c (aarch64_layout_frame):
Align FP callee-saves. Align FP callee-saves.
......
...@@ -1954,26 +1954,31 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) ...@@ -1954,26 +1954,31 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest)); aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
} }
/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to held /* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
intermediate value if necessary. temporary value if necessary. FRAME_RELATED_P should be true if
the RTX_FRAME_RELATED flag should be set and CFA adjustments added
This function is sometimes used to adjust the stack pointer, so we must to the generated instructions. If SCRATCHREG is known to hold
ensure that it can never cause transient stack deallocation by writing an abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
invalid value into REGNUM. */ immediate again.
Since this function may be used to adjust the stack pointer, we must
ensure that it cannot cause transient stack deallocation (for example
by first incrementing SP and then decrementing when adjusting by a
large immediate). */
static void static void
aarch64_add_constant (machine_mode mode, int regnum, int scratchreg, aarch64_add_constant_internal (machine_mode mode, int regnum, int scratchreg,
HOST_WIDE_INT delta, bool frame_related_p) HOST_WIDE_INT delta, bool frame_related_p,
bool emit_move_imm)
{ {
HOST_WIDE_INT mdelta = abs_hwi (delta); HOST_WIDE_INT mdelta = abs_hwi (delta);
rtx this_rtx = gen_rtx_REG (mode, regnum); rtx this_rtx = gen_rtx_REG (mode, regnum);
rtx_insn *insn; rtx_insn *insn;
/* Do nothing if mdelta is zero. */
if (!mdelta) if (!mdelta)
return; return;
/* We only need single instruction if the offset fit into add/sub. */ /* Single instruction adjustment. */
if (aarch64_uimm12_shift (mdelta)) if (aarch64_uimm12_shift (mdelta))
{ {
insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta))); insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta)));
...@@ -1981,11 +1986,10 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg, ...@@ -1981,11 +1986,10 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
return; return;
} }
/* We need two add/sub instructions, each one performing part of the /* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
calculation. Don't do this if the addend can be loaded into register with Only do this if mdelta is not a 16-bit move as adjusting using a move
a single instruction, in that case we prefer a move to a scratch register is better. */
following by an addition. */ if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode))
if (mdelta < 0x1000000 && !aarch64_move_imm (delta, mode))
{ {
HOST_WIDE_INT low_off = mdelta & 0xfff; HOST_WIDE_INT low_off = mdelta & 0xfff;
...@@ -1997,10 +2001,12 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg, ...@@ -1997,10 +2001,12 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
return; return;
} }
/* Otherwise use generic function to handle all other situations. */ /* Emit a move immediate if required and an addition/subtraction. */
rtx scratch_rtx = gen_rtx_REG (mode, scratchreg); rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode); if (emit_move_imm)
insn = emit_insn (gen_add2_insn (this_rtx, scratch_rtx)); aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
: gen_add2_insn (this_rtx, scratch_rtx));
if (frame_related_p) if (frame_related_p)
{ {
RTX_FRAME_RELATED_P (insn) = frame_related_p; RTX_FRAME_RELATED_P (insn) = frame_related_p;
...@@ -2009,6 +2015,27 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg, ...@@ -2009,6 +2015,27 @@ aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
} }
} }
static inline void
aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
HOST_WIDE_INT delta)
{
aarch64_add_constant_internal (mode, regnum, scratchreg, delta, false, true);
}
static inline void
aarch64_add_sp (int scratchreg, HOST_WIDE_INT delta, bool emit_move_imm)
{
aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, delta,
true, emit_move_imm);
}
static inline void
aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p)
{
aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta,
frame_related_p, true);
}
static bool static bool
aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
tree exp ATTRIBUTE_UNUSED) tree exp ATTRIBUTE_UNUSED)
...@@ -3230,7 +3257,7 @@ aarch64_expand_prologue (void) ...@@ -3230,7 +3257,7 @@ aarch64_expand_prologue (void)
aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size); aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
} }
aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, -initial_adjust, true); aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
if (callee_adjust != 0) if (callee_adjust != 0)
aarch64_push_regs (reg1, reg2, callee_adjust); aarch64_push_regs (reg1, reg2, callee_adjust);
...@@ -3251,8 +3278,7 @@ aarch64_expand_prologue (void) ...@@ -3251,8 +3278,7 @@ aarch64_expand_prologue (void)
callee_adjust != 0 || frame_pointer_needed); callee_adjust != 0 || frame_pointer_needed);
aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM, aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM,
callee_adjust != 0 || frame_pointer_needed); callee_adjust != 0 || frame_pointer_needed);
aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, -final_adjust, aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed);
!frame_pointer_needed);
} }
/* Return TRUE if we can use a simple_return insn. /* Return TRUE if we can use a simple_return insn.
...@@ -3317,7 +3343,7 @@ aarch64_expand_epilogue (bool for_sibcall) ...@@ -3317,7 +3343,7 @@ aarch64_expand_epilogue (bool for_sibcall)
RTX_FRAME_RELATED_P (insn) = callee_adjust == 0; RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
} }
else else
aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, final_adjust, true); aarch64_add_sp (IP1_REGNUM, final_adjust, df_regs_ever_live_p (IP1_REGNUM));
aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
callee_adjust != 0, &cfi_ops); callee_adjust != 0, &cfi_ops);
...@@ -3340,7 +3366,7 @@ aarch64_expand_epilogue (bool for_sibcall) ...@@ -3340,7 +3366,7 @@ aarch64_expand_epilogue (bool for_sibcall)
cfi_ops = NULL; cfi_ops = NULL;
} }
aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, initial_adjust, true); aarch64_add_sp (IP0_REGNUM, initial_adjust, df_regs_ever_live_p (IP0_REGNUM));
if (cfi_ops) if (cfi_ops)
{ {
...@@ -3435,7 +3461,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, ...@@ -3435,7 +3461,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
emit_note (NOTE_INSN_PROLOGUE_END); emit_note (NOTE_INSN_PROLOGUE_END);
if (vcall_offset == 0) if (vcall_offset == 0)
aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta, false); aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
else else
{ {
gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0); gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
...@@ -3451,7 +3477,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, ...@@ -3451,7 +3477,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx, addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
plus_constant (Pmode, this_rtx, delta)); plus_constant (Pmode, this_rtx, delta));
else else
aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta, false); aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta);
} }
if (Pmode == ptr_mode) if (Pmode == ptr_mode)
......
2016-10-21 Wilco Dijkstra <wdijkstr@arm.com>
* gcc.target/aarch64/test_frame_17.c: New test.
2016-10-21 Andris Pavenis <andris.pavenis@iki.fi> 2016-10-21 Andris Pavenis <andris.pavenis@iki.fi>
PR preprocessor/71681 PR preprocessor/71681
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment