Commit 5a200acb by Richard Earnshaw Committed by Richard Earnshaw

arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on initial store.

	* arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on
	initial store.
	* thumb2.md (thumb2_storewb_parisi): New pattern.

From-SVN: r202279
parent 5922847b
2013-09-05 Richard Earnshaw <rearnsha@arm.com>
* arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on
initial store.
* thumb2.md (thumb2_storewb_parisi): New pattern.
2013-09-05 Yufeng Zhang <yufeng.zhang@arm.com> 2013-09-05 Yufeng Zhang <yufeng.zhang@arm.com>
* config/aarch64/aarch64-option-extensions.def: Add * config/aarch64/aarch64-option-extensions.def: Add
......
...@@ -16814,123 +16814,165 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, ...@@ -16814,123 +16814,165 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
} }
} }
/* Generate and emit a pattern that will be recognized as STRD pattern. If even /* Generate and emit a sequence of insns equivalent to PUSH, but using
number of registers are being pushed, multiple STRD patterns are created for STR and STRD. If an even number of registers are being pushed, one
all register pairs. If odd number of registers are pushed, emit a or more STRD patterns are created for each register pair. If an
combination of STRDs and STR for the prologue saves. */ odd number of registers are pushed, emit an initial STR followed by
as many STRD instructions as are needed. This works best when the
stack is initially 64-bit aligned (the normal case), since it
ensures that each STRD is also 64-bit aligned. */
static void static void
thumb2_emit_strd_push (unsigned long saved_regs_mask) thumb2_emit_strd_push (unsigned long saved_regs_mask)
{ {
int num_regs = 0; int num_regs = 0;
int i, j; int i;
int regno;
rtx par = NULL_RTX; rtx par = NULL_RTX;
rtx insn = NULL_RTX;
rtx dwarf = NULL_RTX; rtx dwarf = NULL_RTX;
rtx tmp, reg, tmp1; rtx tmp;
bool first = true;
for (i = 0; i <= LAST_ARM_REGNUM; i++) num_regs = bit_count (saved_regs_mask);
if (saved_regs_mask & (1 << i))
num_regs++;
gcc_assert (num_regs && num_regs <= 16); /* Must be at least one register to save, and can't save SP or PC. */
gcc_assert (num_regs > 0 && num_regs <= 14);
gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
/* Pre-decrement the stack pointer, based on there being num_regs 4-byte /* Create sequence for DWARF info. All the frame-related data for
registers to push. */ debugging is held in this wrapper. */
dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
/* Describe the stack adjustment. */
tmp = gen_rtx_SET (VOIDmode, tmp = gen_rtx_SET (VOIDmode,
stack_pointer_rtx, stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs)); plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
RTX_FRAME_RELATED_P (tmp) = 1; RTX_FRAME_RELATED_P (tmp) = 1;
insn = emit_insn (tmp); XVECEXP (dwarf, 0, 0) = tmp;
/* Create sequence for DWARF info. */
dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
/* RTLs cannot be shared, hence create new copy for dwarf. */ /* Find the first register. */
tmp1 = gen_rtx_SET (VOIDmode, for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
stack_pointer_rtx, ;
plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
RTX_FRAME_RELATED_P (tmp1) = 1;
XVECEXP (dwarf, 0, 0) = tmp1;
gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM))); i = 0;
gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
/* Var j iterates over all the registers to gather all the registers in /* If there's an odd number of registers to push. Start off by
saved_regs_mask. Var i gives index of register R_j in stack frame. pushing a single register. This ensures that subsequent strd
A PARALLEL RTX of register-pair is created here, so that pattern for operations are dword aligned (assuming that SP was originally
STRD can be matched. If num_regs is odd, 1st register will be pushed 64-bit aligned). */
using STR and remaining registers will be pushed with STRD in pairs. if ((num_regs & 1) != 0)
If num_regs is even, all registers are pushed with STRD in pairs. {
Hence, skip first element for odd num_regs. */ rtx reg, mem, insn;
for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
if (saved_regs_mask & (1 << j))
{
/* Create RTX for store. New RTX is created for dwarf as
they are not sharable. */
reg = gen_rtx_REG (SImode, j);
tmp = gen_rtx_SET (SImode,
gen_frame_mem
(SImode,
plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
reg);
tmp1 = gen_rtx_SET (SImode,
gen_frame_mem
(SImode,
plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
reg);
RTX_FRAME_RELATED_P (tmp) = 1;
RTX_FRAME_RELATED_P (tmp1) = 1;
if (((i - (num_regs % 2)) % 2) == 1)
/* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to
be created. Hence create it first. The STRD pattern we are
generating is :
[ (SET (MEM (PLUS (SP) (NUM))) (reg_t1))
(SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ]
where the target registers need not be consecutive. */
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
/* Register R_j is added in PARALLEL RTX. If (i - (num_regs % 2)) is reg = gen_rtx_REG (SImode, regno);
even, the reg_j is added as 0th element and if it is odd, reg_i is if (num_regs == 1)
added as 1st element of STRD pattern shown above. */ mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp; stack_pointer_rtx));
XVECEXP (dwarf, 0, (i + 1)) = tmp1; else
mem = gen_frame_mem (Pmode,
gen_rtx_PRE_MODIFY
(Pmode, stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx,
-4 * num_regs)));
if (((i - (num_regs % 2)) % 2) == 0) tmp = gen_rtx_SET (VOIDmode, mem, reg);
/* When (i - (num_regs % 2)) is even, RTXs for both the registers RTX_FRAME_RELATED_P (tmp) = 1;
to be loaded are generated in above given STRD pattern, and the insn = emit_insn (tmp);
pattern can be emitted now. */ RTX_FRAME_RELATED_P (insn) = 1;
emit_insn (par); add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
reg);
RTX_FRAME_RELATED_P (tmp) = 1;
i++;
regno++;
XVECEXP (dwarf, 0, i) = tmp;
first = false;
}
i--; while (i < num_regs)
} if (saved_regs_mask & (1 << regno))
{
rtx reg1, reg2, mem1, mem2;
rtx tmp0, tmp1, tmp2;
int regno2;
if ((num_regs % 2) == 1) /* Find the register to pair with this one. */
{ for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
/* If odd number of registers are pushed, generate STR pattern to store regno2++)
lone register. */ ;
for (; (saved_regs_mask & (1 << j)) == 0; j--);
tmp1 = gen_frame_mem (SImode, plus_constant (Pmode, reg1 = gen_rtx_REG (SImode, regno);
stack_pointer_rtx, 4 * i)); reg2 = gen_rtx_REG (SImode, regno2);
reg = gen_rtx_REG (SImode, j);
tmp = gen_rtx_SET (SImode, tmp1, reg);
RTX_FRAME_RELATED_P (tmp) = 1;
emit_insn (tmp); if (first)
{
rtx insn;
first = false;
mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
stack_pointer_rtx,
-4 * num_regs));
mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
stack_pointer_rtx,
-4 * (num_regs - 1)));
tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx,
-4 * (num_regs)));
tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
RTX_FRAME_RELATED_P (tmp0) = 1;
RTX_FRAME_RELATED_P (tmp1) = 1;
RTX_FRAME_RELATED_P (tmp2) = 1;
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
XVECEXP (par, 0, 0) = tmp0;
XVECEXP (par, 0, 1) = tmp1;
XVECEXP (par, 0, 2) = tmp2;
insn = emit_insn (par);
RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
}
else
{
mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
stack_pointer_rtx,
4 * i));
mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
stack_pointer_rtx,
4 * (i + 1)));
tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
RTX_FRAME_RELATED_P (tmp1) = 1;
RTX_FRAME_RELATED_P (tmp2) = 1;
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
XVECEXP (par, 0, 0) = tmp1;
XVECEXP (par, 0, 1) = tmp2;
emit_insn (par);
}
tmp1 = gen_rtx_SET (SImode, /* Create unwind information. This is an approximation. */
gen_frame_mem tmp1 = gen_rtx_SET (VOIDmode,
(SImode, gen_frame_mem (Pmode,
plus_constant (Pmode, stack_pointer_rtx, 4 * i)), plus_constant (Pmode,
reg); stack_pointer_rtx,
RTX_FRAME_RELATED_P (tmp1) = 1; 4 * i)),
XVECEXP (dwarf, 0, (i + 1)) = tmp1; reg1);
} tmp2 = gen_rtx_SET (VOIDmode,
gen_frame_mem (Pmode,
plus_constant (Pmode,
stack_pointer_rtx,
4 * (i + 1))),
reg2);
RTX_FRAME_RELATED_P (tmp1) = 1;
RTX_FRAME_RELATED_P (tmp2) = 1;
XVECEXP (dwarf, 0, i + 1) = tmp1;
XVECEXP (dwarf, 0, i + 2) = tmp2;
i += 2;
regno = regno2 + 1;
}
else
regno++;
add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
RTX_FRAME_RELATED_P (insn) = 1;
return; return;
} }
......
...@@ -325,6 +325,21 @@ ...@@ -325,6 +325,21 @@
(set_attr "neg_pool_range" "*,*,*,250")] (set_attr "neg_pool_range" "*,*,*,250")]
) )
(define_insn "*thumb2_storewb_pairsi"
[(set (match_operand:SI 0 "register_operand" "=&kr")
(plus:SI (match_operand:SI 1 "register_operand" "0")
(match_operand:SI 2 "const_int_operand" "n")))
(set (mem:SI (plus:SI (match_dup 0) (match_dup 2)))
(match_operand:SI 3 "register_operand" "r"))
(set (mem:SI (plus:SI (match_dup 0)
(match_operand:SI 5 "const_int_operand" "n")))
(match_operand:SI 4 "register_operand" "r"))]
"TARGET_THUMB2
&& INTVAL (operands[5]) == INTVAL (operands[2]) + 4"
"strd\\t%3, %4, [%0, %2]!"
[(set_attr "type" "store2")]
)
(define_insn "*thumb2_cmpsi_neg_shiftsi" (define_insn "*thumb2_cmpsi_neg_shiftsi"
[(set (reg:CC CC_REGNUM) [(set (reg:CC CC_REGNUM)
(compare:CC (match_operand:SI 0 "s_register_operand" "r") (compare:CC (match_operand:SI 0 "s_register_operand" "r")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment