Commit 5a200acb by Richard Earnshaw Committed by Richard Earnshaw

arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on initial store.

	* arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on
	initial store.
	* thumb2.md (thumb2_storewb_parisi): New pattern.

From-SVN: r202279
parent 5922847b
2013-09-05 Richard Earnshaw <rearnsha@arm.com>
* arm.c (thumb2_emit_strd_push): Rewrite to use pre-decrement on
initial store.
* thumb2.md (thumb2_storewb_parisi): New pattern.
2013-09-05 Yufeng Zhang <yufeng.zhang@arm.com> 2013-09-05 Yufeng Zhang <yufeng.zhang@arm.com>
* config/aarch64/aarch64-option-extensions.def: Add * config/aarch64/aarch64-option-extensions.def: Add
......
...@@ -16814,123 +16814,165 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, ...@@ -16814,123 +16814,165 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
} }
} }
/* Generate and emit a pattern that will be recognized as STRD pattern. If even /* Generate and emit a sequence of insns equivalent to PUSH, but using
number of registers are being pushed, multiple STRD patterns are created for STR and STRD. If an even number of registers are being pushed, one
all register pairs. If odd number of registers are pushed, emit a or more STRD patterns are created for each register pair. If an
combination of STRDs and STR for the prologue saves. */ odd number of registers are pushed, emit an initial STR followed by
as many STRD instructions as are needed. This works best when the
stack is initially 64-bit aligned (the normal case), since it
ensures that each STRD is also 64-bit aligned. */
static void static void
thumb2_emit_strd_push (unsigned long saved_regs_mask) thumb2_emit_strd_push (unsigned long saved_regs_mask)
{ {
int num_regs = 0; int num_regs = 0;
int i, j; int i;
int regno;
rtx par = NULL_RTX; rtx par = NULL_RTX;
rtx insn = NULL_RTX;
rtx dwarf = NULL_RTX; rtx dwarf = NULL_RTX;
rtx tmp, reg, tmp1; rtx tmp;
bool first = true;
for (i = 0; i <= LAST_ARM_REGNUM; i++) num_regs = bit_count (saved_regs_mask);
if (saved_regs_mask & (1 << i))
num_regs++;
gcc_assert (num_regs && num_regs <= 16); /* Must be at least one register to save, and can't save SP or PC. */
gcc_assert (num_regs > 0 && num_regs <= 14);
gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
/* Create sequence for DWARF info. All the frame-related data for
debugging is held in this wrapper. */
dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
/* Pre-decrement the stack pointer, based on there being num_regs 4-byte /* Describe the stack adjustment. */
registers to push. */
tmp = gen_rtx_SET (VOIDmode, tmp = gen_rtx_SET (VOIDmode,
stack_pointer_rtx, stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs)); plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
RTX_FRAME_RELATED_P (tmp) = 1; RTX_FRAME_RELATED_P (tmp) = 1;
insn = emit_insn (tmp); XVECEXP (dwarf, 0, 0) = tmp;
/* Create sequence for DWARF info. */
dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
/* RTLs cannot be shared, hence create new copy for dwarf. */ /* Find the first register. */
tmp1 = gen_rtx_SET (VOIDmode, for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
stack_pointer_rtx, ;
plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
RTX_FRAME_RELATED_P (tmp1) = 1;
XVECEXP (dwarf, 0, 0) = tmp1;
gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM))); i = 0;
gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
/* Var j iterates over all the registers to gather all the registers in /* If there's an odd number of registers to push. Start off by
saved_regs_mask. Var i gives index of register R_j in stack frame. pushing a single register. This ensures that subsequent strd
A PARALLEL RTX of register-pair is created here, so that pattern for operations are dword aligned (assuming that SP was originally
STRD can be matched. If num_regs is odd, 1st register will be pushed 64-bit aligned). */
using STR and remaining registers will be pushed with STRD in pairs. if ((num_regs & 1) != 0)
If num_regs is even, all registers are pushed with STRD in pairs.
Hence, skip first element for odd num_regs. */
for (i = num_regs - 1, j = LAST_ARM_REGNUM; i >= (num_regs % 2); j--)
if (saved_regs_mask & (1 << j))
{ {
/* Create RTX for store. New RTX is created for dwarf as rtx reg, mem, insn;
they are not sharable. */
reg = gen_rtx_REG (SImode, j);
tmp = gen_rtx_SET (SImode,
gen_frame_mem
(SImode,
plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
reg);
tmp1 = gen_rtx_SET (SImode, reg = gen_rtx_REG (SImode, regno);
gen_frame_mem if (num_regs == 1)
(SImode, mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
plus_constant (Pmode, stack_pointer_rtx, 4 * i)), stack_pointer_rtx));
else
mem = gen_frame_mem (Pmode,
gen_rtx_PRE_MODIFY
(Pmode, stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx,
-4 * num_regs)));
tmp = gen_rtx_SET (VOIDmode, mem, reg);
RTX_FRAME_RELATED_P (tmp) = 1;
insn = emit_insn (tmp);
RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
reg); reg);
RTX_FRAME_RELATED_P (tmp) = 1; RTX_FRAME_RELATED_P (tmp) = 1;
RTX_FRAME_RELATED_P (tmp1) = 1; i++;
regno++;
XVECEXP (dwarf, 0, i) = tmp;
first = false;
}
if (((i - (num_regs % 2)) % 2) == 1) while (i < num_regs)
/* When (i - (num_regs % 2)) is odd, the RTX to be emitted is yet to if (saved_regs_mask & (1 << regno))
be created. Hence create it first. The STRD pattern we are {
generating is : rtx reg1, reg2, mem1, mem2;
[ (SET (MEM (PLUS (SP) (NUM))) (reg_t1)) rtx tmp0, tmp1, tmp2;
(SET (MEM (PLUS (SP) (NUM + 4))) (reg_t2)) ] int regno2;
where the target registers need not be consecutive. */
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
/* Register R_j is added in PARALLEL RTX. If (i - (num_regs % 2)) is /* Find the register to pair with this one. */
even, the reg_j is added as 0th element and if it is odd, reg_i is for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
added as 1st element of STRD pattern shown above. */ regno2++)
XVECEXP (par, 0, ((i - (num_regs % 2)) % 2)) = tmp; ;
XVECEXP (dwarf, 0, (i + 1)) = tmp1;
if (((i - (num_regs % 2)) % 2) == 0) reg1 = gen_rtx_REG (SImode, regno);
/* When (i - (num_regs % 2)) is even, RTXs for both the registers reg2 = gen_rtx_REG (SImode, regno2);
to be loaded are generated in above given STRD pattern, and the
pattern can be emitted now. */
emit_insn (par);
i--; if (first)
}
if ((num_regs % 2) == 1)
{ {
/* If odd number of registers are pushed, generate STR pattern to store rtx insn;
lone register. */
for (; (saved_regs_mask & (1 << j)) == 0; j--);
tmp1 = gen_frame_mem (SImode, plus_constant (Pmode, first = false;
stack_pointer_rtx, 4 * i)); mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
reg = gen_rtx_REG (SImode, j); stack_pointer_rtx,
tmp = gen_rtx_SET (SImode, tmp1, reg); -4 * num_regs));
RTX_FRAME_RELATED_P (tmp) = 1; mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
stack_pointer_rtx,
-4 * (num_regs - 1)));
tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx,
-4 * (num_regs)));
tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
RTX_FRAME_RELATED_P (tmp0) = 1;
RTX_FRAME_RELATED_P (tmp1) = 1;
RTX_FRAME_RELATED_P (tmp2) = 1;
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
XVECEXP (par, 0, 0) = tmp0;
XVECEXP (par, 0, 1) = tmp1;
XVECEXP (par, 0, 2) = tmp2;
insn = emit_insn (par);
RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
}
else
{
mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
stack_pointer_rtx,
4 * i));
mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
stack_pointer_rtx,
4 * (i + 1)));
tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
RTX_FRAME_RELATED_P (tmp1) = 1;
RTX_FRAME_RELATED_P (tmp2) = 1;
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
XVECEXP (par, 0, 0) = tmp1;
XVECEXP (par, 0, 1) = tmp2;
emit_insn (par);
}
emit_insn (tmp); /* Create unwind information. This is an approximation. */
tmp1 = gen_rtx_SET (VOIDmode,
gen_frame_mem (Pmode,
plus_constant (Pmode,
stack_pointer_rtx,
4 * i)),
reg1);
tmp2 = gen_rtx_SET (VOIDmode,
gen_frame_mem (Pmode,
plus_constant (Pmode,
stack_pointer_rtx,
4 * (i + 1))),
reg2);
tmp1 = gen_rtx_SET (SImode,
gen_frame_mem
(SImode,
plus_constant (Pmode, stack_pointer_rtx, 4 * i)),
reg);
RTX_FRAME_RELATED_P (tmp1) = 1; RTX_FRAME_RELATED_P (tmp1) = 1;
XVECEXP (dwarf, 0, (i + 1)) = tmp1; RTX_FRAME_RELATED_P (tmp2) = 1;
XVECEXP (dwarf, 0, i + 1) = tmp1;
XVECEXP (dwarf, 0, i + 2) = tmp2;
i += 2;
regno = regno2 + 1;
} }
else
regno++;
add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
RTX_FRAME_RELATED_P (insn) = 1;
return; return;
} }
......
...@@ -325,6 +325,21 @@ ...@@ -325,6 +325,21 @@
(set_attr "neg_pool_range" "*,*,*,250")] (set_attr "neg_pool_range" "*,*,*,250")]
) )
(define_insn "*thumb2_storewb_pairsi"
[(set (match_operand:SI 0 "register_operand" "=&kr")
(plus:SI (match_operand:SI 1 "register_operand" "0")
(match_operand:SI 2 "const_int_operand" "n")))
(set (mem:SI (plus:SI (match_dup 0) (match_dup 2)))
(match_operand:SI 3 "register_operand" "r"))
(set (mem:SI (plus:SI (match_dup 0)
(match_operand:SI 5 "const_int_operand" "n")))
(match_operand:SI 4 "register_operand" "r"))]
"TARGET_THUMB2
&& INTVAL (operands[5]) == INTVAL (operands[2]) + 4"
"strd\\t%3, %4, [%0, %2]!"
[(set_attr "type" "store2")]
)
(define_insn "*thumb2_cmpsi_neg_shiftsi" (define_insn "*thumb2_cmpsi_neg_shiftsi"
[(set (reg:CC CC_REGNUM) [(set (reg:CC CC_REGNUM)
(compare:CC (match_operand:SI 0 "s_register_operand" "r") (compare:CC (match_operand:SI 0 "s_register_operand" "r")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment