Commit e2e8d274 by Segher Boessenkool Committed by Segher Boessenkool

rs6000: Remove duplicated code

A large portion of the code moved from rs6000.c (to rs6000-logue.c)
was accidentally retained.  This fixes it.


	* rs6000.c: Fix previous commit, it missed some changes.

From-SVN: r272690
parent baf8d2ec
2019-06-26 Segher Boessenkool <segher@kernel.crashing.org>
* rs6000.c: Fix previous commit, it missed some changes.
2019-06-26 Richard Biener <rguenther@suse.de>
PR ipa/90982
......
......@@ -23691,1285 +23691,6 @@ get_TOC_alias_set (void)
return set;
}
/* This ties together stack memory (MEM with an alias set of frame_alias_set)
and the change to the stack pointer. */
static void
rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
{
rtvec p;
int i;
rtx regs[3];
i = 0;
regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
if (hard_frame_needed)
regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
if (!(REGNO (fp) == STACK_POINTER_REGNUM
|| (hard_frame_needed
&& REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
regs[i++] = fp;
p = rtvec_alloc (i);
while (--i >= 0)
{
rtx mem = gen_frame_mem (BLKmode, regs[i]);
RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
}
emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
}
/* Allocate SIZE_INT bytes on the stack using a store with update style insn
and set the appropriate attributes for the generated insn. Return the
first insn which adjusts the stack pointer or the last insn before
the stack adjustment loop.
SIZE_INT is used to create the CFI note for the allocation.
SIZE_RTX is an rtx containing the size of the adjustment. Note that
since stacks grow to lower addresses its runtime value is -SIZE_INT.
ORIG_SP contains the backchain value that must be stored at *sp. */
static rtx_insn *
rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp)
{
rtx_insn *insn;
rtx size_rtx = GEN_INT (-size_int);
if (size_int > 32767)
{
rtx tmp_reg = gen_rtx_REG (Pmode, 0);
/* Need a note here so that try_split doesn't get confused. */
if (get_last_insn () == NULL_RTX)
emit_note (NOTE_INSN_DELETED);
insn = emit_move_insn (tmp_reg, size_rtx);
try_split (PATTERN (insn), insn, 0);
size_rtx = tmp_reg;
}
if (TARGET_32BIT)
insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx,
stack_pointer_rtx,
size_rtx,
orig_sp));
else
insn = emit_insn (gen_movdi_update_stack (stack_pointer_rtx,
stack_pointer_rtx,
size_rtx,
orig_sp));
rtx par = PATTERN (insn);
gcc_assert (GET_CODE (par) == PARALLEL);
rtx set = XVECEXP (par, 0, 0);
gcc_assert (GET_CODE (set) == SET);
rtx mem = SET_DEST (set);
gcc_assert (MEM_P (mem));
MEM_NOTRAP_P (mem) = 1;
set_mem_alias_set (mem, get_frame_alias_set ());
RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_FRAME_RELATED_EXPR,
gen_rtx_SET (stack_pointer_rtx,
gen_rtx_PLUS (Pmode,
stack_pointer_rtx,
GEN_INT (-size_int))));
/* Emit a blockage to ensure the allocation/probing insns are
not optimized, combined, removed, etc. Add REG_STACK_CHECK
note for similar reasons. */
if (flag_stack_clash_protection)
{
add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
emit_insn (gen_blockage ());
}
return insn;
}
static HOST_WIDE_INT
get_stack_clash_protection_probe_interval (void)
{
return (HOST_WIDE_INT_1U
<< PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
}
static HOST_WIDE_INT
get_stack_clash_protection_guard_size (void)
{
return (HOST_WIDE_INT_1U
<< PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE));
}
/* Allocate ORIG_SIZE bytes on the stack and probe the newly
allocated space every STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes.
COPY_REG, if non-null, should contain a copy of the original
stack pointer at exit from this function.
This is subtly different than the Ada probing in that it tries hard to
prevent attacks that jump the stack guard. Thus it is never allowed to
allocate more than STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes of stack
space without a suitable probe. */
static rtx_insn *
rs6000_emit_probe_stack_range_stack_clash (HOST_WIDE_INT orig_size,
rtx copy_reg)
{
rtx orig_sp = copy_reg;
HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
/* Round the size down to a multiple of PROBE_INTERVAL. */
HOST_WIDE_INT rounded_size = ROUND_DOWN (orig_size, probe_interval);
/* If explicitly requested,
or the rounded size is not the same as the original size
or the the rounded size is greater than a page,
then we will need a copy of the original stack pointer. */
if (rounded_size != orig_size
|| rounded_size > probe_interval
|| copy_reg)
{
/* If the caller did not request a copy of the incoming stack
pointer, then we use r0 to hold the copy. */
if (!copy_reg)
orig_sp = gen_rtx_REG (Pmode, 0);
emit_move_insn (orig_sp, stack_pointer_rtx);
}
/* There's three cases here.
One is a single probe which is the most common and most efficiently
implemented as it does not have to have a copy of the original
stack pointer if there are no residuals.
Second is unrolled allocation/probes which we use if there's just
a few of them. It needs to save the original stack pointer into a
temporary for use as a source register in the allocation/probe.
Last is a loop. This is the most uncommon case and least efficient. */
rtx_insn *retval = NULL;
if (rounded_size == probe_interval)
{
retval = rs6000_emit_allocate_stack_1 (probe_interval, stack_pointer_rtx);
dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
}
else if (rounded_size <= 8 * probe_interval)
{
/* The ABI requires using the store with update insns to allocate
space and store the backchain into the stack
So we save the current stack pointer into a temporary, then
emit the store-with-update insns to store the saved stack pointer
into the right location in each new page. */
for (int i = 0; i < rounded_size; i += probe_interval)
{
rtx_insn *insn
= rs6000_emit_allocate_stack_1 (probe_interval, orig_sp);
/* Save the first stack adjustment in RETVAL. */
if (i == 0)
retval = insn;
}
dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
}
else
{
/* Compute the ending address. */
rtx end_addr
= copy_reg ? gen_rtx_REG (Pmode, 0) : gen_rtx_REG (Pmode, 12);
rtx rs = GEN_INT (-rounded_size);
rtx_insn *insn;
if (add_operand (rs, Pmode))
insn = emit_insn (gen_add3_insn (end_addr, stack_pointer_rtx, rs));
else
{
emit_move_insn (end_addr, GEN_INT (-rounded_size));
insn = emit_insn (gen_add3_insn (end_addr, end_addr,
stack_pointer_rtx));
/* Describe the effect of INSN to the CFI engine. */
add_reg_note (insn, REG_FRAME_RELATED_EXPR,
gen_rtx_SET (end_addr,
gen_rtx_PLUS (Pmode, stack_pointer_rtx,
rs)));
}
RTX_FRAME_RELATED_P (insn) = 1;
/* Emit the loop. */
if (TARGET_64BIT)
retval = emit_insn (gen_probe_stack_rangedi (stack_pointer_rtx,
stack_pointer_rtx, orig_sp,
end_addr));
else
retval = emit_insn (gen_probe_stack_rangesi (stack_pointer_rtx,
stack_pointer_rtx, orig_sp,
end_addr));
RTX_FRAME_RELATED_P (retval) = 1;
/* Describe the effect of INSN to the CFI engine. */
add_reg_note (retval, REG_FRAME_RELATED_EXPR,
gen_rtx_SET (stack_pointer_rtx, end_addr));
/* Emit a blockage to ensure the allocation/probing insns are
not optimized, combined, removed, etc. Other cases handle this
within their call to rs6000_emit_allocate_stack_1. */
emit_insn (gen_blockage ());
dump_stack_clash_frame_info (PROBE_LOOP, rounded_size != orig_size);
}
if (orig_size != rounded_size)
{
/* Allocate (and implicitly probe) any residual space. */
HOST_WIDE_INT residual = orig_size - rounded_size;
rtx_insn *insn = rs6000_emit_allocate_stack_1 (residual, orig_sp);
/* If the residual was the only allocation, then we can return the
allocating insn. */
if (!retval)
retval = insn;
}
return retval;
}
/* Emit the correct code for allocating stack space, as insns.
If COPY_REG, make sure a copy of the old frame is left there.
The generated code may use hard register 0 as a temporary. */
static rtx_insn *
rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
{
rtx_insn *insn;
rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
rtx tmp_reg = gen_rtx_REG (Pmode, 0);
rtx todec = gen_int_mode (-size, Pmode);
if (INTVAL (todec) != -size)
{
warning (0, "stack frame too large");
emit_insn (gen_trap ());
return 0;
}
if (crtl->limit_stack)
{
if (REG_P (stack_limit_rtx)
&& REGNO (stack_limit_rtx) > 1
&& REGNO (stack_limit_rtx) <= 31)
{
rtx_insn *insn
= gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size));
gcc_assert (insn);
emit_insn (insn);
emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, const0_rtx));
}
else if (SYMBOL_REF_P (stack_limit_rtx)
&& TARGET_32BIT
&& DEFAULT_ABI == ABI_V4
&& !flag_pic)
{
rtx toload = gen_rtx_CONST (VOIDmode,
gen_rtx_PLUS (Pmode,
stack_limit_rtx,
GEN_INT (size)));
emit_insn (gen_elf_high (tmp_reg, toload));
emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
const0_rtx));
}
else
warning (0, "stack limit expression is not supported");
}
if (flag_stack_clash_protection)
{
if (size < get_stack_clash_protection_guard_size ())
dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
else
{
rtx_insn *insn = rs6000_emit_probe_stack_range_stack_clash (size,
copy_reg);
/* If we asked for a copy with an offset, then we still need add in
the offset. */
if (copy_reg && copy_off)
emit_insn (gen_add3_insn (copy_reg, copy_reg, GEN_INT (copy_off)));
return insn;
}
}
if (copy_reg)
{
if (copy_off != 0)
emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
else
emit_move_insn (copy_reg, stack_reg);
}
/* Since we didn't use gen_frame_mem to generate the MEM, grab
it now and set the alias set/attributes. The above gen_*_update
calls will generate a PARALLEL with the MEM set being the first
operation. */
insn = rs6000_emit_allocate_stack_1 (size, stack_reg);
return insn;
}
#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
#if PROBE_INTERVAL > 32768
#error Cannot use indexed addressing mode for stack probing
#endif
/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
inclusive. These are offsets from the current stack pointer. */
static void
rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
{
/* See if we have a constant small number of probes to generate. If so,
that's the easy case. */
if (first + size <= 32768)
{
HOST_WIDE_INT i;
/* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
it exceeds SIZE. If only one probe is needed, this will not
generate any code. Then probe at FIRST + SIZE. */
for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
-(first + i)));
emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
-(first + size)));
}
/* Otherwise, do the same as above, but in a loop. Note that we must be
extra careful with variables wrapping around because we might be at
the very top (or the very bottom) of the address space and we have
to be able to handle this case properly; in particular, we use an
equality test for the loop condition. */
else
{
HOST_WIDE_INT rounded_size;
rtx r12 = gen_rtx_REG (Pmode, 12);
rtx r0 = gen_rtx_REG (Pmode, 0);
/* Sanity check for the addressing mode we're going to use. */
gcc_assert (first <= 32768);
/* Step 1: round SIZE to the previous multiple of the interval. */
rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
/* Step 2: compute initial and final value of the loop counter. */
/* TEST_ADDR = SP + FIRST. */
emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
-first)));
/* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
if (rounded_size > 32768)
{
emit_move_insn (r0, GEN_INT (-rounded_size));
emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
}
else
emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
-rounded_size)));
/* Step 3: the loop
do
{
TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
probe at TEST_ADDR
}
while (TEST_ADDR != LAST_ADDR)
probes at FIRST + N * PROBE_INTERVAL for values of N from 1
until it is equal to ROUNDED_SIZE. */
if (TARGET_64BIT)
emit_insn (gen_probe_stack_rangedi (r12, r12, stack_pointer_rtx, r0));
else
emit_insn (gen_probe_stack_rangesi (r12, r12, stack_pointer_rtx, r0));
/* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
that SIZE is equal to ROUNDED_SIZE. */
if (size != rounded_size)
emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
}
}
/* This function is called when rs6000_frame_related is processing
SETs within a PARALLEL, and returns whether the REGNO save ought to
be marked RTX_FRAME_RELATED_P. The PARALLELs involved are those
for out-of-line register save functions, store multiple, and the
Darwin world_save. They may contain registers that don't really
need saving. */
static bool
interesting_frame_related_regno (unsigned int regno)
{
/* Saves apparently of r0 are actually saving LR. It doesn't make
sense to substitute the regno here to test save_reg_p (LR_REGNO).
We *know* LR needs saving, and dwarf2cfi.c is able to deduce that
(set (mem) (r0)) is saving LR from a prior (set (r0) (lr)) marked
as frame related. */
if (regno == 0)
return true;
/* If we see CR2 then we are here on a Darwin world save. Saves of
CR2 signify the whole CR is being saved. This is a long-standing
ABI wart fixed by ELFv2. As for r0/lr there is no need to check
that CR needs to be saved. */
if (regno == CR2_REGNO)
return true;
/* Omit frame info for any user-defined global regs. If frame info
is supplied for them, frame unwinding will restore a user reg.
Also omit frame info for any reg we don't need to save, as that
bloats frame info and can cause problems with shrink wrapping.
Since global regs won't be seen as needing to be saved, both of
these conditions are covered by save_reg_p. */
return save_reg_p (regno);
}
/* Probe a range of stack addresses from REG1 to REG3 inclusive. These are
addresses, not offsets.
REG2 contains the backchain that must be stored into *sp at each allocation.
This is subtly different than the Ada probing above in that it tries hard
to prevent attacks that jump the stack guard. Thus, it is never allowed
to allocate more than PROBE_INTERVAL bytes of stack space without a
suitable probe. */
static const char *
output_probe_stack_range_stack_clash (rtx reg1, rtx reg2, rtx reg3)
{
static int labelno = 0;
char loop_lab[32];
rtx xops[3];
HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
/* This allocates and probes. */
xops[0] = reg1;
xops[1] = reg2;
xops[2] = GEN_INT (-probe_interval);
if (TARGET_64BIT)
output_asm_insn ("stdu %1,%2(%0)", xops);
else
output_asm_insn ("stwu %1,%2(%0)", xops);
/* Jump to LOOP_LAB if TEST_ADDR != LAST_ADDR. */
xops[0] = reg1;
xops[1] = reg3;
if (TARGET_64BIT)
output_asm_insn ("cmpd 0,%0,%1", xops);
else
output_asm_insn ("cmpw 0,%0,%1", xops);
fputs ("\tbne 0,", asm_out_file);
assemble_name_raw (asm_out_file, loop_lab);
fputc ('\n', asm_out_file);
return "";
}
/* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
is not NULL. It would be nice if dwarf2out_frame_debug_expr could
deduce these equivalences by itself so it wasn't necessary to hold
its hand so much. Don't be tempted to always supply d2_f_d_e with
the actual cfa register, ie. r31 when we are using a hard frame
pointer. That fails when saving regs off r1, and sched moves the
r31 setup past the reg saves. */
static rtx_insn *
rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
rtx reg2, rtx repl2)
{
rtx repl;
if (REGNO (reg) == STACK_POINTER_REGNUM)
{
gcc_checking_assert (val == 0);
repl = NULL_RTX;
}
else
repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
GEN_INT (val));
rtx pat = PATTERN (insn);
if (!repl && !reg2)
{
/* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
if (GET_CODE (pat) == PARALLEL)
for (int i = 0; i < XVECLEN (pat, 0); i++)
if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
{
rtx set = XVECEXP (pat, 0, i);
if (!REG_P (SET_SRC (set))
|| interesting_frame_related_regno (REGNO (SET_SRC (set))))
RTX_FRAME_RELATED_P (set) = 1;
}
RTX_FRAME_RELATED_P (insn) = 1;
return insn;
}
/* We expect that 'pat' is either a SET or a PARALLEL containing
SETs (and possibly other stuff). In a PARALLEL, all the SETs
are important so they all have to be marked RTX_FRAME_RELATED_P.
Call simplify_replace_rtx on the SETs rather than the whole insn
so as to leave the other stuff alone (for example USE of r12). */
set_used_flags (pat);
if (GET_CODE (pat) == SET)
{
if (repl)
pat = simplify_replace_rtx (pat, reg, repl);
if (reg2)
pat = simplify_replace_rtx (pat, reg2, repl2);
}
else if (GET_CODE (pat) == PARALLEL)
{
pat = shallow_copy_rtx (pat);
XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
for (int i = 0; i < XVECLEN (pat, 0); i++)
if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
{
rtx set = XVECEXP (pat, 0, i);
if (repl)
set = simplify_replace_rtx (set, reg, repl);
if (reg2)
set = simplify_replace_rtx (set, reg2, repl2);
XVECEXP (pat, 0, i) = set;
if (!REG_P (SET_SRC (set))
|| interesting_frame_related_regno (REGNO (SET_SRC (set))))
RTX_FRAME_RELATED_P (set) = 1;
}
}
else
gcc_unreachable ();
RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
return insn;
}
/* Returns an insn that has a vrsave set operation with the
appropriate CLOBBERs. */
static rtx
generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
{
int nclobs, i;
rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
clobs[0]
= gen_rtx_SET (vrsave,
gen_rtx_UNSPEC_VOLATILE (SImode,
gen_rtvec (2, reg, vrsave),
UNSPECV_SET_VRSAVE));
nclobs = 1;
/* We need to clobber the registers in the mask so the scheduler
does not move sets to VRSAVE before sets of AltiVec registers.
However, if the function receives nonlocal gotos, reload will set
all call saved registers live. We will end up with:
(set (reg 999) (mem))
(parallel [ (set (reg vrsave) (unspec blah))
(clobber (reg 999))])
The clobber will cause the store into reg 999 to be dead, and
flow will attempt to delete an epilogue insn. In this case, we
need an unspec use/set of the register. */
for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
{
if (!epiloguep || call_used_regs [i])
clobs[nclobs++] = gen_hard_reg_clobber (V4SImode, i);
else
{
rtx reg = gen_rtx_REG (V4SImode, i);
clobs[nclobs++]
= gen_rtx_SET (reg,
gen_rtx_UNSPEC (V4SImode,
gen_rtvec (1, reg), 27));
}
}
insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
for (i = 0; i < nclobs; ++i)
XVECEXP (insn, 0, i) = clobs[i];
return insn;
}
static rtx
gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
{
rtx addr, mem;
addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
mem = gen_frame_mem (GET_MODE (reg), addr);
return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
}
static rtx
gen_frame_load (rtx reg, rtx frame_reg, int offset)
{
return gen_frame_set (reg, frame_reg, offset, false);
}
static rtx
gen_frame_store (rtx reg, rtx frame_reg, int offset)
{
return gen_frame_set (reg, frame_reg, offset, true);
}
/* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
static rtx_insn *
emit_frame_save (rtx frame_reg, machine_mode mode,
unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
{
rtx reg;
/* Some cases that need register indexed addressing. */
gcc_checking_assert (!(TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
|| (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode)));
reg = gen_rtx_REG (mode, regno);
rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
NULL_RTX, NULL_RTX);
}
/* Emit an offset memory reference suitable for a frame store, while
converting to a valid addressing mode. */
static rtx
gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
{
return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, GEN_INT (offset)));
}
#ifndef TARGET_FIX_AND_CONTINUE
#define TARGET_FIX_AND_CONTINUE 0
#endif
/* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
#define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
#define LAST_SAVRES_REGISTER 31
#define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
enum {
SAVRES_LR = 0x1,
SAVRES_SAVE = 0x2,
SAVRES_REG = 0x0c,
SAVRES_GPR = 0,
SAVRES_FPR = 4,
SAVRES_VR = 8
};
static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
/* Temporary holding space for an out-of-line register save/restore
routine name. */
static char savres_routine_name[30];
/* Return the name for an out-of-line register save/restore routine.
We are saving/restoring GPRs if GPR is true. */
static char *
rs6000_savres_routine_name (int regno, int sel)
{
const char *prefix = "";
const char *suffix = "";
/* Different targets are supposed to define
{SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
routine name could be defined with:
sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
This is a nice idea in practice, but in reality, things are
complicated in several ways:
- ELF targets have save/restore routines for GPRs.
- PPC64 ELF targets have routines for save/restore of GPRs that
differ in what they do with the link register, so having a set
prefix doesn't work. (We only use one of the save routines at
the moment, though.)
- PPC32 elf targets have "exit" versions of the restore routines
that restore the link register and can save some extra space.
These require an extra suffix. (There are also "tail" versions
of the restore routines and "GOT" versions of the save routines,
but we don't generate those at present. Same problems apply,
though.)
We deal with all this by synthesizing our own prefix/suffix and
using that for the simple sprintf call shown above. */
if (DEFAULT_ABI == ABI_V4)
{
if (TARGET_64BIT)
goto aix_names;
if ((sel & SAVRES_REG) == SAVRES_GPR)
prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
else if ((sel & SAVRES_REG) == SAVRES_FPR)
prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
else if ((sel & SAVRES_REG) == SAVRES_VR)
prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
else
abort ();
if ((sel & SAVRES_LR))
suffix = "_x";
}
else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
{
#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
/* No out-of-line save/restore routines for GPRs on AIX. */
gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
#endif
aix_names:
if ((sel & SAVRES_REG) == SAVRES_GPR)
prefix = ((sel & SAVRES_SAVE)
? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
: ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
else if ((sel & SAVRES_REG) == SAVRES_FPR)
{
#if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
if ((sel & SAVRES_LR))
prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
else
#endif
{
prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
}
}
else if ((sel & SAVRES_REG) == SAVRES_VR)
prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
else
abort ();
}
if (DEFAULT_ABI == ABI_DARWIN)
{
/* The Darwin approach is (slightly) different, in order to be
compatible with code generated by the system toolchain. There is a
single symbol for the start of save sequence, and the code here
embeds an offset into that code on the basis of the first register
to be saved. */
prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
if ((sel & SAVRES_REG) == SAVRES_GPR)
sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
(regno - 13) * 4, prefix, regno);
else if ((sel & SAVRES_REG) == SAVRES_FPR)
sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
(regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
else if ((sel & SAVRES_REG) == SAVRES_VR)
sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
(regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
else
abort ();
}
else
sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
return savres_routine_name;
}
/* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
We are saving/restoring GPRs if GPR is true. */
static rtx
rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
{
int regno = ((sel & SAVRES_REG) == SAVRES_GPR
? info->first_gp_reg_save
: (sel & SAVRES_REG) == SAVRES_FPR
? info->first_fp_reg_save - 32
: (sel & SAVRES_REG) == SAVRES_VR
? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
: -1);
rtx sym;
int select = sel;
/* Don't generate bogus routine names. */
gcc_assert (FIRST_SAVRES_REGISTER <= regno
&& regno <= LAST_SAVRES_REGISTER
&& select >= 0 && select <= 12);
sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
if (sym == NULL)
{
char *name;
name = rs6000_savres_routine_name (regno, sel);
sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
= gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
}
return sym;
}
/* Emit a sequence of insns, including a stack tie if needed, for
resetting the stack pointer. If UPDT_REGNO is not 1, then don't
reset the stack pointer, but move the base of the frame into
reg UPDT_REGNO for use by out-of-line register restore routines. */
static rtx
rs6000_emit_stack_reset (rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
unsigned updt_regno)
{
/* If there is nothing to do, don't do anything. */
if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
return NULL_RTX;
rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
/* This blockage is needed so that sched doesn't decide to move
the sp change before the register restores. */
if (DEFAULT_ABI == ABI_V4)
return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
GEN_INT (frame_off)));
/* If we are restoring registers out-of-line, we will be using the
"exit" variants of the restore routines, which will reset the
stack for us. But we do need to point updt_reg into the
right place for those routines. */
if (frame_off != 0)
return emit_insn (gen_add3_insn (updt_reg_rtx,
frame_reg_rtx, GEN_INT (frame_off)));
else
return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
return NULL_RTX;
}
/* Return the register number used as a pointer by out-of-line
save/restore functions. */
static inline unsigned
ptr_regno_for_savres (int sel)
{
if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
}
/* Construct a parallel rtx describing the effect of a call to an
out-of-line register save/restore routine, and emit the insn
or jump_insn as appropriate. */
static rtx_insn *
rs6000_emit_savres_rtx (rs6000_stack_t *info,
rtx frame_reg_rtx, int save_area_offset, int lr_offset,
machine_mode reg_mode, int sel)
{
int i;
int offset, start_reg, end_reg, n_regs, use_reg;
int reg_size = GET_MODE_SIZE (reg_mode);
rtx sym;
rtvec p;
rtx par;
rtx_insn *insn;
offset = 0;
start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
? info->first_gp_reg_save
: (sel & SAVRES_REG) == SAVRES_FPR
? info->first_fp_reg_save
: (sel & SAVRES_REG) == SAVRES_VR
? info->first_altivec_reg_save
: -1);
end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
? 32
: (sel & SAVRES_REG) == SAVRES_FPR
? 64
: (sel & SAVRES_REG) == SAVRES_VR
? LAST_ALTIVEC_REGNO + 1
: -1);
n_regs = end_reg - start_reg;
p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
+ ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
+ n_regs);
if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
RTVEC_ELT (p, offset++) = ret_rtx;
RTVEC_ELT (p, offset++) = gen_hard_reg_clobber (Pmode, LR_REGNO);
sym = rs6000_savres_routine_sym (info, sel);
RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
use_reg = ptr_regno_for_savres (sel);
if ((sel & SAVRES_REG) == SAVRES_VR)
{
/* Vector regs are saved/restored using [reg+reg] addressing. */
RTVEC_ELT (p, offset++) = gen_hard_reg_clobber (Pmode, use_reg);
RTVEC_ELT (p, offset++)
= gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
}
else
RTVEC_ELT (p, offset++)
= gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
for (i = 0; i < end_reg - start_reg; i++)
RTVEC_ELT (p, i + offset)
= gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
frame_reg_rtx, save_area_offset + reg_size * i,
(sel & SAVRES_SAVE) != 0);
if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
RTVEC_ELT (p, i + offset)
= gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
par = gen_rtx_PARALLEL (VOIDmode, p);
if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
{
insn = emit_jump_insn (par);
JUMP_LABEL (insn) = ret_rtx;
}
else
insn = emit_insn (par);
return insn;
}
/* Emit prologue code to store CR fields that need to be saved into REG. This
function should only be called when moving the non-volatile CRs to REG, it
is not a general purpose routine to move the entire set of CRs to REG.
Specifically, gen_prologue_movesi_from_cr() does not contain uses of the
volatile CRs. */
static void
rs6000_emit_prologue_move_from_cr (rtx reg)
{
/* Only the ELFv2 ABI allows storing only selected fields. */
if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
{
int i, cr_reg[8], count = 0;
/* Collect CR fields that must be saved. */
for (i = 0; i < 8; i++)
if (save_reg_p (CR0_REGNO + i))
cr_reg[count++] = i;
/* If it's just a single one, use mfcrf. */
if (count == 1)
{
rtvec p = rtvec_alloc (1);
rtvec r = rtvec_alloc (2);
RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
RTVEC_ELT (p, 0)
= gen_rtx_SET (reg,
gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
return;
}
/* ??? It might be better to handle count == 2 / 3 cases here
as well, using logical operations to combine the values. */
}
emit_insn (gen_prologue_movesi_from_cr (reg));
}
/* Return whether the split-stack arg pointer (r12) is used. */
static bool
split_stack_arg_pointer_used_p (void)
{
/* If the pseudo holding the arg pointer is no longer a pseudo,
then the arg pointer is used. */
if (cfun->machine->split_stack_arg_pointer != NULL_RTX
&& (!REG_P (cfun->machine->split_stack_arg_pointer)
|| HARD_REGISTER_P (cfun->machine->split_stack_arg_pointer)))
return true;
/* Unfortunately we also need to do some code scanning, since
r12 may have been substituted for the pseudo. */
rtx_insn *insn;
basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
FOR_BB_INSNS (bb, insn)
if (NONDEBUG_INSN_P (insn))
{
/* A call destroys r12. */
if (CALL_P (insn))
return false;
df_ref use;
FOR_EACH_INSN_USE (use, insn)
{
rtx x = DF_REF_REG (use);
if (REG_P (x) && REGNO (x) == 12)
return true;
}
df_ref def;
FOR_EACH_INSN_DEF (def, insn)
{
rtx x = DF_REF_REG (def);
if (REG_P (x) && REGNO (x) == 12)
return false;
}
}
return bitmap_bit_p (DF_LR_OUT (bb), 12);
}
/* -mprofile-kernel code calls mcount before the function prolog,
so a profiled leaf function should stay a leaf function. */
static bool
rs6000_keep_leaf_when_profiled ()
{
return TARGET_PROFILE_KERNEL;
}
/* Non-zero if vmx regs are restored before the frame pop, zero if
we restore after the pop when possible. */
#define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
/* Restoring cr is a two step process: loading a reg from the frame
save, then moving the reg to cr. For ABI_V4 we must let the
unwinder know that the stack location is no longer valid at or
before the stack deallocation, but we can't emit a cfa_restore for
cr at the stack deallocation like we do for other registers.
The trouble is that it is possible for the move to cr to be
scheduled after the stack deallocation. So say exactly where cr
is located on each of the two insns. */
static rtx
load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
{
rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
rtx reg = gen_rtx_REG (SImode, regno);
rtx_insn *insn = emit_move_insn (reg, mem);
if (!exit_func && DEFAULT_ABI == ABI_V4)
{
rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
rtx set = gen_rtx_SET (reg, cr);
add_reg_note (insn, REG_CFA_REGISTER, set);
RTX_FRAME_RELATED_P (insn) = 1;
}
return reg;
}
/* Reload CR from REG. */
static void
restore_saved_cr (rtx reg, bool using_mfcr_multiple, bool exit_func)
{
int count = 0;
int i;
if (using_mfcr_multiple)
{
for (i = 0; i < 8; i++)
if (save_reg_p (CR0_REGNO + i))
count++;
gcc_assert (count);
}
if (using_mfcr_multiple && count > 1)
{
rtx_insn *insn;
rtvec p;
int ndx;
p = rtvec_alloc (count);
ndx = 0;
for (i = 0; i < 8; i++)
if (save_reg_p (CR0_REGNO + i))
{
rtvec r = rtvec_alloc (2);
RTVEC_ELT (r, 0) = reg;
RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
RTVEC_ELT (p, ndx) =
gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
ndx++;
}
insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
gcc_assert (ndx == count);
/* For the ELFv2 ABI we generate a CFA_RESTORE for each
CR field separately. */
if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
{
for (i = 0; i < 8; i++)
if (save_reg_p (CR0_REGNO + i))
add_reg_note (insn, REG_CFA_RESTORE,
gen_rtx_REG (SImode, CR0_REGNO + i));
RTX_FRAME_RELATED_P (insn) = 1;
}
}
else
for (i = 0; i < 8; i++)
if (save_reg_p (CR0_REGNO + i))
{
rtx insn = emit_insn (gen_movsi_to_cr_one
(gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
/* For the ELFv2 ABI we generate a CFA_RESTORE for each
CR field separately, attached to the insn that in fact
restores this particular CR field. */
if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
{
add_reg_note (insn, REG_CFA_RESTORE,
gen_rtx_REG (SImode, CR0_REGNO + i));
RTX_FRAME_RELATED_P (insn) = 1;
}
}
/* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
if (!exit_func && DEFAULT_ABI != ABI_ELFv2
&& (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
{
rtx_insn *insn = get_last_insn ();
rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
add_reg_note (insn, REG_CFA_RESTORE, cr);
RTX_FRAME_RELATED_P (insn) = 1;
}
}
/* Like cr, the move to lr instruction can be scheduled after the
stack deallocation, but unlike cr, its stack frame save is still
valid. So we only need to emit the cfa_restore on the correct
instruction. */
static void
load_lr_save (int regno, rtx frame_reg_rtx, int offset)
{
rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
rtx reg = gen_rtx_REG (Pmode, regno);
emit_move_insn (reg, mem);
}
static void
restore_saved_lr (int regno, bool exit_func)
{
rtx reg = gen_rtx_REG (Pmode, regno);
rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
rtx_insn *insn = emit_move_insn (lr, reg);
if (!exit_func && flag_shrink_wrap)
{
add_reg_note (insn, REG_CFA_RESTORE, lr);
RTX_FRAME_RELATED_P (insn) = 1;
}
}
static rtx
add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
{
if (DEFAULT_ABI == ABI_ELFv2)
{
int i;
for (i = 0; i < 8; i++)
if (save_reg_p (CR0_REGNO + i))
{
rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
cfa_restores);
}
}
else if (info->cr_save_p)
cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
gen_rtx_REG (SImode, CR2_REGNO),
cfa_restores);
if (info->lr_save_p)
cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
gen_rtx_REG (Pmode, LR_REGNO),
cfa_restores);
return cfa_restores;
}
/* Return true if OFFSET from stack pointer can be clobbered by signals.
V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
below stack pointer not cloberred by signals. */
static inline bool
offset_below_red_zone_p (HOST_WIDE_INT offset)
{
return offset < (DEFAULT_ABI == ABI_V4
? 0
: TARGET_32BIT ? -220 : -288);
}
/* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
static void
emit_cfa_restores (rtx cfa_restores)
{
rtx_insn *insn = get_last_insn ();
rtx *loc = &REG_NOTES (insn);
while (*loc)
loc = &XEXP (*loc, 1);
*loc = cfa_restores;
RTX_FRAME_RELATED_P (insn) = 1;
}
/* -fsplit-stack support. */
/* A SYMBOL_REF for __morestack. */
static GTY(()) rtx morestack_ref;
static rtx
gen_add3_const (rtx rt, rtx ra, long c)
{
if (TARGET_64BIT)
return gen_adddi3 (rt, ra, GEN_INT (c));
else
return gen_addsi3 (rt, ra, GEN_INT (c));
}
/* Return the internal arg pointer used for function incoming
arguments. When -fsplit-stack, the arg pointer is r12 so we need
to copy it to a pseudo in order for it to be preserved over calls
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment