Commit 6e21f3b5 by Segher Boessenkool Committed by Segher Boessenkool

rs6000: Separate shrink-wrapping for FPRs

This patch implements separate shrink-wrapping for the non-volatile
floating point registers (FPR14..FPR31).  It is pretty much exactly
analogous to the code for GPRs.


	* config/rs6000/rs6000.c (struct machine_function): Add field
	fpr_is_wrapped_separately.
	(rs6000_get_separate_components): Use 64 components.  Handle the
	new FPR components.
	(rs6000_components_for_bb): Handle the FPR components.
	(rs6000_emit_prologue_components): Handle the FPR components.
	(rs6000_emit_epilogue_components): Handle the FPR components.
	(rs6000_set_handled_components): Handle the FPR components.
	(rs6000_emit_prologue): Don't output prologue code for those FPRs
	that are already separately shrink-wrapped.
	(rs6000_emit_epilogue): Don't output epilogue code for those FPRs
	that are already separately shrink-wrapped.

From-SVN: r248256
parent afbc9ecb
2017-05-18 Segher Boessenkool <segher@kernel.crashing.org>
* config/rs6000/rs6000.c (struct machine_function): Add field
fpr_is_wrapped_separately.
(rs6000_get_separate_components): Use 64 components. Handle the
new FPR components.
(rs6000_components_for_bb): Handle the FPR components.
(rs6000_emit_prologue_components): Handle the FPR components.
(rs6000_emit_epilogue_components): Handle the FPR components.
(rs6000_set_handled_components): Handle the FPR components.
(rs6000_emit_prologue): Don't output prologue code for those FPRs
that are already separately shrink-wrapped.
(rs6000_emit_epilogue): Don't output epilogue code for those FPRs
that are already separately shrink-wrapped.
2017-05-18 Michael Meissner <meissner@linux.vnet.ibm.com> 2017-05-18 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/80510 PR target/80510
......
...@@ -159,6 +159,7 @@ typedef struct GTY(()) machine_function ...@@ -159,6 +159,7 @@ typedef struct GTY(()) machine_function
/* The components already handled by separate shrink-wrapping, which should /* The components already handled by separate shrink-wrapping, which should
not be considered by the prologue and epilogue. */ not be considered by the prologue and epilogue. */
bool gpr_is_wrapped_separately[32]; bool gpr_is_wrapped_separately[32];
bool fpr_is_wrapped_separately[32];
bool lr_is_wrapped_separately; bool lr_is_wrapped_separately;
} machine_function; } machine_function;
...@@ -29221,17 +29222,25 @@ rs6000_get_separate_components (void) ...@@ -29221,17 +29222,25 @@ rs6000_get_separate_components (void)
if (TARGET_SPE_ABI) if (TARGET_SPE_ABI)
return NULL; return NULL;
sbitmap components = sbitmap_alloc (32);
bitmap_clear (components);
gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE) gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
&& !(info->savres_strategy & REST_MULTIPLE)); && !(info->savres_strategy & REST_MULTIPLE));
/* Component 0 is the save/restore of LR (done via GPR0).
Components 13..31 are the save/restore of GPR13..GPR31.
Components 46..63 are the save/restore of FPR14..FPR31. */
int n_components = 64;
sbitmap components = sbitmap_alloc (n_components);
bitmap_clear (components);
int reg_size = TARGET_32BIT ? 4 : 8;
int fp_reg_size = 8;
/* The GPRs we need saved to the frame. */ /* The GPRs we need saved to the frame. */
if ((info->savres_strategy & SAVE_INLINE_GPRS) if ((info->savres_strategy & SAVE_INLINE_GPRS)
&& (info->savres_strategy & REST_INLINE_GPRS)) && (info->savres_strategy & REST_INLINE_GPRS))
{ {
int reg_size = TARGET_32BIT ? 4 : 8;
int offset = info->gp_save_offset; int offset = info->gp_save_offset;
if (info->push_p) if (info->push_p)
offset += info->total_size; offset += info->total_size;
...@@ -29256,6 +29265,23 @@ rs6000_get_separate_components (void) ...@@ -29256,6 +29265,23 @@ rs6000_get_separate_components (void)
|| (flag_pic && DEFAULT_ABI == ABI_DARWIN)) || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM); bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
/* The FPRs we need saved to the frame. */
if ((info->savres_strategy & SAVE_INLINE_FPRS)
&& (info->savres_strategy & REST_INLINE_FPRS))
{
int offset = info->fp_save_offset;
if (info->push_p)
offset += info->total_size;
for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
{
if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
bitmap_set_bit (components, regno);
offset += fp_reg_size;
}
}
/* Optimize LR save and restore if we can. This is component 0. Any /* Optimize LR save and restore if we can. This is component 0. Any
out-of-line register save/restore routines need LR. */ out-of-line register save/restore routines need LR. */
if (info->lr_save_p if (info->lr_save_p
...@@ -29290,14 +29316,23 @@ rs6000_components_for_bb (basic_block bb) ...@@ -29290,14 +29316,23 @@ rs6000_components_for_bb (basic_block bb)
sbitmap components = sbitmap_alloc (32); sbitmap components = sbitmap_alloc (32);
bitmap_clear (components); bitmap_clear (components);
/* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */ /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
/* GPRs. */
for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++) for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
if (bitmap_bit_p (in, regno) if (bitmap_bit_p (in, regno)
|| bitmap_bit_p (gen, regno) || bitmap_bit_p (gen, regno)
|| bitmap_bit_p (kill, regno)) || bitmap_bit_p (kill, regno))
bitmap_set_bit (components, regno); bitmap_set_bit (components, regno);
/* LR needs to be saved around a bb if it is killed in that bb. */ /* FPRs. */
for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
if (bitmap_bit_p (in, regno)
|| bitmap_bit_p (gen, regno)
|| bitmap_bit_p (kill, regno))
bitmap_set_bit (components, regno);
/* The link register. */
if (bitmap_bit_p (in, LR_REGNO) if (bitmap_bit_p (in, LR_REGNO)
|| bitmap_bit_p (gen, LR_REGNO) || bitmap_bit_p (gen, LR_REGNO)
|| bitmap_bit_p (kill, LR_REGNO)) || bitmap_bit_p (kill, LR_REGNO))
...@@ -29331,13 +29366,18 @@ rs6000_emit_prologue_components (sbitmap components) ...@@ -29331,13 +29366,18 @@ rs6000_emit_prologue_components (sbitmap components)
rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
? HARD_FRAME_POINTER_REGNUM ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM); : STACK_POINTER_REGNUM);
machine_mode reg_mode = Pmode;
int reg_size = TARGET_32BIT ? 4 : 8; int reg_size = TARGET_32BIT ? 4 : 8;
machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
? DFmode : SFmode;
int fp_reg_size = 8;
/* Prologue for LR. */ /* Prologue for LR. */
if (bitmap_bit_p (components, 0)) if (bitmap_bit_p (components, 0))
{ {
rtx reg = gen_rtx_REG (Pmode, 0); rtx reg = gen_rtx_REG (reg_mode, 0);
rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO)); rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (reg_mode, LR_REGNO));
RTX_FRAME_RELATED_P (insn) = 1; RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_CFA_REGISTER, NULL); add_reg_note (insn, REG_CFA_REGISTER, NULL);
...@@ -29347,7 +29387,7 @@ rs6000_emit_prologue_components (sbitmap components) ...@@ -29347,7 +29387,7 @@ rs6000_emit_prologue_components (sbitmap components)
insn = emit_insn (gen_frame_store (reg, ptr_reg, offset)); insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
RTX_FRAME_RELATED_P (insn) = 1; RTX_FRAME_RELATED_P (insn) = 1;
rtx lr = gen_rtx_REG (Pmode, LR_REGNO); rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
rtx mem = copy_rtx (SET_DEST (single_set (insn))); rtx mem = copy_rtx (SET_DEST (single_set (insn)));
add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr)); add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
} }
...@@ -29361,7 +29401,7 @@ rs6000_emit_prologue_components (sbitmap components) ...@@ -29361,7 +29401,7 @@ rs6000_emit_prologue_components (sbitmap components)
{ {
if (bitmap_bit_p (components, i)) if (bitmap_bit_p (components, i))
{ {
rtx reg = gen_rtx_REG (Pmode, i); rtx reg = gen_rtx_REG (reg_mode, i);
rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset)); rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
RTX_FRAME_RELATED_P (insn) = 1; RTX_FRAME_RELATED_P (insn) = 1;
rtx set = copy_rtx (single_set (insn)); rtx set = copy_rtx (single_set (insn));
...@@ -29370,6 +29410,25 @@ rs6000_emit_prologue_components (sbitmap components) ...@@ -29370,6 +29410,25 @@ rs6000_emit_prologue_components (sbitmap components)
offset += reg_size; offset += reg_size;
} }
/* Prologue for the FPRs. */
offset = info->fp_save_offset;
if (info->push_p)
offset += info->total_size;
for (int i = info->first_fp_reg_save; i < 64; i++)
{
if (bitmap_bit_p (components, i))
{
rtx reg = gen_rtx_REG (fp_reg_mode, i);
rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
RTX_FRAME_RELATED_P (insn) = 1;
rtx set = copy_rtx (single_set (insn));
add_reg_note (insn, REG_CFA_OFFSET, set);
}
offset += fp_reg_size;
}
} }
/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */ /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
...@@ -29380,10 +29439,34 @@ rs6000_emit_epilogue_components (sbitmap components) ...@@ -29380,10 +29439,34 @@ rs6000_emit_epilogue_components (sbitmap components)
rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
? HARD_FRAME_POINTER_REGNUM ? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM); : STACK_POINTER_REGNUM);
machine_mode reg_mode = Pmode;
int reg_size = TARGET_32BIT ? 4 : 8; int reg_size = TARGET_32BIT ? 4 : 8;
machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
? DFmode : SFmode;
int fp_reg_size = 8;
/* Epilogue for the FPRs. */
int offset = info->fp_save_offset;
if (info->push_p)
offset += info->total_size;
for (int i = info->first_fp_reg_save; i < 64; i++)
{
if (bitmap_bit_p (components, i))
{
rtx reg = gen_rtx_REG (fp_reg_mode, i);
rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_CFA_RESTORE, reg);
}
offset += fp_reg_size;
}
/* Epilogue for the GPRs. */ /* Epilogue for the GPRs. */
int offset = info->gp_save_offset; offset = info->gp_save_offset;
if (info->push_p) if (info->push_p)
offset += info->total_size; offset += info->total_size;
...@@ -29391,7 +29474,7 @@ rs6000_emit_epilogue_components (sbitmap components) ...@@ -29391,7 +29474,7 @@ rs6000_emit_epilogue_components (sbitmap components)
{ {
if (bitmap_bit_p (components, i)) if (bitmap_bit_p (components, i))
{ {
rtx reg = gen_rtx_REG (Pmode, i); rtx reg = gen_rtx_REG (reg_mode, i);
rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset)); rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
RTX_FRAME_RELATED_P (insn) = 1; RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_CFA_RESTORE, reg); add_reg_note (insn, REG_CFA_RESTORE, reg);
...@@ -29407,7 +29490,7 @@ rs6000_emit_epilogue_components (sbitmap components) ...@@ -29407,7 +29490,7 @@ rs6000_emit_epilogue_components (sbitmap components)
if (info->push_p) if (info->push_p)
offset += info->total_size; offset += info->total_size;
rtx reg = gen_rtx_REG (Pmode, 0); rtx reg = gen_rtx_REG (reg_mode, 0);
rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset)); rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
rtx lr = gen_rtx_REG (Pmode, LR_REGNO); rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
...@@ -29427,6 +29510,10 @@ rs6000_set_handled_components (sbitmap components) ...@@ -29427,6 +29510,10 @@ rs6000_set_handled_components (sbitmap components)
if (bitmap_bit_p (components, i)) if (bitmap_bit_p (components, i))
cfun->machine->gpr_is_wrapped_separately[i] = true; cfun->machine->gpr_is_wrapped_separately[i] = true;
for (int i = info->first_fp_reg_save; i < 64; i++)
if (bitmap_bit_p (components, i))
cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
if (bitmap_bit_p (components, 0)) if (bitmap_bit_p (components, 0))
cfun->machine->lr_is_wrapped_separately = true; cfun->machine->lr_is_wrapped_separately = true;
} }
...@@ -29439,6 +29526,9 @@ rs6000_emit_prologue (void) ...@@ -29439,6 +29526,9 @@ rs6000_emit_prologue (void)
rs6000_stack_t *info = rs6000_stack_info (); rs6000_stack_t *info = rs6000_stack_info ();
machine_mode reg_mode = Pmode; machine_mode reg_mode = Pmode;
int reg_size = TARGET_32BIT ? 4 : 8; int reg_size = TARGET_32BIT ? 4 : 8;
machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
? DFmode : SFmode;
int fp_reg_size = 8;
rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
rtx frame_reg_rtx = sp_reg_rtx; rtx frame_reg_rtx = sp_reg_rtx;
unsigned int cr_save_regno; unsigned int cr_save_regno;
...@@ -29736,15 +29826,16 @@ rs6000_emit_prologue (void) ...@@ -29736,15 +29826,16 @@ rs6000_emit_prologue (void)
it ourselves. Otherwise, call function. */ it ourselves. Otherwise, call function. */
if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS)) if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
{ {
int i; int offset = info->fp_save_offset + frame_off;
for (i = 0; i < 64 - info->first_fp_reg_save; i++) for (int i = info->first_fp_reg_save; i < 64; i++)
if (save_reg_p (info->first_fp_reg_save + i)) {
emit_frame_save (frame_reg_rtx, if (save_reg_p (i)
(TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT && !cfun->machine->fpr_is_wrapped_separately[i - 32])
? DFmode : SFmode), emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
info->first_fp_reg_save + i,
info->fp_save_offset + frame_off + 8 * i,
sp_off - frame_off); sp_off - frame_off);
offset += fp_reg_size;
}
} }
else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64) else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
{ {
...@@ -30818,6 +30909,9 @@ rs6000_emit_epilogue (int sibcall) ...@@ -30818,6 +30909,9 @@ rs6000_emit_epilogue (int sibcall)
rtx cr_save_reg = NULL_RTX; rtx cr_save_reg = NULL_RTX;
machine_mode reg_mode = Pmode; machine_mode reg_mode = Pmode;
int reg_size = TARGET_32BIT ? 4 : 8; int reg_size = TARGET_32BIT ? 4 : 8;
machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
? DFmode : SFmode;
int fp_reg_size = 8;
int i; int i;
bool exit_func; bool exit_func;
unsigned ptr_regno; unsigned ptr_regno;
...@@ -31564,16 +31658,22 @@ rs6000_emit_epilogue (int sibcall) ...@@ -31564,16 +31658,22 @@ rs6000_emit_epilogue (int sibcall)
/* Restore fpr's if we need to do it without calling a function. */ /* Restore fpr's if we need to do it without calling a function. */
if (restoring_FPRs_inline) if (restoring_FPRs_inline)
for (i = 0; i < 64 - info->first_fp_reg_save; i++)
if (save_reg_p (info->first_fp_reg_save + i))
{ {
rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT int offset = info->fp_save_offset + frame_off;
? DFmode : SFmode), for (i = info->first_fp_reg_save; i < 64; i++)
info->first_fp_reg_save + i); {
emit_insn (gen_frame_load (reg, frame_reg_rtx, if (save_reg_p (i)
info->fp_save_offset + frame_off + 8 * i)); && !cfun->machine->fpr_is_wrapped_separately[i - 32])
{
rtx reg = gen_rtx_REG (fp_reg_mode, i);
emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
cfa_restores);
}
offset += fp_reg_size;
}
} }
/* If we saved cr, restore it here. Just those that were used. */ /* If we saved cr, restore it here. Just those that were used. */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment