Commit 37a185d7 by Richard Henderson Committed by Richard Henderson

mn10300: Emit retf instruction

Now that we properly track the life of MDR, we can emit
the RETF instruction if MDR has not been modified.  This
insn is 3-4 cycles faster since the return address is
already loaded.

From-SVN: r169013
parent 040c5757
2011-01-19 Richard Henderson <rth@redhat.com> 2011-01-19 Richard Henderson <rth@redhat.com>
* config/mn10300/mn10300.c (mn10300_can_use_retf_insn): New.
(mn10300_can_use_rets_insn): Rename from mn10300_can_use_return_insn.
(mn10300_expand_epilogue): Use it. Compute REG_SAVE_BYTES once.
* config/mn10300/mn10300-protos.h: Update.
* config/mn10300/mn10300.md (return): Use mn10300_can_use_retf_insn.
(return_ret): Likewise. Rename from return_internal_regs.
(return_internal): Remove.
* config/mn10300/mn10300.c (mn10300_unspec_int_label_counter): Remove. * config/mn10300/mn10300.c (mn10300_unspec_int_label_counter): Remove.
(mn10300_asm_output_addr_const_extra): Don't handle UNSPEC_INT_LABEL. (mn10300_asm_output_addr_const_extra): Don't handle UNSPEC_INT_LABEL.
(mn10300_legitimate_constant_p): Likewise. (mn10300_legitimate_constant_p): Likewise.
......
...@@ -45,7 +45,8 @@ extern bool mn10300_match_ccmode (rtx, Mmode); ...@@ -45,7 +45,8 @@ extern bool mn10300_match_ccmode (rtx, Mmode);
#endif /* RTX_CODE */ #endif /* RTX_CODE */
extern bool mn10300_regno_in_class_p (unsigned, int, bool); extern bool mn10300_regno_in_class_p (unsigned, int, bool);
extern int mn10300_can_use_return_insn (void); extern bool mn10300_can_use_rets_insn (void);
extern bool mn10300_can_use_retf_insn (void);
extern void mn10300_expand_prologue (void); extern void mn10300_expand_prologue (void);
extern void mn10300_expand_epilogue (void); extern void mn10300_expand_epilogue (void);
extern int mn10300_initial_offset (int, int); extern int mn10300_initial_offset (int, int);
......
...@@ -623,8 +623,33 @@ mn10300_print_reg_list (FILE *file, int mask) ...@@ -623,8 +623,33 @@ mn10300_print_reg_list (FILE *file, int mask)
fputc (']', file); fputc (']', file);
} }
int /* If the MDR register is never clobbered, we can use the RETF instruction
mn10300_can_use_return_insn (void) which takes the address from the MDR register. This is 3 cycles faster
than having to load the address from the stack. */
bool
mn10300_can_use_retf_insn (void)
{
/* Don't bother if we're not optimizing. In this case we won't
have proper access to df_regs_ever_live_p. */
if (!optimize)
return false;
/* EH returns alter the saved return address; MDR is not current. */
if (crtl->calls_eh_return)
return false;
/* Obviously not if MDR is ever clobbered. */
if (df_regs_ever_live_p (MDR_REG))
return false;
/* ??? Careful not to use this during expand_epilogue etc. */
gcc_assert (!in_sequence_p ());
return leaf_function_p ();
}
bool
mn10300_can_use_rets_insn (void)
{ {
return !mn10300_initial_offset (ARG_POINTER_REGNUM, STACK_POINTER_REGNUM); return !mn10300_initial_offset (ARG_POINTER_REGNUM, STACK_POINTER_REGNUM);
} }
...@@ -995,6 +1020,7 @@ void ...@@ -995,6 +1020,7 @@ void
mn10300_expand_epilogue (void) mn10300_expand_epilogue (void)
{ {
HOST_WIDE_INT size = mn10300_frame_size (); HOST_WIDE_INT size = mn10300_frame_size ();
int reg_save_bytes = REG_SAVE_BYTES;
if (TARGET_AM33_2 && fp_regs_to_save ()) if (TARGET_AM33_2 && fp_regs_to_save ())
{ {
...@@ -1026,14 +1052,14 @@ mn10300_expand_epilogue (void) ...@@ -1026,14 +1052,14 @@ mn10300_expand_epilogue (void)
this_strategy_size = SIZE_FMOV_SP (size, num_regs_to_save); this_strategy_size = SIZE_FMOV_SP (size, num_regs_to_save);
/* If size is too large, we'll have to adjust SP with an /* If size is too large, we'll have to adjust SP with an
add. */ add. */
if (size + 4 * num_regs_to_save + REG_SAVE_BYTES > 255) if (size + 4 * num_regs_to_save + reg_save_bytes > 255)
{ {
/* Insn: add size + 4 * num_regs_to_save, sp. */ /* Insn: add size + 4 * num_regs_to_save, sp. */
this_strategy_size += SIZE_ADD_SP (size + 4 * num_regs_to_save); this_strategy_size += SIZE_ADD_SP (size + 4 * num_regs_to_save);
} }
/* If we don't have to restore any non-FP registers, /* If we don't have to restore any non-FP registers,
we'll be able to save one byte by using rets. */ we'll be able to save one byte by using rets. */
if (! REG_SAVE_BYTES) if (! reg_save_bytes)
this_strategy_size--; this_strategy_size--;
if (this_strategy_size < strategy_size) if (this_strategy_size < strategy_size)
...@@ -1060,14 +1086,14 @@ mn10300_expand_epilogue (void) ...@@ -1060,14 +1086,14 @@ mn10300_expand_epilogue (void)
When size is close to 32Kb, we may be able to adjust SP When size is close to 32Kb, we may be able to adjust SP
with an imm16 add instruction while still using fmov with an imm16 add instruction while still using fmov
(d8,sp). */ (d8,sp). */
if (size + 4 * num_regs_to_save + REG_SAVE_BYTES > 255) if (size + 4 * num_regs_to_save + reg_save_bytes > 255)
{ {
/* Insn: add size + 4 * num_regs_to_save /* Insn: add size + 4 * num_regs_to_save
+ REG_SAVE_BYTES - 252,sp. */ + reg_save_bytes - 252,sp. */
this_strategy_size = SIZE_ADD_SP (size + 4 * num_regs_to_save this_strategy_size = SIZE_ADD_SP (size + 4 * num_regs_to_save
+ REG_SAVE_BYTES - 252); + reg_save_bytes - 252);
/* Insn: fmov (##,sp),fs#, fo each fs# to be restored. */ /* Insn: fmov (##,sp),fs#, fo each fs# to be restored. */
this_strategy_size += SIZE_FMOV_SP (252 - REG_SAVE_BYTES this_strategy_size += SIZE_FMOV_SP (252 - reg_save_bytes
- 4 * num_regs_to_save, - 4 * num_regs_to_save,
num_regs_to_save); num_regs_to_save);
/* We're going to use ret to release the FP registers /* We're going to use ret to release the FP registers
...@@ -1096,14 +1122,14 @@ mn10300_expand_epilogue (void) ...@@ -1096,14 +1122,14 @@ mn10300_expand_epilogue (void)
this_strategy_size += 3 * num_regs_to_save; this_strategy_size += 3 * num_regs_to_save;
/* If size is large enough, we may be able to save a /* If size is large enough, we may be able to save a
couple of bytes. */ couple of bytes. */
if (size + 4 * num_regs_to_save + REG_SAVE_BYTES > 255) if (size + 4 * num_regs_to_save + reg_save_bytes > 255)
{ {
/* Insn: mov a1,sp. */ /* Insn: mov a1,sp. */
this_strategy_size += 2; this_strategy_size += 2;
} }
/* If we don't have to restore any non-FP registers, /* If we don't have to restore any non-FP registers,
we'll be able to save one byte by using rets. */ we'll be able to save one byte by using rets. */
if (! REG_SAVE_BYTES) if (! reg_save_bytes)
this_strategy_size--; this_strategy_size--;
if (this_strategy_size < strategy_size) if (this_strategy_size < strategy_size)
...@@ -1129,8 +1155,8 @@ mn10300_expand_epilogue (void) ...@@ -1129,8 +1155,8 @@ mn10300_expand_epilogue (void)
emit_insn (gen_addsi3 (stack_pointer_rtx, emit_insn (gen_addsi3 (stack_pointer_rtx,
stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (size + 4 * num_regs_to_save GEN_INT (size + 4 * num_regs_to_save
+ REG_SAVE_BYTES - 252))); + reg_save_bytes - 252)));
size = 252 - REG_SAVE_BYTES - 4 * num_regs_to_save; size = 252 - reg_save_bytes - 4 * num_regs_to_save;
break; break;
case restore_a1: case restore_a1:
...@@ -1176,7 +1202,7 @@ mn10300_expand_epilogue (void) ...@@ -1176,7 +1202,7 @@ mn10300_expand_epilogue (void)
/* If we were using the restore_a1 strategy and the number of /* If we were using the restore_a1 strategy and the number of
bytes to be released won't fit in the `ret' byte, copy `a1' bytes to be released won't fit in the `ret' byte, copy `a1'
to `sp', to avoid having to use `add' to adjust it. */ to `sp', to avoid having to use `add' to adjust it. */
if (! frame_pointer_needed && reg && size + REG_SAVE_BYTES > 255) if (! frame_pointer_needed && reg && size + reg_save_bytes > 255)
{ {
emit_move_insn (stack_pointer_rtx, XEXP (reg, 0)); emit_move_insn (stack_pointer_rtx, XEXP (reg, 0));
size = 0; size = 0;
...@@ -1203,7 +1229,7 @@ mn10300_expand_epilogue (void) ...@@ -1203,7 +1229,7 @@ mn10300_expand_epilogue (void)
emit_move_insn (stack_pointer_rtx, frame_pointer_rtx); emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
size = 0; size = 0;
} }
else if (size + REG_SAVE_BYTES > 255) else if (size + reg_save_bytes > 255)
{ {
emit_insn (gen_addsi3 (stack_pointer_rtx, emit_insn (gen_addsi3 (stack_pointer_rtx,
stack_pointer_rtx, stack_pointer_rtx,
...@@ -1212,15 +1238,10 @@ mn10300_expand_epilogue (void) ...@@ -1212,15 +1238,10 @@ mn10300_expand_epilogue (void)
} }
/* Adjust the stack and restore callee-saved registers, if any. */ /* Adjust the stack and restore callee-saved registers, if any. */
if (size || df_regs_ever_live_p (2) || df_regs_ever_live_p (3) if (mn10300_can_use_rets_insn ())
|| df_regs_ever_live_p (6) || df_regs_ever_live_p (7) emit_jump_insn (gen_rtx_RETURN (VOIDmode));
|| df_regs_ever_live_p (14) || df_regs_ever_live_p (15)
|| df_regs_ever_live_p (16) || df_regs_ever_live_p (17)
|| frame_pointer_needed)
emit_jump_insn (gen_return_internal_regs
(GEN_INT (size + REG_SAVE_BYTES)));
else else
emit_jump_insn (gen_return_internal ()); emit_jump_insn (gen_return_ret (GEN_INT (size + REG_SAVE_BYTES)));
} }
/* Recognize the PARALLEL rtx generated by mn10300_gen_multiple_store(). /* Recognize the PARALLEL rtx generated by mn10300_gen_multiple_store().
......
...@@ -1607,31 +1607,28 @@ ...@@ -1607,31 +1607,28 @@
{ mn10300_expand_epilogue (); DONE; } { mn10300_expand_epilogue (); DONE; }
) )
(define_insn "return_internal" (define_insn "return"
[(const_int 2) [(return)]
(return)] "mn10300_can_use_rets_insn ()"
"" {
"rets" /* The RETF insn is 4 cycles faster than RETS, though 1 byte larger. */
[(set_attr "timings" "66")] if (optimize_insn_for_speed_p () && mn10300_can_use_retf_insn ())
) return "retf [],0";
else
return "rets";
})
;; This insn restores the callee saved registers and does a return, it (define_insn "return_ret"
;; can also deallocate stack space. [(return)
(define_insn "return_internal_regs" (use (match_operand:SI 0 "const_int_operand" ""))]
[(const_int 0)
(match_operand:SI 0 "const_int_operand" "i")
(return)]
"" ""
{ {
fputs ("\tret ", asm_out_file); /* The RETF insn is up to 3 cycles faster than RET. */
fputs ((mn10300_can_use_retf_insn () ? "\tretf " : "\tret "), asm_out_file);
mn10300_print_reg_list (asm_out_file, mn10300_get_live_callee_saved_regs ()); mn10300_print_reg_list (asm_out_file, mn10300_get_live_callee_saved_regs ());
fprintf (asm_out_file, ",%d\n", (int) INTVAL (operands[0])); fprintf (asm_out_file, ",%d\n", (int) INTVAL (operands[0]));
return ""; return "";
} })
;; Assumes that there will be no more than 8 regs to pop
[(set (attr "timings") (if_then_else (eq_attr "cpu" "am34")
(const_int 1414) (const_int 1313)))]
)
;; This instruction matches one generated by mn10300_gen_multiple_store() ;; This instruction matches one generated by mn10300_gen_multiple_store()
(define_insn "store_movm" (define_insn "store_movm"
...@@ -1651,13 +1648,6 @@ ...@@ -1651,13 +1648,6 @@
(const_int 99) (const_int 88)))] (const_int 99) (const_int 88)))]
) )
(define_insn "return"
[(return)]
"mn10300_can_use_return_insn ()"
"rets"
[(set_attr "timings" "66")]
)
(define_expand "load_pic" (define_expand "load_pic"
[(const_int 0)] [(const_int 0)]
"flag_pic" "flag_pic"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment