Commit 1c71e60e by Jan Hubicka Committed by Richard Henderson

i386.c (SAVE_REGS_FIRST): Remove.

        * i386.c (SAVE_REGS_FIRST): Remove.
        (ix86_initial_elimination_offset): Handle only SAVE_REGS_FIRST mode.
        (ix86_compute_frame_size): Likewise.
        (ix86_expand_prologue): Likewise.  Use pro_epilogue_adjust_stack.
        (ix86_emit_restore_regs): Remove.
        (ix86_emit_epilogue_esp_adjustment): Use pro_epilogue_adjust_stack
        when a frame pointer is in use.
        (ix86_expand_epilogue): Handle only SAVE_REGS_FIRST mode.  Use mov
        instead of pop to restore a register when profitable; emit leave
        when profitable.
        (ix86_attr_length_default): Handle pro_epilogue_adjust_stack
        as a TYPE_LEA insn.
        (ix86_adjust_cost): Handle pro_epilogue_adjust_stack as TYPE_ALU.
        * i386.md (prologue_allocate_stack): Remove.
        (epilogue_deallocate_stack): Remove.
        (pro_epilogue_adjust_stack): New.

Co-Authored-By: Richard Henderson <rth@cygnus.com>

From-SVN: r31792
parent c13fde05
2000-02-04 Jan Hubicka <jh@suse.cz>
Richard Henderson <rth@cygnus.com>
* i386.c (SAVE_REGS_FIRST): Remove.
(ix86_initial_elimination_offset): Handle only SAVE_REGS_FIRST mode.
(ix86_compute_frame_size): Likewise.
(ix86_expand_prologue): Likewise. Use pro_epilogue_adjust_stack.
(ix86_emit_restore_regs): Remove.
(ix86_emit_epilogue_esp_adjustment): Use pro_epilogue_adjust_stack
when a frame pointer is in use.
(ix86_expand_epilogue): Handle only SAVE_REGS_FIRST mode. Use mov
instead of pop to restore a register when profitable; emit leave
when profitable.
(ix86_attr_length_default): Handle pro_epilogue_adjust_stack
as a TYPE_LEA insn.
(ix86_adjust_cost): Handle pro_epilogue_adjust_stack as TYPE_ALU.
* i386.md (prologue_allocate_stack): Remove.
(epilogue_deallocate_stack): Remove.
(pro_epilogue_adjust_stack): New.
2000-02-04 Richard Henderson <rth@cygnus.com> 2000-02-04 Richard Henderson <rth@cygnus.com>
* function.c (diddle_return_value): Rework to use a callback function. * function.c (diddle_return_value): Rework to use a callback function.
......
...@@ -41,16 +41,6 @@ Boston, MA 02111-1307, USA. */ ...@@ -41,16 +41,6 @@ Boston, MA 02111-1307, USA. */
#include "basic-block.h" #include "basic-block.h"
#include "ggc.h" #include "ggc.h"
/* True when we want to do pushes before allocating stack to get better
scheduling.
Saving registers first is win in the most cases except for LEAVE
instruction. Macro is 0 iff we will use LEAVE. */
#define SAVED_REGS_FIRST \
(!frame_pointer_needed || (!TARGET_USE_LEAVE && !optimize_size))
#ifdef EXTRA_CONSTRAINT #ifdef EXTRA_CONSTRAINT
/* If EXTRA_CONSTRAINT is defined, then the 'S' /* If EXTRA_CONSTRAINT is defined, then the 'S'
constraint in REG_CLASS_FROM_LETTER will no longer work, and various constraint in REG_CLASS_FROM_LETTER will no longer work, and various
...@@ -411,7 +401,6 @@ static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT, ...@@ -411,7 +401,6 @@ static HOST_WIDE_INT ix86_compute_frame_size PARAMS((HOST_WIDE_INT,
int *, int *, int *)); int *, int *, int *));
static int ix86_nsaved_regs PARAMS((void)); static int ix86_nsaved_regs PARAMS((void));
static void ix86_emit_save_regs PARAMS((void)); static void ix86_emit_save_regs PARAMS((void));
static void ix86_emit_restore_regs PARAMS((void));
static void ix86_emit_epilogue_esp_adjustment PARAMS((int)); static void ix86_emit_epilogue_esp_adjustment PARAMS((int));
struct ix86_address struct ix86_address
...@@ -1705,16 +1694,13 @@ ix86_initial_elimination_offset (from, to) ...@@ -1705,16 +1694,13 @@ ix86_initial_elimination_offset (from, to)
saved frame pointer if frame_pointer_needed saved frame pointer if frame_pointer_needed
<- HARD_FRAME_POINTER <- HARD_FRAME_POINTER
[saved regs if SAVED_REGS_FIRST] [saved regs]
[padding1] \ [padding1] \
| <- FRAME_POINTER | <- FRAME_POINTER
[frame] > tsize [frame] > tsize
| |
[padding2] / [padding2] /
[saved regs if !SAVED_REGS_FIRST]
<- STACK_POINTER
*/ */
if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
...@@ -1725,7 +1711,6 @@ ix86_initial_elimination_offset (from, to) ...@@ -1725,7 +1711,6 @@ ix86_initial_elimination_offset (from, to)
&& to == HARD_FRAME_POINTER_REGNUM) && to == HARD_FRAME_POINTER_REGNUM)
{ {
ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0); ix86_compute_frame_size (get_frame_size (), &nregs, &padding1, (int *)0);
if (SAVED_REGS_FIRST)
padding1 += nregs * UNITS_PER_WORD; padding1 += nregs * UNITS_PER_WORD;
return -padding1; return -padding1;
} }
...@@ -1743,10 +1728,8 @@ ix86_initial_elimination_offset (from, to) ...@@ -1743,10 +1728,8 @@ ix86_initial_elimination_offset (from, to)
return tsize + nregs * UNITS_PER_WORD + frame_size; return tsize + nregs * UNITS_PER_WORD + frame_size;
else if (from != FRAME_POINTER_REGNUM) else if (from != FRAME_POINTER_REGNUM)
abort (); abort ();
else if (SAVED_REGS_FIRST)
return tsize - padding1;
else else
return tsize + nregs * UNITS_PER_WORD - padding1; return tsize - padding1;
} }
} }
...@@ -1788,13 +1771,10 @@ ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2) ...@@ -1788,13 +1771,10 @@ ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
if (stack_alignment_needed < 4) if (stack_alignment_needed < 4)
stack_alignment_needed = 4; stack_alignment_needed = 4;
if (stack_alignment_needed > preferred_alignment) if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
abort (); abort ();
if (SAVED_REGS_FIRST)
offset += nregs * UNITS_PER_WORD; offset += nregs * UNITS_PER_WORD;
else
total_size += nregs * UNITS_PER_WORD;
total_size += offset; total_size += offset;
...@@ -1807,7 +1787,6 @@ ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2) ...@@ -1807,7 +1787,6 @@ ix86_compute_frame_size (size, nregs_on_stack, rpadding1, rpadding2)
} }
/* Align stack boundary. */ /* Align stack boundary. */
if (!current_function_is_leaf)
padding2 = ((total_size + preferred_alignment - 1) padding2 = ((total_size + preferred_alignment - 1)
& -preferred_alignment) - total_size; & -preferred_alignment) - total_size;
} }
...@@ -1868,7 +1847,6 @@ ix86_expand_prologue () ...@@ -1868,7 +1847,6 @@ ix86_expand_prologue ()
RTX_FRAME_RELATED_P (insn) = 1; RTX_FRAME_RELATED_P (insn) = 1;
} }
if (SAVED_REGS_FIRST)
ix86_emit_save_regs (); ix86_emit_save_regs ();
if (tsize == 0) if (tsize == 0)
...@@ -1876,10 +1854,9 @@ ix86_expand_prologue () ...@@ -1876,10 +1854,9 @@ ix86_expand_prologue ()
else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT) else if (! TARGET_STACK_PROBE || tsize < CHECK_STACK_LIMIT)
{ {
if (frame_pointer_needed) if (frame_pointer_needed)
insn = emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx, insn = emit_insn (gen_pro_epilogue_adjust_stack
stack_pointer_rtx, (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (-tsize), GEN_INT (-tsize), hard_frame_pointer_rtx));
hard_frame_pointer_rtx));
else else
insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (-tsize))); GEN_INT (-tsize)));
...@@ -1903,9 +1880,6 @@ ix86_expand_prologue () ...@@ -1903,9 +1880,6 @@ ix86_expand_prologue ()
CALL_INSN_FUNCTION_USAGE (insn)); CALL_INSN_FUNCTION_USAGE (insn));
} }
if (!SAVED_REGS_FIRST)
ix86_emit_save_regs ();
#ifdef SUBTARGET_PROLOGUE #ifdef SUBTARGET_PROLOGUE
SUBTARGET_PROLOGUE; SUBTARGET_PROLOGUE;
#endif #endif
...@@ -1920,25 +1894,6 @@ ix86_expand_prologue () ...@@ -1920,25 +1894,6 @@ ix86_expand_prologue ()
emit_insn (gen_blockage ()); emit_insn (gen_blockage ());
} }
/* Emit code to pop all registers from stack. */
static void
ix86_emit_restore_regs ()
{
int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
|| current_function_uses_const_pool);
int limit = (frame_pointer_needed
? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
int regno;
for (regno = 0; regno < limit; regno++)
if ((regs_ever_live[regno] && !call_used_regs[regno])
|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
{
emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
}
}
/* Emit code to add TSIZE to esp value. Use POP instruction when /* Emit code to add TSIZE to esp value. Use POP instruction when
profitable. */ profitable. */
...@@ -1980,7 +1935,14 @@ ix86_emit_epilogue_esp_adjustment (tsize) ...@@ -1980,7 +1935,14 @@ ix86_emit_epilogue_esp_adjustment (tsize)
} }
else else
{ {
/* If there is no frame pointer, we must still release the frame. */ /* If a frame pointer is present, we must be sure to tie the sp
to the fp so that we don't mis-schedule. */
if (frame_pointer_needed)
emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
stack_pointer_rtx,
GEN_INT (tsize),
hard_frame_pointer_rtx));
else
emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (tsize))); GEN_INT (tsize)));
} }
...@@ -1991,60 +1953,36 @@ ix86_emit_epilogue_esp_adjustment (tsize) ...@@ -1991,60 +1953,36 @@ ix86_emit_epilogue_esp_adjustment (tsize)
void void
ix86_expand_epilogue () ix86_expand_epilogue ()
{ {
int regno;
int nregs; int nregs;
int limit; int regno;
int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table int pic_reg_used = flag_pic && (current_function_uses_pic_offset_table
|| current_function_uses_const_pool); || current_function_uses_const_pool);
int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging; int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
HOST_WIDE_INT offset; HOST_WIDE_INT offset;
HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs, (int *)0, HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs,
(int *)0); (int *)0, (int *)0);
/* SP is often unreliable so we may have to go off the frame pointer. */
offset = -(tsize + nregs * UNITS_PER_WORD); /* Calculate start of saved registers relative to ebp. */
offset = -nregs * UNITS_PER_WORD;
if (SAVED_REGS_FIRST) #ifdef FUNCTION_BLOCK_PROFILER_EXIT
{ if (profile_block_flag == 2)
if (!sp_valid)
{ {
if (nregs) FUNCTION_BLOCK_PROFILER_EXIT;
emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
gen_rtx_PLUS (SImode, hard_frame_pointer_rtx,
GEN_INT (- nregs * UNITS_PER_WORD))));
else
emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx,
hard_frame_pointer_rtx));
}
else if (tsize)
ix86_emit_epilogue_esp_adjustment (tsize);
ix86_emit_restore_regs ();
} }
#endif
/* If we're only restoring one register and sp is not valid then /* If we're only restoring one register and sp is not valid then
using a move instruction to restore the register since it's using a move instruction to restore the register since it's
less work than reloading sp and popping the register. Otherwise, less work than reloading sp and popping the register. */
restore sp (if necessary) and pop the registers. */ if (!sp_valid && nregs <= 1)
else if (nregs > 1 || sp_valid)
{ {
if (!sp_valid) if (!frame_pointer_needed)
{ abort();
rtx addr_offset;
addr_offset = adj_offsettable_operand (AT_BP (QImode), offset);
addr_offset = XEXP (addr_offset, 0);
emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, addr_offset)); for (regno = 0; regno < HARD_FRAME_POINTER_REGNUM; regno++)
}
ix86_emit_restore_regs ();
}
else
{
limit = (frame_pointer_needed
? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
for (regno = 0; regno < limit; regno++)
if ((regs_ever_live[regno] && ! call_used_regs[regno]) if ((regs_ever_live[regno] && ! call_used_regs[regno])
|| (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used)) || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
{ {
...@@ -2052,30 +1990,40 @@ ix86_expand_epilogue () ...@@ -2052,30 +1990,40 @@ ix86_expand_epilogue ()
adj_offsettable_operand (AT_BP (Pmode), offset)); adj_offsettable_operand (AT_BP (Pmode), offset));
offset += 4; offset += 4;
} }
}
if (frame_pointer_needed)
{
/* If not an i386, mov & pop is faster than "leave". */ /* If not an i386, mov & pop is faster than "leave". */
if (TARGET_USE_LEAVE || optimize_size) if (TARGET_USE_LEAVE || optimize_size)
emit_insn (gen_leave ()); emit_insn (gen_leave ());
else else
{ {
if (!SAVED_REGS_FIRST) emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
const0_rtx,
hard_frame_pointer_rtx)); hard_frame_pointer_rtx));
emit_insn (gen_popsi1 (hard_frame_pointer_rtx)); emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
} }
} }
else if (!SAVED_REGS_FIRST && tsize) else
{
/* First step is to deallocate the stack frame so that we can
pop the registers. */
if (!sp_valid)
{
if (!frame_pointer_needed)
abort ();
emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
hard_frame_pointer_rtx,
GEN_INT (offset),
hard_frame_pointer_rtx));
}
else if (tsize)
ix86_emit_epilogue_esp_adjustment (tsize); ix86_emit_epilogue_esp_adjustment (tsize);
#ifdef FUNCTION_BLOCK_PROFILER_EXIT for (regno = 0; regno < STACK_POINTER_REGNUM; regno++)
if (profile_block_flag == 2) if ((regs_ever_live[regno] && !call_used_regs[regno])
{ || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
FUNCTION_BLOCK_PROFILER_EXIT; emit_insn (gen_popsi1 (gen_rtx_REG (SImode, regno)));
} }
#endif
if (current_function_pops_args && current_function_args_size) if (current_function_pops_args && current_function_args_size)
{ {
...@@ -5853,14 +5801,14 @@ ix86_attr_length_default (insn) ...@@ -5853,14 +5801,14 @@ ix86_attr_length_default (insn)
{ {
/* Irritatingly, single_set doesn't work with REG_UNUSED present, /* Irritatingly, single_set doesn't work with REG_UNUSED present,
as we'll get from running life_analysis during reg-stack when as we'll get from running life_analysis during reg-stack when
not optimizing. */ not optimizing. Not that it matters anyway, now that
pro_epilogue_adjust_stack uses lea, and is by design not
single_set. */
rtx set = PATTERN (insn); rtx set = PATTERN (insn);
if (GET_CODE (set) == SET) if (GET_CODE (set) == SET)
; ;
else if (GET_CODE (set) == PARALLEL else if (GET_CODE (set) == PARALLEL
&& XVECLEN (set, 0) == 2 && GET_CODE (XVECEXP (set, 0, 0)) == SET)
&& GET_CODE (XVECEXP (set, 0, 0)) == SET
&& GET_CODE (XVECEXP (set, 0, 1)) == CLOBBER)
set = XVECEXP (set, 0, 0); set = XVECEXP (set, 0, 0);
else else
abort (); abort ();
...@@ -6011,18 +5959,17 @@ ix86_adjust_cost (insn, link, dep_insn, cost) ...@@ -6011,18 +5959,17 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
return cost; return cost;
/* Prologue and epilogue allocators have false dependency on ebp. insn_type = get_attr_type (insn);
This results in one cycle extra stall on Pentium prologue scheduling, so dep_insn_type = get_attr_type (dep_insn);
handle this important case manually. */
if ((dep_insn_code_number == CODE_FOR_prologue_allocate_stack /* Prologue and epilogue allocators can have a false dependency on ebp.
|| dep_insn_code_number == CODE_FOR_epilogue_deallocate_stack) This results in one cycle extra stall on Pentium prologue scheduling,
so handle this important case manually. */
if (dep_insn_code_number == CODE_FOR_pro_epilogue_adjust_stack
&& dep_insn_type == TYPE_ALU
&& !reg_mentioned_p (stack_pointer_rtx, insn)) && !reg_mentioned_p (stack_pointer_rtx, insn))
return 0; return 0;
insn_type = get_attr_type (insn);
dep_insn_type = get_attr_type (dep_insn);
switch (ix86_cpu) switch (ix86_cpu)
{ {
case PROCESSOR_PENTIUM: case PROCESSOR_PENTIUM:
......
...@@ -8807,16 +8807,22 @@ ...@@ -8807,16 +8807,22 @@
;; ;;
;; in proper program order. ;; in proper program order.
(define_insn "prologue_allocate_stack" (define_insn "pro_epilogue_adjust_stack"
[(set (match_operand:SI 0 "register_operand" "=r") [(set (match_operand:SI 0 "register_operand" "=r,r")
(plus:SI (match_operand:SI 1 "register_operand" "0") (plus:SI (match_operand:SI 1 "register_operand" "0,r")
(match_operand:SI 2 "nonmemory_operand" "ri"))) (match_operand:SI 2 "immediate_operand" "i,i")))
(set (match_operand:SI 3 "register_operand" "=r") (set (match_operand:SI 3 "register_operand" "+r,r")
(match_dup 3)) (match_dup 3))
(clobber (reg:CC 17))] (clobber (reg:CC 17))]
"" ""
"* "*
{ {
switch (get_attr_type (insn))
{
case TYPE_IMOV:
return \"mov{l}\\t{%1, %0|%0, %1}\";
case TYPE_ALU:
if (GET_CODE (operands[2]) == CONST_INT if (GET_CODE (operands[2]) == CONST_INT
&& (INTVAL (operands[2]) == 128 && (INTVAL (operands[2]) == 128
|| (INTVAL (operands[2]) < 0 || (INTVAL (operands[2]) < 0
...@@ -8826,16 +8832,22 @@ ...@@ -8826,16 +8832,22 @@
return \"sub{l}\\t{%2, %0|%0, %2}\"; return \"sub{l}\\t{%2, %0|%0, %2}\";
} }
return \"add{l}\\t{%2, %0|%0, %2}\"; return \"add{l}\\t{%2, %0|%0, %2}\";
}"
[(set_attr "type" "alu")])
(define_insn "epilogue_deallocate_stack" case TYPE_LEA:
[(set (match_operand:SI 0 "register_operand" "=r") operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
(match_operand:SI 1 "register_operand" "+r")) return \"lea{l}\\t{%a2, %0|%0, %a2}\";
(set (match_dup 1) (match_dup 1))]
"" default:
"mov{l}\\t{%1, %0|%0, %1}" abort ();
[(set_attr "type" "imov")]) }
}"
[(set (attr "type")
(cond [(eq_attr "alternative" "0")
(const_string "alu")
(match_operand:SI 2 "const0_operand" "")
(const_string "imov")
]
(const_string "lea")))])
(define_insn "allocate_stack_worker" (define_insn "allocate_stack_worker"
[(unspec:SI [(match_operand:SI 0 "register_operand" "a")] 3) [(unspec:SI [(match_operand:SI 0 "register_operand" "a")] 3)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment