Commit bdeb029c by Jan Hubicka Committed by Jan Hubicka

i386.c (x86_sub_esp_4, [...]): New global variables.

	* i386.c (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8):
	New global variables.
	(ix86_emit_epilogue_adjustment): Do not attempt to use pop for the
	adjustment.
	* i386.h (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8):
	Declare,
	(TARGET_SUB_ESP_4, TARGET_SUB_ESP_8, TARGET_ADD_ESP_4,
	 TARGET_ADD_ESP_8): New macros.
	* i386.md: Add peep2s to convert esp adjustments to push and pop
	instructions.
	(pushsi_prologue, popsi_epilogue): New patterns.

From-SVN: r33100
parent 2fc00b18
Fri Apr 7 12:23:04 MET DST 2000 Jan Hubicka <jh@suse.cz>
* i386.c (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8):
New global variables.
(ix86_emit_epilogue_adjustment): Do not attempt to use pop for the
adjustment.
* i386.h (x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8):
Declare,
(TARGET_SUB_ESP_4, TARGET_SUB_ESP_8, TARGET_ADD_ESP_4,
TARGET_ADD_ESP_8): New macros.
* i386.md: Add peep2s to convert esp adjustments to push and pop
instructions.
(pushsi_prologue, popsi_epilogue): New patterns.
2000-04-12 Jakub Jelinek <jakub@redhat.com>
* real.c (toe64): Remove stale #endif from the last change.
......
......@@ -218,6 +218,10 @@ const int x86_qimode_math = ~(0);
const int x86_promote_qi_regs = 0;
const int x86_himode_math = ~(m_PPRO);
const int x86_promote_hi_regs = m_PPRO;
const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
const int x86_add_esp_4 = m_ATHLON | m_K6;
const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
......@@ -1968,51 +1972,16 @@ static void
ix86_emit_epilogue_esp_adjustment (tsize)
int tsize;
{
/* Intel's docs say that for 4 or 8 bytes of stack frame one should
use `pop' and not `add'. */
int use_pop = tsize == 4;
rtx edx = 0, ecx;
/* Use two pops only for the Pentium processors. */
if (tsize == 8 && !TARGET_386 && !TARGET_486)
{
rtx retval = current_function_return_rtx;
edx = gen_rtx_REG (SImode, 1);
/* This case is a bit more complex. Since we cannot pop into
%ecx twice we need a second register. But this is only
available if the return value is not of DImode in which
case the %edx register is not available. */
use_pop = (retval == NULL
|| !reg_overlap_mentioned_p (edx, retval));
}
if (use_pop)
{
ecx = gen_rtx_REG (SImode, 2);
/* We have to prevent the two pops here from being scheduled.
GCC otherwise would try in some situation to put other
instructions in between them which has a bad effect. */
emit_insn (gen_blockage ());
emit_insn (gen_popsi1 (ecx));
if (tsize == 8)
emit_insn (gen_popsi1 (edx));
}
/* If a frame pointer is present, we must be sure to tie the sp
to the fp so that we don't mis-schedule. */
if (frame_pointer_needed)
emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
stack_pointer_rtx,
GEN_INT (tsize),
hard_frame_pointer_rtx));
else
{
/* If a frame pointer is present, we must be sure to tie the sp
to the fp so that we don't mis-schedule. */
if (frame_pointer_needed)
emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
stack_pointer_rtx,
GEN_INT (tsize),
hard_frame_pointer_rtx));
else
emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (tsize)));
}
emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
GEN_INT (tsize)));
}
/* Emit code to restore saved registers using MOV insns. First register
......
......@@ -174,6 +174,7 @@ extern const int x86_read_modify, x86_split_long_moves;
extern const int x86_promote_QImode, x86_single_stringop;
extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs;
extern const int x86_promote_hi_regs;
extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
#define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
#define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK)
......@@ -201,6 +202,10 @@ extern const int x86_promote_hi_regs;
#define TARGET_HIMODE_MATH (x86_himode_math & CPUMASK)
#define TARGET_PROMOTE_QI_REGS (x86_promote_qi_regs & CPUMASK)
#define TARGET_PROMOTE_HI_REGS (x86_promote_hi_regs & CPUMASK)
#define TARGET_ADD_ESP_4 (x86_add_esp_4 & CPUMASK)
#define TARGET_ADD_ESP_8 (x86_add_esp_8 & CPUMASK)
#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & CPUMASK)
#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK)
#define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
......
......@@ -1307,6 +1307,24 @@
"push{l}\\t%1"
[(set_attr "type" "push")])
(define_insn "*pushsi2_prologue"
[(set (match_operand:SI 0 "push_operand" "=<")
(match_operand:SI 1 "general_no_elim_operand" "ri*m"))
(set (reg:SI 6) (reg:SI 6))]
""
"push{l}\\t%1"
[(set_attr "type" "push")])
(define_insn "*popsi1_epilogue"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
(mem:SI (reg:SI 7)))
(set (reg:SI 7)
(plus:SI (reg:SI 7) (const_int 4)))
(set (reg:SI 6) (reg:SI 6))]
""
"pop{l}\\t%0"
[(set_attr "type" "pop")])
(define_insn "popsi1"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r*m")
(mem:SI (reg:SI 7)))
......@@ -9752,6 +9770,140 @@
[(parallel [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 2)))
(clobber (reg:CC 17))])]
"operands[2] = GEN_INT (exact_log2 (INTVAL (operands[1])));")
;; The ESP adjustments can be done by the push and pop instructions. Resulting
;; code is shorter, since push is only 1 byte, while add imm, %esp 3 bytes. On
;; many CPUs it is also faster, since special hardware to avoid esp
;; dependancies is present.
;; While some of these converisons may be done using splitters, we use peepholes
;; in order to allow combine_stack_adjustments pass to see nonobfuscated RTL.
;; Convert prologue esp substractions to push.
;; We need register to push. In order to keep verify_flow_info happy we have
;; two choices
;; - use scratch and clobber it in order to avoid dependencies
;; - use already live register
;; We can't use the second way right now, since there is no reliable way how to
;; verify that given register is live. First choice will also most likely in
;; fewer dependencies. On the place of esp adjustments it is very likely that
;; call clobbered registers are dead. We may want to use base pointer as an
;; alternative when no register is available later.
(define_peephole2
[(match_scratch:SI 0 "r")
(parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4)))
(set (reg:SI 6) (reg:SI 6))
(clobber (reg:CC 17))])]
"optimize_size || !TARGET_SUB_ESP_4"
[(clobber (match_dup 0))
(parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
(set (reg:SI 6) (reg:SI 6))])])
(define_peephole2
[(match_scratch:SI 0 "r")
(parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
(set (reg:SI 6) (reg:SI 6))
(clobber (reg:CC 17))])]
"optimize_size || !TARGET_SUB_ESP_8"
[(clobber (match_dup 0))
(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
(parallel [(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
(set (reg:SI 6) (reg:SI 6))])])
;; Convert esp substractions to push.
(define_peephole2
[(match_scratch:SI 0 "r")
(parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -4)))
(clobber (reg:CC 17))])]
"optimize_size || !TARGET_SUB_ESP_4"
[(clobber (match_dup 0))
(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))])
(define_peephole2
[(match_scratch:SI 0 "r")
(parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int -8)))
(clobber (reg:CC 17))])]
"optimize_size || !TARGET_SUB_ESP_8"
[(clobber (match_dup 0))
(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))
(set (mem:SI (pre_dec:SI (reg:SI 7))) (match_dup 0))])
;; Convert epilogue deallocator to pop.
(define_peephole2
[(match_scratch:SI 0 "r")
(parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
(set (reg:SI 6) (reg:SI 6))
(clobber (reg:CC 17))])]
"optimize_size || !TARGET_ADD_ESP_4"
[(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
(set (reg:SI 6) (reg:SI 6))])]
"")
;; Two pops case is tricky, since pop causes dependency on destination register.
;; We use two registers if available.
(define_peephole2
[(match_scratch:SI 0 "r")
(match_scratch:SI 1 "r")
(parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
(set (reg:SI 6) (reg:SI 6))
(clobber (reg:CC 17))])]
"optimize_size || !TARGET_ADD_ESP_8"
[(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
(set (reg:SI 6) (reg:SI 6))])
(parallel [(set (match_dup 1) (mem:SI (reg:SI 7)))
(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
"")
(define_peephole2
[(match_scratch:SI 0 "r")
(parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
(set (reg:SI 6) (reg:SI 6))
(clobber (reg:CC 17))])]
"optimize_size"
[(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
(set (reg:SI 6) (reg:SI 6))])
(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
"")
;; Convert esp additions to pop.
(define_peephole2
[(match_scratch:SI 0 "r")
(parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))
(clobber (reg:CC 17))])]
""
[(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
"")
;; Two pops case is tricky, since pop causes dependency on destination register.
;; We use two registers if available.
(define_peephole2
[(match_scratch:SI 0 "r")
(match_scratch:SI 1 "r")
(parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
(clobber (reg:CC 17))])]
""
[(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])
(parallel [(set (match_dup 1) (mem:SI (reg:SI 7)))
(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
"")
(define_peephole2
[(match_scratch:SI 0 "r")
(parallel [(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 8)))
(clobber (reg:CC 17))])]
"optimize_size"
[(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])
(parallel [(set (match_dup 0) (mem:SI (reg:SI 7)))
(set (reg:SI 7) (plus:SI (reg:SI 7) (const_int 4)))])]
"")
;; Call-value patterns last so that the wildcard operand does not
;; disrupt insn-recog's switch tables.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment