Commit 2ab0437e by Jan Hubicka Committed by Jan Hubicka

i386.c (size_cost): New static variable.

	* i386.c (size_cost): New static variable.
	(override_function): Use size_cost when -Os is specified.

	* i386.c (ix86_expand_prologue): Set use_fast_prologue_epilogue
	properly;  Use current_function_calls_eh_return.
	(ix86_expand_epilogue): Avoid dummy optimize_size tests;
	use leave to avoid depdendancy chain.

	* local-alloc.c (update_equiv_regs): Use CFG to iterate over INSN stream;
	get BB loop_depth instead of computing it from LOOP notes.

	* reg-stack.c (subst_stack_reg_pat): Handle reversal of conditional moves.

From-SVN: r45415
parent 76f81d95
Wed Sep 5 17:28:49 CEST 2001 Jan Hubicka <jh@suse.cz>
* i386.c (size_cost): New static variable.
(override_function): Use size_cost when -Os is specified.
* i386.c (ix86_expand_prologue): Set use_fast_prologue_epilogue
properly; Use current_function_calls_eh_return.
(ix86_expand_epilogue): Avoid dummy optimize_size tests;
use leave to avoid depdendancy chain.
* local-alloc.c (update_equiv_regs): Use CFG to iterate over INSN stream;
get BB loop_depth instead of computing it from LOOP notes.
* reg-stack.c (subst_stack_reg_pat): Handle reversal of conditional moves.
2001-09-05 John David Anglin <dave@hiauly1.hia.nrc.ca>
* som.h (ASM_PREFERRED_EH_DATA_FORMAT): Define.
......
......@@ -48,6 +48,38 @@ Boston, MA 02111-1307, USA. */
#endif
/* Processor costs (relative to an add) */
struct processor_costs size_cost = { /* costs for tunning for size */
2, /* cost of an add instruction */
3, /* cost of a lea instruction */
2, /* variable shift costs */
3, /* constant shift costs */
3, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
3, /* cost of a divide/mod */
0, /* "large" insn */
2, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
{2, 2, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 2, 2}, /* cost of storing integer registers */
2, /* cost of reg,reg fld/fst */
{2, 2, 2}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
{2, 2, 2}, /* cost of loading integer registers */
3, /* cost of moving MMX register */
{3, 3}, /* cost of loading MMX registers
in SImode and DImode */
{3, 3}, /* cost of storing MMX registers
in SImode and DImode */
3, /* cost of moving SSE register */
{3, 3, 3}, /* cost of loading SSE registers
in SImode, DImode and TImode */
{3, 3, 3}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
};
/* Processor costs (relative to an add) */
struct processor_costs i386_cost = { /* 386 specific costs */
1, /* cost of an add instruction */
1, /* cost of a lea instruction */
......@@ -798,6 +830,9 @@ override_options ()
error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
}
if (optimize_size)
ix86_cost = &size_cost;
else
ix86_cost = processor_target_table[ix86_cpu].cost;
target_flags |= processor_target_table[ix86_cpu].target_enable;
target_flags &= ~processor_target_table[ix86_cpu].target_disable;
......@@ -2664,10 +2699,11 @@ ix86_expand_prologue ()
int use_mov = 0;
HOST_WIDE_INT allocate;
if (TARGET_PROLOGUE_USING_MOVE && !optimize_size)
if (!optimize_size)
{
use_fast_prologue_epilogue
= !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
if (TARGET_PROLOGUE_USING_MOVE)
use_mov = use_fast_prologue_epilogue;
}
ix86_compute_frame_layout (&frame);
......@@ -2807,13 +2843,13 @@ ix86_expand_epilogue (style)
and there is exactly one register to pop. This heruistic may need some
tuning in future. */
if ((!sp_valid && frame.nregs <= 1)
|| (TARGET_EPILOGUE_USING_MOVE && !optimize_size
|| (TARGET_EPILOGUE_USING_MOVE
&& use_fast_prologue_epilogue
&& (frame.nregs > 1 || frame.to_allocate))
|| (frame_pointer_needed && !frame.nregs && frame.to_allocate)
|| (frame_pointer_needed && TARGET_USE_LEAVE && !optimize_size
|| (frame_pointer_needed && TARGET_USE_LEAVE
&& use_fast_prologue_epilogue && frame.nregs == 1)
|| style == 2)
|| current_function_calls_eh_return)
{
/* Restore registers. We can use ebp or esp to address the memory
locations. If both are available, default to ebp, since offsets
......@@ -2899,7 +2935,11 @@ ix86_expand_epilogue (style)
}
if (frame_pointer_needed)
{
if (TARGET_64BIT)
/* Leave results in shorter depdendancy chains on CPUs that are
able to grok it fast. */
if (TARGET_USE_LEAVE)
emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
else if (TARGET_64BIT)
emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
else
emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
......
......@@ -821,26 +821,18 @@ update_equiv_regs ()
/* Scan the insns and find which registers have equivalences. Do this
in a separate scan of the insns because (due to -fcse-follow-jumps)
a register can be set below its use. */
loop_depth = 0;
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
for (block = 0; block < n_basic_blocks; block++)
{
basic_block bb = BASIC_BLOCK (block);
loop_depth = bb->loop_depth;
for (insn = bb->head; insn != NEXT_INSN (bb->end); insn = NEXT_INSN (insn))
{
rtx note;
rtx set;
rtx dest, src;
int regno;
if (GET_CODE (insn) == NOTE)
{
if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
++loop_depth;
else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END)
{
if (! loop_depth)
abort ();
--loop_depth;
}
}
if (! INSN_P (insn))
continue;
......@@ -1024,14 +1016,8 @@ update_equiv_regs ()
even if the register is only used in one block so that
dependencies can be handled where the last register is
used in a different block (i.e. HIGH / LO_SUM sequences)
and to reduce the number of registers alive across calls.
It would be nice to use "loop_depth * 2" in the compare
below. Unfortunately, LOOP_DEPTH need not be constant within
a basic block so this would be too complicated.
This case normally occurs when a parameter is read from
memory and then used exactly once, not in a loop. */
and to reduce the number of registers alive across
calls. */
if (REG_N_REFS (regno) == 2
&& (rtx_equal_p (XEXP (note, 0), src)
......@@ -1042,6 +1028,7 @@ update_equiv_regs ()
}
}
}
}
/* Now scan all regs killed in an insn to see if any of them are
registers only used that once. If so, see if we can replace the
......@@ -1050,35 +1037,18 @@ update_equiv_regs ()
can't replace the reference, and the initialzing reference is
within the same loop (or in an inner loop), then move the register
initialization just before the use, so that they are in the same
basic block.
basic block. */
for (block = n_basic_blocks - 1; block >= 0; block--)
{
basic_block bb = BASIC_BLOCK (block);
Skip this optimization if loop_depth isn't initially zero since
that indicates a mismatch between loop begin and loop end notes
(i.e. gcc.dg/noncompile/920721-2.c). */
block = n_basic_blocks - 1;
for (insn = (loop_depth == 0) ? get_last_insn () : NULL_RTX;
insn; insn = PREV_INSN (insn))
loop_depth = bb->loop_depth;
for (insn = bb->end; insn != PREV_INSN (bb->head); insn = PREV_INSN (insn))
{
rtx link;
if (! INSN_P (insn))
{
if (GET_CODE (insn) == NOTE)
{
if (NOTE_INSN_BASIC_BLOCK_P (insn))
block = NOTE_BASIC_BLOCK (insn)->index - 1;
else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
{
if (! loop_depth)
abort ();
--loop_depth;
}
else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END)
++loop_depth;
}
continue;
}
for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
{
......@@ -1182,6 +1152,7 @@ update_equiv_regs ()
}
}
}
}
/* Clear all dead REGNOs from all basic block's live info. */
if (clear_regnos)
......
......@@ -530,6 +530,7 @@ branch_prob ()
total_num_times_called++;
flow_call_edges_add (NULL);
add_noreturn_fake_exit_edges ();
/* We can't handle cyclic regions constructed using abnormal edges.
To avoid these we replace every source of abnormal edge by a fake
......
......@@ -1775,6 +1775,12 @@ subst_stack_regs_pat (insn, regstack, pat)
case IF_THEN_ELSE:
/* This insn requires the top of stack to be the destination. */
src1 = get_true_reg (&XEXP (pat_src, 1));
src2 = get_true_reg (&XEXP (pat_src, 2));
src1_note = find_regno_note (insn, REG_DEAD, REGNO (*src1));
src2_note = find_regno_note (insn, REG_DEAD, REGNO (*src2));
/* If the comparison operator is an FP comparison operator,
it is handled correctly by compare_for_stack_reg () who
will move the destination to the top of stack. But if the
......@@ -1782,13 +1788,35 @@ subst_stack_regs_pat (insn, regstack, pat)
have to handle it here. */
if (get_hard_regnum (regstack, *dest) >= FIRST_STACK_REG
&& REGNO (*dest) != regstack->reg[regstack->top])
{
/* In case one of operands is the top of stack and the operands
dies, it is safe to make it the destination operand by reversing
the direction of cmove and avoid fxch. */
if ((REGNO (*src1) == regstack->reg[regstack->top]
&& src1_note)
|| (REGNO (*src2) == regstack->reg[regstack->top]
&& src2_note))
{
/* We know that both sources "dies", as one dies and other
is overwriten by the destination. Claim both sources
to be dead, as the code bellow will properly pop the
non-top-of-stack note and replace top-of-stack by the
result by popping source first and then pushing result. */
if (!src1_note)
src1_note = REG_NOTES (insn)
= gen_rtx_EXPR_LIST (REG_DEAD, *src1, REG_NOTES (insn));
if (!src2_note)
src2_note = REG_NOTES (insn)
= gen_rtx_EXPR_LIST (REG_DEAD, *src2, REG_NOTES (insn));
/* i386 do have comparison always reversible. */
PUT_CODE (XEXP (pat_src, 0),
reversed_comparison_code (XEXP (pat_src, 0), insn));
}
else
emit_swap_insn (insn, regstack, *dest);
src1 = get_true_reg (&XEXP (pat_src, 1));
src2 = get_true_reg (&XEXP (pat_src, 2));
src1_note = find_regno_note (insn, REG_DEAD, REGNO (*src1));
src2_note = find_regno_note (insn, REG_DEAD, REGNO (*src2));
}
{
rtx src_note [3];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment