Commit be04394b by Jan Hubicka Committed by Jan Hubicka

i386.c (x86_four_jump_limit): New variable.


	* i386.c (x86_four_jump_limit): New variable.
	(k8_avoid_jump_misspredicts): Rename to ...
	(ix86_avoid_jump_misspredicts): .. this one.
	(ix86_pad_returns): Break out from ...
	(ix86_reorg): ... this one; do ix86_avoid_jump_misspredicts when asked
	to.
	* i386.h (TARGET_FOUR_JUMP_LIMIT): New macro.

From-SVN: r77814
parent ac3cadf0
2004-02-14 Jan Hubicka <jh@suse.cz>
* i386.c (x86_four_jump_limit): New variable.
(k8_avoid_jump_misspredicts): Rename to ...
(ix86_avoid_jump_misspredicts): .. this one.
(ix86_pad_returns): Break out from ...
(ix86_reorg): ... this one; do ix86_avoid_jump_misspredicts when asked
to.
* i386.h (TARGET_FOUR_JUMP_LIMIT): New macro.
2004-02-14 Josef Zlomek <zlomekj@suse.cz> 2004-02-14 Josef Zlomek <zlomekj@suse.cz>
* emit-rtl.c (set_decl_incoming_rtl): Check whether the 0th element of * emit-rtl.c (set_decl_incoming_rtl): Check whether the 0th element of
......
...@@ -524,6 +524,9 @@ const int x86_use_ffreep = m_ATHLON_K8; ...@@ -524,6 +524,9 @@ const int x86_use_ffreep = m_ATHLON_K8;
const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6; const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
const int x86_inter_unit_moves = ~(m_ATHLON_K8); const int x86_inter_unit_moves = ~(m_ATHLON_K8);
const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO; const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
/* Some CPU cores are not able to predict more than 4 branch instructions in
the 16 byte window. */
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4;
/* In case the average insn count for single function invocation is /* In case the average insn count for single function invocation is
lower than this constant, emit fast (but longer) prologue and lower than this constant, emit fast (but longer) prologue and
...@@ -883,7 +886,6 @@ static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *); ...@@ -883,7 +886,6 @@ static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
static int extended_reg_mentioned_1 (rtx *, void *); static int extended_reg_mentioned_1 (rtx *, void *);
static bool ix86_rtx_costs (rtx, int, int, int *); static bool ix86_rtx_costs (rtx, int, int, int *);
static int min_insn_size (rtx); static int min_insn_size (rtx);
static void k8_avoid_jump_misspredicts (void);
#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
static void ix86_svr3_asm_out_constructor (rtx, int); static void ix86_svr3_asm_out_constructor (rtx, int);
...@@ -15714,7 +15716,7 @@ min_insn_size (rtx insn) ...@@ -15714,7 +15716,7 @@ min_insn_size (rtx insn)
window. */ window. */
static void static void
k8_avoid_jump_misspredicts (void) ix86_avoid_jump_misspredicts (void)
{ {
rtx insn, start = get_insns (); rtx insn, start = get_insns ();
int nbytes = 0, njumps = 0; int nbytes = 0, njumps = 0;
...@@ -15774,18 +15776,15 @@ k8_avoid_jump_misspredicts (void) ...@@ -15774,18 +15776,15 @@ k8_avoid_jump_misspredicts (void)
} }
} }
/* Implement machine specific optimizations. /* AMD Athlon works faster
At the moment we implement single transformation: AMD Athlon works faster
when RET is not destination of conditional jump or directly preceded when RET is not destination of conditional jump or directly preceded
by other jump instruction. We avoid the penalty by inserting NOP just by other jump instruction. We avoid the penalty by inserting NOP just
before the RET instructions in such cases. */ before the RET instructions in such cases. */
static void static void
ix86_reorg (void) ix86_pad_returns (void)
{ {
edge e; edge e;
if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
return;
for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next) for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
{ {
basic_block bb = e->src; basic_block bb = e->src;
...@@ -15825,7 +15824,17 @@ ix86_reorg (void) ...@@ -15825,7 +15824,17 @@ ix86_reorg (void)
delete_insn (ret); delete_insn (ret);
} }
} }
k8_avoid_jump_misspredicts (); }
/* Implement machine specific optimizations. We implement padding of returns
for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
static void
ix86_reorg (void)
{
if (TARGET_ATHLON_K8 && optimize && !optimize_size)
ix86_pad_returns ();
if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
ix86_avoid_jump_misspredicts ();
} }
/* Return nonzero when QImode register that must be represented via REX prefix /* Return nonzero when QImode register that must be represented via REX prefix
......
...@@ -293,6 +293,7 @@ extern int x86_prefetch_sse; ...@@ -293,6 +293,7 @@ extern int x86_prefetch_sse;
#define TARGET_USE_FFREEP (x86_use_ffreep & TUNEMASK) #define TARGET_USE_FFREEP (x86_use_ffreep & TUNEMASK)
#define TARGET_REP_MOVL_OPTIMAL (x86_rep_movl_optimal & TUNEMASK) #define TARGET_REP_MOVL_OPTIMAL (x86_rep_movl_optimal & TUNEMASK)
#define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & TUNEMASK) #define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & TUNEMASK)
#define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & TUNEMASK)
#define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE) #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment