Commit d2c49530 by Jan Hubicka Committed by Jan Hubicka

i386.c (min_insn_size, [...]): New functions

	* i386.c (min_insn_size, k8_avoid_jump_misspredicts): New functions
	(ix86_reorg): Use it.
	* i386.md (align): New insn pattern.

From-SVN: r67446
parent 05a0e2af
Wed Jun 4 18:39:33 CEST 2003 Jan Hubicka <jh@suse.cz>
* i386.c (min_insn_size, k8_avoid_jump_misspredicts): New functions
(ix86_reorg): Use it.
* i386.md (align): New insn pattern.
2003-06-04 Kaveh R. Ghazi <ghazi@caip.rutgers.edu> 2003-06-04 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
* toplev.c (rest_of_type_compilation): Fix typo. * toplev.c (rest_of_type_compilation): Fix typo.
......
...@@ -878,6 +878,8 @@ static bool ix86_ms_bitfield_layout_p PARAMS ((tree)); ...@@ -878,6 +878,8 @@ static bool ix86_ms_bitfield_layout_p PARAMS ((tree));
static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *)); static tree ix86_handle_struct_attribute PARAMS ((tree *, tree, tree, int, bool *));
static int extended_reg_mentioned_1 PARAMS ((rtx *, void *)); static int extended_reg_mentioned_1 PARAMS ((rtx *, void *));
static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *)); static bool ix86_rtx_costs PARAMS ((rtx, int, int, int *));
static int min_insn_size PARAMS ((rtx));
static void k8_avoid_jump_misspredicts PARAMS ((void));
#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION) #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int)); static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
...@@ -15526,6 +15528,117 @@ x86_function_profiler (file, labelno) ...@@ -15526,6 +15528,117 @@ x86_function_profiler (file, labelno)
} }
} }
/* We don't have exact information about the insn sizes, but we may assume
quite safely that we are informed about all 1 byte insns and memory
address sizes. This is enought to elliminate unnecesary padding in
99% of cases. */
static int
min_insn_size (insn)
rtx insn;
{
int l = 0;
if (!INSN_P (insn) || !active_insn_p (insn))
return 0;
/* Discard alignments we've emit and jump instructions. */
if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
&& XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
return 0;
if (GET_CODE (insn) == JUMP_INSN
&& (GET_CODE (PATTERN (insn)) == ADDR_VEC
|| GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
return 0;
/* Important case - calls are always 5 bytes.
It is common to have many calls in the row. */
if (GET_CODE (insn) == CALL_INSN
&& symbolic_reference_mentioned_p (PATTERN (insn))
&& !SIBLING_CALL_P (insn))
return 5;
if (get_attr_length (insn) <= 1)
return 1;
/* For normal instructions we may rely on the sizes of addresses
and the presence of symbol to require 4 bytes of encoding.
This is not the case for jumps where references are PC relative. */
if (GET_CODE (insn) != JUMP_INSN)
{
l = get_attr_length_address (insn);
if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
l = 4;
}
if (l)
return 1+l;
else
return 2;
}
/* AMD K8 core misspredicts jumps when there are more than 3 jumps in 16 byte
window. */
static void
k8_avoid_jump_misspredicts ()
{
rtx insn, start = get_insns ();
int nbytes = 0, njumps = 0;
int isjump = 0;
/* Look for all minimal intervals of instructions containing 4 jumps.
The intervals are bounded by START and INSN. NBYTES is the total
size of instructions in the interval including INSN and not including
START. When the NBYTES is smaller than 16 bytes, it is possible
that the end of START and INSN ends up in the same 16byte page.
The smallest offset in the page INSN can start is the case where START
ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
*/
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
nbytes += min_insn_size (insn);
if (rtl_dump_file)
fprintf(stderr,"Insn %i estimated to %i bytes\n",
INSN_UID (insn), min_insn_size (insn));
if ((GET_CODE (insn) == JUMP_INSN
&& GET_CODE (PATTERN (insn)) != ADDR_VEC
&& GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
|| GET_CODE (insn) == CALL_INSN)
njumps++;
else
continue;
while (njumps > 3)
{
start = NEXT_INSN (start);
if ((GET_CODE (start) == JUMP_INSN
&& GET_CODE (PATTERN (start)) != ADDR_VEC
&& GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
|| GET_CODE (start) == CALL_INSN)
njumps--, isjump = 1;
else
isjump = 0;
nbytes -= min_insn_size (start);
}
if (njumps < 0)
abort ();
if (rtl_dump_file)
fprintf(stderr,"Interval %i to %i has %i bytes\n",
INSN_UID (start), INSN_UID (insn), nbytes);
if (njumps == 3 && isjump && nbytes < 16)
{
int padsize = 15 - nbytes + min_insn_size (insn);
if (rtl_dump_file)
fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
emit_insn_before (gen_align (GEN_INT (padsize)), insn);
}
}
}
/* Implement machine specific optimizations. /* Implement machine specific optimizations.
At the moment we implement single transformation: AMD Athlon works faster At the moment we implement single transformation: AMD Athlon works faster
when RET is not destination of conditional jump or directly preceded when RET is not destination of conditional jump or directly preceded
...@@ -15577,6 +15690,7 @@ ix86_reorg () ...@@ -15577,6 +15690,7 @@ ix86_reorg ()
delete_insn (ret); delete_insn (ret);
} }
} }
k8_avoid_jump_misspredicts ();
} }
/* Return nonzero when QImode register that must be represented via REX prefix /* Return nonzero when QImode register that must be represented via REX prefix
......
...@@ -126,6 +126,7 @@ ...@@ -126,6 +126,7 @@
(UNSPECV_STMXCSR 40) (UNSPECV_STMXCSR 40)
(UNSPECV_FEMMS 46) (UNSPECV_FEMMS 46)
(UNSPECV_CLFLUSH 57) (UNSPECV_CLFLUSH 57)
(UNSPECV_ALIGN 68)
]) ])
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
...@@ -14278,6 +14279,19 @@ ...@@ -14278,6 +14279,19 @@
(set_attr "modrm" "0") (set_attr "modrm" "0")
(set_attr "ppro_uops" "one")]) (set_attr "ppro_uops" "one")])
;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
;; all of memory. This blocks insns from being moved across this point.
(define_insn "align"
[(unspec_volatile [(match_operand 0 "" "")] UNSPECV_ALIGN)]
""
{
#ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
return ".p2align\t4,,%c0";
#endif
}
[(set_attr "length" "16")])
(define_expand "prologue" (define_expand "prologue"
[(const_int 1)] [(const_int 1)]
"" ""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment