Commit b0f86a7e by Andreas Krebbel Committed by Andreas Krebbel

s390.c (Z10_PREDICT_DISTANCE): New macro.

2009-08-20  Andreas Krebbel  <krebbel1@de.ibm.com>

	* config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro.
	(s390_z10_fix_long_loop_prediction): New function.
	(s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg.
	(s390_reorg): Walk over the INSNs and invoke
	s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp.

From-SVN: r150955
parent f1149235
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com> 2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
* config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro.
(s390_z10_fix_long_loop_prediction): New function.
(s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg.
(s390_reorg): Walk over the INSNs and invoke
s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp.
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
* config/s390/s390.md ("*brx_stage1_<GPR:mode>", "*brxg_64bit", * config/s390/s390.md ("*brx_stage1_<GPR:mode>", "*brxg_64bit",
"*brx_64bit", "*brx_31bit"): New patterns. "*brx_64bit", "*brx_31bit"): New patterns.
* config/s390/s390.c ('E'): New output modifier. * config/s390/s390.c ('E'): New output modifier.
......
...@@ -345,6 +345,10 @@ struct GTY(()) machine_function ...@@ -345,6 +345,10 @@ struct GTY(()) machine_function
#define REGNO_PAIR_OK(REGNO, MODE) \ #define REGNO_PAIR_OK(REGNO, MODE) \
(HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1)) (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
/* That's the read ahead of the dynamic branch prediction unit in
bytes on a z10 CPU. */
#define Z10_PREDICT_DISTANCE 384
static enum machine_mode static enum machine_mode
s390_libgcc_cmp_return_mode (void) s390_libgcc_cmp_return_mode (void)
{ {
...@@ -9661,6 +9665,66 @@ s390_optimize_prologue (void) ...@@ -9661,6 +9665,66 @@ s390_optimize_prologue (void)
} }
} }
/* On z10 the dynamic branch prediction must see the backward jump in
a window of 384 bytes. If not it falls back to the static
prediction. This function rearranges the loop backward branch in a
way which makes the static prediction always correct. The function
returns true if it added an instruction. */
static bool
s390_z10_fix_long_loop_prediction (rtx insn)
{
rtx set = single_set (insn);
rtx code_label, label_ref, new_label;
rtx uncond_jump;
rtx cur_insn;
rtx tmp;
int distance;
/* This will exclude branch on count and branch on index patterns
since these are correctly statically predicted. */
if (!set
|| SET_DEST (set) != pc_rtx
|| GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
return false;
label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
gcc_assert (GET_CODE (label_ref) == LABEL_REF);
code_label = XEXP (label_ref, 0);
if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
|| INSN_ADDRESSES (INSN_UID (insn)) == -1
|| (INSN_ADDRESSES (INSN_UID (insn))
- INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE))
return false;
for (distance = 0, cur_insn = PREV_INSN (insn);
distance < Z10_PREDICT_DISTANCE - 6;
distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
return false;
new_label = gen_label_rtx ();
uncond_jump = emit_jump_insn_after (
gen_rtx_SET (VOIDmode, pc_rtx,
gen_rtx_LABEL_REF (VOIDmode, code_label)),
insn);
emit_label_after (new_label, uncond_jump);
tmp = XEXP (SET_SRC (set), 1);
XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
XEXP (SET_SRC (set), 2) = tmp;
INSN_CODE (insn) = -1;
XEXP (label_ref, 0) = new_label;
JUMP_LABEL (insn) = new_label;
JUMP_LABEL (uncond_jump) = code_label;
return true;
}
/* Returns 1 if INSN reads the value of REG for purposes not related /* Returns 1 if INSN reads the value of REG for purposes not related
to addressing of memory, and 0 otherwise. */ to addressing of memory, and 0 otherwise. */
static int static int
...@@ -9743,20 +9807,15 @@ s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn) ...@@ -9743,20 +9807,15 @@ s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
if that register's value is delivered via a bypass, then the if that register's value is delivered via a bypass, then the
pipeline recycles, thereby causing significant performance decline. pipeline recycles, thereby causing significant performance decline.
This function locates such situations and exchanges the two This function locates such situations and exchanges the two
operands of the compare. */ operands of the compare. The function return true whenever it
static void added an insn. */
s390_z10_optimize_cmp (void) static bool
s390_z10_optimize_cmp (rtx insn)
{ {
rtx insn, prev_insn, next_insn; rtx prev_insn, next_insn;
int added_NOPs = 0; bool insn_added_p = false;
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
rtx cond, *op0, *op1; rtx cond, *op0, *op1;
if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
continue;
if (GET_CODE (PATTERN (insn)) == PARALLEL) if (GET_CODE (PATTERN (insn)) == PARALLEL)
{ {
/* Handle compare and branch and branch on count /* Handle compare and branch and branch on count
...@@ -9766,7 +9825,7 @@ s390_z10_optimize_cmp (void) ...@@ -9766,7 +9825,7 @@ s390_z10_optimize_cmp (void)
if (!pattern if (!pattern
|| SET_DEST (pattern) != pc_rtx || SET_DEST (pattern) != pc_rtx
|| GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE) || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
continue; return false;
cond = XEXP (SET_SRC (pattern), 0); cond = XEXP (SET_SRC (pattern), 0);
op0 = &XEXP (cond, 0); op0 = &XEXP (cond, 0);
...@@ -9783,7 +9842,7 @@ s390_z10_optimize_cmp (void) ...@@ -9783,7 +9842,7 @@ s390_z10_optimize_cmp (void)
if (!REG_P (dest) if (!REG_P (dest)
|| !CC_REGNO_P (REGNO (dest)) || !CC_REGNO_P (REGNO (dest))
|| GET_CODE (src) != COMPARE) || GET_CODE (src) != COMPARE)
continue; return false;
/* s390_swap_cmp will try to find the conditional /* s390_swap_cmp will try to find the conditional
jump when passing NULL_RTX as condition. */ jump when passing NULL_RTX as condition. */
...@@ -9792,10 +9851,10 @@ s390_z10_optimize_cmp (void) ...@@ -9792,10 +9851,10 @@ s390_z10_optimize_cmp (void)
op1 = &XEXP (src, 1); op1 = &XEXP (src, 1);
} }
else else
continue; return false;
if (!REG_P (*op0) || !REG_P (*op1)) if (!REG_P (*op0) || !REG_P (*op1))
continue; return false;
/* Swap the COMPARE arguments and its mask if there is a /* Swap the COMPARE arguments and its mask if there is a
conflicting access in the previous insn. */ conflicting access in the previous insn. */
...@@ -9821,19 +9880,14 @@ s390_z10_optimize_cmp (void) ...@@ -9821,19 +9880,14 @@ s390_z10_optimize_cmp (void)
emit_insn_after (gen_nop1 (), insn); emit_insn_after (gen_nop1 (), insn);
else else
emit_insn_after (gen_nop (), insn); emit_insn_after (gen_nop (), insn);
added_NOPs = 1; insn_added_p = true;
} }
else else
s390_swap_cmp (cond, op0, op1, insn); s390_swap_cmp (cond, op0, op1, insn);
} }
} return insn_added_p;
/* Adjust branches if we added new instructions. */
if (added_NOPs)
shorten_branches (get_insns ());
} }
/* Perform machine-dependent processing. */ /* Perform machine-dependent processing. */
static void static void
...@@ -9944,10 +9998,33 @@ s390_reorg (void) ...@@ -9944,10 +9998,33 @@ s390_reorg (void)
/* Try to optimize prologue and epilogue further. */ /* Try to optimize prologue and epilogue further. */
s390_optimize_prologue (); s390_optimize_prologue ();
/* Eliminate z10-specific pipeline recycles related to some compare /* Walk over the insns and do some z10 specific changes. */
instructions. */
if (s390_tune == PROCESSOR_2097_Z10) if (s390_tune == PROCESSOR_2097_Z10)
s390_z10_optimize_cmp (); {
rtx insn;
bool insn_added_p = false;
/* The insn lengths and addresses have to be up to date for the
following manipulations. */
shorten_branches (get_insns ());
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
continue;
if (JUMP_P (insn))
insn_added_p |= s390_z10_fix_long_loop_prediction (insn);
if (GET_CODE (PATTERN (insn)) == PARALLEL
|| GET_CODE (PATTERN (insn)) == SET)
insn_added_p |= s390_z10_optimize_cmp (insn);
}
/* Adjust branches if we added new instructions. */
if (insn_added_p)
shorten_branches (get_insns ());
}
} }
......
...@@ -1046,6 +1046,64 @@ ...@@ -1046,6 +1046,64 @@
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg (const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
; 10 byte for clgr/jg ; 10 byte for clgr/jg
; And now the same two patterns as above but with a negated CC mask.
; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr
; The following instructions do a complementary access of their second
; operand (z01 only): crj_c, cgrjc, cr, cgr
(define_insn "*icmp_and_br_signed_<mode>"
[(set (pc)
(if_then_else (match_operator 0 "s390_signed_integer_comparison"
[(match_operand:GPR 1 "register_operand" "d,d")
(match_operand:GPR 2 "nonmemory_operand" "d,C")])
(pc)
(label_ref (match_operand 3 "" ""))))
(clobber (reg:CC CC_REGNUM))]
"TARGET_Z10"
{
if (get_attr_length (insn) == 6)
return which_alternative ?
"c<g>ij%D0\t%1,%c2,%l3" : "c<g>rj%D0\t%1,%2,%l3";
else
return which_alternative ?
"c<g>fi\t%1,%c2\;jg%D0\t%l3" : "c<g>r\t%1,%2\;jg%D0\t%l3";
}
[(set_attr "op_type" "RIE")
(set_attr "type" "branch")
(set_attr "z10prop" "z10_super_c,z10_super")
(set (attr "length")
(if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
(const_int 6) (const_int 12)))]) ; 8 byte for cr/jg
; 10 byte for cgr/jg
; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr
; The following instructions do a complementary access of their second
; operand (z10 only): clrj, clgrj, clr, clgr
(define_insn "*icmp_and_br_unsigned_<mode>"
[(set (pc)
(if_then_else (match_operator 0 "s390_unsigned_integer_comparison"
[(match_operand:GPR 1 "register_operand" "d,d")
(match_operand:GPR 2 "nonmemory_operand" "d,I")])
(pc)
(label_ref (match_operand 3 "" ""))))
(clobber (reg:CC CC_REGNUM))]
"TARGET_Z10"
{
if (get_attr_length (insn) == 6)
return which_alternative ?
"cl<g>ij%D0\t%1,%b2,%l3" : "cl<g>rj%D0\t%1,%2,%l3";
else
return which_alternative ?
"cl<g>fi\t%1,%b2\;jg%D0\t%l3" : "cl<g>r\t%1,%2\;jg%D0\t%l3";
}
[(set_attr "op_type" "RIE")
(set_attr "type" "branch")
(set_attr "z10prop" "z10_super_c,z10_super")
(set (attr "length")
(if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
; 10 byte for clgr/jg
;; ;;
;;- Move instructions. ;;- Move instructions.
;; ;;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment