Commit b0f86a7e by Andreas Krebbel Committed by Andreas Krebbel

s390.c (Z10_PREDICT_DISTANCE): New macro.

2009-08-20  Andreas Krebbel  <krebbel1@de.ibm.com>

	* config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro.
	(s390_z10_fix_long_loop_prediction): New function.
	(s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg.
	(s390_reorg): Walk over the INSNs and invoke
	s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp.

From-SVN: r150955
parent f1149235
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
* config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro.
(s390_z10_fix_long_loop_prediction): New function.
(s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg.
(s390_reorg): Walk over the INSNs and invoke
s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp.
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
* config/s390/s390.md ("*brx_stage1_<GPR:mode>", "*brxg_64bit",
"*brx_64bit", "*brx_31bit"): New patterns.
* config/s390/s390.c ('E'): New output modifier.
......
......@@ -345,6 +345,10 @@ struct GTY(()) machine_function
#define REGNO_PAIR_OK(REGNO, MODE) \
(HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
/* That's the read ahead of the dynamic branch prediction unit in
bytes on a z10 CPU. */
#define Z10_PREDICT_DISTANCE 384
static enum machine_mode
s390_libgcc_cmp_return_mode (void)
{
......@@ -9661,6 +9665,66 @@ s390_optimize_prologue (void)
}
}
/* On z10 the dynamic branch prediction must see the backward jump in
a window of 384 bytes. If not it falls back to the static
prediction. This function rearranges the loop backward branch in a
way which makes the static prediction always correct. The function
returns true if it added an instruction. */
static bool
s390_z10_fix_long_loop_prediction (rtx insn)
{
rtx set = single_set (insn);
rtx code_label, label_ref, new_label;
rtx uncond_jump;
rtx cur_insn;
rtx tmp;
int distance;
/* This will exclude branch on count and branch on index patterns
since these are correctly statically predicted. */
if (!set
|| SET_DEST (set) != pc_rtx
|| GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
return false;
label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
gcc_assert (GET_CODE (label_ref) == LABEL_REF);
code_label = XEXP (label_ref, 0);
if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
|| INSN_ADDRESSES (INSN_UID (insn)) == -1
|| (INSN_ADDRESSES (INSN_UID (insn))
- INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE))
return false;
for (distance = 0, cur_insn = PREV_INSN (insn);
distance < Z10_PREDICT_DISTANCE - 6;
distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
return false;
new_label = gen_label_rtx ();
uncond_jump = emit_jump_insn_after (
gen_rtx_SET (VOIDmode, pc_rtx,
gen_rtx_LABEL_REF (VOIDmode, code_label)),
insn);
emit_label_after (new_label, uncond_jump);
tmp = XEXP (SET_SRC (set), 1);
XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
XEXP (SET_SRC (set), 2) = tmp;
INSN_CODE (insn) = -1;
XEXP (label_ref, 0) = new_label;
JUMP_LABEL (insn) = new_label;
JUMP_LABEL (uncond_jump) = code_label;
return true;
}
/* Returns 1 if INSN reads the value of REG for purposes not related
to addressing of memory, and 0 otherwise. */
static int
......@@ -9743,20 +9807,15 @@ s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
if that register's value is delivered via a bypass, then the
pipeline recycles, thereby causing significant performance decline.
This function locates such situations and exchanges the two
operands of the compare. */
static void
s390_z10_optimize_cmp (void)
operands of the compare. The function return true whenever it
added an insn. */
static bool
s390_z10_optimize_cmp (rtx insn)
{
rtx insn, prev_insn, next_insn;
int added_NOPs = 0;
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
rtx prev_insn, next_insn;
bool insn_added_p = false;
rtx cond, *op0, *op1;
if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
continue;
if (GET_CODE (PATTERN (insn)) == PARALLEL)
{
/* Handle compare and branch and branch on count
......@@ -9766,7 +9825,7 @@ s390_z10_optimize_cmp (void)
if (!pattern
|| SET_DEST (pattern) != pc_rtx
|| GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
continue;
return false;
cond = XEXP (SET_SRC (pattern), 0);
op0 = &XEXP (cond, 0);
......@@ -9783,7 +9842,7 @@ s390_z10_optimize_cmp (void)
if (!REG_P (dest)
|| !CC_REGNO_P (REGNO (dest))
|| GET_CODE (src) != COMPARE)
continue;
return false;
/* s390_swap_cmp will try to find the conditional
jump when passing NULL_RTX as condition. */
......@@ -9792,10 +9851,10 @@ s390_z10_optimize_cmp (void)
op1 = &XEXP (src, 1);
}
else
continue;
return false;
if (!REG_P (*op0) || !REG_P (*op1))
continue;
return false;
/* Swap the COMPARE arguments and its mask if there is a
conflicting access in the previous insn. */
......@@ -9821,19 +9880,14 @@ s390_z10_optimize_cmp (void)
emit_insn_after (gen_nop1 (), insn);
else
emit_insn_after (gen_nop (), insn);
added_NOPs = 1;
insn_added_p = true;
}
else
s390_swap_cmp (cond, op0, op1, insn);
}
}
/* Adjust branches if we added new instructions. */
if (added_NOPs)
shorten_branches (get_insns ());
return insn_added_p;
}
/* Perform machine-dependent processing. */
static void
......@@ -9944,10 +9998,33 @@ s390_reorg (void)
/* Try to optimize prologue and epilogue further. */
s390_optimize_prologue ();
/* Eliminate z10-specific pipeline recycles related to some compare
instructions. */
/* Walk over the insns and do some z10 specific changes. */
if (s390_tune == PROCESSOR_2097_Z10)
s390_z10_optimize_cmp ();
{
rtx insn;
bool insn_added_p = false;
/* The insn lengths and addresses have to be up to date for the
following manipulations. */
shorten_branches (get_insns ());
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
continue;
if (JUMP_P (insn))
insn_added_p |= s390_z10_fix_long_loop_prediction (insn);
if (GET_CODE (PATTERN (insn)) == PARALLEL
|| GET_CODE (PATTERN (insn)) == SET)
insn_added_p |= s390_z10_optimize_cmp (insn);
}
/* Adjust branches if we added new instructions. */
if (insn_added_p)
shorten_branches (get_insns ());
}
}
......
......@@ -1046,6 +1046,64 @@
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
; 10 byte for clgr/jg
; And now the same two patterns as above but with a negated CC mask.
; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr
; The following instructions do a complementary access of their second
; operand (z01 only): crj_c, cgrjc, cr, cgr
(define_insn "*icmp_and_br_signed_<mode>"
[(set (pc)
(if_then_else (match_operator 0 "s390_signed_integer_comparison"
[(match_operand:GPR 1 "register_operand" "d,d")
(match_operand:GPR 2 "nonmemory_operand" "d,C")])
(pc)
(label_ref (match_operand 3 "" ""))))
(clobber (reg:CC CC_REGNUM))]
"TARGET_Z10"
{
if (get_attr_length (insn) == 6)
return which_alternative ?
"c<g>ij%D0\t%1,%c2,%l3" : "c<g>rj%D0\t%1,%2,%l3";
else
return which_alternative ?
"c<g>fi\t%1,%c2\;jg%D0\t%l3" : "c<g>r\t%1,%2\;jg%D0\t%l3";
}
[(set_attr "op_type" "RIE")
(set_attr "type" "branch")
(set_attr "z10prop" "z10_super_c,z10_super")
(set (attr "length")
(if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
(const_int 6) (const_int 12)))]) ; 8 byte for cr/jg
; 10 byte for cgr/jg
; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr
; The following instructions do a complementary access of their second
; operand (z10 only): clrj, clgrj, clr, clgr
(define_insn "*icmp_and_br_unsigned_<mode>"
[(set (pc)
(if_then_else (match_operator 0 "s390_unsigned_integer_comparison"
[(match_operand:GPR 1 "register_operand" "d,d")
(match_operand:GPR 2 "nonmemory_operand" "d,I")])
(pc)
(label_ref (match_operand 3 "" ""))))
(clobber (reg:CC CC_REGNUM))]
"TARGET_Z10"
{
if (get_attr_length (insn) == 6)
return which_alternative ?
"cl<g>ij%D0\t%1,%b2,%l3" : "cl<g>rj%D0\t%1,%2,%l3";
else
return which_alternative ?
"cl<g>fi\t%1,%b2\;jg%D0\t%l3" : "cl<g>r\t%1,%2\;jg%D0\t%l3";
}
[(set_attr "op_type" "RIE")
(set_attr "type" "branch")
(set_attr "z10prop" "z10_super_c,z10_super")
(set (attr "length")
(if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
; 10 byte for clgr/jg
;;
;;- Move instructions.
;;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment