Commit b0f86a7e by Andreas Krebbel Committed by Andreas Krebbel

s390.c (Z10_PREDICT_DISTANCE): New macro.

2009-08-20  Andreas Krebbel  <krebbel1@de.ibm.com>

	* config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro.
	(s390_z10_fix_long_loop_prediction): New function.
	(s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg.
	(s390_reorg): Walk over the INSNs and invoke
	s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp.

From-SVN: r150955
parent f1149235
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
* config/s390/s390.c (Z10_PREDICT_DISTANCE): New macro.
(s390_z10_fix_long_loop_prediction): New function.
(s390_z10_optimize_cmp): INSN walk moved to callee - s390_reorg.
(s390_reorg): Walk over the INSNs and invoke
s390_z10_fix_long_loop_prediction and s390_z10_optimize_cmp.
2009-08-20 Andreas Krebbel <krebbel1@de.ibm.com>
* config/s390/s390.md ("*brx_stage1_<GPR:mode>", "*brxg_64bit",
"*brx_64bit", "*brx_31bit"): New patterns.
* config/s390/s390.c ('E'): New output modifier.
......
......@@ -345,6 +345,10 @@ struct GTY(()) machine_function
#define REGNO_PAIR_OK(REGNO, MODE) \
(HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
/* That's the read ahead of the dynamic branch prediction unit in
bytes on a z10 CPU. */
#define Z10_PREDICT_DISTANCE 384
static enum machine_mode
s390_libgcc_cmp_return_mode (void)
{
......@@ -9661,6 +9665,66 @@ s390_optimize_prologue (void)
}
}
/* On z10 the dynamic branch prediction must see the backward jump in
a window of 384 bytes. If not it falls back to the static
prediction. This function rearranges the loop backward branch in a
way which makes the static prediction always correct. The function
returns true if it added an instruction. */
static bool
s390_z10_fix_long_loop_prediction (rtx insn)
{
rtx set = single_set (insn);
rtx code_label, label_ref, new_label;
rtx uncond_jump;
rtx cur_insn;
rtx tmp;
int distance;
/* This will exclude branch on count and branch on index patterns
since these are correctly statically predicted. */
if (!set
|| SET_DEST (set) != pc_rtx
|| GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
return false;
label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
gcc_assert (GET_CODE (label_ref) == LABEL_REF);
code_label = XEXP (label_ref, 0);
if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
|| INSN_ADDRESSES (INSN_UID (insn)) == -1
|| (INSN_ADDRESSES (INSN_UID (insn))
- INSN_ADDRESSES (INSN_UID (code_label)) < Z10_PREDICT_DISTANCE))
return false;
for (distance = 0, cur_insn = PREV_INSN (insn);
distance < Z10_PREDICT_DISTANCE - 6;
distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
return false;
new_label = gen_label_rtx ();
uncond_jump = emit_jump_insn_after (
gen_rtx_SET (VOIDmode, pc_rtx,
gen_rtx_LABEL_REF (VOIDmode, code_label)),
insn);
emit_label_after (new_label, uncond_jump);
tmp = XEXP (SET_SRC (set), 1);
XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
XEXP (SET_SRC (set), 2) = tmp;
INSN_CODE (insn) = -1;
XEXP (label_ref, 0) = new_label;
JUMP_LABEL (insn) = new_label;
JUMP_LABEL (uncond_jump) = code_label;
return true;
}
/* Returns 1 if INSN reads the value of REG for purposes not related
to addressing of memory, and 0 otherwise. */
static int
......@@ -9743,97 +9807,87 @@ s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
if that register's value is delivered via a bypass, then the
pipeline recycles, thereby causing significant performance decline.
This function locates such situations and exchanges the two
operands of the compare. */
static void
s390_z10_optimize_cmp (void)
operands of the compare. The function return true whenever it
added an insn. */
static bool
s390_z10_optimize_cmp (rtx insn)
{
rtx insn, prev_insn, next_insn;
int added_NOPs = 0;
rtx prev_insn, next_insn;
bool insn_added_p = false;
rtx cond, *op0, *op1;
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
if (GET_CODE (PATTERN (insn)) == PARALLEL)
{
rtx cond, *op0, *op1;
/* Handle compare and branch and branch on count
instructions. */
rtx pattern = single_set (insn);
if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
continue;
if (GET_CODE (PATTERN (insn)) == PARALLEL)
{
/* Handle compare and branch and branch on count
instructions. */
rtx pattern = single_set (insn);
if (!pattern
|| SET_DEST (pattern) != pc_rtx
|| GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
continue;
if (!pattern
|| SET_DEST (pattern) != pc_rtx
|| GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
return false;
cond = XEXP (SET_SRC (pattern), 0);
op0 = &XEXP (cond, 0);
op1 = &XEXP (cond, 1);
}
else if (GET_CODE (PATTERN (insn)) == SET)
{
rtx src, dest;
cond = XEXP (SET_SRC (pattern), 0);
op0 = &XEXP (cond, 0);
op1 = &XEXP (cond, 1);
}
else if (GET_CODE (PATTERN (insn)) == SET)
{
rtx src, dest;
/* Handle normal compare instructions. */
src = SET_SRC (PATTERN (insn));
dest = SET_DEST (PATTERN (insn));
/* Handle normal compare instructions. */
src = SET_SRC (PATTERN (insn));
dest = SET_DEST (PATTERN (insn));
if (!REG_P (dest)
|| !CC_REGNO_P (REGNO (dest))
|| GET_CODE (src) != COMPARE)
continue;
if (!REG_P (dest)
|| !CC_REGNO_P (REGNO (dest))
|| GET_CODE (src) != COMPARE)
return false;
/* s390_swap_cmp will try to find the conditional
jump when passing NULL_RTX as condition. */
cond = NULL_RTX;
op0 = &XEXP (src, 0);
op1 = &XEXP (src, 1);
}
else
continue;
/* s390_swap_cmp will try to find the conditional
jump when passing NULL_RTX as condition. */
cond = NULL_RTX;
op0 = &XEXP (src, 0);
op1 = &XEXP (src, 1);
}
else
return false;
if (!REG_P (*op0) || !REG_P (*op1))
continue;
if (!REG_P (*op0) || !REG_P (*op1))
return false;
/* Swap the COMPARE arguments and its mask if there is a
conflicting access in the previous insn. */
prev_insn = PREV_INSN (insn);
/* Swap the COMPARE arguments and its mask if there is a
conflicting access in the previous insn. */
prev_insn = PREV_INSN (insn);
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
&& reg_referenced_p (*op1, PATTERN (prev_insn)))
s390_swap_cmp (cond, op0, op1, insn);
/* Check if there is a conflict with the next insn. If there
was no conflict with the previous insn, then swap the
COMPARE arguments and its mask. If we already swapped
the operands, or if swapping them would cause a conflict
with the previous insn, issue a NOP after the COMPARE in
order to separate the two instuctions. */
next_insn = NEXT_INSN (insn);
if (next_insn != NULL_RTX && INSN_P (next_insn)
&& s390_non_addr_reg_read_p (*op1, next_insn))
{
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
&& reg_referenced_p (*op1, PATTERN (prev_insn)))
s390_swap_cmp (cond, op0, op1, insn);
/* Check if there is a conflict with the next insn. If there
was no conflict with the previous insn, then swap the
COMPARE arguments and its mask. If we already swapped
the operands, or if swapping them would cause a conflict
with the previous insn, issue a NOP after the COMPARE in
order to separate the two instuctions. */
next_insn = NEXT_INSN (insn);
if (next_insn != NULL_RTX && INSN_P (next_insn)
&& s390_non_addr_reg_read_p (*op1, next_insn))
&& s390_non_addr_reg_read_p (*op0, prev_insn))
{
if (prev_insn != NULL_RTX && INSN_P (prev_insn)
&& s390_non_addr_reg_read_p (*op0, prev_insn))
{
if (REGNO (*op1) == 0)
emit_insn_after (gen_nop1 (), insn);
else
emit_insn_after (gen_nop (), insn);
added_NOPs = 1;
}
if (REGNO (*op1) == 0)
emit_insn_after (gen_nop1 (), insn);
else
s390_swap_cmp (cond, op0, op1, insn);
emit_insn_after (gen_nop (), insn);
insn_added_p = true;
}
else
s390_swap_cmp (cond, op0, op1, insn);
}
/* Adjust branches if we added new instructions. */
if (added_NOPs)
shorten_branches (get_insns ());
return insn_added_p;
}
/* Perform machine-dependent processing. */
static void
......@@ -9944,10 +9998,33 @@ s390_reorg (void)
/* Try to optimize prologue and epilogue further. */
s390_optimize_prologue ();
/* Eliminate z10-specific pipeline recycles related to some compare
instructions. */
/* Walk over the insns and do some z10 specific changes. */
if (s390_tune == PROCESSOR_2097_Z10)
s390_z10_optimize_cmp ();
{
rtx insn;
bool insn_added_p = false;
/* The insn lengths and addresses have to be up to date for the
following manipulations. */
shorten_branches (get_insns ());
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
{
if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
continue;
if (JUMP_P (insn))
insn_added_p |= s390_z10_fix_long_loop_prediction (insn);
if (GET_CODE (PATTERN (insn)) == PARALLEL
|| GET_CODE (PATTERN (insn)) == SET)
insn_added_p |= s390_z10_optimize_cmp (insn);
}
/* Adjust branches if we added new instructions. */
if (insn_added_p)
shorten_branches (get_insns ());
}
}
......
......@@ -1046,6 +1046,64 @@
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
; 10 byte for clgr/jg
; And now the same two patterns as above but with a negated CC mask.
; cij, cgij, crj, cgrj, cfi, cgfi, cr, cgr
; The following instructions do a complementary access of their second
; operand (z01 only): crj_c, cgrjc, cr, cgr
(define_insn "*icmp_and_br_signed_<mode>"
[(set (pc)
(if_then_else (match_operator 0 "s390_signed_integer_comparison"
[(match_operand:GPR 1 "register_operand" "d,d")
(match_operand:GPR 2 "nonmemory_operand" "d,C")])
(pc)
(label_ref (match_operand 3 "" ""))))
(clobber (reg:CC CC_REGNUM))]
"TARGET_Z10"
{
if (get_attr_length (insn) == 6)
return which_alternative ?
"c<g>ij%D0\t%1,%c2,%l3" : "c<g>rj%D0\t%1,%2,%l3";
else
return which_alternative ?
"c<g>fi\t%1,%c2\;jg%D0\t%l3" : "c<g>r\t%1,%2\;jg%D0\t%l3";
}
[(set_attr "op_type" "RIE")
(set_attr "type" "branch")
(set_attr "z10prop" "z10_super_c,z10_super")
(set (attr "length")
(if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
(const_int 6) (const_int 12)))]) ; 8 byte for cr/jg
; 10 byte for cgr/jg
; clij, clgij, clrj, clgrj, clfi, clgfi, clr, clgr
; The following instructions do a complementary access of their second
; operand (z10 only): clrj, clgrj, clr, clgr
(define_insn "*icmp_and_br_unsigned_<mode>"
[(set (pc)
(if_then_else (match_operator 0 "s390_unsigned_integer_comparison"
[(match_operand:GPR 1 "register_operand" "d,d")
(match_operand:GPR 2 "nonmemory_operand" "d,I")])
(pc)
(label_ref (match_operand 3 "" ""))))
(clobber (reg:CC CC_REGNUM))]
"TARGET_Z10"
{
if (get_attr_length (insn) == 6)
return which_alternative ?
"cl<g>ij%D0\t%1,%b2,%l3" : "cl<g>rj%D0\t%1,%2,%l3";
else
return which_alternative ?
"cl<g>fi\t%1,%b2\;jg%D0\t%l3" : "cl<g>r\t%1,%2\;jg%D0\t%l3";
}
[(set_attr "op_type" "RIE")
(set_attr "type" "branch")
(set_attr "z10prop" "z10_super_c,z10_super")
(set (attr "length")
(if_then_else (lt (abs (minus (pc) (match_dup 3))) (const_int 60000))
(const_int 6) (const_int 12)))]) ; 8 byte for clr/jg
; 10 byte for clgr/jg
;;
;;- Move instructions.
;;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment