Commit f65590a9 by Jeff Law

pa.md (conditional branches): Use the full displacement range for the branch target.

        * pa.md (conditional branches): Use the full displacement range
        for the branch target.  Update length computations to match current
        reality.
        (branch on bit patterns): Likewise.
        (decrement_and_branch_until_zero): Re-enable pattern.  Rewrite to
        be simpler and more efficient.  Also handle case where loop counter
        is in a FP register.

From-SVN: r4874
parent e287977a
...@@ -85,10 +85,9 @@ ...@@ -85,10 +85,9 @@
;; Integer conditional branch delay slot description. ;; Integer conditional branch delay slot description.
;; Nullification of conditional branches on the PA is dependent on the ;; Nullification of conditional branches on the PA is dependent on the
;; direction of the branch. Forward branches nullify true (direction > 0), ;; direction of the branch. Forward branches nullify true and
;; and backward branches nullify false (direction < 0). ;; backward branches nullify false. If the direction is unknown
;; If direction == 0, then the direction is unknown and we do not allow ;; then nullification is not allowed.
;; any nullification.
(define_delay (eq_attr "type" "cbranch") (define_delay (eq_attr "type" "cbranch")
[(eq_attr "in_branch_delay" "true") [(eq_attr "in_branch_delay" "true")
(and (eq_attr "in_nullified_branch_delay" "true") (and (eq_attr "in_nullified_branch_delay" "true")
...@@ -760,14 +759,10 @@ ...@@ -760,14 +759,10 @@
}" }"
[(set_attr "type" "cbranch") [(set_attr "type" "cbranch")
(set (attr "length") (set (attr "length")
(cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 2)))) (if_then_else (lt (abs (minus (match_dup 0) (plus (pc) (const_int 2))))
(const_int 1023)) (const_int 2047))
(const_int 1) (const_int 1)
(and (lt (match_dup 0) (pc)) (const_int 2)))])
(eq (symbol_ref "INSN_ANNULLED_BRANCH_P (insn)")
(const_int 1)))
(const_int 3)]
(const_int 2)))])
;; Match the negated branch. ;; Match the negated branch.
...@@ -787,14 +782,10 @@ ...@@ -787,14 +782,10 @@
}" }"
[(set_attr "type" "cbranch") [(set_attr "type" "cbranch")
(set (attr "length") (set (attr "length")
(cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 2)))) (if_then_else (lt (abs (minus (match_dup 0) (plus (pc) (const_int 2))))
(const_int 1023)) (const_int 2047))
(const_int 1) (const_int 1)
(and (lt (match_dup 0) (pc)) (const_int 2)))])
(eq (symbol_ref "INSN_ANNULLED_BRANCH_P (insn)")
(const_int 1)))
(const_int 3)]
(const_int 2)))])
;; Branch on Bit patterns. ;; Branch on Bit patterns.
(define_insn "" (define_insn ""
...@@ -816,14 +807,10 @@ ...@@ -816,14 +807,10 @@
}" }"
[(set_attr "type" "cbranch") [(set_attr "type" "cbranch")
(set (attr "length") (set (attr "length")
(cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 2)))) (if_then_else (lt (abs (minus (match_dup 0) (plus (pc) (const_int 2))))
(const_int 1023)) (const_int 2047))
(const_int 1) (const_int 1)
(and (lt (match_dup 0) (pc)) (const_int 2)))])
(eq (symbol_ref "INSN_ANNULLED_BRANCH_P (insn)")
(const_int 1)))
(const_int 3)]
(const_int 2)))])
(define_insn "" (define_insn ""
[(set (pc) [(set (pc)
...@@ -844,17 +831,12 @@ ...@@ -844,17 +831,12 @@
}" }"
[(set_attr "type" "cbranch") [(set_attr "type" "cbranch")
(set (attr "length") (set (attr "length")
(cond [(lt (abs (minus (match_dup 0) (plus (pc) (const_int 2)))) (if_then_else (lt (abs (minus (match_dup 0) (plus (pc) (const_int 2))))
(const_int 1023)) (const_int 2047))
(const_int 1) (const_int 1)
(and (lt (match_dup 0) (pc)) (const_int 2)))])
(eq (symbol_ref "INSN_ANNULLED_BRANCH_P (insn)")
(const_int 1)))
(const_int 3)]
(const_int 2)))])
;; Floating point branches ;; Floating point branches
(define_insn "" (define_insn ""
[(set (pc) (if_then_else (ne (reg:CCFP 0) (const_int 0)) [(set (pc) (if_then_else (ne (reg:CCFP 0) (const_int 0))
(label_ref (match_operand 0 "" "")) (label_ref (match_operand 0 "" ""))
...@@ -2927,113 +2909,120 @@ ...@@ -2927,113 +2909,120 @@
[(set (pc) [(set (pc)
(if_then_else (if_then_else
(match_operator 2 "comparison_operator" (match_operator 2 "comparison_operator"
[(plus:SI (match_operand:SI 0 "register_operand" "+!r,m") [(plus:SI (match_operand:SI 0 "register_operand" "+!r,!*fx,!*m")
(match_operand:SI 1 "int5_operand" "L,L")) (match_operand:SI 1 "int5_operand" "L,L,L"))
(const_int 0)]) (const_int 0)])
(label_ref (match_operand 3 "" "")) (label_ref (match_operand 3 "" ""))
(pc))) (pc)))
(set (match_dup 0) (set (match_dup 0)
(plus:SI (match_dup 0) (match_dup 1))) (plus:SI (match_dup 0) (match_dup 1)))
(clobber (match_scratch:SI 4 "=X,r"))] (clobber (match_scratch:SI 4 "=X,r,r"))]
"0" ""
"* "*
{ {
if (INSN_ANNULLED_BRANCH_P (insn))
if (which_alternative == 0)
{
int nullify = INSN_ANNULLED_BRANCH_P (insn);
int length = get_attr_length (insn);
/* If this is a long branch with its delay slot unfilled, set `nullify'
as it can nullify the delay slot and save a nop. */
if (length == 2 && dbr_sequence_length () == 0)
nullify = 1;
/* If this is a short forward conditional branch which did not get
its delay slot filled, the delay slot can still be nullified. */
if (! nullify && length == 1 && dbr_sequence_length () == 0)
nullify = forward_branch_p (insn);
/* Handle short versions first. */
if (length == 1 && nullify)
return \"addib,%C2,n %1,%0,%3\";
else if (length == 1 && ! nullify)
return \"addib,%C2 %1,%0,%3\";
else if (length == 2)
{
/* Handle weird backwards branch with a fulled delay slot
which is nullified. */
if (dbr_sequence_length () != 0
&& ! forward_branch_p (insn)
&& nullify)
return \"addib,%N2,n %1,%0,.+12\;bl %3,0\";
/* Handle normal cases. */
if (nullify)
return \"addi,%N2 %1,%0,%0\;bl,n %3,0\";
else
return \"addi,%N2 %1,%0,%0\;bl %3,0\";
}
else
abort();
}
/* Deal with gross reload from FP register case. */
else if (which_alternative == 1)
{ {
/* Loop counter is in a register. */ /* Move loop counter from FP register to MEM then into a GR,
if (which_alternative == 0) increment the GR, store the GR into MEM, and finally reload
/* Short branch. Normal handling of nullification. */ the FP register from MEM from within the branch's delay slot. */
if (get_attr_length (insn) == 1) output_asm_insn (\"fstws %0,-16(0,%%r30)\;ldw -16(0,%%r30),%4\",operands);
return \"addib,%C2,n %1,%0,%3\"; output_asm_insn (\"ldo %1(%4),%4\;stw %4,-16(0,%%r30)\", operands);
/* Long Conditional branch forward with delay slot nullified if if (get_attr_length (insn) == 6)
branch is taken. */ return \"comb,%S2 0,%4,%3\;fldws -16(0,%%r30),%0\";
else if (get_attr_length (insn) == 2)
return \"addi,%N2 %1,%0,%0\;bl,n %3,0\";
/* Long Conditional branch backwards with delay slot nullified
if branch is not taken. */
else
return \"addib,%N2 %1,%0,.+16\;nop\;bl %3,0\";
else else
{ return \"comclr,%B2 0,%4,0\;bl %3,0\;fldws -16(0,%%r30),%0\";
/* Must reload loop counter from memory. Ugly. */
output_asm_insn (\"ldw %0,%4\;ldo %1(%4),%4\;stw %4,%0\", operands);
/* Short branch. Normal handling of nullification. */
if (get_attr_length (insn) == 4)
return \"comb,%S2,n 0,%4,%3\";
/* Long Conditional branch forward with delay slot nullified if
branch is taken. */
else if (get_attr_length (insn) == 5)
return \"comclr,%B2 0,%4,0\;bl,n %3,0\";
else
/* Long Conditional branch backwards with delay slot nullified
if branch is not taken. */
return \"comb,%B2 0,%4,.+16\;nop\;bl %3,0\";
}
} }
/* Deal with gross reload from memory case. */
else else
{ {
/* We are not nullifying the delay slot. Much simpler. */ /* Reload loop counter from memory, the store back to memory
if (which_alternative == 0) happens in the branch's delay slot. */
if (get_attr_length (insn) == 1) output_asm_insn (\"ldw %0,%4\", operands);
/* Short form. */ if (get_attr_length (insn) == 3)
return \"addib,%C2 %1,%0,%3%#\"; return \"addib,%C2 %1,%4,%3;stw %4,%0\";
else
/* Long form. */
return \"addi,%N2 %1,%0,%0\;bl%* %3,0\";
else else
{ return \"addi,%N2 %1,%4,%0\;bl %3,0\;stw %4,%0\";
/* Reload loop counter from memory. */
output_asm_insn (\"ldw %0,%4\;ldo %1(%4),%4\;stw %4,%0\", operands);
/* Short form. */
if (get_attr_length (insn) == 4)
return \"comb,%S2 0,%4,%3%#\";
/* Long form. */
else
return \"comclr,%B2 0,%4,0\;bl%* %3,0\";
}
} }
}" }"
;; Do not expect to understand this the first time through. ;; Do not expect to understand this the first time through.
[(set_attr "type" "cbranch") [(set_attr "type" "cbranch,multi,multi")
(set (attr "length") (set (attr "length")
(if_then_else (if_then_else (eq_attr "alternative" "0")
(eq_attr "alternative" "0") ;; Loop counter in register case
;; Loop counter in register case. ;; Short branch has length of 1
(cond [(lt (abs (minus (match_dup 1) (plus (pc) (const_int 2)))) ;; Long branch has length of 2
(const_int 1023)) (if_then_else (lt (abs (minus (match_dup 3) (plus (pc) (const_int 2))))
;; Short branch has a length of 1. (const_int 2047))
(const_int 1) (const_int 1)
;; Long backward branch with nullified delay slot has length of 3. (const_int 2))
(and (lt (match_dup 1) (pc))
(eq (symbol_ref "INSN_ANNULLED_BRANCH_P (insn)") ;; Loop counter in FP reg case.
(const_int 1))) ;; Extra goo to deal with additional reload insns.
(const_int 3)] (if_then_else (eq_attr "alternative" "1")
;; Default others to 2. (if_then_else (lt (match_dup 3) (pc))
;; Long branches with unfilled delay slots --or-- (if_then_else
;; Long forward with nullified delay slot. (lt (abs (minus (match_dup 3) (plus (pc) (const_int 6))))
(const_int 2)) (const_int 2047))
;; Loop counter in memory case. Similar to above except we pay (const_int 6)
;; 3 extra insns in each case for reloading the counter into a register. (const_int 7))
(if_then_else (lt (match_dup 1) (pc)) (if_then_else
(cond [(lt (abs (minus (match_dup 1) (plus (pc) (const_int 5)))) (lt (abs (minus (match_dup 3) (plus (pc) (const_int 2))))
(const_int 1023)) (const_int 2047))
;; Short branch has length of 4 (the reloading costs 3 insns) (const_int 6)
(const_int 4) (const_int 7)))
(and (lt (match_dup 1) (pc)) ;; Loop counter in memory case.
(eq (symbol_ref "INSN_ANNULLED_BRANCH_P (insn)") ;; Extra goo to deal with additional reload insns.
(const_int 1))) (if_then_else (lt (match_dup 3) (pc))
;; Long backward branch with nullified delay slot has length of 6. (if_then_else
(const_int 6)] (lt (abs (minus (match_dup 3) (plus (pc) (const_int 3))))
;; Default others to 5. (const_int 2047))
;; Long branches with unfilled delay slots --or-- (const_int 3)
;; Long forward with nullified delay slot. (const_int 4))
(const_int 5)) (if_then_else
(if_then_else (lt (abs (minus (match_dup 1) (lt (abs (minus (match_dup 3) (plus (pc) (const_int 2))))
(plus (pc) (const_int 2)))) (const_int 2047))
(const_int 1023)) (const_int 3)
(const_int 4) (const_int 4))))))])
(const_int 5)))))])
;; The next four peepholes take advantage of the new 5 operand ;; The next four peepholes take advantage of the new 5 operand
;; fmpy{add,sub} instructions available on 1.1 CPUS. Basically ;; fmpy{add,sub} instructions available on 1.1 CPUS. Basically
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment