Commit 74835ed8 by Richard Henderson Committed by Richard Henderson

alpha.h (ISSUE_RATE): Define.

        * alpha.h (ISSUE_RATE): Define.
        * alpha.c (alpha_adjust_cost): Handle EV5 mult delay; don't apply
        EV4 adjustments to EV5.
        * alpha.md: Remove all scaling from function unit delays.  Rework
        EV5 function units to match the CPU.
        (umuldi3_highpart): EV5 added the IMULH insn class.

From-SVN: r15916
parent ee80685a
Wed Oct 15 18:16:05 1997 Richard Henderson <rth@cygnus.com>
Tune Haifa scheduler for Alpha:
* alpha.h (ISSUE_RATE): Define.
* alpha.c (alpha_adjust_cost): Handle EV5 mult delay; don't apply
EV4 adjustments to EV5.
* alpha.md: Remove all scaling from function unit delays. Rework
EV5 function units to match the CPU.
(umuldi3_highpart): EV5 added the IMULH insn class.
Wed Oct 15 17:42:41 1997 Jeffrey A Law (law@cygnus.com) Wed Oct 15 17:42:41 1997 Jeffrey A Law (law@cygnus.com)
* pa.c (following_call): Fail if the CALL_INSN is an indirect * pa.c (following_call): Fail if the CALL_INSN is an indirect
......
...@@ -1150,7 +1150,7 @@ alpha_adjust_cost (insn, link, dep_insn, cost) ...@@ -1150,7 +1150,7 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
rtx dep_insn; rtx dep_insn;
int cost; int cost;
{ {
rtx set; rtx set, set_src;
/* If the dependence is an anti-dependence, there is no cost. For an /* If the dependence is an anti-dependence, there is no cost. For an
output dependence, there is sometimes a cost, but it doesn't seem output dependence, there is sometimes a cost, but it doesn't seem
...@@ -1159,12 +1159,12 @@ alpha_adjust_cost (insn, link, dep_insn, cost) ...@@ -1159,12 +1159,12 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
if (REG_NOTE_KIND (link) != 0) if (REG_NOTE_KIND (link) != 0)
return 0; return 0;
/* EV5 costs are as given in alpha.md; exceptions are given here. */
if (alpha_cpu == PROCESSOR_EV5) if (alpha_cpu == PROCESSOR_EV5)
{ {
/* And the lord DEC sayeth: "A special bypass provides an effective /* On EV5, "A special bypass provides an effective latency of 0
latency of 0 cycles for an ICMP or ILOG insn producing the test cycles for an ICMP or ILOG insn producing the test operand of an
operand of an IBR or CMOV insn." */ IBR or CMOV insn." */
if (recog_memoized (dep_insn) >= 0 if (recog_memoized (dep_insn) >= 0
&& (get_attr_type (dep_insn) == TYPE_ICMP && (get_attr_type (dep_insn) == TYPE_ICMP
|| get_attr_type (dep_insn) == TYPE_ILOG) || get_attr_type (dep_insn) == TYPE_ILOG)
...@@ -1173,67 +1173,104 @@ alpha_adjust_cost (insn, link, dep_insn, cost) ...@@ -1173,67 +1173,104 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
|| (get_attr_type (insn) == TYPE_CMOV || (get_attr_type (insn) == TYPE_CMOV
&& !((set = single_set (dep_insn)) != 0 && !((set = single_set (dep_insn)) != 0
&& GET_CODE (PATTERN (insn)) == SET && GET_CODE (PATTERN (insn)) == SET
&& GET_CODE (SET_SRC (PATTERN (insn))) == IF_THEN_ELSE && (set_src = SET_SRC (PATTERN (insn)),
&& (rtx_equal_p (SET_DEST (set), GET_CODE (set_src) == IF_THEN_ELSE)
XEXP (SET_SRC (PATTERN (insn)), 1)) && (set = SET_DEST (set),
|| rtx_equal_p (SET_DEST (set), rtx_equal_p (set, XEXP (set_src, 1))
XEXP (SET_SRC (PATTERN (insn)), 2))))))) || rtx_equal_p (set, XEXP (set_src, 2)))))))
return 1; return 0;
return cost;
}
/* If INSN is a store insn and DEP_INSN is setting the data being stored,
we can sometimes lower the cost. */
if (recog_memoized (insn) >= 0 && get_attr_type (insn) == TYPE_ST
&& (set = single_set (dep_insn)) != 0
&& GET_CODE (PATTERN (insn)) == SET
&& rtx_equal_p (SET_DEST (set), SET_SRC (PATTERN (insn))))
switch (get_attr_type (dep_insn))
{
case TYPE_LD:
/* No savings here. */
return cost;
case TYPE_IMULL:
case TYPE_IMULQ:
/* In these cases, we save one cycle. */
return cost - 2;
default: /* On EV5 it takes longer to get data to the multiplier than to
/* In all other cases, we save two cycles. */ anywhere else, so increase costs. */
return MAX (0, cost - 4);
} if (recog_memoized (insn) >= 0
&& recog_memoized (dep_insn) >= 0
&& (get_attr_type (insn) == TYPE_IMULL
|| get_attr_type (insn) == TYPE_IMULQ
|| get_attr_type (insn) == TYPE_IMULH)
&& (set = single_set (dep_insn)) != 0
&& GET_CODE (PATTERN (insn)) == SET
&& (set_src = SET_SRC (PATTERN (insn)),
GET_CODE (set_src) == MULT)
&& (set = SET_DEST (set),
rtx_equal_p (set, XEXP (set_src, 0))
|| rtx_equal_p (set, XEXP (set_src, 1))))
{
switch (get_attr_type (insn))
{
case TYPE_LD:
case TYPE_CMOV:
case TYPE_IMULL:
case TYPE_IMULQ:
case TYPE_IMULH:
return cost + 1;
case TYPE_JSR:
case TYPE_IADD:
case TYPE_ILOG:
case TYPE_SHIFT:
case TYPE_ICMP:
return cost + 2;
}
}
}
else
{
/* On EV4, if INSN is a store insn and DEP_INSN is setting the data
being stored, we can sometimes lower the cost. */
/* Another case that needs adjustment is an arithmetic or logical if (recog_memoized (insn) >= 0 && get_attr_type (insn) == TYPE_ST
operation. It's cost is usually one cycle, but we default it to && (set = single_set (dep_insn)) != 0
two in the MD file. The only case that it is actually two is && GET_CODE (PATTERN (insn)) == SET
for the address in loads and stores. */ && rtx_equal_p (SET_DEST (set), SET_SRC (PATTERN (insn))))
{
switch (get_attr_type (dep_insn))
{
case TYPE_LD:
/* No savings here. */
return cost;
case TYPE_IMULL:
case TYPE_IMULQ:
case TYPE_IMULH:
/* In these cases, we save one cycle. */
return cost - 1;
default:
/* In all other cases, we save two cycles. */
return MAX (0, cost - 2);
}
}
if (recog_memoized (dep_insn) >= 0 /* Another case that needs adjustment is an arithmetic or logical
&& (get_attr_type (dep_insn) == TYPE_IADD operation. It's cost is usually one cycle, but we default it to
|| get_attr_type (dep_insn) == TYPE_ILOG)) two in the MD file. The only case that it is actually two is
switch (get_attr_type (insn)) for the address in loads and stores. */
{
case TYPE_LD:
case TYPE_ST:
return cost;
default: if (recog_memoized (dep_insn) >= 0
return 2; && (get_attr_type (dep_insn) == TYPE_IADD
} || get_attr_type (dep_insn) == TYPE_ILOG))
{
switch (get_attr_type (insn))
{
case TYPE_LD:
case TYPE_ST:
return cost;
default:
return 1;
}
}
/* The final case is when a compare feeds into an integer branch. The cost /* The final case is when a compare feeds into an integer branch;
is only one cycle in that case. */ the cost is only one cycle in that case. */
if (recog_memoized (dep_insn) >= 0 if (recog_memoized (dep_insn) >= 0
&& get_attr_type (dep_insn) == TYPE_ICMP && get_attr_type (dep_insn) == TYPE_ICMP
&& recog_memoized (insn) >= 0 && recog_memoized (insn) >= 0
&& get_attr_type (insn) == TYPE_IBR) && get_attr_type (insn) == TYPE_IBR)
return 2; return 1;
}
/* Otherwise, return the default cost. */ /* Otherwise, return the default cost. */
return cost; return cost;
} }
......
...@@ -1595,6 +1595,9 @@ extern void final_prescan_insn (); ...@@ -1595,6 +1595,9 @@ extern void final_prescan_insn ();
our own exit function. */ our own exit function. */
#define HAVE_ATEXIT #define HAVE_ATEXIT
/* The EV4 is dual issue; EV5 is quad issue. */
#define ISSUE_RATE (alpha_cpu == PROCESSOR_EV4 ? 2 : 4)
/* Compute the cost of computing a constant rtl expression RTX /* Compute the cost of computing a constant rtl expression RTX
whose rtx-code is CODE. The body of this macro is a portion whose rtx-code is CODE. The body of this macro is a portion
of a switch statement. If the code is computed here, of a switch statement. If the code is computed here,
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
;; separately. ;; separately.
(define_attr "type" (define_attr "type"
"ld,st,ibr,fbr,jsr,iadd,ilog,shift,cmov,icmp,imull,imulq,fadd,fmul,fcpys,fdivs,fdivt,ldsym,isubr" "ld,st,ibr,fbr,jsr,iadd,ilog,shift,cmov,icmp,imull,imulq,imulh,fadd,fmul,fcpys,fdivs,fdivt,ldsym,isubr,misc"
(const_string "iadd")) (const_string "iadd"))
;; The TRAP_TYPE attribute marks instructions that may generate traps ;; The TRAP_TYPE attribute marks instructions that may generate traps
...@@ -41,35 +41,30 @@ ...@@ -41,35 +41,30 @@
;; is desired). ;; is desired).
(define_attr "trap" "yes,no" (const_string "no")) (define_attr "trap" "yes,no" (const_string "no"))
;; For the EV4 we include four function units: ABOX, which computes the address, ;; For the EV4 we include four function units: ABOX, which computes
;; BBOX, used for branches, EBOX, used for integer operations, and FBOX, ;; the address, BBOX, used for branches, EBOX, used for integer
;; used for FP operations. ;; operations, and FBOX, used for FP operations.
;;
;; We assume that we have been successful in getting double issues and
;; hence multiply all costs by two insns per cycle. The minimum time in
;; a function unit is 2 cycle, which will tend to produce the double
;; issues.
;; Memory delivers its result in three cycles. ;; Memory delivers its result in three cycles.
(define_function_unit "ev4_abox" 1 0 (define_function_unit "ev4_abox" 1 0
(and (eq_attr "cpu" "ev4") (and (eq_attr "cpu" "ev4")
(eq_attr "type" "ld,st")) (eq_attr "type" "ld,ldsym,st"))
6 2) 3 1)
;; Branches have no delay cost, but do tie up the unit for two cycles. ;; Branches have no delay cost, but do tie up the unit for two cycles.
(define_function_unit "ev4_bbox" 1 1 (define_function_unit "ev4_bbox" 1 1
(and (eq_attr "cpu" "ev4") (and (eq_attr "cpu" "ev4")
(eq_attr "type" "ibr,fbr,jsr")) (eq_attr "type" "ibr,fbr,jsr"))
4 4) 2 2)
;; Arithmetic insns are normally have their results available after two ;; Arithmetic insns are normally have their results available after
;; cycles. There are a number of exceptions. They are encoded in ;; two cycles. There are a number of exceptions. They are encoded in
;; ADJUST_COST. Some of the other insns have similar exceptions. ;; ADJUST_COST. Some of the other insns have similar exceptions.
(define_function_unit "ev4_ebox" 1 0 (define_function_unit "ev4_ebox" 1 0
(and (eq_attr "cpu" "ev4") (and (eq_attr "cpu" "ev4")
(eq_attr "type" "iadd,ilog,ldsym,shift,cmov,icmp")) (eq_attr "type" "iadd,ilog,shift,cmov,icmp"))
4 2) 2 1)
;; These really don't take up the integer pipeline, but they do occupy ;; These really don't take up the integer pipeline, but they do occupy
;; IBOX1; we approximate here. ;; IBOX1; we approximate here.
...@@ -77,135 +72,145 @@ ...@@ -77,135 +72,145 @@
(define_function_unit "ev4_ebox" 1 0 (define_function_unit "ev4_ebox" 1 0
(and (eq_attr "cpu" "ev4") (and (eq_attr "cpu" "ev4")
(eq_attr "type" "imull")) (eq_attr "type" "imull"))
42 2) 21 1)
(define_function_unit "ev4_ebox" 1 0 (define_function_unit "ev4_ebox" 1 0
(and (eq_attr "cpu" "ev4") (and (eq_attr "cpu" "ev4")
(eq_attr "type" "imulq")) (eq_attr "type" "imulq,imulh"))
46 2) 23 1)
(define_function_unit "ev4_imult" 1 0 (define_function_unit "ev4_imult" 1 0
(and (eq_attr "cpu" "ev4") (and (eq_attr "cpu" "ev4")
(eq_attr "type" "imull")) (eq_attr "type" "imull"))
42 38) 21 19)
(define_function_unit "ev4_imult" 1 0 (define_function_unit "ev4_imult" 1 0
(and (eq_attr "cpu" "ev4") (and (eq_attr "cpu" "ev4")
(eq_attr "type" "imulq")) (eq_attr "type" "imulq,imulh"))
46 42) 23 21)
(define_function_unit "ev4_fbox" 1 0 (define_function_unit "ev4_fbox" 1 0
(and (eq_attr "cpu" "ev4") (and (eq_attr "cpu" "ev4")
(eq_attr "type" "fadd,fmul,fcpys")) (eq_attr "type" "fadd,fmul,fcpys"))
12 2) 6 1)
(define_function_unit "ev4_fbox" 1 0 (define_function_unit "ev4_fbox" 1 0
(and (eq_attr "cpu" "ev4") (and (eq_attr "cpu" "ev4")
(eq_attr "type" "fdivs")) (eq_attr "type" "fdivs"))
68 0) 34 0)
(define_function_unit "ev4_fbox" 1 0 (define_function_unit "ev4_fbox" 1 0
(and (eq_attr "cpu" "ev4") (and (eq_attr "cpu" "ev4")
(eq_attr "type" "fdivt")) (eq_attr "type" "fdivt"))
126 0) 63 0)
(define_function_unit "ev4_divider" 1 0 (define_function_unit "ev4_divider" 1 0
(and (eq_attr "cpu" "ev4") (and (eq_attr "cpu" "ev4")
(eq_attr "type" "fdivs")) (eq_attr "type" "fdivs"))
68 60) 34 30)
(define_function_unit "ev4_divider" 1 0 (define_function_unit "ev4_divider" 1 0
(and (eq_attr "cpu" "ev4") (and (eq_attr "cpu" "ev4")
(eq_attr "type" "fdivt")) (eq_attr "type" "fdivt"))
126 118) 64 59)
;; EV5 scheduling. EV5 can issue 4 insns per clock. ;; EV5 scheduling. EV5 can issue 4 insns per clock.
;; Multiply all costs by 4.
;; EV5 has two integer units. ;; EV5 has two asymetric integer units. Model this with ebox,e0,e1.
;; Everything uses ebox, and those that require particular pipes grab
;; those as well.
(define_function_unit "ev5_ebox" 2 0 (define_function_unit "ev5_ebox" 2 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "iadd,ilog,icmp,ldsym")) (eq_attr "type" "iadd,ilog,icmp,st,shift,imull,imulq,imulh"))
4 4) 1 1)
;; Memory takes at least 2 clocks. ;; Memory takes at least 2 clocks, and load cannot dual issue with stores.
;; Conditional moves always take 2 ticks.
(define_function_unit "ev5_ebox" 2 0 (define_function_unit "ev5_ebox" 2 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "ld,cmov")) (eq_attr "type" "ld,ldsym"))
8 4) 2 1)
(define_function_unit "ev5_e0" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "ld,ldsym"))
0 1
[(eq_attr "type" "st")])
;; Loads can dual issue. Store cannot; nor can loads + stores. ;; Conditional moves always take 2 ticks.
;; Model this with a mythical load/store unit. (define_function_unit "ev5_ebox" 2 0
(define_function_unit "ev5_ldst" 1 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "ld")) (eq_attr "type" "cmov"))
8 4 [(eq_attr "type" "st")]) 2 1)
(define_function_unit "ev5_ldst" 1 0 ;; Stores, shifts, and multiplies can only issue to E0
(define_function_unit "ev5_e0" 1 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "st")) (eq_attr "type" "st"))
4 4) 1 1)
(define_function_unit "ev5_ebox" 2 0 ;; But shifts and multiplies don't conflict with loads.
(define_function_unit "ev5_e0" 1 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "imull")) (eq_attr "type" "shift,imull,imulq,imulh"))
32 4) 1 1
[(eq_attr "type" "st,shift,imull,imulq,imulh")])
(define_function_unit "ev5_ebox" 2 0 ;; Branches can only issue to E1
(define_function_unit "ev5_e1" 1 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "imulq")) (eq_attr "type" "ibr,jsr"))
48 4) 1 1)
;; Multiplies also use the integer multiplier. ;; Multiplies also use the integer multiplier.
(define_function_unit "ev5_imult" 1 0 (define_function_unit "ev5_imult" 1 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "imull")) (eq_attr "type" "imull"))
16 8) 8 4)
(define_function_unit "ev5_imult" 1 0 (define_function_unit "ev5_imult" 1 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "imulq")) (eq_attr "type" "imulq"))
48 32) 12 8)
;; There is only 1 shifter/zapper. (define_function_unit "ev5_imult" 1 0
(define_function_unit "ev5_shift" 1 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "shift")) (eq_attr "type" "imulh"))
4 4) 14 8)
;; Similarly for the FPU we have two asymetric units. But fcpys can issue
;; on either so we have to play the game again.
;; We pretend EV5 has symmetrical 2 fpus,
;; even though cpys is the only insn that can issue on either unit.
(define_function_unit "ev5_fpu" 2 0 (define_function_unit "ev5_fpu" 2 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "fadd,fmul,fcpys")) (eq_attr "type" "fadd,fmul,fcpys,fbr,fdivs,fdivt"))
16 4) 4 1)
;; Multiplies (resp. adds) also use the fmul (resp. fadd) units. ;; Multiplies (resp. adds) also use the fmul (resp. fadd) units.
(define_function_unit "ev5_fpmul" 1 0 (define_function_unit "ev5_fm" 1 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "fmul")) (eq_attr "type" "fmul"))
16 4) 4 1)
(define_function_unit "ev5_fpadd" 1 0 (define_function_unit "ev5_fa" 1 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "fadd")) (eq_attr "type" "fadd"))
16 4) 4 1)
(define_function_unit "ev5_fpadd" 1 0 (define_function_unit "ev5_fa" 1 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "fbr")) (eq_attr "type" "fbr"))
4 4) 1 1)
(define_function_unit "ev5_fpadd" 1 0 (define_function_unit "ev5_fa" 1 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "fdivs")) (eq_attr "type" "fdivs"))
60 4) 15 1)
(define_function_unit "ev5_fpadd" 1 0 (define_function_unit "ev5_fa" 1 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "fdivt")) (eq_attr "type" "fdivt"))
88 4) 22 1)
;; First define the arithmetic insns. Note that the 32-bit forms also ;; First define the arithmetic insns. Note that the 32-bit forms also
;; sign-extend. ;; sign-extend.
...@@ -607,7 +612,7 @@ ...@@ -607,7 +612,7 @@
(const_int 64))))] (const_int 64))))]
"" ""
"umulh %1,%2,%0" "umulh %1,%2,%0"
[(set_attr "type" "imulq")]) [(set_attr "type" "imulh")])
(define_insn "" (define_insn ""
[(set (match_operand:DI 0 "register_operand" "=r") [(set (match_operand:DI 0 "register_operand" "=r")
...@@ -618,7 +623,7 @@ ...@@ -618,7 +623,7 @@
(const_int 64))))] (const_int 64))))]
"" ""
"umulh %1,%2,%0" "umulh %1,%2,%0"
[(set_attr "type" "imulq")]) [(set_attr "type" "imulh")])
;; The divide and remainder operations always take their inputs from ;; The divide and remainder operations always take their inputs from
;; r24 and r25, put their output in r27, and clobber r23 and r28. ;; r24 and r25, put their output in r27, and clobber r23 and r28.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment