Commit 74835ed8 by Richard Henderson Committed by Richard Henderson

alpha.h (ISSUE_RATE): Define.

        * alpha.h (ISSUE_RATE): Define.
        * alpha.c (alpha_adjust_cost): Handle EV5 mult delay; don't apply
        EV4 adjustments to EV5.
        * alpha.md: Remove all scaling from function unit delays.  Rework
        EV5 function units to match the CPU.
        (umuldi3_highpart): EV5 added the IMULH insn class.

From-SVN: r15916
parent ee80685a
Wed Oct 15 18:16:05 1997 Richard Henderson <rth@cygnus.com>
Tune Haifa scheduler for Alpha:
* alpha.h (ISSUE_RATE): Define.
* alpha.c (alpha_adjust_cost): Handle EV5 mult delay; don't apply
EV4 adjustments to EV5.
* alpha.md: Remove all scaling from function unit delays. Rework
EV5 function units to match the CPU.
(umuldi3_highpart): EV5 added the IMULH insn class.
Wed Oct 15 17:42:41 1997 Jeffrey A Law (law@cygnus.com)
* pa.c (following_call): Fail if the CALL_INSN is an indirect
......
......@@ -1150,7 +1150,7 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
rtx dep_insn;
int cost;
{
rtx set;
rtx set, set_src;
/* If the dependence is an anti-dependence, there is no cost. For an
output dependence, there is sometimes a cost, but it doesn't seem
......@@ -1159,12 +1159,12 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
if (REG_NOTE_KIND (link) != 0)
return 0;
/* EV5 costs are as given in alpha.md; exceptions are given here. */
if (alpha_cpu == PROCESSOR_EV5)
{
/* And the lord DEC sayeth: "A special bypass provides an effective
latency of 0 cycles for an ICMP or ILOG insn producing the test
operand of an IBR or CMOV insn." */
/* On EV5, "A special bypass provides an effective latency of 0
cycles for an ICMP or ILOG insn producing the test operand of an
IBR or CMOV insn." */
if (recog_memoized (dep_insn) >= 0
&& (get_attr_type (dep_insn) == TYPE_ICMP
|| get_attr_type (dep_insn) == TYPE_ILOG)
......@@ -1173,22 +1173,56 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
|| (get_attr_type (insn) == TYPE_CMOV
&& !((set = single_set (dep_insn)) != 0
&& GET_CODE (PATTERN (insn)) == SET
&& GET_CODE (SET_SRC (PATTERN (insn))) == IF_THEN_ELSE
&& (rtx_equal_p (SET_DEST (set),
XEXP (SET_SRC (PATTERN (insn)), 1))
|| rtx_equal_p (SET_DEST (set),
XEXP (SET_SRC (PATTERN (insn)), 2)))))))
return 1;
return cost;
}
&& (set_src = SET_SRC (PATTERN (insn)),
GET_CODE (set_src) == IF_THEN_ELSE)
&& (set = SET_DEST (set),
rtx_equal_p (set, XEXP (set_src, 1))
|| rtx_equal_p (set, XEXP (set_src, 2)))))))
return 0;
/* On EV5 it takes longer to get data to the multiplier than to
anywhere else, so increase costs. */
/* If INSN is a store insn and DEP_INSN is setting the data being stored,
we can sometimes lower the cost. */
if (recog_memoized (insn) >= 0
&& recog_memoized (dep_insn) >= 0
&& (get_attr_type (insn) == TYPE_IMULL
|| get_attr_type (insn) == TYPE_IMULQ
|| get_attr_type (insn) == TYPE_IMULH)
&& (set = single_set (dep_insn)) != 0
&& GET_CODE (PATTERN (insn)) == SET
&& (set_src = SET_SRC (PATTERN (insn)),
GET_CODE (set_src) == MULT)
&& (set = SET_DEST (set),
rtx_equal_p (set, XEXP (set_src, 0))
|| rtx_equal_p (set, XEXP (set_src, 1))))
{
switch (get_attr_type (insn))
{
case TYPE_LD:
case TYPE_CMOV:
case TYPE_IMULL:
case TYPE_IMULQ:
case TYPE_IMULH:
return cost + 1;
case TYPE_JSR:
case TYPE_IADD:
case TYPE_ILOG:
case TYPE_SHIFT:
case TYPE_ICMP:
return cost + 2;
}
}
}
else
{
/* On EV4, if INSN is a store insn and DEP_INSN is setting the data
being stored, we can sometimes lower the cost. */
if (recog_memoized (insn) >= 0 && get_attr_type (insn) == TYPE_ST
&& (set = single_set (dep_insn)) != 0
&& GET_CODE (PATTERN (insn)) == SET
&& rtx_equal_p (SET_DEST (set), SET_SRC (PATTERN (insn))))
{
switch (get_attr_type (dep_insn))
{
case TYPE_LD:
......@@ -1197,12 +1231,14 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
case TYPE_IMULL:
case TYPE_IMULQ:
case TYPE_IMULH:
/* In these cases, we save one cycle. */
return cost - 2;
return cost - 1;
default:
/* In all other cases, we save two cycles. */
return MAX (0, cost - 4);
return MAX (0, cost - 2);
}
}
/* Another case that needs adjustment is an arithmetic or logical
......@@ -1213,27 +1249,28 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
if (recog_memoized (dep_insn) >= 0
&& (get_attr_type (dep_insn) == TYPE_IADD
|| get_attr_type (dep_insn) == TYPE_ILOG))
{
switch (get_attr_type (insn))
{
case TYPE_LD:
case TYPE_ST:
return cost;
default:
return 2;
return 1;
}
}
/* The final case is when a compare feeds into an integer branch. The cost
is only one cycle in that case. */
/* The final case is when a compare feeds into an integer branch;
the cost is only one cycle in that case. */
if (recog_memoized (dep_insn) >= 0
&& get_attr_type (dep_insn) == TYPE_ICMP
&& recog_memoized (insn) >= 0
&& get_attr_type (insn) == TYPE_IBR)
return 2;
return 1;
}
/* Otherwise, return the default cost. */
return cost;
}
......
......@@ -1595,6 +1595,9 @@ extern void final_prescan_insn ();
our own exit function. */
#define HAVE_ATEXIT
/* The EV4 is dual issue; EV5 is quad issue. */
#define ISSUE_RATE (alpha_cpu == PROCESSOR_EV4 ? 2 : 4)
/* Compute the cost of computing a constant rtl expression RTX
whose rtx-code is CODE. The body of this macro is a portion
of a switch statement. If the code is computed here,
......
......@@ -33,7 +33,7 @@
;; separately.
(define_attr "type"
"ld,st,ibr,fbr,jsr,iadd,ilog,shift,cmov,icmp,imull,imulq,fadd,fmul,fcpys,fdivs,fdivt,ldsym,isubr"
"ld,st,ibr,fbr,jsr,iadd,ilog,shift,cmov,icmp,imull,imulq,imulh,fadd,fmul,fcpys,fdivs,fdivt,ldsym,isubr,misc"
(const_string "iadd"))
;; The TRAP_TYPE attribute marks instructions that may generate traps
......@@ -41,35 +41,30 @@
;; is desired).
(define_attr "trap" "yes,no" (const_string "no"))
;; For the EV4 we include four function units: ABOX, which computes the address,
;; BBOX, used for branches, EBOX, used for integer operations, and FBOX,
;; used for FP operations.
;;
;; We assume that we have been successful in getting double issues and
;; hence multiply all costs by two insns per cycle. The minimum time in
;; a function unit is 2 cycle, which will tend to produce the double
;; issues.
;; For the EV4 we include four function units: ABOX, which computes
;; the address, BBOX, used for branches, EBOX, used for integer
;; operations, and FBOX, used for FP operations.
;; Memory delivers its result in three cycles.
(define_function_unit "ev4_abox" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "ld,st"))
6 2)
(eq_attr "type" "ld,ldsym,st"))
3 1)
;; Branches have no delay cost, but do tie up the unit for two cycles.
(define_function_unit "ev4_bbox" 1 1
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "ibr,fbr,jsr"))
4 4)
2 2)
;; Arithmetic insns are normally have their results available after two
;; cycles. There are a number of exceptions. They are encoded in
;; Arithmetic insns are normally have their results available after
;; two cycles. There are a number of exceptions. They are encoded in
;; ADJUST_COST. Some of the other insns have similar exceptions.
(define_function_unit "ev4_ebox" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "iadd,ilog,ldsym,shift,cmov,icmp"))
4 2)
(eq_attr "type" "iadd,ilog,shift,cmov,icmp"))
2 1)
;; These really don't take up the integer pipeline, but they do occupy
;; IBOX1; we approximate here.
......@@ -77,135 +72,145 @@
(define_function_unit "ev4_ebox" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "imull"))
42 2)
21 1)
(define_function_unit "ev4_ebox" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "imulq"))
46 2)
(eq_attr "type" "imulq,imulh"))
23 1)
(define_function_unit "ev4_imult" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "imull"))
42 38)
21 19)
(define_function_unit "ev4_imult" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "imulq"))
46 42)
(eq_attr "type" "imulq,imulh"))
23 21)
(define_function_unit "ev4_fbox" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "fadd,fmul,fcpys"))
12 2)
6 1)
(define_function_unit "ev4_fbox" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "fdivs"))
68 0)
34 0)
(define_function_unit "ev4_fbox" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "fdivt"))
126 0)
63 0)
(define_function_unit "ev4_divider" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "fdivs"))
68 60)
34 30)
(define_function_unit "ev4_divider" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "fdivt"))
126 118)
64 59)
;; EV5 scheduling. EV5 can issue 4 insns per clock.
;; Multiply all costs by 4.
;; EV5 has two integer units.
;; EV5 has two asymetric integer units. Model this with ebox,e0,e1.
;; Everything uses ebox, and those that require particular pipes grab
;; those as well.
(define_function_unit "ev5_ebox" 2 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "iadd,ilog,icmp,ldsym"))
4 4)
(eq_attr "type" "iadd,ilog,icmp,st,shift,imull,imulq,imulh"))
1 1)
;; Memory takes at least 2 clocks.
;; Conditional moves always take 2 ticks.
;; Memory takes at least 2 clocks, and load cannot dual issue with stores.
(define_function_unit "ev5_ebox" 2 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "ld,cmov"))
8 4)
(eq_attr "type" "ld,ldsym"))
2 1)
(define_function_unit "ev5_e0" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "ld,ldsym"))
0 1
[(eq_attr "type" "st")])
;; Loads can dual issue. Store cannot; nor can loads + stores.
;; Model this with a mythical load/store unit.
(define_function_unit "ev5_ldst" 1 0
;; Conditional moves always take 2 ticks.
(define_function_unit "ev5_ebox" 2 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "ld"))
8 4 [(eq_attr "type" "st")])
(eq_attr "type" "cmov"))
2 1)
(define_function_unit "ev5_ldst" 1 0
;; Stores, shifts, and multiplies can only issue to E0
(define_function_unit "ev5_e0" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "st"))
4 4)
1 1)
(define_function_unit "ev5_ebox" 2 0
;; But shifts and multiplies don't conflict with loads.
(define_function_unit "ev5_e0" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "imull"))
32 4)
(eq_attr "type" "shift,imull,imulq,imulh"))
1 1
[(eq_attr "type" "st,shift,imull,imulq,imulh")])
(define_function_unit "ev5_ebox" 2 0
;; Branches can only issue to E1
(define_function_unit "ev5_e1" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "imulq"))
48 4)
(eq_attr "type" "ibr,jsr"))
1 1)
;; Multiplies also use the integer multiplier.
(define_function_unit "ev5_imult" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "imull"))
16 8)
8 4)
(define_function_unit "ev5_imult" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "imulq"))
48 32)
12 8)
;; There is only 1 shifter/zapper.
(define_function_unit "ev5_shift" 1 0
(define_function_unit "ev5_imult" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "shift"))
4 4)
(eq_attr "type" "imulh"))
14 8)
;; Similarly for the FPU we have two asymetric units. But fcpys can issue
;; on either so we have to play the game again.
;; We pretend EV5 has symmetrical 2 fpus,
;; even though cpys is the only insn that can issue on either unit.
(define_function_unit "ev5_fpu" 2 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "fadd,fmul,fcpys"))
16 4)
(eq_attr "type" "fadd,fmul,fcpys,fbr,fdivs,fdivt"))
4 1)
;; Multiplies (resp. adds) also use the fmul (resp. fadd) units.
(define_function_unit "ev5_fpmul" 1 0
(define_function_unit "ev5_fm" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "fmul"))
16 4)
4 1)
(define_function_unit "ev5_fpadd" 1 0
(define_function_unit "ev5_fa" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "fadd"))
16 4)
4 1)
(define_function_unit "ev5_fpadd" 1 0
(define_function_unit "ev5_fa" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "fbr"))
4 4)
1 1)
(define_function_unit "ev5_fpadd" 1 0
(define_function_unit "ev5_fa" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "fdivs"))
60 4)
15 1)
(define_function_unit "ev5_fpadd" 1 0
(define_function_unit "ev5_fa" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "fdivt"))
88 4)
22 1)
;; First define the arithmetic insns. Note that the 32-bit forms also
;; sign-extend.
......@@ -607,7 +612,7 @@
(const_int 64))))]
""
"umulh %1,%2,%0"
[(set_attr "type" "imulq")])
[(set_attr "type" "imulh")])
(define_insn ""
[(set (match_operand:DI 0 "register_operand" "=r")
......@@ -618,7 +623,7 @@
(const_int 64))))]
""
"umulh %1,%2,%0"
[(set_attr "type" "imulq")])
[(set_attr "type" "imulh")])
;; The divide and remainder operations always take their inputs from
;; r24 and r25, put their output in r27, and clobber r23 and r28.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment