Commit 98791e3a by Richard Henderson Committed by Richard Henderson

alpha.c (alpha_adjust_cost): Remove everything but memory latency adjustments.

        * config/alpha/alpha.c (alpha_adjust_cost): Remove everything but
        memory latency adjustments.
        (alpha_variable_issue): Remove.
        (alpha_use_dfa_pipeline_interface): New.
        (alpha_multipass_dfa_lookahead): New.
        * config/alpha/alpha.md: Remove define_function_unit scheduling;
        include new dfa scheduling.
        (attr type): Add none.
        (blockage): Use it.
        * config/alpha/ev4.md: New.
        * config/alpha/ev5.md: New.
        * config/alpha/ev6.md: New.

From-SVN: r53196
parent 309527ce
2002-05-05 Richard Henderson <rth@redhat.com>
* config/alpha/alpha.c (alpha_adjust_cost): Remove everything but
memory latency adjustments.
(alpha_variable_issue): Remove.
(alpha_use_dfa_pipeline_interface): New.
(alpha_multipass_dfa_lookahead): New.
* config/alpha/alpha.md: Remove define_function_unit scheduling;
include new dfa scheduling.
(attr type): Add none.
(blockage): Use it.
* config/alpha/ev4.md: New.
* config/alpha/ev5.md: New.
* config/alpha/ev6.md: New.
2002-05-05 David S. Miller <davem@redhat.com>
* recog.c (store_data_bypass_p): Handle CLOBBER inside PARALLEL.
......
......@@ -157,8 +157,10 @@ static int alpha_adjust_cost
PARAMS ((rtx, rtx, rtx, int));
static int alpha_issue_rate
PARAMS ((void));
static int alpha_variable_issue
PARAMS ((FILE *, int, rtx, int));
static int alpha_use_dfa_pipeline_interface
PARAMS ((void));
static int alpha_multipass_dfa_lookahead
PARAMS ((void));
#if TARGET_ABI_UNICOSMK
static void alpha_init_machine_status
......@@ -231,8 +233,12 @@ static unsigned int unicosmk_section_type_flags PARAMS ((tree, const char *,
#define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
#undef TARGET_SCHED_VARIABLE_ISSUE
#define TARGET_SCHED_VARIABLE_ISSUE alpha_variable_issue
#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
alpha_use_dfa_pipeline_interface
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
alpha_multipass_dfa_lookahead
struct gcc_target targetm = TARGET_INITIALIZER;
......@@ -4828,9 +4834,8 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
/* If the dependence is an anti-dependence, there is no cost. For an
output dependence, there is sometimes a cost, but it doesn't seem
worth handling those few cases. */
if (REG_NOTE_KIND (link) != 0)
return 0;
return cost;
/* If we can't recognize the insns, we can't really do anything. */
if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
......@@ -4845,122 +4850,13 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
|| dep_insn_type == TYPE_LDSYM)
cost += alpha_memory_latency-1;
switch (alpha_cpu)
{
case PROCESSOR_EV4:
/* On EV4, if INSN is a store insn and DEP_INSN is setting the data
being stored, we can sometimes lower the cost. */
if ((insn_type == TYPE_IST || insn_type == TYPE_FST)
&& (set = single_set (dep_insn)) != 0
&& GET_CODE (PATTERN (insn)) == SET
&& rtx_equal_p (SET_DEST (set), SET_SRC (PATTERN (insn))))
{
switch (dep_insn_type)
{
case TYPE_ILD:
case TYPE_FLD:
/* No savings here. */
return cost;
case TYPE_IMUL:
/* In these cases, we save one cycle. */
return cost - 1;
default:
/* In all other cases, we save two cycles. */
return MAX (0, cost - 2);
}
}
/* Everything else handled in DFA bypasses now. */
/* Another case that needs adjustment is an arithmetic or logical
operation. It's cost is usually one cycle, but we default it to
two in the MD file. The only case that it is actually two is
for the address in loads, stores, and jumps. */
if (dep_insn_type == TYPE_IADD || dep_insn_type == TYPE_ILOG)
{
switch (insn_type)
{
case TYPE_ILD:
case TYPE_IST:
case TYPE_FLD:
case TYPE_FST:
case TYPE_JSR:
return cost;
default:
return 1;
}
}
/* The final case is when a compare feeds into an integer branch;
the cost is only one cycle in that case. */
if (dep_insn_type == TYPE_ICMP && insn_type == TYPE_IBR)
return 1;
break;
case PROCESSOR_EV5:
/* And the lord DEC saith: "A special bypass provides an effective
latency of 0 cycles for an ICMP or ILOG insn producing the test
operand of an IBR or ICMOV insn." */
if ((dep_insn_type == TYPE_ICMP || dep_insn_type == TYPE_ILOG)
&& (set = single_set (dep_insn)) != 0)
{
/* A branch only has one input. This must be it. */
if (insn_type == TYPE_IBR)
return 0;
/* A conditional move has three, make sure it is the test. */
if (insn_type == TYPE_ICMOV
&& GET_CODE (set_src = PATTERN (insn)) == SET
&& GET_CODE (set_src = SET_SRC (set_src)) == IF_THEN_ELSE
&& rtx_equal_p (SET_DEST (set), XEXP (set_src, 0)))
return 0;
}
/* "The multiplier is unable to receive data from IEU bypass paths.
The instruction issues at the expected time, but its latency is
increased by the time it takes for the input data to become
available to the multiplier" -- which happens in pipeline stage
six, when results are comitted to the register file. */
if (insn_type == TYPE_IMUL)
{
switch (dep_insn_type)
{
/* These insns produce their results in pipeline stage five. */
case TYPE_ILD:
case TYPE_ICMOV:
case TYPE_IMUL:
case TYPE_MVI:
return cost + 1;
/* Other integer insns produce results in pipeline stage four. */
default:
return cost + 2;
}
}
break;
case PROCESSOR_EV6:
/* There is additional latency to move the result of (most) FP
operations anywhere but the FP register file. */
if ((insn_type == TYPE_FST || insn_type == TYPE_FTOI)
&& (dep_insn_type == TYPE_FADD ||
dep_insn_type == TYPE_FMUL ||
dep_insn_type == TYPE_FCMOV))
return cost + 2;
break;
}
/* Otherwise, return the default cost. */
return cost;
}
/* Function to initialize the issue rate used by the scheduler. */
/* The number of instructions that can be issued per cycle. */
static int
alpha_issue_rate ()
{
......@@ -4968,18 +4864,24 @@ alpha_issue_rate ()
}
static int
alpha_variable_issue (dump, verbose, insn, cim)
FILE *dump ATTRIBUTE_UNUSED;
int verbose ATTRIBUTE_UNUSED;
rtx insn;
int cim;
alpha_use_dfa_pipeline_interface ()
{
if (recog_memoized (insn) < 0 || get_attr_type (insn) == TYPE_MULTI)
return 0;
return cim - 1;
return true;
}
/* How many alternative schedules to try. This should be as wide as the
scheduling freedom in the DFA, but no wider. Making this value too
large results extra work for the scheduler.
For EV4, loads can be issued to either IB0 or IB1, thus we have 2
alternative schedules. For EV5, we can choose between E0/E1 and
FA/FM. For EV6, an arithmatic insn can be issued to U0/U1/L0/L1. */
static int
alpha_multipass_dfa_lookahead ()
{
return (alpha_cpu == PROCESSOR_EV6 ? 4 : 2);
}
/* Register global variables and machine-specific functions with the
garbage collector. */
......
......@@ -78,7 +78,7 @@
(define_attr "type"
"ild,fld,ldsym,ist,fst,ibr,fbr,jsr,iadd,ilog,shift,icmov,fcmov,icmp,imul,\
fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi"
fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
(const_string "iadd"))
;; Describe a user's asm statement.
......@@ -123,313 +123,11 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi"
(define_attr "length" ""
(const_int 4))
;; On EV4 there are two classes of resources to consider: resources needed
;; to issue, and resources needed to execute. IBUS[01] are in the first
;; category. ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second.
;; (There are a few other register-like resources, but ...)
; First, describe all of the issue constraints with single cycle delays.
; All insns need a bus, but all except loads require one or the other.
(define_function_unit "ev4_ibus0" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "fst,fbr,iadd,imul,ilog,shift,icmov,icmp"))
1 1)
(define_function_unit "ev4_ibus1" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "ist,ibr,jsr,fadd,fcmov,fcpys,fmul,fdiv,misc"))
1 1)
; Memory delivers its result in three cycles. Actually return one and
; take care of this in adjust_cost, since we want to handle user-defined
; memory latencies.
(define_function_unit "ev4_abox" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "ild,fld,ldsym,ist,fst"))
1 1)
; Branches have no delay cost, but do tie up the unit for two cycles.
(define_function_unit "ev4_bbox" 1 1
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "ibr,fbr,jsr"))
2 2)
; Arithmetic insns are normally have their results available after
; two cycles. There are a number of exceptions. They are encoded in
; ADJUST_COST. Some of the other insns have similar exceptions.
(define_function_unit "ev4_ebox" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "iadd,ilog,shift,icmov,icmp,misc"))
2 1)
(define_function_unit "imul" 1 0
(and (eq_attr "cpu" "ev4")
(and (eq_attr "type" "imul")
(eq_attr "opsize" "si")))
21 19)
(define_function_unit "imul" 1 0
(and (eq_attr "cpu" "ev4")
(and (eq_attr "type" "imul")
(eq_attr "opsize" "!si")))
23 21)
(define_function_unit "ev4_fbox" 1 0
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "fadd,fmul,fcpys,fcmov"))
6 1)
(define_function_unit "fdiv" 1 0
(and (eq_attr "cpu" "ev4")
(and (eq_attr "type" "fdiv")
(eq_attr "opsize" "si")))
34 30)
(define_function_unit "fdiv" 1 0
(and (eq_attr "cpu" "ev4")
(and (eq_attr "type" "fdiv")
(eq_attr "opsize" "di")))
63 59)
;; EV5 scheduling. EV5 can issue 4 insns per clock.
;;
;; EV5 has two asymetric integer units. Model this with E0 & E1 along
;; with the combined resource EBOX.
(define_function_unit "ev5_ebox" 2 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "!fbr,fcmov,fadd,fmul,fcpys,fdiv"))
1 1)
; Memory takes at least 2 clocks. Return one from here and fix up with
; user-defined latencies in adjust_cost.
(define_function_unit "ev5_ebox" 2 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "ild,fld,ldsym"))
1 1)
; Loads can dual issue with one another, but loads and stores do not mix.
(define_function_unit "ev5_e0" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "ild,fld,ldsym"))
1 1
[(eq_attr "type" "ist,fst")])
; Stores, shifts, multiplies can only issue to E0
(define_function_unit "ev5_e0" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "ist,fst,shift,imul"))
1 1)
; Motion video insns also issue only to E0, and take two ticks.
(define_function_unit "ev5_e0" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "mvi"))
2 1)
; Conditional moves always take 2 ticks.
(define_function_unit "ev5_ebox" 2 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "icmov"))
2 1)
; Branches can only issue to E1
(define_function_unit "ev5_e1" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "ibr,jsr"))
1 1)
; Multiplies also use the integer multiplier.
; ??? How to: "No instruction can be issued to pipe E0 exactly two
; cycles before an integer multiplication completes."
(define_function_unit "imul" 1 0
(and (eq_attr "cpu" "ev5")
(and (eq_attr "type" "imul")
(eq_attr "opsize" "si")))
8 4)
(define_function_unit "imul" 1 0
(and (eq_attr "cpu" "ev5")
(and (eq_attr "type" "imul")
(eq_attr "opsize" "di")))
12 8)
(define_function_unit "imul" 1 0
(and (eq_attr "cpu" "ev5")
(and (eq_attr "type" "imul")
(eq_attr "opsize" "udi")))
14 8)
;; Similarly for the FPU we have two asymetric units. But fcpys can issue
;; on either so we have to play the game again.
(define_function_unit "ev5_fbox" 2 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "fadd,fcmov,fmul,fcpys,fbr,fdiv"))
4 1)
(define_function_unit "ev5_fm" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "fmul"))
4 1)
; Add and cmov as you would expect; fbr never produces a result;
; fdiv issues through fa to the divider,
(define_function_unit "ev5_fa" 1 0
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "fadd,fcmov,fbr,fdiv"))
4 1)
; ??? How to: "No instruction can be issued to pipe FA exactly five
; cycles before a floating point divide completes."
(define_function_unit "fdiv" 1 0
(and (eq_attr "cpu" "ev5")
(and (eq_attr "type" "fdiv")
(eq_attr "opsize" "si")))
15 15) ; 15 to 31 data dependent
(define_function_unit "fdiv" 1 0
(and (eq_attr "cpu" "ev5")
(and (eq_attr "type" "fdiv")
(eq_attr "opsize" "di")))
22 22) ; 22 to 60 data dependent
;; EV6 scheduling. EV6 can issue 4 insns per clock.
;;
;; EV6 has two symmetric pairs ("clusters") of two asymetric integer units
;; ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
;; Conditional moves decompose into two independent primitives, each
;; taking one cycle. Since ev6 is out-of-order, we can't see anything
;; but two cycles.
(define_function_unit "ev6_ebox" 4 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "icmov"))
2 1)
(define_function_unit "ev6_ebox" 4 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "!fbr,fcmov,fadd,fmul,fcpys,fdiv,fsqrt"))
1 1)
;; Integer loads take at least 3 clocks, and only issue to lower units.
;; Return one from here and fix up with user-defined latencies in adjust_cost.
(define_function_unit "ev6_l" 2 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "ild,ldsym,ist,fst"))
1 1)
;; FP loads take at least 4 clocks. Return two from here...
(define_function_unit "ev6_l" 2 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "fld"))
2 1)
;; Motion video insns also issue only to U0, and take three ticks.
(define_function_unit "ev6_u0" 1 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "mvi"))
3 1)
(define_function_unit "ev6_u" 2 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "mvi"))
3 1)
;; Shifts issue to either upper pipe.
(define_function_unit "ev6_u" 2 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "shift"))
1 1)
;; Multiplies issue only to U1, and all take 7 ticks.
;; Rather than create a new function unit just for U1, reuse IMUL
(define_function_unit "imul" 1 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "imul"))
7 1)
(define_function_unit "ev6_u" 2 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "imul"))
7 1)
;; Branches issue to either upper pipe
(define_function_unit "ev6_u" 2 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "ibr"))
3 1)
;; Calls only issue to L0.
(define_function_unit "ev6_l0" 1 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "jsr"))
1 1)
(define_function_unit "ev6_l" 2 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "jsr"))
1 1)
;; Ftoi/itof only issue to lower pipes
(define_function_unit "ev6_l" 2 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "ftoi"))
3 1)
(define_function_unit "ev6_l" 2 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "itof"))
4 1)
;; For the FPU we are very similar to EV5, except there's no insn that
;; can issue to fm & fa, so we get to leave that out.
(define_function_unit "ev6_fm" 1 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "fmul"))
4 1)
(define_function_unit "ev6_fa" 1 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "fadd,fcpys,fbr,fdiv,fsqrt"))
4 1)
(define_function_unit "ev6_fa" 1 0
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "fcmov"))
8 1)
(define_function_unit "fdiv" 1 0
(and (eq_attr "cpu" "ev6")
(and (eq_attr "type" "fdiv")
(eq_attr "opsize" "si")))
12 10)
(define_function_unit "fdiv" 1 0
(and (eq_attr "cpu" "ev6")
(and (eq_attr "type" "fdiv")
(eq_attr "opsize" "di")))
15 13)
(define_function_unit "fsqrt" 1 0
(and (eq_attr "cpu" "ev6")
(and (eq_attr "type" "fsqrt")
(eq_attr "opsize" "si")))
16 14)
(define_function_unit "fsqrt" 1 0
(and (eq_attr "cpu" "ev6")
(and (eq_attr "type" "fsqrt")
(eq_attr "opsize" "di")))
32 30)
; ??? The FPU communicates with memory and the integer register file
; via two fp store units. We need a slot in the fst immediately, and
; a slot in LOW after the operand data is ready. At which point the
; data may be moved either to the store queue or the integer register
; file and the insn retired.
;; Include scheduling descriptions.
(include "ev4.md")
(include "ev5.md")
(include "ev6.md")
;; First define the arithmetic insns. Note that the 32-bit forms also
;; sign-extend.
......@@ -5018,7 +4716,8 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi"
[(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
""
""
[(set_attr "length" "0")])
[(set_attr "length" "0")
(set_attr "type" "none")])
(define_insn "jump"
[(set (pc)
......
;; Scheduling description for Alpha EV4.
;; Copyright (C) 2002 Free Software Foundation, Inc.
;;
;; This file is part of GNU CC.
;;
;; GNU CC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;;
;; GNU CC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GNU CC; see the file COPYING. If not, write to
;; the Free Software Foundation, 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.
; On EV4 there are two classes of resources to consider: resources needed
; to issue, and resources needed to execute. IBUS[01] are in the first
; category. ABOX, BBOX, EBOX, FBOX, IMUL & FDIV make up the second.
; (There are a few other register-like resources, but ...)
(define_automaton "ev4_0,ev4_1,ev4_2")
(define_cpu_unit "ev4_ib0,ev4_ib1,ev4_abox,ev4_bbox" "ev4_0")
(define_cpu_unit "ev4_ebox,ev4_imul" "ev4_1")
(define_cpu_unit "ev4_fbox,ev4_fdiv" "ev4_2")
(define_reservation "ev4_ib01" "ev4_ib0|ev4_ib1")
; Assume type "multi" single issues.
(define_insn_reservation "ev4_multi" 1
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "multi"))
"ev4_ib0+ev4_ib1")
; Loads from L0 completes in three cycles. adjust_cost still factors
; in user-specified memory latency, so return 1 here.
(define_insn_reservation "ev4_ld" 1
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "ild,fld,ldsym"))
"ev4_ib01+ev4_abox")
; Stores can issue before the data (but not address) is ready.
(define_insn_reservation "ev4_ist" 1
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "ist"))
"ev4_ib1+ev4_abox")
(define_insn_reservation "ev4_fst" 1
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "fst"))
"ev4_ib0+ev4_abox")
; Branches have no delay cost, but do tie up the unit for two cycles.
(define_insn_reservation "ev4_ibr" 2
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "ibr,jsr"))
"ev4_ib1+ev4_bbox,ev4_bbox")
(define_insn_reservation "ev4_fbr" 2
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "fbr"))
"ev4_ib0+ev4_bbox,ev4_bbox")
; Arithmetic insns are normally have their results available after
; two cycles. There are a number of exceptions.
(define_insn_reservation "ev4_iaddlog" 2
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "iadd,ilog"))
"ev4_ib0+ev4_ebox")
(define_bypass 1
"ev4_iaddlog"
"ev4_ibr,ev4_iaddlog,ev4_shiftcm,ev4_icmp,ev4_imulsi,ev4_imuldi")
(define_insn_reservation "ev4_shiftcm" 2
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "shift,icmov"))
"ev4_ib0+ev4_ebox")
(define_insn_reservation "ev4_icmp" 2
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "icmp"))
"ev4_ib0+ev4_ebox")
(define_bypass 1 "ev4_icmp" "ev4_ibr")
(define_bypass 0
"ev4_iaddlog,ev4_shiftcm,ev4_icmp"
"ev4_ist"
"store_data_bypass_p")
; Multiplies use a non-piplined imul unit. Also, "no [ebox] insn can
; be issued exactly three cycles before an integer multiply completes".
(define_insn_reservation "ev4_imulsi" 21
(and (eq_attr "cpu" "ev4")
(and (eq_attr "type" "imul")
(eq_attr "opsize" "si")))
"ev4_ib0+ev4_imul,ev4_imul*18,ev4_ebox")
(define_bypass 20 "ev4_imulsi" "ev4_ist" "store_data_bypass_p")
(define_insn_reservation "ev4_imuldi" 23
(and (eq_attr "cpu" "ev4")
(and (eq_attr "type" "imul")
(eq_attr "opsize" "!si")))
"ev4_ib0+ev4_imul,ev4_imul*20,ev4_ebox")
(define_bypass 22 "ev4_imuldi" "ev4_ist" "store_data_bypass_p")
; Most FP insns have a 6 cycle latency, but with a 4 cycle bypass back in.
(define_insn_reservation "ev4_fpop" 6
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "fadd,fmul,fcpys,fcmov"))
"ev4_ib1+ev4_fbox")
(define_bypass 4 "ev4_fpop" "ev4_fpop")
; The floating point divider is not pipelined. Also, "no FPOP insn can be
; issued exactly five or exactly six cycles before an fdiv insn completes".
(define_insn_reservation "ev4_fdivsf" 34
(and (eq_attr "cpu" "ev4")
(and (eq_attr "type" "fdiv")
(eq_attr "opsize" "si")))
"ev4_ib1+ev4_fdiv,ev4_fdiv*28,ev4_fdiv+ev4_fbox,ev4_fbox")
(define_insn_reservation "ev4_fdivdf" 63
(and (eq_attr "cpu" "ev4")
(and (eq_attr "type" "fdiv")
(eq_attr "opsize" "di")))
"ev4_ib1+ev4_fdiv,ev4_fdiv*57,ev4_fdiv+ev4_fbox,ev4_fbox")
; Traps don't consume or produce data.
(define_insn_reservation "ev4_misc" 1
(and (eq_attr "cpu" "ev4")
(eq_attr "type" "misc"))
"ev4_ib1")
;; Scheduling description for Alpha EV5.
;; Copyright (C) 2002 Free Software Foundation, Inc.
;;
;; This file is part of GNU CC.
;;
;; GNU CC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;;
;; GNU CC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GNU CC; see the file COPYING. If not, write to
;; the Free Software Foundation, 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.
;; EV5 has two asymetric integer units, E0 and E1, plus separate
;; FP add and multiply units.
(define_automaton "ev5_0,ev5_1")
(define_cpu_unit "ev5_e0,ev5_e1,ev5_fa,ev5_fm" "ev5_0")
(define_reservation "ev5_e01" "ev5_e0|ev5_e1")
(define_reservation "ev5_fam" "ev5_fa|ev5_fm")
(define_cpu_unit "ev5_imul" "ev5_0")
(define_cpu_unit "ev5_fdiv" "ev5_1")
; Assume type "multi" single issues.
(define_insn_reservation "ev5_multi" 1
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "multi"))
"ev5_e0+ev5_e1+ev5_fa+ev5_fm")
; Stores can only issue to E0, and may not issue with loads.
; Model this with some fake units.
(define_cpu_unit "ev5_l0,ev5_l1,ev5_st" "ev5_0")
(define_reservation "ev5_ld" "ev5_l0|ev5_l1")
(exclusion_set "ev5_l0,ev5_l1" "ev5_st")
(define_insn_reservation "ev5_st" 1
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "ist,fst"))
"ev5_e0+ev5_st")
; Loads from L0 complete in two cycles. adjust_cost still factors
; in user-specified memory latency, so return 1 here.
(define_insn_reservation "ev5_ld" 1
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "ild,fld,ldsym"))
"ev5_e01+ev5_ld")
; Integer branches slot only to E1.
(define_insn_reservation "ev5_ibr" 1
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "ibr"))
"ev5_e1")
(define_insn_reservation "ev5_jsr" 1
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "jsr"))
"ev5_e1")
(define_insn_reservation "ev5_shiftmvi" 2
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "shift,mvi"))
"ev5_e0")
(define_insn_reservation "ev5_cmov" 2
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "icmov"))
"ev5_e01")
(define_insn_reservation "ev5_iadd" 1
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "iadd"))
"ev5_e01")
(define_insn_reservation "ev5_ilogcmp" 1
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "ilog,icmp"))
"ev5_e01")
; Conditional move and branch can issue the same cycle as the test.
(define_bypass 0 "ev5_ilogcmp" "ev5_ibr,ev5_cmov" "if_test_bypass_p")
; Multiplies use a non-piplined imul unit. Also, "no insn can be issued
; to E0 exactly two cycles before an integer multiply completes".
(define_insn_reservation "ev5_imull" 8
(and (eq_attr "cpu" "ev5")
(and (eq_attr "type" "imul")
(eq_attr "opsize" "si")))
"ev5_e0+ev5_imul,ev5_imul*3,nothing,ev5_e0")
(define_insn_reservation "ev5_imulq" 12
(and (eq_attr "cpu" "ev5")
(and (eq_attr "type" "imul")
(eq_attr "opsize" "di")))
"ev5_e0+ev5_imul,ev5_imul*7,nothing,ev5_e0")
(define_insn_reservation "ev5_imulh" 14
(and (eq_attr "cpu" "ev5")
(and (eq_attr "type" "imul")
(eq_attr "opsize" "udi")))
"ev5_e0+ev5_imul,ev5_imul*7,nothing*3,ev5_e0")
; The multiplier is unable to receive data from Ebox bypass paths. The
; instruction issues at the expected time, but its latency is increased
; by the time it takes for the input data to become available to the
; multiplier. For example, an IMULL instruction issued one cycle later
; than an ADDL instruction, which produced one of its operands, has a
; latency of 10 (8 + 2). If the IMULL instruction is issued two cycles
; later than the ADDL instruction, the latency is 9 (8 + 1).
;
; Model this instead with increased latency on the input instruction.
(define_bypass 3
"ev5_ld,ev5_shiftmvi,ev5_cmov,ev5_iadd,ev5_ilogcmp"
"ev5_imull,ev5_imulq,ev5_imulh")
(define_bypass 9 "ev5_imull" "ev5_imull,ev5_imulq,ev5_imulh")
(define_bypass 13 "ev5_imulq" "ev5_imull,ev5_imulq,ev5_imulh")
(define_bypass 15 "ev5_imulh" "ev5_imull,ev5_imulq,ev5_imulh")
; Similarly for the FPU we have two asymetric units.
(define_insn_reservation "ev5_fadd" 4
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "fadd,fcmov"))
"ev5_fa")
(define_insn_reservation "ev5_fbr" 1
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "fbr"))
"ev5_fa")
(define_insn_reservation "ev5_fcpys" 4
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "fcpys"))
"ev5_fam")
(define_insn_reservation "ev5_fmul" 4
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "fmul"))
"ev5_fm")
; The floating point divider is not pipelined. Also, "no insn can be issued
; to FA exactly five before an fdiv insn completes".
;
; ??? Do not model this late reservation due to the enormously increased
; size of the resulting DFA.
;
; ??? Putting ev5_fa and ev5_fdiv alone into the same automata produces
; a DFA of acceptable size, but putting ev5_fm and ev5_fa into separate
; automata produces incorrect results for insns that can choose one or
; the other, i.e. ev5_fcpys.
(define_insn_reservation "ev5_fdivsf" 15
(and (eq_attr "cpu" "ev5")
(and (eq_attr "type" "fdiv")
(eq_attr "opsize" "si")))
; "ev5_fa+ev5_fdiv,ev5_fdiv*9,ev5_fa+ev5_fdiv,ev5_fdiv*4"
"ev5_fa+ev5_fdiv,ev5_fdiv*14")
(define_insn_reservation "ev5_fdivdf" 22
(and (eq_attr "cpu" "ev5")
(and (eq_attr "type" "fdiv")
(eq_attr "opsize" "di")))
; "ev5_fa+ev5_fdiv,ev5_fdiv*17,ev5_fa+ev5_fdiv,ev5_fdiv*4"
"ev5_fa+ev5_fdiv,ev5_fdiv*21")
; Traps don't consume or produce data; rpcc is latency 2 if we ever add it.
(define_insn_reservation "ev5_misc" 2
(and (eq_attr "cpu" "ev5")
(eq_attr "type" "misc"))
"ev5_e0")
;; Scheduling description for Alpha EV6.
;; Copyright (C) 2002 Free Software Foundation, Inc.
;;
;; This file is part of GNU CC.
;;
;; GNU CC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;;
;; GNU CC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GNU CC; see the file COPYING. If not, write to
;; the Free Software Foundation, 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.
; EV6 can issue 4 insns per clock. It's out-of-order, so this isn't
; expected to help over-much, but a precise description can be important
; for software pipelining.
;
; EV6 has two symmetric pairs ("clusters") of two asymetric integer
; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
;
; ??? The clusters have independent register files that are re-synced
; every cycle. Thus there is one additional cycle of latency between
; insns issued on different clusters. Possibly model that by duplicating
; all EBOX insn_reservations that can issue to either cluster, increasing
; all latencies by one, and adding bypasses within the cluster.
;
; ??? In addition, instruction order affects cluster issue.
(define_automaton "ev6_0,ev6_1")
(define_cpu_unit "ev6_u0,ev6_u1,ev6_l0,ev6_l1" "ev6_0")
(define_reservation "ev6_u" "ev6_u0|ev6_u1")
(define_reservation "ev6_l" "ev6_l0|ev6_l1")
(define_reservation "ev6_ebox" "ev6_u|ev6_l")
(define_cpu_unit "ev6_fa" "ev6_1")
(define_cpu_unit "ev6_fm,ev6_fst0,ev6_fst1" "ev6_0")
(define_reservation "ev6_fst" "ev6_fst0|ev6_fst1")
; Assume type "multi" single issues.
(define_insn_reservation "ev6_multi" 1
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "multi"))
"ev6_u0+ev6_u1+ev6_l0+ev6_l1+ev6_fa+ev6_fm+ev6_fst0+ev6_fst1")
; Integer loads take at least 3 clocks, and only issue to lower units.
; adjust_cost still factors in user-specified memory latency, so return 1 here.
(define_insn_reservation "ev6_ild" 1
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "ild,ldsym"))
"ev6_l")
(define_insn_reservation "ev6_ist" 1
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "ist"))
"ev6_l")
; FP loads take at least 4 clocks. adjust_cost still factors
; in user-specified memory latency, so return 2 here.
(define_insn_reservation "ev6_fld" 2
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "fld"))
"ev6_l")
; The FPU communicates with memory and the integer register file
; via two fp store units. We need a slot in the fst immediately, and
; a slot in LOW after the operand data is ready. At which point the
; data may be moved either to the store queue or the integer register
; file and the insn retired.
(define_insn_reservation "ev6_fst" 3
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "fst"))
"ev6_fst,nothing,ev6_l")
; Arithmetic goes anywhere.
(define_insn_reservation "ev6_arith" 1
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "iadd,ilog,icmp"))
"ev6_ebox")
; Motion video insns also issue only to U0, and take three ticks.
(define_insn_reservation "ev6_mvi" 3
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "mvi"))
"ev6_u0")
; Shifts issue to upper units.
(define_insn_reservation "ev6_shift" 1
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "shift"))
"ev6_u")
; Multiplies issue only to U1, and all take 7 ticks.
(define_insn_reservation "ev6_imul" 7
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "imul"))
"ev6_u1")
; Conditional moves decompose into two independent primitives, each taking
; one cycle. Since ev6 is out-of-order, we can't see anything but two cycles.
(define_insn_reservation "ev6_icmov" 2
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "icmov"))
"ev6_ebox,ev6_ebox")
; Integer branches issue to upper units
(define_insn_reservation "ev6_ibr" 1
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "ibr"))
"ev6_u")
; Calls only issue to L0.
(define_insn_reservation "ev6_jsr" 1
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "jsr"))
"ev6_l0")
; Ftoi/itof only issue to lower pipes.
(define_insn_reservation "ev6_itof" 3
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "itof"))
"ev6_l")
(define_insn_reservation "ev6_ftoi" 3
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "ftoi"))
"ev6_fst,nothing,ev6_l")
(define_insn_reservation "ev6_fmul" 4
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "fmul"))
"ev6_fm")
(define_insn_reservation "ev6_fadd" 4
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "fadd,fcpys,fbr"))
"ev6_fa")
(define_insn_reservation "ev6_fcmov" 8
(and (eq_attr "cpu" "ev6")
(eq_attr "type" "fcmov"))
"ev6_fa,nothing*3,ev6_fa")
(define_insn_reservation "ev6_fdivsf" 12
(and (eq_attr "cpu" "ev6")
(and (eq_attr "type" "fdiv")
(eq_attr "opsize" "si")))
"ev6_fa*9")
(define_insn_reservation "ev6_fdivdf" 15
(and (eq_attr "cpu" "ev6")
(and (eq_attr "type" "fdiv")
(eq_attr "opsize" "di")))
"ev6_fa*12")
(define_insn_reservation "ev6_sqrtsf" 18
(and (eq_attr "cpu" "ev6")
(and (eq_attr "type" "fsqrt")
(eq_attr "opsize" "si")))
"ev6_fa*15")
(define_insn_reservation "ev6_sqrtdf" 33
(and (eq_attr "cpu" "ev6")
(and (eq_attr "type" "fsqrt")
(eq_attr "opsize" "di")))
"ev6_fa*30")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment