2003-01-09 Vladimir Makarov <vmakarov@redhat.com>

Merging changes from itanium-sched-branch: From-SVN: r61132

2003-01-09 Vladimir Makarov <vmakarov@redhat.com>
Merging changes from itanium-sched-branch: From-SVN: r61132
30028c85 · Vladimir Makarov · Vladimir Makarov · 6ff2fe39 · 30028c85 · 30028c85
Commit 30028c85 authored Jan 09, 2003 by Vladimir Makarov Committed by Vladimir Makarov Jan 09, 2003
21 changed files
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
--- a/gcc/config/ia64/ia64-protos.h
+++ b/gcc/config/ia64/ia64-protos.h
@@ -27,7 +27,11 @@ extern GTY(()) rtx ia64_compare_op1;
 /* Functions defined in ia64.c */
+extern int bundling_p;
 #ifdef RTX_CODE
+extern int ia64_st_address_bypass_p PARAMS((rtx, rtx));
+extern int ia64_ld_address_bypass_p PARAMS((rtx, rtx));
+extern int ia64_produce_address_p PARAMS((rtx));
 extern int call_operand PARAMS((rtx, enum machine_mode));
 extern int sdata_symbolic_operand PARAMS((rtx, enum machine_mode));
 extern int got_symbolic_operand PARAMS((rtx, enum machine_mode));

--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
--- a/gcc/config/ia64/ia64.h
+++ b/gcc/config/ia64/ia64.h
@@ -95,6 +95,8 @@ extern int target_flags;
 #define MASK_DWARF2_ASM 0x40000000	/* test dwarf2 line info via gas.  */
+#define MASK_EARLY_STOP_BITS 0x00002000 /* tune stop bits for the model.  */
 #define TARGET_BIG_ENDIAN	(target_flags & MASK_BIG_ENDIAN)
 #define TARGET_GNU_AS		(target_flags & MASK_GNU_AS)
@@ -137,6 +139,7 @@ extern int ia64_tls_size;
 #define TARGET_TLS14		(ia64_tls_size == 14)
 #define TARGET_TLS22		(ia64_tls_size == 22)
 #define TARGET_TLS64		(ia64_tls_size == 64)
+#define TARGET_EARLY_STOP_BITS	(target_flags & MASK_EARLY_STOP_BITS)
 #define TARGET_HPUX_LD		0
@@ -188,6 +191,10 @@ extern int ia64_tls_size;
      N_("Enable Dwarf 2 line debug info via GNU as")},			\
  { "no-dwarf2-asm", 	-MASK_DWARF2_ASM,				\
      N_("Disable Dwarf 2 line debug info via GNU as")},		\
+  { "early-stop-bits", MASK_EARLY_STOP_BITS,				\
+      N_("Enable earlier placing stop bits for better scheduling")},	\
+  { "no-early-stop-bits", -MASK_EARLY_STOP_BITS,			\
+      N_("Disable earlier placing stop bits")},				\
  SUBTARGET_SWITCHES							\
  { "",			TARGET_DEFAULT | TARGET_CPU_DEFAULT,		\
      NULL }								\
@@ -213,12 +220,30 @@ extern int ia64_tls_size;
 extern const char *ia64_fixed_range_string;
 extern const char *ia64_tls_size_string;
+/* Which processor to schedule for. The cpu attribute defines a list
+   that mirrors this list, so changes to i64.md must be made at the
+   same time.  */
+enum processor_type
+{
+  PROCESSOR_ITANIUM,			/* Original Itanium. */
+  PROCESSOR_ITANIUM2,
+  PROCESSOR_max
+};
+extern enum processor_type ia64_tune;
+extern const char *ia64_tune_string;
 #define TARGET_OPTIONS \
 {									\
  { "fixed-range=", 	&ia64_fixed_range_string,			\
      N_("Specify range of registers to make fixed")},			\
  { "tls-size=",	&ia64_tls_size_string,				\
      N_("Specify bit size of immediate TLS offsets")},			\
+  { "tune=",		&ia64_tune_string,				\
+      N_("Schedule code for given CPU")},				\
 }
 /* Sometimes certain combinations of command options do not make sense on a
@@ -2485,4 +2510,9 @@ enum fetchop_code {
 #undef  PROFILE_BEFORE_PROLOGUE
 #define PROFILE_BEFORE_PROLOGUE 1
+/* Switch on code for querying unit reservations.  */
+#define CPU_UNITS_QUERY 1
 /* End of ia64.h */
--- a/gcc/config/ia64/ia64.md
+++ b/gcc/config/ia64/ia64.md
@@ -91,6 +91,10 @@
 ;; ::
 ;; ::::::::::::::::::::
+;; Processor type.  This attribute must exactly match the processor_type
+;; enumeration in ia64.h.
+(define_attr "cpu" "itanium,itanium2" (const (symbol_ref "ia64_tune")))
 ;; Instruction type.  This primarily determines how instructions can be
 ;; packed in bundles, and secondarily affects scheduling to function units.
@@ -110,8 +114,8 @@
 (define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,
 	fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,
 	chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,
-	syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop_b,nop_f,
+	syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop,nop_b,nop_f,
-	nop_i,nop_m,nop_x,lfetch"
+	nop_i,nop_m,nop_x,lfetch,pre_cycle"
  (const_string "unknown"))
 ;; chk_s has an I and an M form; use type A for convenience.
@@ -146,76 +150,23 @@
 (define_attr "predicable" "no,yes" (const_string "yes"))
-;; ::::::::::::::::::::
-;; ::
-;; :: Function Units
-;; ::
-;; ::::::::::::::::::::
-;; We define 6 "dummy" functional units.  All the real work to decide which
+;; DFA descriptions of ia64 processors used for insn scheduling and
-;; insn uses which unit is done by our MD_SCHED_REORDER hooks.  We only
+;; bundling.
-;; have to ensure here that there are enough copies of the dummy unit so
-;; that the scheduler doesn't get confused by MD_SCHED_REORDER.
+(automata_option "ndfa")
-;; Other than the 6 dummies for normal insns, we also add a single dummy unit
-;; for stop bits.
+;; Uncomment the following line to output automata for debugging.
+;; (automata_option "v")
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "br")     0 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "scall")  0 0)
+(automata_option "w")
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcmp")   2 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcvtfx") 7 0)
+;;(automata_option "no-minimization")
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fld")    9 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmac")   5 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmisc")  5 0)
+(include "itanium1.md")
+(include "itanium2.md")
-;; There is only one insn `mov = ar.bsp' for frar_i:
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_i") 13 0)
-;; There is only ony insn `mov = ar.unat' for frar_m:
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_m") 6 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frbr")   2 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frfr")   2 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frpr")   2 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ialu")   1 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "icmp")   1 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ilog")   1 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ishf")   1 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ld")     2 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "long_i") 1 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmmul")  2 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshf")  2 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshfi")  2 0)
-;; Now we have only one insn (flushrs) of such class.  We assume that flushrs
-;; is the 1st syllable of the bundle after stop bit.
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "rse_m")  0 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "sem")   11 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "stf")    1 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "st")     1 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m0") 1 0)
-;; Now we use only one insn `mf'.  Therfore latency time is set up to 0.
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m") 0 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tbit")   1 0)
-;; There is only one insn `mov ar.pfs =' for toar_i therefore we use
-;; latency time equal to 0:
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_i") 0 0)
-;; There are only ony 2 insns `mov ar.ccv =' and `mov ar.unat =' for toar_m:
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_m") 5 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tobr")   1 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tofr")   9 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "topr")   1 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xmpy")   7 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xtd")    1 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_m")  0 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_i")  0 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_f")  0 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_b")  0 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_x")  0 0)
-(define_function_unit "stop_bit" 1 1 (eq_attr "itanium_class" "stop_bit") 0 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ignore") 0 0)
-(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "unknown") 0 0)
 ;; ::::::::::::::::::::
 ;; ::
@@ -5089,7 +5040,7 @@
  [(const_int 0)]
  ""
  "nop 0"
-  [(set_attr "itanium_class" "unknown")])
+  [(set_attr "itanium_class" "nop")])
 (define_insn "nop_m"
  [(const_int 1)]
@@ -5121,6 +5072,14 @@
  ""
  [(set_attr "itanium_class" "nop_x")])
+;; The following insn will be never generated.  It is used only by
+;; insn scheduler to change state before advancing cycle.
+(define_insn "pre_cycle"
+  [(const_int 6)]
+  ""
+  ""
+  [(set_attr "itanium_class" "pre_cycle")])
 (define_insn "bundle_selector"
  [(unspec [(match_operand 0 "const_int_operand" "")] UNSPEC_BUNDLE_SELECTOR)]
  ""

--- a/gcc/config/ia64/itanium1.md
+++ b/gcc/config/ia64/itanium1.md
--- a/gcc/config/ia64/itanium2.md
+++ b/gcc/config/ia64/itanium2.md
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -9519,6 +9519,14 @@ A fixed register is one that the register allocator can not use.  This is
 useful when compiling kernel code.  A register range is specified as
 two registers separated by a dash.  Multiple register ranges can be
 specified separated by a comma.
+@item -mearly-stop-bits
+@itemx -mno-early-stop-bits
+@opindex mearly-stop-bits
+@opindex mno-early-stop-bits
+Allow stop bits to be placed earlier than immediately preceding the
+instruction that triggered the stop bit.  This can improve instruction
+scheduling, but does not always do so.
 @end table
 @node D30V Options

--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5586,16 +5586,23 @@ which the unit is bound.  The automaton should be described in
 construction @code{define_automaton}.  You should give
 @dfn{automaton-name}, if there is a defined automaton.
+The assignment of units to automata are constrained by the uses of the
+units in insn reservations.  The most important constraint is: if a
+unit reservation is present on a particular cycle of an alternative
+for an insn reservation, then some unit from the same automaton must
+be present on the same cycle for the other alternatives of the insn
+reservation.  The rest of the constraints are mentioned in the
+description of the subsequent constructions.
 @findex define_query_cpu_unit
 @cindex querying function unit reservations
 The following construction describes CPU functional units analogously
-to @code{define_cpu_unit}.  If we use automata without their
+to @code{define_cpu_unit}.  The reservation of such units can be
-minimization, the reservation of such units can be queried for an
+queried for an automaton state.  The instruction scheduler never
-automaton state.  The instruction scheduler never queries reservation
+queries reservation of functional units for given automaton state.  So
-of functional units for given automaton state.  So as a rule, you
+as a rule, you don't need this construction.  This construction could
-don't need this construction.  This construction could be used for
+be used for future code generation goals (e.g. to generate
-future code generation goals (e.g. to generate @acronym{VLIW} insn
+@acronym{VLIW} insn templates).
-templates).
 @smallexample
 (define_query_cpu_unit @var{unit-names} [@var{automaton-name}])
@@ -5744,7 +5751,9 @@ of insn @samp{store} (not a stored value).
 @findex exclusion_set
 @findex presence_set
+@findex final_presence_set
 @findex absence_set
+@findex final_absence_set
 @cindex VLIW
 @cindex RISC
 Usually the following three constructions are used to describe
@@ -5754,13 +5763,19 @@ used for @acronym{RISC} processors too.
 @smallexample
 (exclusion_set @var{unit-names} @var{unit-names})
-(presence_set @var{unit-names} @var{unit-names})
+(presence_set @var{unit-names} @var{patterns})
-(absence_set @var{unit-names} @var{unit-names})
+(final_presence_set @var{unit-names} @var{patterns})
+(absence_set @var{unit-names} @var{patterns})
+(final_absence_set @var{unit-names} @var{patterns})
 @end smallexample
 @var{unit-names} is a string giving names of functional units
 separated by commas.
+@var{patterns} is a string giving patterns of functional units
+separated by comma.  Currently pattern is is one unit or units
+separated by white-spaces.
 The first construction (@samp{exclusion_set}) means that each
 functional unit in the first string can not be reserved simultaneously
 with a unit whose name is in the second string and vice versa.  For
@@ -5771,22 +5786,75 @@ point insns or only double floating point insns.
 The second construction (@samp{presence_set}) means that each
 functional unit in the first string can not be reserved unless at
-least one of units whose names are in the second string is reserved.
+least one of pattern of units whose names are in the second string is
-This is an asymmetric relation.  For example, it is useful for
+reserved.  This is an asymmetric relation.  For example, it is useful
-description that @acronym{VLIW} @samp{slot1} is reserved after
+for description that @acronym{VLIW} @samp{slot1} is reserved after
-@samp{slot0} reservation.
+@samp{slot0} reservation.  We could describe it by the following
+construction
-The third construction (@samp{absence_set}) means that each functional
-unit in the first string can be reserved only if each unit whose name
+@smallexample
-is in the second string is not reserved.  This is an asymmetric
+(presence_set "slot1" "slot0")
-relation (actually @samp{exclusion_set} is analogous to this one but
+@end smallexample
-it is symmetric).  For example, it is useful for description that
-@acronym{VLIW} @samp{slot0} can not be reserved after @samp{slot1} or
+Or @samp{slot1} is reserved only after @samp{slot0} and unit @samp{b0}
-@samp{slot2} reservation.
+reservation.  In this case we could write
+@smallexample
+(presence_set "slot1" "slot0 b0")
+@end smallexample
+The third construction (@samp{final_presence_set}) is analogous to
+@samp{presence_set}.  The difference between them is when checking is
+done.  When an instruction is issued in given automaton state
+reflecting all current and planned unit reservations, the automaton
+state is changed.  The first state is a source state, the second one
+is a result state.  Checking for @samp{presence_set} is done on the
+source state reservation, checking for @samp{final_presence_set} is
+done on the result reservation.  This construction is useful to
+describe a reservation which is actually two subsequent reservations.
+For example, if we use
+@smallexample
+(presence_set "slot1" "slot0")
+@end smallexample
+the following insn will be never issued (because @samp{slot1} requires
+@samp{slot0} which is absent in the source state).
+@smallexample
+(define_reservation "insn_and_nop" "slot0 + slot1")
+@end smallexample
+but it can be issued if we use analogous @samp{final_presence_set}.
+The forth construction (@samp{absence_set}) means that each functional
+unit in the first string can be reserved only if each pattern of units
+whose names are in the second string is not reserved.  This is an
+asymmetric relation (actually @samp{exclusion_set} is analogous to
+this one but it is symmetric).  For example, it is useful for
+description that @acronym{VLIW} @samp{slot0} can not be reserved after
+@samp{slot1} or @samp{slot2} reservation.  We could describe it by the
+following construction
+@smallexample
+(absence_set "slot2" "slot0, slot1")
+@end smallexample
+Or @samp{slot2} can not be reserved if @samp{slot0} and unit @samp{b0}
+are reserved or @samp{slot1} and unit @samp{b1} are reserved.  In
+this case we could write
+@smallexample
+(absence_set "slot2" "slot0 b0, slot1 b1")
+@end smallexample
 All functional units mentioned in a set should belong to the same
 automaton.
+The last construction (@samp{final_absence_set}) is analogous to
+@samp{absence_set} but checking is done on the result (state)
+reservation.  See comments for @samp{final_presence_set}.
 @findex automata_option
 @cindex deterministic finite state automaton
 @cindex nondeterministic finite state automaton
@@ -5804,8 +5872,8 @@ code.  Currently there are the following options:
 @itemize @bullet
 @item
 @dfn{no-minimization} makes no minimization of the automaton.  This is
-only worth to do when we are going to query CPU functional unit
+only worth to do when we are debugging the description and need to
-reservations in an automaton state.
+look more accurately at reservations of states.
 @item
 @dfn{time} means printing additional time statistics about

--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5629,6 +5629,16 @@ scheduling one insn causes other insns to become ready in the same
 cycle.  These other insns can then be taken into account properly.
 @end deftypefn
+@deftypefn {Target Hook} void TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK (rtx @var{head}, rtx @var{tail})
+This hook is called after evaluation forward dependencies of insns in
+chain given by two parameter values (@var{head} and @var{tail}
+correspondingly) but before insns scheduling of the insn chain.  For
+example, it can be used for better insn classification if it requires
+analysis of dependencies.  This hook can use backward and forward
+dependencies of the insn scheduler because they are already
+calculated.
+@end deftypefn
 @deftypefn {Target Hook} void TARGET_SCHED_INIT (FILE *@var{file}, int @var{verbose}, int @var{max_ready})
 This hook is executed by the scheduler at the beginning of each block of
 instructions that are to be scheduled.  @var{file} is either a null
@@ -5715,6 +5725,30 @@ schedules to choose the best one.
 The default is no multipass scheduling.
 @end deftypefn
+@deftypefn {Target Hook} int TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD (rtx)
+This hook controls what insns from the ready insn queue will be
+considered for the multipass insn scheduling.  If the hook returns
+zero for insn passed as the parameter, the insn will be not chosen to
+be issued.
+The default is that any ready insns can be choosen to be issued.
+@end deftypefn
+@deftypefn {Target Hook} int TARGET_SCHED_DFA_NEW_CYCLE (FILE *, int, rtx, int, int, int *)
+This hook is called by the insn scheduler before issuing insn passed
+as the third parameter on given cycle.  If the hook returns nonzero,
+the insn is not issued on given processors cycle.  Instead of that,
+the processor cycle is advanced.  If the value passed through the last
+parameter is zero, the insn ready queue is not sorted on the new cycle
+start as usually.  The first parameter passes file for debugging
+output.  The second one passes the scheduler verbose level of the
+debugging output.  The forth and the fifth parameter values are
+correspondingly processor cycle on which the previous insn has been
+issued and the current processor cycle.
+@end deftypefn
 @deftypefn {Target Hook} void TARGET_SCHED_INIT_DFA_BUBBLES (void)
 The @acronym{DFA}-based scheduler could take the insertion of nop
 operations for better insn scheduling into account.  It can be done

--- a/gcc/genattr.c
+++ b/gcc/genattr.c
@@ -441,6 +441,11 @@ main (argc, argv)
      printf ("   unit with given code is currently reserved in given\n");
      printf ("   DFA state.  */\n");
      printf ("extern int cpu_unit_reservation_p PARAMS ((state_t, int));\n");
+      printf ("/* Clean insn code cache.  It should be called if there\n");
+      printf ("   is a chance that condition value in a\n");
+      printf ("   define_insn_reservation will be changed after\n");
+      printf ("   last call of dfa_start.  */\n");
+      printf ("extern void dfa_clean_insn_cache PARAMS ((void));\n\n");
      printf ("#endif\n\n");
      printf ("/* Initiate and finish work with DFA.  They should be\n");
      printf ("   called as the first and the last interface\n");

--- a/gcc/genattrtab.c
+++ b/gcc/genattrtab.c
@@ -6115,10 +6115,18 @@ from the machine description file `md'.  */\n\n");
 	  gen_presence_set (desc);
 	  break;
+	case FINAL_PRESENCE_SET:
+	  gen_final_presence_set (desc);
+	  break;
 	case ABSENCE_SET:
 	  gen_absence_set (desc);
 	  break;
+	case FINAL_ABSENCE_SET:
+	  gen_final_absence_set (desc);
+	  break;
 	case DEFINE_AUTOMATON:
 	  gen_automaton (desc);
 	  break;

--- a/gcc/genattrtab.h
+++ b/gcc/genattrtab.h
@@ -33,7 +33,9 @@ extern void gen_query_cpu_unit		PARAMS ((rtx));
 extern void gen_bypass			PARAMS ((rtx));
 extern void gen_excl_set		PARAMS ((rtx));
 extern void gen_presence_set		PARAMS ((rtx));
+extern void gen_final_presence_set	PARAMS ((rtx));
 extern void gen_absence_set		PARAMS ((rtx));
+extern void gen_final_absence_set	PARAMS ((rtx));
 extern void gen_automaton		PARAMS ((rtx));
 extern void gen_automata_option		PARAMS ((rtx));
 extern void gen_reserv   		PARAMS ((rtx));

--- a/gcc/genautomata.c
+++ b/gcc/genautomata.c
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
--- a/gcc/rtl.def
+++ b/gcc/rtl.def
@@ -358,9 +358,8 @@ DEF_RTL_EXPR(ADDRESS, "address", "e", 'm')
 DEF_RTL_EXPR(DEFINE_CPU_UNIT, "define_cpu_unit", "sS", 'x')
 /* (define_query_cpu_unit string [string]) describes cpu functional
-   units analogously to define_cpu_unit.  If we use automaton without
+   units analogously to define_cpu_unit.  The reservation of such
-   minimization, the reservation of such units can be queried for
+   units can be queried for automaton state.  */
-   automaton state.  */
 DEF_RTL_EXPR(DEFINE_QUERY_CPU_UNIT, "define_query_cpu_unit", "sS", 'x')
 /* (exclusion_set string string) means that each CPU functional unit
@@ -370,28 +369,80 @@ DEF_RTL_EXPR(DEFINE_QUERY_CPU_UNIT, "define_query_cpu_unit", "sS", 'x')
   for description CPU with fully pipelined floating point functional
   unit which can execute simultaneously only single floating point
   insns or only double floating point insns.  All CPU functional
-   units in a set should belong the same automaton.  */
+   units in a set should belong to the same automaton.  */
 DEF_RTL_EXPR(EXCLUSION_SET, "exclusion_set", "ss", 'x')
 /* (presence_set string string) means that each CPU functional unit in
-   the first string can not be reserved unless at least one of units
+   the first string can not be reserved unless at least one of pattern
-   whose names are in the second string is reserved.  This is an
+   of units whose names are in the second string is reserved.  This is
-   asymmetric relation.  CPU units in the string are separated by
+   an asymmetric relation.  CPU units or unit patterns in the strings
-   commas.  For example, it is useful for description that slot1 is
+   are separated by commas.  Pattern is one unit name or unit names
-   reserved after slot0 reservation for VLIW processor.  All CPU
+   separated by white-spaces.
-   functional units in a set should belong the same automaton.  */
+   For example, it is useful for description that slot1 is reserved
+   after slot0 reservation for a VLIW processor.  We could describe it
+   by the following construction
+      (presence_set "slot1" "slot0")
+   Or slot1 is reserved only after slot0 and unit b0 reservation.  In
+   this case we could write
+      (presence_set "slot1" "slot0 b0")
+   All CPU functional units in a set should belong to the same
+   automaton.  */
 DEF_RTL_EXPR(PRESENCE_SET, "presence_set", "ss", 'x')
+/* (final_presence_set string string) is analogous to `presence_set'.
+   The difference between them is when checking is done.  When an
+   instruction is issued in given automaton state reflecting all
+   current and planned unit reservations, the automaton state is
+   changed.  The first state is a source state, the second one is a
+   result state.  Checking for `presence_set' is done on the source
+   state reservation, checking for `final_presence_set' is done on the
+   result reservation.  This construction is useful to describe a
+   reservation which is actually two subsequent reservations.  For
+   example, if we use 
+      (presence_set "slot1" "slot0")
+   the following insn will be never issued (because slot1 requires
+   slot0 which is absent in the source state).
+      (define_reservation "insn_and_nop" "slot0 + slot1")
+   but it can be issued if we use analogous `final_presence_set'.  */
+DEF_RTL_EXPR(FINAL_PRESENCE_SET, "final_presence_set", "ss", 'x')
 /* (absence_set string string) means that each CPU functional unit in
-   the first string can not be reserved only if each unit whose name
+   the first string can be reserved only if each pattern of units
-   is in the second string is not reserved.  This is an asymmetric
+   whose names are in the second string is not reserved.  This is an
-   relation (actually exclusion set is analogous to this one but it is
+   asymmetric relation (actually exclusion set is analogous to this
-   symmetric).  CPU units in the string are separated by commas.  For
+   one but it is symmetric).  CPU units or unit patterns in the string
-   example, it is useful for description that slot0 can not be
+   are separated by commas.  Pattern is one unit name or unit names
-   reserved after slot1 or slot2 reservation for VLIW processor.  All
+   separated by white-spaces.
-   CPU functional units in a set should belong the same automaton.  */
+   For example, it is useful for description that slot0 can not be
+   reserved after slot1 or slot2 reservation for a VLIW processor.  We
+   could describe it by the following construction
+      (absence_set "slot2" "slot0, slot1")
+   Or slot2 can not be reserved if slot0 and unit b0 are reserved or
+   slot1 and unit b1 are reserved .  In this case we could write
+      (absence_set "slot2" "slot0 b0, slot1 b1")
+   All CPU functional units in a set should to belong the same
+   automaton.  */
 DEF_RTL_EXPR(ABSENCE_SET, "absence_set", "ss", 'x')
+/* (final_absence_set string string) is analogous to `absence_set' but
+   checking is done on the result (state) reservation.  See comments
+   for `final_presence_set'.  */
+DEF_RTL_EXPR(FINAL_ABSENCE_SET, "final_absence_set", "ss", 'x')
 /* (define_bypass number out_insn_names in_insn_names) names bypass
   with given latency (the first number) from insns given by the first
   string (see define_insn_reservation) into insns given by the second
@@ -416,8 +467,8 @@ DEF_RTL_EXPR(DEFINE_AUTOMATON, "define_automaton", "s", 'x')
   automata.  Currently there are the following options:
   o "no-minimization" which makes no minimization of automata.  This
-     is only worth to do when we are going to query CPU functional
+     is only worth to do when we are debugging the description and
-     unit reservations in an automaton state.
+     need to look more accurately at reservations of states.
   o "time" which means printing additional time statistics about
      generation of automata.

--- a/gcc/sched-deps.c
+++ b/gcc/sched-deps.c
@@ -83,14 +83,12 @@ static sbitmap *forward_dependency_cache;
 static int deps_may_trap_p PARAMS ((rtx));
 static void add_dependence_list PARAMS ((rtx, rtx, enum reg_note));
 static void add_dependence_list_and_free PARAMS ((rtx, rtx *, enum reg_note));
-static void remove_dependence PARAMS ((rtx, rtx));
 static void set_sched_group_p PARAMS ((rtx));
 static void flush_pending_lists PARAMS ((struct deps *, rtx, int, int));
 static void sched_analyze_1 PARAMS ((struct deps *, rtx, rtx));
 static void sched_analyze_2 PARAMS ((struct deps *, rtx, rtx));
 static void sched_analyze_insn PARAMS ((struct deps *, rtx, rtx, rtx));
-static rtx group_leader PARAMS ((rtx));
 static rtx get_condition PARAMS ((rtx));
 static int conditions_mutex_p PARAMS ((rtx, rtx));
@@ -237,18 +235,16 @@ add_dependence (insn, elem, dep_type)
      rtx nnext;
      while ((nnext = next_nonnote_insn (next)) != NULL
 	     && INSN_P (nnext)
+	     && next != insn
 	     && SCHED_GROUP_P (nnext))
 	next = nnext;
-      /* Again, don't depend an insn on itself.  */
+      if (insn != next)
-      if (insn == next)
+	add_dependence (insn, next, REG_DEP_ANTI);
-	return;
-      /* Make the dependence to NEXT, the last insn of the group, instead
-         of the original ELEM.  */
-      elem = next;
    }
  present_p = 1;
 #ifdef INSN_SCHEDULING
  /* ??? No good way to tell from here whether we're doing interblock
@@ -384,76 +380,6 @@ add_dependence_list_and_free (insn, listp, dep_type)
    }
 }
-/* Remove ELEM wrapped in an INSN_LIST from the LOG_LINKS
-   of INSN.  Abort if not found.  */
-static void
-remove_dependence (insn, elem)
-     rtx insn;
-     rtx elem;
-{
-  rtx prev, link, next;
-  int found = 0;
-  for (prev = 0, link = LOG_LINKS (insn); link; link = next)
-    {
-      next = XEXP (link, 1);
-      if (XEXP (link, 0) == elem)
-	{
-	  if (prev)
-	    XEXP (prev, 1) = next;
-	  else
-	    LOG_LINKS (insn) = next;
-#ifdef INSN_SCHEDULING
-	  /* If we are removing a dependency from the LOG_LINKS list,
-	     make sure to remove it from the cache too.  */
-	  if (true_dependency_cache != NULL)
-	    {
-	      if (REG_NOTE_KIND (link) == 0)
-		RESET_BIT (true_dependency_cache[INSN_LUID (insn)],
-			   INSN_LUID (elem));
-	      else if (REG_NOTE_KIND (link) == REG_DEP_ANTI)
-		RESET_BIT (anti_dependency_cache[INSN_LUID (insn)],
-			   INSN_LUID (elem));
-	      else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
-		RESET_BIT (output_dependency_cache[INSN_LUID (insn)],
-			   INSN_LUID (elem));
-	    }
-#endif
-	  free_INSN_LIST_node (link);
-	  found = 1;
-	}
-      else
-	prev = link;
-    }
-  if (!found)
-    abort ();
-  return;
-}
-/* Return an insn which represents a SCHED_GROUP, which is
-   the last insn in the group.  */
-static rtx
-group_leader (insn)
-     rtx insn;
-{
-  rtx prev;
-  do
-    {
-      prev = insn;
-      insn = next_nonnote_insn (insn);
-    }
-  while (insn && INSN_P (insn) && SCHED_GROUP_P (insn));
-  return prev;
-}
 /* Set SCHED_GROUP_P and care for the rest of the bookkeeping that
   goes along with that.  */
@@ -465,21 +391,21 @@ set_sched_group_p (insn)
  SCHED_GROUP_P (insn) = 1;
-  /* There may be a note before this insn now, but all notes will
+  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
-     be removed before we actually try to schedule the insns, so
+    {
-     it won't cause a problem later.  We must avoid it here though.  */
+      prev = insn;
+      do
+	{
+	  prev = prev_nonnote_insn (prev);
+	  if (XEXP (link, 0) == prev)
+	    break;
+	}
+      while (SCHED_GROUP_P (prev));
+      if (XEXP (link, 0) != prev)
+	add_dependence (prev, XEXP (link, 0), REG_DEP_ANTI);
+    }
  prev = prev_nonnote_insn (insn);
+  add_dependence (insn, prev, REG_DEP_ANTI);
-  /* Make a copy of all dependencies on the immediately previous insn,
-     and add to this insn.  This is so that all the dependencies will
-     apply to the group.  Remove an explicit dependence on this insn
-     as SCHED_GROUP_P now represents it.  */
-  if (find_insn_list (prev, LOG_LINKS (insn)))
-    remove_dependence (insn, prev);
-  for (link = LOG_LINKS (prev); link; link = XEXP (link, 1))
-    add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link));
 }
 /* Process an insn's memory dependencies.  There are four kinds of
@@ -983,7 +909,15 @@ sched_analyze_insn (deps, x, insn, loop_notes)
 	  INIT_REG_SET (&tmp);
 	  (*current_sched_info->compute_jump_reg_dependencies) (insn, &tmp);
-	  IOR_REG_SET (reg_pending_uses, &tmp);
+	  /* Make latency of jump equal to 0 by using anti-dependence.  */
+	  EXECUTE_IF_SET_IN_REG_SET (&tmp, 0, i,
+	    {
+	      struct deps_reg *reg_last = &deps->reg_last[i];
+	      add_dependence_list (insn, reg_last->sets, REG_DEP_ANTI);
+	      add_dependence_list (insn, reg_last->clobbers, REG_DEP_ANTI);
+	      reg_last->uses_length++;
+	      reg_last->uses = alloc_INSN_LIST (insn, reg_last->uses);
+	    });
 	  CLEAR_REG_SET (&tmp);
 	  /* All memory writes and volatile reads must happen before the
@@ -1049,14 +983,16 @@ sched_analyze_insn (deps, x, insn, loop_notes)
  /* Add dependencies if a scheduling barrier was found.  */
  if (reg_pending_barrier)
    {
+      /* In the case of barrier the most added dependencies are not
+         real, so we use anti-dependence here.  */
      if (GET_CODE (PATTERN (insn)) == COND_EXEC)
 	{
 	  EXECUTE_IF_SET_IN_REG_SET (&deps->reg_last_in_use, 0, i,
 	    {
 	      struct deps_reg *reg_last = &deps->reg_last[i];
 	      add_dependence_list (insn, reg_last->uses, REG_DEP_ANTI);
-	      add_dependence_list (insn, reg_last->sets, 0);
+	      add_dependence_list (insn, reg_last->sets, REG_DEP_ANTI);
-	      add_dependence_list (insn, reg_last->clobbers, 0);
+	      add_dependence_list (insn, reg_last->clobbers, REG_DEP_ANTI);
 	    });
 	}
      else
@@ -1066,8 +1002,10 @@ sched_analyze_insn (deps, x, insn, loop_notes)
 	      struct deps_reg *reg_last = &deps->reg_last[i];
 	      add_dependence_list_and_free (insn, &reg_last->uses,
 					    REG_DEP_ANTI);
-	      add_dependence_list_and_free (insn, &reg_last->sets, 0);
+	      add_dependence_list_and_free (insn, &reg_last->sets,
-	      add_dependence_list_and_free (insn, &reg_last->clobbers, 0);
+					    REG_DEP_ANTI);
+	      add_dependence_list_and_free (insn, &reg_last->clobbers,
+					    REG_DEP_ANTI);
 	      reg_last->uses_length = 0;
 	      reg_last->clobbers_length = 0;
 	    });
@@ -1432,11 +1370,9 @@ compute_forward_dependences (head, tail)
      if (! INSN_P (insn))
 	continue;
-      insn = group_leader (insn);
      for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
 	{
-	  rtx x = group_leader (XEXP (link, 0));
+	  rtx x = XEXP (link, 0);
 	  rtx new_link;
 	  if (x != XEXP (link, 0))

--- a/gcc/sched-ebb.c
+++ b/gcc/sched-ebb.c
@@ -40,6 +40,7 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include "recog.h"
 #include "cfglayout.h"
 #include "sched-int.h"
+#include "target.h"
 /* The number of insns to be scheduled in total.  */
 static int target_n_insns;
@@ -89,14 +90,7 @@ init_ready_list (ready)
     Count number of insns in the target block being scheduled.  */
  for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
    {
-      rtx next;
+      if (INSN_DEP_COUNT (insn) == 0)
-      if (! INSN_P (insn))
-	continue;
-      next = NEXT_INSN (insn);
-      if (INSN_DEP_COUNT (insn) == 0
-	  && (! INSN_P (next) || SCHED_GROUP_P (next) == 0))
 	ready_add (ready, insn);
      if (!(SCHED_GROUP_P (insn)))
 	target_n_insns++;
@@ -222,6 +216,9 @@ schedule_ebb (head, tail)
  /* Compute INSN_DEPEND.  */
  compute_forward_dependences (head, tail);
+  if (targetm.sched.dependencies_evaluation_hook)
+    targetm.sched.dependencies_evaluation_hook (head, tail);
  /* Set priorities.  */
  n_insns = set_priorities (head, tail);

--- a/gcc/sched-rgn.c
+++ b/gcc/sched-rgn.c
@@ -2023,17 +2023,9 @@ init_ready_list (ready)
     Count number of insns in the target block being scheduled.  */
  for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
    {
-      rtx next;
+      if (INSN_DEP_COUNT (insn) == 0)
-      if (! INSN_P (insn))
-	continue;
-      next = NEXT_INSN (insn);
-      if (INSN_DEP_COUNT (insn) == 0
-	  && (! INSN_P (next) || SCHED_GROUP_P (next) == 0))
 	ready_add (ready, insn);
-      if (!(SCHED_GROUP_P (insn)))
+      target_n_insns++;
-	target_n_insns++;
    }
  /* Add to ready list all 'ready' insns in valid source blocks.
@@ -2067,19 +2059,8 @@ init_ready_list (ready)
 							     insn, insn) <= 3)))
 			&& check_live (insn, bb_src)
 			&& is_exception_free (insn, bb_src, target_bb))))
-	      {
+	      if (INSN_DEP_COUNT (insn) == 0)
-		rtx next;
+		ready_add (ready, insn);
-		/* Note that we haven't squirreled away the notes for
-		   blocks other than the current.  So if this is a
-		   speculative insn, NEXT might otherwise be a note.  */
-		next = next_nonnote_insn (insn);
-		if (INSN_DEP_COUNT (insn) == 0
-		    && (! next
-			|| ! INSN_P (next)
-			|| SCHED_GROUP_P (next) == 0))
-		  ready_add (ready, insn);
-	      }
 	  }
      }
 }
@@ -2097,7 +2078,6 @@ can_schedule_ready_p (insn)
  /* An interblock motion?  */
  if (INSN_BB (insn) != target_bb)
    {
-      rtx temp;
      basic_block b1;
      if (IS_SPECULATIVE_INSN (insn))
@@ -2114,18 +2094,9 @@ can_schedule_ready_p (insn)
 	}
      nr_inter++;
-      /* Find the beginning of the scheduling group.  */
-      /* ??? Ought to update basic block here, but later bits of
-	 schedule_block assumes the original insn block is
-	 still intact.  */
-      temp = insn;
-      while (SCHED_GROUP_P (temp))
-	temp = PREV_INSN (temp);
      /* Update source block boundaries.  */
-      b1 = BLOCK_FOR_INSN (temp);
+      b1 = BLOCK_FOR_INSN (insn);
-      if (temp == b1->head && insn == b1->end)
+      if (insn == b1->head && insn == b1->end)
 	{
 	  /* We moved all the insns in the basic block.
 	     Emit a note after the last insn and update the
@@ -2139,9 +2110,9 @@ can_schedule_ready_p (insn)
 	  /* We took insns from the end of the basic block,
 	     so update the end of block boundary so that it
 	     points to the first insn we did not move.  */
-	  b1->end = PREV_INSN (temp);
+	  b1->end = PREV_INSN (insn);
 	}
-      else if (temp == b1->head)
+      else if (insn == b1->head)
 	{
 	  /* We took insns from the start of the basic block,
 	     so update the start of block boundary so that
@@ -2361,17 +2332,6 @@ add_branch_dependences (head, tail)
 	  CANT_MOVE (insn) = 1;
 	  last = insn;
-	  /* Skip over insns that are part of a group.
-	     Make each insn explicitly depend on the previous insn.
-	     This ensures that only the group header will ever enter
-	     the ready queue (and, when scheduled, will automatically
-	     schedule the SCHED_GROUP_P block).  */
-	  while (SCHED_GROUP_P (insn))
-	    {
-	      rtx temp = prev_nonnote_insn (insn);
-	      add_dependence (insn, temp, REG_DEP_ANTI);
-	      insn = temp;
-	    }
 	}
      /* Don't overrun the bounds of the basic block.  */
@@ -2393,10 +2353,6 @@ add_branch_dependences (head, tail)
 	add_dependence (last, insn, REG_DEP_ANTI);
 	INSN_REF_COUNT (insn) = 1;
-	/* Skip over insns that are part of a group.  */
-	while (SCHED_GROUP_P (insn))
-	  insn = prev_nonnote_insn (insn);
      }
 }
@@ -2728,6 +2684,10 @@ schedule_region (rgn)
      get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
      compute_forward_dependences (head, tail);
+      if (targetm.sched.dependencies_evaluation_hook)
+	targetm.sched.dependencies_evaluation_hook (head, tail);
    }
  /* Set priorities.  */

--- a/gcc/target-def.h
+++ b/gcc/target-def.h
@@ -194,12 +194,15 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 #define TARGET_SCHED_FINISH 0
 #define TARGET_SCHED_REORDER 0
 #define TARGET_SCHED_REORDER2 0
+#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK 0
 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE 0
 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN 0
 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN 0
 #define TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN 0
 #define TARGET_SCHED_DFA_POST_CYCLE_INSN 0
 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 0
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD 0
+#define TARGET_SCHED_DFA_NEW_CYCLE 0
 #define TARGET_SCHED_INIT_DFA_BUBBLES 0
 #define TARGET_SCHED_DFA_BUBBLE 0
@@ -212,12 +215,15 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
   TARGET_SCHED_FINISH,						\
   TARGET_SCHED_REORDER,					\
   TARGET_SCHED_REORDER2,					\
+   TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK,			\
   TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE,			\
   TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN,			\
   TARGET_SCHED_DFA_PRE_CYCLE_INSN,				\
   TARGET_SCHED_INIT_DFA_POST_CYCLE_INSN,			\
   TARGET_SCHED_DFA_POST_CYCLE_INSN,				\
   TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD,		\
+   TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD,	\
+   TARGET_SCHED_DFA_NEW_CYCLE,					\
   TARGET_SCHED_INIT_DFA_BUBBLES,				\
   TARGET_SCHED_DFA_BUBBLE}

--- a/gcc/target.h
+++ b/gcc/target.h
@@ -177,6 +177,11 @@ struct gcc_target
    int (* reorder)  PARAMS ((FILE *, int, rtx *, int *, int));
    int (* reorder2) PARAMS ((FILE *, int, rtx *, int *, int));
+    /* The following member value is a pointer to a function called
+       after evaluation forward dependencies of insns in chain given
+       by two parameter values (head and tail correspondingly).  */
+    void (* dependencies_evaluation_hook) PARAMS ((rtx, rtx));
    /* The following member value is a pointer to a function returning
       nonzero if we should use DFA based scheduling.  The default is
       to use the old pipeline scheduler.  */
@@ -206,6 +211,25 @@ struct gcc_target
       try to choose ready insn which permits to start maximum number of
       insns on the same cycle.  */
    int (* first_cycle_multipass_dfa_lookahead) PARAMS ((void));
+    /* The following member value is pointer to a function controlling
+       what insns from the ready insn queue will be considered for the
+       multipass insn scheduling.  If the hook returns zero for insn
+       passed as the parameter, the insn will be not chosen to be
+       issued.  */
+    int (* first_cycle_multipass_dfa_lookahead_guard) PARAMS ((rtx));
+    /* The following member value is pointer to a function called by
+       the insn scheduler before issuing insn passed as the third
+       parameter on given cycle.  If the hook returns nonzero, the
+       insn is not issued on given processors cycle.  Instead of that,
+       the processor cycle is advanced.  If the value passed through
+       the last parameter is zero, the insn ready queue is not sorted
+       on the new cycle start as usually.  The first parameter passes
+       file for debugging output.  The second one passes the scheduler
+       verbose level of the debugging output.  The forth and the fifth
+       parameter values are correspondingly processor cycle on which
+       the previous insn has been issued and the current processor
+       cycle.  */
+    int (* dfa_new_cycle) PARAMS ((FILE *, int, rtx, int, int, int *));
    /* The values of the following members are pointers to functions
       used to improve the first cycle multipass scheduling by
       inserting nop insns.  dfa_scheduler_bubble gives a function