pa.c (fmpy_operands): Remove.

* pa/pa.c (fmpy_operands): Remove. No longer needed. (combinable_add, combinable_copy, combinable_fmpy): Likewise. (combinable_fadd, combineable_fsub): Likewise. (pa_reorg): Call pa_combine_instructions. (pa_combine_instructions): Combine instructions to make things like fmpyadd and fmpysub. (pa_can_combine_p): Helper function for pa_combine_instructions. * pa/pa.md (pa_combine_type): New attribute. Set it appropriately for various insns. (define_delays): Use a separate define_delay for unconditional branches. (fmpyadd, fmpysub peepholes): Remove, no longer needed. (fmpyadd, fmpysub insns): Add variant with fadd/fsub first, then the fmpy. From-SVN: r13346

pa.c (fmpy_operands): Remove.
* pa/pa.c (fmpy_operands): Remove. No longer needed. (combinable_add, combinable_copy, combinable_fmpy): Likewise. (combinable_fadd, combineable_fsub): Likewise. (pa_reorg): Call pa_combine_instructions. (pa_combine_instructions): Combine instructions to make things like fmpyadd and fmpysub. (pa_can_combine_p): Helper function for pa_combine_instructions. * pa/pa.md (pa_combine_type): New attribute. Set it appropriately for various insns. (define_delays): Use a separate define_delay for unconditional branches. (fmpyadd, fmpysub peepholes): Remove, no longer needed. (fmpyadd, fmpysub insns): Add variant with fadd/fsub first, then the fmpy. From-SVN: r13346
c4bb6b38 · Jeff Law · 54552651 · c4bb6b38 · c4bb6b38
Commit c4bb6b38 authored Dec 30, 1996 by Jeff Law
Hide whitespace changes
Inline Side-by-side

Showing with 331 additions and 299 deletions

gcc/config/pa/pa.c
+290 -237

gcc/config/pa/pa.md
+41 -62

No files found.
--- a/gcc/config/pa/pa.c
+++ b/gcc/config/pa/pa.c
@@ -62,14 +62,6 @@ static int out_of_line_prologue_epilogue;

 static rtx find_addr_reg ();

-/* Kludgery.  We hold the operands to a fmpy insn here so we can
-   compare them with the operands for an fadd/fsub to determine if
-   they can be combined into a fmpyadd/fmpysub insn.
-
-   This _WILL_ disappear as the code to combine independent insns
-   matures.  */
-static rtx fmpy_operands[3];
-
 /* Keep track of the number of bytes we have output in the CODE subspaces
   during this compilation so we'll know when to emit inline long-calls.  */

@@ -1347,7 +1339,7 @@ emit_move_sequence (operands, mode, scratch_reg)
 }

 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
-   it will need a link/runtime reloc.  */
+   it will need a link/runtime reloc).  */

 int
 reloc_needed (exp)
@@ -5484,70 +5476,6 @@ output_parallel_addb (operands, length)
    }
 }

-/* Return nonzero if INSN represents an integer add which might be
-   combinable with an unconditional branch.  */ 
-
-combinable_add (insn)
-     rtx insn;
-{
-  rtx src, dest, prev, pattern = PATTERN (insn);
-
-  /* Must be a (set (reg) (plus (reg) (reg/5_bit_int)))  */
-  if (GET_CODE (pattern) != SET
-      || GET_CODE (SET_SRC (pattern)) != PLUS
-      || GET_CODE (SET_DEST (pattern)) != REG)
-    return 0;
-
-  src = SET_SRC (pattern);
-  dest = SET_DEST (pattern);
-
-  /* Must be an integer add.  */
-  if (GET_MODE (src) != SImode
-      || GET_MODE (dest) != SImode)
-    return 0;
-
-  /* Each operand must be an integer register and/or 5 bit immediate.  */
-  if (!ireg_or_int5_operand (dest, VOIDmode)
-       || !ireg_or_int5_operand (XEXP (src, 0), VOIDmode)
-       || !ireg_or_int5_operand (XEXP (src, 1), VOIDmode))
-    return 0;
-
-  /* The destination must also be one of the sources.  */
-  return (dest == XEXP (src, 0) || dest == XEXP (src, 1));
-}
-
-/* Return nonzero if INSN represents an integer load/copy which might be
-   combinable with an unconditional branch.  */ 
-
-combinable_copy (insn)
-     rtx insn;
-{
-  rtx src, dest, pattern = PATTERN (insn);
-  enum machine_mode mode;
-
-  /* Must be a (set (reg) (reg/5_bit_int)).  */
-  if (GET_CODE (pattern) != SET)
-    return 0;
-
-  src = SET_SRC (pattern);
-  dest = SET_DEST (pattern);
-
-  /* Must be a mode that corresponds to a single integer register.  */
-  mode = GET_MODE (dest);
-  if (mode != SImode
-      && mode != SFmode
-      && mode != HImode
-      && mode != QImode)
-    return 0;
-
-  /* Each operand must be a register or 5 bit integer.  */
-  if (!ireg_or_int5_operand (dest, VOIDmode)
-      || !ireg_or_int5_operand (src, VOIDmode))
-    return 0;
-
-  return 1;
-}
-
 /* Return nonzero if INSN (a jump insn) immediately follows a call.  This
   is used to discourage creating parallel movb/addb insns since a jump
   which immediately follows a call can execute in the delay slot of the
@@ -5574,170 +5502,6 @@ following_call (insn)
  return 0;
 }

-/* Return nonzero if this is a floating point multiply (fmpy) which
-   could be combined with a suitable floating point add or sub insn.  */
-
-combinable_fmpy (insn)
-     rtx insn;
-{
-  rtx src, dest, pattern = PATTERN (insn);
-  enum machine_mode mode;
-
-  /* Only on 1.1 and later cpus.  */
-  if (!TARGET_SNAKE)
-    return 0;
-
-  /* Must be a (set (reg) (mult (reg) (reg))).  */
-  if (GET_CODE (pattern) != SET
-      || GET_CODE (SET_SRC (pattern)) != MULT
-      || GET_CODE (SET_DEST (pattern)) != REG)
-    return 0;
-
-  src = SET_SRC (pattern);
-  dest = SET_DEST (pattern);
-
-  /* Must be registers.  */
-  if (GET_CODE (XEXP (src, 0)) != REG
-      || GET_CODE (XEXP (src, 1)) != REG)
-    return 0;
-
-  /* Must be a floating point mode.  Must match the mode of the fmul.  */
-  mode = GET_MODE (dest);
-  if (mode != DFmode && mode != SFmode)
-    return 0;
- 
-  /* SFmode limits the registers which can be used to the upper
-     32 32bit FP registers.  */
-  if (mode == SFmode
-      && (REGNO (dest) < 57
-	  || REGNO (XEXP (src, 0)) < 57
-	  || REGNO (XEXP (src, 1)) < 57))
-    return 0;
-     
-  /* Save our operands, we'll need to verify they don't conflict with
-     those in the fadd or fsub.  XXX This needs to disasppear soon.  */
-  fmpy_operands[0] = dest;
-  fmpy_operands[1] = XEXP (src, 0);
-  fmpy_operands[2] = XEXP (src, 1);
-
-  return 1;
-}
-
-/* Return nonzero if INSN is a floating point add suitable for combining
-   with the most recently examined floating point multiply.  */
-
-combinable_fadd (insn)
-     rtx insn;
-{
-  rtx src, dest, pattern = PATTERN (insn);
-  enum machine_mode mode;
-
-  /* Must be a (set (reg) (plus (reg) (reg))).  */
-  if (GET_CODE (pattern) != SET
-      || GET_CODE (SET_SRC (pattern)) != PLUS
-      || GET_CODE (SET_DEST (pattern)) != REG)
-    return 0;
-
-  src = SET_SRC (pattern);
-  dest = SET_DEST (pattern);
-
-  /* Must be registers.  */
-  if (GET_CODE (XEXP (src, 0)) != REG
-      || GET_CODE (XEXP (src, 1)) != REG)
-    return 0;
-
-  /* Must be a floating point mode.  Must match the mode of the fmul.  */
-  mode = GET_MODE (dest);
-  if (mode != DFmode && mode != SFmode)
-    return 0;
- 
-  if (mode != GET_MODE (fmpy_operands[0]))
-    return 0;
-
-  /* SFmode limits the registers which can be used to the upper
-     32 32bit FP registers.  */
-  if (mode == SFmode
-      && (REGNO (dest) < 57
-	  || REGNO (XEXP (src, 0)) < 57
-	  || REGNO (XEXP (src, 1)) < 57))
-    return 0;
-     
-  /* Only 2 real operands to the addition.  One of the input operands
-     must be the same as the output operand.  */
-  if (! rtx_equal_p (dest, XEXP (src, 0))
-      && ! rtx_equal_p (dest, XEXP (src, 1)))
-    return 0;
-
-  /* Inout operand of the add can not conflict with any operands from the
-     multiply.  */
-  if (rtx_equal_p (dest, fmpy_operands[0])
-      || rtx_equal_p (dest, fmpy_operands[1])
-      || rtx_equal_p (dest, fmpy_operands[2]))
-    return 0;
-
-  /* The multiply can not feed into the addition.  */
-  if (rtx_equal_p (fmpy_operands[0], XEXP (src, 0))
-      || rtx_equal_p (fmpy_operands[0], XEXP (src, 1)))
-    return 0;
-
-  return 1;
-}
-
-/* Return nonzero if INSN is a floating point sub suitable for combining
-   with the most recently examined floating point multiply.  */
-
-combinable_fsub (insn)
-     rtx insn;
-{
-  rtx src, dest, pattern = PATTERN (insn);
-  enum machine_mode mode;
-
-  /* Must be (set (reg) (minus (reg) (reg))).  */
-  if (GET_CODE (pattern) != SET
-      || GET_CODE (SET_SRC (pattern)) != MINUS
-      || GET_CODE (SET_DEST (pattern)) != REG)
-    return 0;
-
-  src = SET_SRC (pattern);
-  dest = SET_DEST (pattern);
-
-  if (GET_CODE (XEXP (src, 0)) != REG
-      || GET_CODE (XEXP (src, 1)) != REG)
-    return 0;
-
-  /* Must be a floating point mode.  Must match the mode of the fmul.  */
-  mode = GET_MODE (dest);
-  if (mode != DFmode && mode != SFmode)
-    return 0;
- 
-  if (mode != GET_MODE (fmpy_operands[0]))
-    return 0;
-
-  /* SFmode limits the registers which can be used to the upper
-     32 32bit FP registers.  */
-  if (mode == SFmode && (REGNO (dest) < 57 || REGNO (XEXP (src, 1)) < 57))
-    return 0;
-     
-  /* Only 2 real operands to the subtraction.  Output must be the
-     same as the first operand of the MINUS.  */
-  if (! rtx_equal_p (dest, XEXP (src, 0)))
-    return 0;
-
-  /* Inout operand of the sub can not conflict with any operands from the
-     multiply.  */
-  if (rtx_equal_p (dest, fmpy_operands[0])
-      || rtx_equal_p (dest, fmpy_operands[1])
-      || rtx_equal_p (dest, fmpy_operands[2]))
-    return 0;
-
-  /* The multiply can not feed into the subtraction.  */
-  if (rtx_equal_p (fmpy_operands[0], XEXP (src, 0))
-      || rtx_equal_p (fmpy_operands[0], XEXP (src, 1)))
-    return 0;
-
-  return 1;
-}
-
 /* We use this hook to perform a PA specific optimization which is difficult
   to do in earlier passes.

@@ -5771,6 +5535,8 @@ pa_reorg (insns)

  remove_useless_addtr_insns (insns, 1);

+  pa_combine_instructions (get_insns ());
+
  /* This is fairly cheap, so always run it if optimizing.  */
  if (optimize > 0)
    {
@@ -5840,3 +5606,290 @@ pa_reorg (insns)
 	}
    }
 }
+
+/* The PA has a number of odd instructions which can perform multiple
+   tasks at once.  On first generation PA machines (PA1.0 and PA1.1)
+   it may be profitable to combine two instructions into one instruction
+   with two outputs.  It's not profitable PA2.0 machines because the
+   two outputs would take two slots in the reorder buffers.
+
+   This routine finds instructions which can be combined and combines
+   them.  We only support some of the potential combinations, and we
+   only try common ways to find suitable instructions.
+
+      * addb can add two registers or a register and a small integer
+      and jump to a nearby (+-8k) location.  Normally the jump to the
+      nearby location is conditional on the result of the add, but by
+      using the "true" condition we can make the jump unconditional.
+      Thus addb can perform two independent operations in one insn.
+
+      * movb is similar to addb in that it can perform a reg->reg
+      or small immediate->reg copy and jump to a nearby (+-8k location).
+
+      * fmpyadd and fmpysub can perform a FP multiply and either an
+      FP add or FP sub if the operands of the multiply and add/sub are
+      independent (there are other minor restrictions).  Note both
+      the fmpy and fadd/fsub can in theory move to better spots according
+      to data dependencies, but for now we require the fmpy stay at a
+      fixed location.
+
+      * Many of the memory operations can perform pre & post updates
+      of index registers.  GCC's pre/post increment/decrement addressing
+      is far too simple to take advantage of all the possibilities.  This
+      pass may not be suitable since those insns may not be independent.
+
+      * comclr can compare two ints or an int and a register, nullify
+      the following instruction and zero some other register.  This
+      is more difficult to use as it's harder to find an insn which
+      will generate a comclr than finding something like an unconditional
+      branch.  (conditional moves & long branches create comclr insns).
+
+      * Most arithmetic operations can conditionally skip the next
+      instruction.  They can be viewed as "perform this operation
+      and conditionally jump to this nearby location" (where nearby
+      is an insns away).  These are difficult to use due to the
+      branch length restrictions.  */
+
+pa_combine_instructions (insns)
+     rtx insns;
+{
+  rtx anchor, new;
+
+  /* This can get expensive since the basic algorithm is on the
+     order of O(n^2) (or worse).  Only do it for -O2 or higher
+     levels of optimizaton.  */
+  if (optimize < 2)
+    return;
+
+  /* Walk down the list of insns looking for "anchor" insns which
+     may be combined with "floating" insns.  As the name implies,
+     "anchor" instructions don't move, while "floating" insns may
+     move around.  */
+  new = gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX));
+  new = make_insn_raw (new);
+
+  for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor))
+    {
+      enum attr_pa_combine_type anchor_attr;
+      enum attr_pa_combine_type floater_attr;
+
+      /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
+	 Also ignore any special USE insns.  */
+      if (GET_CODE (anchor) != INSN
+	  && GET_CODE (anchor) != JUMP_INSN
+	  && GET_CODE (anchor) != CALL_INSN
+	  || GET_CODE (PATTERN (anchor)) == USE
+	  || GET_CODE (PATTERN (anchor)) == CLOBBER
+	  || GET_CODE (PATTERN (anchor)) == ADDR_VEC
+	  || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC)
+	continue;
+
+      anchor_attr = get_attr_pa_combine_type (anchor);
+      /* See if anchor is an insn suitable for combination.  */
+      if (anchor_attr == PA_COMBINE_TYPE_FMPY
+	  || anchor_attr == PA_COMBINE_TYPE_FADDSUB
+	  || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
+	      && ! forward_branch_p (anchor)))
+	{
+	  rtx floater;
+
+	  for (floater = PREV_INSN (anchor);
+	       floater;
+	       floater = PREV_INSN (floater))
+	    {
+	      if (GET_CODE (floater) == NOTE
+		  || (GET_CODE (floater) == INSN
+		      && (GET_CODE (PATTERN (floater)) == USE
+			  || GET_CODE (PATTERN (floater)) == CLOBBER)))
+		continue;
+
+	      /* Anything except a regular INSN will stop our search.  */
+	      if (GET_CODE (floater) != INSN
+		  || GET_CODE (PATTERN (floater)) == ADDR_VEC
+		  || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
+		{
+		  floater = NULL_RTX;
+		  break;
+		}
+
+	      /* See if FLOATER is suitable for combination with the
+		 anchor.  */
+	      floater_attr = get_attr_pa_combine_type (floater);
+	      if ((anchor_attr == PA_COMBINE_TYPE_FMPY
+		   && floater_attr == PA_COMBINE_TYPE_FADDSUB)
+		  || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
+		      && floater_attr == PA_COMBINE_TYPE_FMPY))
+		{
+		  /* If ANCHOR and FLOATER can be combined, then we're
+		     done with this pass.  */
+		  if (pa_can_combine_p (new, anchor, floater, 0,
+					SET_DEST (PATTERN (floater)),
+					XEXP (SET_SRC (PATTERN (floater)), 0),
+					XEXP (SET_SRC (PATTERN (floater)), 1)))
+		    break;
+		}
+
+	      else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH
+		       && floater_attr == PA_COMBINE_TYPE_ADDMOVE)
+		{
+		  if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS)
+		    {
+		      if (pa_can_combine_p (new, anchor, floater, 0,
+					    SET_DEST (PATTERN (floater)),
+					XEXP (SET_SRC (PATTERN (floater)), 0),
+					XEXP (SET_SRC (PATTERN (floater)), 1)))
+			break;
+		    }
+		  else
+		    {
+		      if (pa_can_combine_p (new, anchor, floater, 0,
+					    SET_DEST (PATTERN (floater)),
+					    SET_SRC (PATTERN (floater)),
+					    SET_SRC (PATTERN (floater))))
+			break;
+		    }
+		}
+	    }
+
+	  /* If we didn't find anything on the backwards scan try forwards.  */
+	  if (!floater
+	      && (anchor_attr == PA_COMBINE_TYPE_FMPY
+		  || anchor_attr == PA_COMBINE_TYPE_FADDSUB))
+	    {
+	      for (floater = anchor; floater; floater = NEXT_INSN (floater))
+		{
+		  if (GET_CODE (floater) == NOTE
+		      || (GET_CODE (floater) == INSN
+			  && (GET_CODE (PATTERN (floater)) == USE
+			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
+			
+		    continue;
+
+		  /* Anything except a regular INSN will stop our search.  */
+		  if (GET_CODE (floater) != INSN
+		      || GET_CODE (PATTERN (floater)) == ADDR_VEC
+		      || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC)
+		    {
+		      floater = NULL_RTX;
+		      break;
+		    }
+
+		  /* See if FLOATER is suitable for combination with the
+		     anchor.  */
+		  floater_attr = get_attr_pa_combine_type (floater);
+		  if ((anchor_attr == PA_COMBINE_TYPE_FMPY
+		       && floater_attr == PA_COMBINE_TYPE_FADDSUB)
+		      || (anchor_attr == PA_COMBINE_TYPE_FADDSUB
+			  && floater_attr == PA_COMBINE_TYPE_FMPY))
+		    {
+		      /* If ANCHOR and FLOATER can be combined, then we're
+			 done with this pass.  */
+		      if (pa_can_combine_p (new, anchor, floater, 1,
+					    SET_DEST (PATTERN (floater)),
+					    XEXP (SET_SRC (PATTERN(floater)),0),
+					    XEXP(SET_SRC(PATTERN(floater)),1)))
+			break;
+		    }
+		}
+	    }
+
+	  /* FLOATER will be nonzero if we found a suitable floating
+	     insn for combination with ANCHOR.  */
+	  if (floater
+	      && (anchor_attr == PA_COMBINE_TYPE_FADDSUB
+		  || anchor_attr == PA_COMBINE_TYPE_FMPY))
+	    {
+	      /* Emit the new instruction and delete the old anchor.  */
+	      emit_insn_before (gen_rtx (PARALLEL, VOIDmode,
+					 gen_rtvec (2, PATTERN (anchor),
+						    PATTERN (floater))),
+				anchor);
+	      PUT_CODE (anchor, NOTE);
+	      NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
+	      NOTE_SOURCE_FILE (anchor) = 0;
+
+	      /* Emit a special USE insn for FLOATER, then delete
+		 the floating insn.  */
+	      emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
+	      delete_insn (floater);
+
+	      continue;
+	    }
+	  else if (floater
+		   && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH)
+	    {
+	      rtx temp;
+	      /* Emit the new_jump instruction and delete the old anchor.  */
+	      temp = emit_jump_insn_before (gen_rtx (PARALLEL, VOIDmode,
+					      gen_rtvec (2, PATTERN (anchor),
+							 PATTERN (floater))),
+				anchor);
+	      JUMP_LABEL (temp) = JUMP_LABEL (anchor);
+	      PUT_CODE (anchor, NOTE);
+	      NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED;
+	      NOTE_SOURCE_FILE (anchor) = 0;
+
+	      /* Emit a special USE insn for FLOATER, then delete
+		 the floating insn.  */
+	      emit_insn_before (gen_rtx (USE, VOIDmode, floater), floater);
+	      delete_insn (floater);
+	      continue;
+	    }
+	}
+    }
+}
+
+int
+pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2)
+     rtx new, anchor, floater;
+     int reversed;
+     rtx dest, src1, src2;
+{
+  int insn_code_number;
+  rtx start, end;
+
+  /* Create a PARALLEL with the patterns of ANCHOR and
+     FLOATER, try to recognize it, then test constraints
+     for the resulting pattern.
+
+     If the pattern doesn't match or the constraints
+     aren't met keep searching for a suitable floater
+     insn.  */
+  XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor);
+  XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater);
+  INSN_CODE (new) = -1;
+  insn_code_number = recog_memoized (new);
+  if (insn_code_number < 0
+      || !constrain_operands (insn_code_number, 1))
+    return 0;
+
+  if (reversed)
+    {
+      start = anchor;
+      end = floater;
+    }
+  else
+    {
+      start = floater;
+      end = anchor;
+    }
+
+  /* There's up to three operands to consider.  One
+     output and two inputs.
+
+     The output must not be used between FLOATER & ANCHOR
+     exclusive.  The inputs must not be set between
+     FLOATER and ANCHOR exclusive.  */
+
+  if (reg_used_between_p (dest, start, end))
+    return 0;
+
+  if (reg_set_between_p (src1, start, end))
+    return 0;
+
+  if (reg_set_between_p (src2, start, end))
+    return 0;
+
+  /* If we get here, then everything is good.  */
+  return 1;
+}
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -34,6 +34,10 @@
  "move,unary,binary,shift,nullshift,compare,load,store,uncond_branch,branch,cbranch,fbranch,call,dyncall,fpload,fpstore,fpalu,fpcc,fpmulsgl,fpmuldbl,fpdivsgl,fpdivdbl,fpsqrtsgl,fpsqrtdbl,multi,milli,parallel_branch"
  (const_string "binary"))

+(define_attr "pa_combine_type"
+  "fmpy,faddsub,uncond_branch,addmove,none"
+  (const_string "none"))
+
 ;; Processor type (for scheduling, not code generation) -- this attribute
 ;; must exactly match the processor_type enumeration in pa.h.
 ;;
@@ -97,7 +101,7 @@


 ;; Call delay slot description.
-(define_delay (eq_attr "type" "uncond_branch,call")
+(define_delay (eq_attr "type" "call")
  [(eq_attr "in_call_delay" "true") (nil) (nil)])

 ;; millicode call delay slot description.  Note it disallows delay slot
@@ -129,6 +133,11 @@
   (and (eq_attr "in_nullified_branch_delay" "true")
 	(attr_flag "backward"))])

+(define_delay (and (eq_attr "type" "uncond_branch")
+		   (eq (symbol_ref "following_call (insn)")
+		       (const_int 0)))
+  [(eq_attr "in_branch_delay" "true") (nil) (nil)])
+
 ;; Function units of the HPPA. The following data is for the 700 CPUs
 ;; (Mustang CPU + Timex FPU aka PA-89) because that's what I have the docs for.
 ;; Scheduling instructions for PA-83 machines according to the Snake
@@ -1337,6 +1346,7 @@
   fldw%F1 %1,%0
   fstw%F0 %1,%0"
  [(set_attr "type" "move,move,move,shift,load,store,move,fpalu,fpload,fpstore")
+   (set_attr "pa_combine_type" "addmove")
   (set_attr "length" "4,4,4,4,4,4,4,4,4,4")])

 (define_insn ""
@@ -1356,6 +1366,7 @@
   stw%M0 %r1,%0
   mtsar %r1"
  [(set_attr "type" "move,move,move,move,load,store,move")
+   (set_attr "pa_combine_type" "addmove")
   (set_attr "length" "4,4,4,4,4,4,4")])

 (define_insn ""
@@ -1735,6 +1746,7 @@
   mtsar %r1
   fcpy,sgl %r1,%0"
  [(set_attr "type" "move,move,move,shift,load,store,move,fpalu")
+   (set_attr "pa_combine_type" "addmove")
   (set_attr "length" "4,4,4,4,4,4,4,4")])

 (define_insn ""
@@ -1896,6 +1908,7 @@
   mtsar %r1
   fcpy,sgl %r1,%0"
  [(set_attr "type" "move,move,move,shift,load,store,move,fpalu")
+   (set_attr "pa_combine_type" "addmove")
   (set_attr "length" "4,4,4,4,4,4,4,4")])

 (define_insn ""
@@ -2535,6 +2548,7 @@
   fstw%F0 %r1,%0
   stw%M0 %r1,%0"
  [(set_attr "type" "fpalu,move,fpload,load,fpstore,store")
+   (set_attr "pa_combine_type" "addmove")
   (set_attr "length" "4,4,4,4,4,4")])

 (define_insn ""
@@ -2550,6 +2564,7 @@
   ldw%M1 %1,%0
   stw%M0 %r1,%0"
  [(set_attr "type" "move,load,store")
+   (set_attr "pa_combine_type" "addmove")
   (set_attr "length" "4,4,4")])

 (define_insn ""
@@ -2932,6 +2947,7 @@
   addl %1,%2,%0
   ldo %2(%1),%0"
  [(set_attr "type" "binary,binary")
+   (set_attr "pa_combine_type" "addmove")
   (set_attr "length" "4,4")])

 ;; Disgusting kludge to work around reload bugs with frame pointer
@@ -3452,6 +3468,7 @@
  "! TARGET_SOFT_FLOAT"
  "fadd,dbl %1,%2,%0"
  [(set_attr "type" "fpalu")
+   (set_attr "pa_combine_type" "faddsub")
   (set_attr "length" "4")])

 (define_insn "addsf3"
@@ -3461,6 +3478,7 @@
  "! TARGET_SOFT_FLOAT"
  "fadd,sgl %1,%2,%0"
  [(set_attr "type" "fpalu")
+   (set_attr "pa_combine_type" "faddsub")
   (set_attr "length" "4")])

 (define_insn "subdf3"
@@ -3470,6 +3488,7 @@
  "! TARGET_SOFT_FLOAT"
  "fsub,dbl %1,%2,%0"
  [(set_attr "type" "fpalu")
+   (set_attr "pa_combine_type" "faddsub")
   (set_attr "length" "4")])

 (define_insn "subsf3"
@@ -3479,6 +3498,7 @@
  "! TARGET_SOFT_FLOAT"
  "fsub,sgl %1,%2,%0"
  [(set_attr "type" "fpalu")
+   (set_attr "pa_combine_type" "faddsub")
   (set_attr "length" "4")])

 (define_insn "muldf3"
@@ -3488,6 +3508,7 @@
  "! TARGET_SOFT_FLOAT"
  "fmpy,dbl %1,%2,%0"
  [(set_attr "type" "fpmuldbl")
+   (set_attr "pa_combine_type" "fmpy")
   (set_attr "length" "4")])

 (define_insn "mulsf3"
@@ -3497,6 +3518,7 @@
  "! TARGET_SOFT_FLOAT"
  "fmpy,sgl %1,%2,%0"
  [(set_attr "type" "fpmulsgl")
+   (set_attr "pa_combine_type" "fmpy")
   (set_attr "length" "4")])

 (define_insn "divdf3"
@@ -3892,6 +3914,7 @@
  ""
  "bl%* %l0,0"
  [(set_attr "type" "uncond_branch")
+   (set_attr "pa_combine_type" "uncond_branch")
   (set (attr "length")
    (cond [(eq (symbol_ref "jump_in_call_delay (insn)") (const_int 0))
 	   (const_int 4)
@@ -4738,64 +4761,14 @@
   (set_attr "length" "4")])

 (define_insn ""
-  [(set (match_operand 0 "register_operand" "=f")
-	(mult (match_operand 1 "register_operand" "f")
-	      (match_operand 2 "register_operand" "f")))
-   (set (match_operand 3 "register_operand" "+f")
-	(minus (match_operand 4 "register_operand" "f")
-	       (match_operand 5 "register_operand" "f")))]
-  "TARGET_SNAKE && ! TARGET_SOFT_FLOAT
-   && reload_completed && fmpysuboperands (operands)"
-  "*
-{
-  if (GET_MODE (operands[0]) == DFmode)
-    return \"fmpysub,dbl %1,%2,%0,%5,%3\";
-  else
-    return \"fmpysub,sgl %1,%2,%0,%5,%3\";
-}"
-  [(set_attr "type" "fpalu")
-   (set_attr "length" "4")])
-
-;; The next four peepholes take advantage of the new 5 operand
-;; fmpy{add,sub} instructions available on 1.1 CPUS.  Basically
-;; fmpyadd performs a multiply and add/sub of independent operands
-;; at the same time.  Because the operands must be independent
-;; combine will not try to combine such insns...  Thus we have
-;; to use a peephole.
-(define_peephole
-  [(set (match_operand 0 "register_operand" "=f")
-	(mult (match_operand 1 "register_operand" "f")
-	      (match_operand 2 "register_operand" "f")))
-   (set (match_operand 3 "register_operand" "+f")
-	(plus (match_operand 4 "register_operand" "f")
-	      (match_operand 5 "register_operand" "f")))]
-  "! TARGET_SOFT_FLOAT && TARGET_SNAKE && fmpyaddoperands (operands)"
-  "*
-{
-  if (GET_MODE (operands[0]) == DFmode)
-    {
-      if (rtx_equal_p (operands[5], operands[3]))
-	return \"fmpyadd,dbl %1,%2,%0,%4,%3\";
-      else
-	return \"fmpyadd,dbl %1,%2,%0,%5,%3\";
-    }
-  else
-    {
-      if (rtx_equal_p (operands[5], operands[3]))
-	return \"fmpyadd,sgl %1,%2,%0,%4,%3\";
-      else
-	return \"fmpyadd,sgl %1,%2,%0,%5,%3\";
-    }
-}")
-
-(define_peephole
  [(set (match_operand 3 "register_operand" "+f")
 	(plus (match_operand 4 "register_operand" "f")
 	      (match_operand 5 "register_operand" "f")))
   (set (match_operand 0 "register_operand" "=f")
 	(mult (match_operand 1 "register_operand" "f")
 	      (match_operand 2 "register_operand" "f")))]
-  "! TARGET_SOFT_FLOAT && TARGET_SNAKE && fmpyaddoperands (operands)"
+  "TARGET_SNAKE && ! TARGET_SOFT_FLOAT
+   && reload_completed && fmpyaddoperands (operands)"
  "*
 {
  if (GET_MODE (operands[0]) == DFmode)
@@ -4812,41 +4785,47 @@
      else
 	return \"fmpyadd,sgl %1,%2,%0,%5,%3\";
    }
-}")
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])

-;; Note fsub subtracts the second operand from the first while fmpysub
-;; does the opposite for the subtraction operands!
-(define_peephole
+(define_insn ""
  [(set (match_operand 0 "register_operand" "=f")
 	(mult (match_operand 1 "register_operand" "f")
 	      (match_operand 2 "register_operand" "f")))
   (set (match_operand 3 "register_operand" "+f")
 	(minus (match_operand 4 "register_operand" "f")
 	       (match_operand 5 "register_operand" "f")))]
-  "! TARGET_SOFT_FLOAT && TARGET_SNAKE && fmpysuboperands (operands)"
+  "TARGET_SNAKE && ! TARGET_SOFT_FLOAT
+   && reload_completed && fmpysuboperands (operands)"
  "*
 {
  if (GET_MODE (operands[0]) == DFmode)
    return \"fmpysub,dbl %1,%2,%0,%5,%3\";
  else
    return \"fmpysub,sgl %1,%2,%0,%5,%3\";
-}")
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])

-(define_peephole
+(define_insn ""
  [(set (match_operand 3 "register_operand" "+f")
 	(minus (match_operand 4 "register_operand" "f")
 	       (match_operand 5 "register_operand" "f")))
   (set (match_operand 0 "register_operand" "=f")
 	(mult (match_operand 1 "register_operand" "f")
 	      (match_operand 2 "register_operand" "f")))]
-  "! TARGET_SOFT_FLOAT && TARGET_SNAKE && fmpysuboperands (operands)"
+  "TARGET_SNAKE && ! TARGET_SOFT_FLOAT
+   && reload_completed && fmpysuboperands (operands)"
  "*
 {
  if (GET_MODE (operands[0]) == DFmode)
    return \"fmpysub,dbl %1,%2,%0,%5,%3\";
  else
    return \"fmpysub,sgl %1,%2,%0,%5,%3\";
-}")
+}"
+  [(set_attr "type" "fpalu")
+   (set_attr "length" "4")])

 ;; Clean up turds left by reload.
 (define_peephole