* config/i386/sse.md (*fma_fmadd_<mode>, *fma_fmsub_<mode>,

*fma_fnmadd_<mode>, *fma_fnmsub_<mode>, *fma_fmaddsub_<mode>, *fma_fmsubadd_<mode>): Move FMA3 insn patterns before FMA4 patterns. From-SVN: r190304

* config/i386/sse.md (fma_fmadd_<mode>, fma_fmsub_<mode>,
*fma_fnmadd_<mode>, *fma_fnmsub_<mode>, *fma_fmaddsub_<mode>, *fma_fmsubadd_<mode>): Move FMA3 insn patterns before FMA4 patterns. From-SVN: r190304
b0d5396c · Uros Bizjak · Uros Bizjak · a2a40ee8 · b0d5396c · b0d5396c
Commit b0d5396c authored Aug 10, 2012 by Uros Bizjak Committed by Uros Bizjak Aug 10, 2012
Hide whitespace changes
Inline Side-by-side

Showing with 174 additions and 172 deletions

gcc/ChangeLog
+6 -0

gcc/config/i386/sse.md
+168 -172

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
 2012-08-10  Uros Bizjak  <ubizjak@gmail.com>
+	* config/i386/sse.md (*fma_fmadd_<mode>, *fma_fmsub_<mode>,
+	*fma_fnmadd_<mode>, *fma_fnmsub_<mode>, *fma_fmaddsub_<mode>,
+	*fma_fmsubadd_<mode>): Move FMA3 insn patterns before FMA4 patterns.
+2012-08-10  Uros Bizjak  <ubizjak@gmail.com>
 	* config/i386/i386.md (simple LEA peephole2s): Add zero-extend
 	variants of PLUS and MULT simple LEA patterns.  Disable PLUS
 	patterns for TARGET_OPT_AGU.

--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -403,8 +403,6 @@
 ;; Mix-n-match
 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
-(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
 ;; Mapping of immediate bits for blend instructions
 (define_mode_attr blendbits
  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
@@ -1886,12 +1884,13 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
-;; FMA4 floating point multiply/accumulate instructions.  This
+;; FMA floating point multiply/accumulate instructions.  These include
-;; includes the scalar version of the instructions as well as the
+;; scalar versions of the instructions as well as vector versions.
-;; vector.
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
 ;; combine to generate a multiply/add with two memory references.  We then
 ;; split this insn, into loading up the destination register with one of the
@@ -1907,8 +1906,6 @@
 ;; We could now properly represent that only one memory operand is
 ;; allowed and not be penalized during optimization.
-;; Intrinsic FMA operations.
 ;; The standard names for fma is only available with SSE math enabled.
 (define_expand "fma<mode>4"
  [(set (match_operand:FMAMODE 0 "register_operand")
@@ -1942,7 +1939,7 @@
 	  (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
  "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
-;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
+;; The builtin for intrinsics is not constrained by SSE math enabled.
 (define_expand "fma4i_fmadd_<mode>"
  [(set (match_operand:FMAMODE 0 "register_operand")
 	(fma:FMAMODE
@@ -1951,7 +1948,71 @@
 	  (match_operand:FMAMODE 3 "nonimmediate_operand")))]
  "TARGET_FMA || TARGET_FMA4")
-(define_insn "*fma4i_fmadd_<mode>"
+;; FMA3 version
+(define_insn "*fma_fmadd_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+	(fma:FMAMODE
+	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
+	  (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+	  (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
+  "TARGET_FMA"
+  "@
+   vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+(define_insn "*fma_fmsub_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+	(fma:FMAMODE
+	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0, 0,x")
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm")
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
+  "TARGET_FMA"
+  "@
+   vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+(define_insn "*fma_fnmadd_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+	(fma:FMAMODE
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm")
+	  (match_operand:FMAMODE   3 "nonimmediate_operand" " x,xm,0")))]
+  "TARGET_FMA"
+  "@
+   vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+(define_insn "*fma_fnmsub_<mode>"
+  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+	(fma:FMAMODE
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
+	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm")
+	  (neg:FMAMODE
+	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
+  "TARGET_FMA"
+  "@
+   vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+;; FMA4 version
+(define_insn "*fma4_fmadd_<mode>"
  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
 	(fma:FMAMODE
 	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
@@ -1962,7 +2023,7 @@
  [(set_attr "type" "ssemuladd")
   (set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fmsub_<mode>"
+(define_insn "*fma4_fmsub_<mode>"
  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
 	(fma:FMAMODE
 	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
@@ -1974,7 +2035,7 @@
  [(set_attr "type" "ssemuladd")
   (set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fnmadd_<mode>"
+(define_insn "*fma4_fnmadd_<mode>"
  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
 	(fma:FMAMODE
 	  (neg:FMAMODE
@@ -1986,7 +2047,7 @@
  [(set_attr "type" "ssemuladd")
   (set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fnmsub_<mode>"
+(define_insn "*fma4_fnmsub_<mode>"
  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
 	(fma:FMAMODE
 	  (neg:FMAMODE
@@ -1999,22 +2060,88 @@
  [(set_attr "type" "ssemuladd")
   (set_attr "mode" "<MODE>")])
-;; Scalar versions of the above.  Unlike ADDSS et al, these write the
+;; FMA parallel floating point multiply addsub and subadd operations.
-;; entire destination register, with the high-order elements zeroed.
-(define_expand "fma4i_vmfmadd_<mode>"
+;; It would be possible to represent these without the UNSPEC as
-  [(set (match_operand:VF_128 0 "register_operand")
+;;
-	(vec_merge:VF_128
+;; (vec_merge
-	  (fma:VF_128
+;;   (fma op1 op2 op3)
-	    (match_operand:VF_128 1 "nonimmediate_operand")
+;;   (fma op1 op2 (neg op3))
-	    (match_operand:VF_128 2 "nonimmediate_operand")
+;;   (merge-const))
-	    (match_operand:VF_128 3 "nonimmediate_operand"))
+;;
-	  (match_dup 4)
+;; But this doesn't seem useful in practice.
-	  (const_int 1)))]
+(define_expand "fmaddsub_<mode>"
+  [(set (match_operand:VF 0 "register_operand")
+	(unspec:VF
+	  [(match_operand:VF 1 "nonimmediate_operand")
+	   (match_operand:VF 2 "nonimmediate_operand")
+	   (match_operand:VF 3 "nonimmediate_operand")]
+	  UNSPEC_FMADDSUB))]
+  "TARGET_FMA || TARGET_FMA4")
+;; FMA3 version
+(define_insn "*fma_fmaddsub_<mode>"
+  [(set (match_operand:VF 0 "register_operand" "=x,x,x")
+	(unspec:VF
+	  [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
+	   (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
+	   (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
+	  UNSPEC_FMADDSUB))]
+  "TARGET_FMA"
+  "@
+   vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+(define_insn "*fma_fmsubadd_<mode>"
+  [(set (match_operand:VF 0 "register_operand" "=x,x,x")
+	(unspec:VF
+	  [(match_operand:VF   1 "nonimmediate_operand" "%0, 0,x")
+	   (match_operand:VF   2 "nonimmediate_operand" "xm, x,xm")
+	   (neg:VF
+	     (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
+	  UNSPEC_FMADDSUB))]
+  "TARGET_FMA"
+  "@
+   vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+   vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+   vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+;; FMA4 version
+(define_insn "*fma4_fmaddsub_<mode>"
+  [(set (match_operand:VF 0 "register_operand" "=x,x")
+	(unspec:VF
+	  [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
+	   (match_operand:VF 2 "nonimmediate_operand" " x,m")
+	   (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
+	  UNSPEC_FMADDSUB))]
  "TARGET_FMA4"
-{
+  "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  operands[4] = CONST0_RTX (<MODE>mode);
+  [(set_attr "type" "ssemuladd")
-})
+   (set_attr "mode" "<MODE>")])
+(define_insn "*fma4_fmsubadd_<mode>"
+  [(set (match_operand:VF 0 "register_operand" "=x,x")
+	(unspec:VF
+	  [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
+	   (match_operand:VF 2 "nonimmediate_operand" " x,m")
+	   (neg:VF
+	     (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
+	  UNSPEC_FMADDSUB))]
+  "TARGET_FMA4"
+  "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssemuladd")
+   (set_attr "mode" "<MODE>")])
+;; FMA3 floating point scalar intrinsics. These merge result with
+;; high-order elements from the destination register.
 (define_expand "fmai_vmfmadd_<mode>"
  [(set (match_operand:VF_128 0 "register_operand")
@@ -2099,6 +2226,21 @@
  [(set_attr "type" "ssemuladd")
   (set_attr "mode" "<MODE>")])
+;; FMA4 floating point scalar intrinsics.  These write the
+;; entire destination register, with the high-order elements zeroed.
+(define_expand "fma4i_vmfmadd_<mode>"
+  [(set (match_operand:VF_128 0 "register_operand")
+	(vec_merge:VF_128
+	  (fma:VF_128
+	    (match_operand:VF_128 1 "nonimmediate_operand")
+	    (match_operand:VF_128 2 "nonimmediate_operand")
+	    (match_operand:VF_128 3 "nonimmediate_operand"))
+	  (match_dup 4)
+	  (const_int 1)))]
+  "TARGET_FMA4"
+  "operands[4] = CONST0_RTX (<MODE>mode);")
 (define_insn "*fma4i_vmfmadd_<mode>"
  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
 	(vec_merge:VF_128
@@ -2161,152 +2303,6 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
-;; FMA4 Parallel floating point multiply addsub and subadd operations.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; It would be possible to represent these without the UNSPEC as
-;;
-;; (vec_merge
-;;   (fma op1 op2 op3)
-;;   (fma op1 op2 (neg op3))
-;;   (merge-const))
-;;
-;; But this doesn't seem useful in practice.
-(define_expand "fmaddsub_<mode>"
-  [(set (match_operand:VF 0 "register_operand")
-	(unspec:VF
-	  [(match_operand:VF 1 "nonimmediate_operand")
-	   (match_operand:VF 2 "nonimmediate_operand")
-	   (match_operand:VF 3 "nonimmediate_operand")]
-	  UNSPEC_FMADDSUB))]
-  "TARGET_FMA || TARGET_FMA4")
-(define_insn "*fma4_fmaddsub_<mode>"
-  [(set (match_operand:VF 0 "register_operand" "=x,x")
-	(unspec:VF
-	  [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
-	   (match_operand:VF 2 "nonimmediate_operand" " x,m")
-	   (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
-	  UNSPEC_FMADDSUB))]
-  "TARGET_FMA4"
-  "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-(define_insn "*fma4_fmsubadd_<mode>"
-  [(set (match_operand:VF 0 "register_operand" "=x,x")
-	(unspec:VF
-	  [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
-	   (match_operand:VF 2 "nonimmediate_operand" " x,m")
-	   (neg:VF
-	     (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
-	  UNSPEC_FMADDSUB))]
-  "TARGET_FMA4"
-  "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; FMA3 floating point multiply/accumulate instructions.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-(define_insn "*fma_fmadd_<mode>"
-  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
-	(fma:FMAMODE
-	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
-	  (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
-	  (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
-  "TARGET_FMA"
-  "@
-   vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
-   vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-(define_insn "*fma_fmsub_<mode>"
-  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
-	(fma:FMAMODE
-	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0, 0,x")
-	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm")
-	  (neg:FMAMODE
-	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
-  "TARGET_FMA"
-  "@
-   vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
-   vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-(define_insn "*fma_fnmadd_<mode>"
-  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
-	(fma:FMAMODE
-	  (neg:FMAMODE
-	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
-	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm")
-	  (match_operand:FMAMODE   3 "nonimmediate_operand" " x,xm,0")))]
-  "TARGET_FMA"
-  "@
-   vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
-   vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-(define_insn "*fma_fnmsub_<mode>"
-  [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
-	(fma:FMAMODE
-	  (neg:FMAMODE
-	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
-	  (match_operand:FMAMODE   2 "nonimmediate_operand" "xm, x,xm")
-	  (neg:FMAMODE
-	    (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
-  "TARGET_FMA"
-  "@
-   vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
-   vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-(define_insn "*fma_fmaddsub_<mode>"
-  [(set (match_operand:VF 0 "register_operand" "=x,x,x")
-	(unspec:VF
-	  [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
-	   (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
-	   (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
-	  UNSPEC_FMADDSUB))]
-  "TARGET_FMA"
-  "@
-   vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
-   vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-(define_insn "*fma_fmsubadd_<mode>"
-  [(set (match_operand:VF 0 "register_operand" "=x,x,x")
-	(unspec:VF
-	  [(match_operand:VF   1 "nonimmediate_operand" "%0, 0,x")
-	   (match_operand:VF   2 "nonimmediate_operand" "xm, x,xm")
-	   (neg:VF
-	     (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
-	  UNSPEC_FMADDSUB))]
-  "TARGET_FMA"
-  "@
-   vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
-   vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
-   vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
 ;; Parallel single-precision floating point conversion operations
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;