Commit b0d5396c by Uros Bizjak Committed by Uros Bizjak

* config/i386/sse.md (*fma_fmadd_<mode>, *fma_fmsub_<mode>,

	*fma_fnmadd_<mode>, *fma_fnmsub_<mode>, *fma_fmaddsub_<mode>,
	*fma_fmsubadd_<mode>): Move FMA3 insn patterns before FMA4 patterns.

From-SVN: r190304
parent a2a40ee8
2012-08-10 Uros Bizjak <ubizjak@gmail.com>
* config/i386/sse.md (*fma_fmadd_<mode>, *fma_fmsub_<mode>,
*fma_fnmadd_<mode>, *fma_fnmsub_<mode>, *fma_fmaddsub_<mode>,
*fma_fmsubadd_<mode>): Move FMA3 insn patterns before FMA4 patterns.
2012-08-10 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (simple LEA peephole2s): Add zero-extend
variants of PLUS and MULT simple LEA patterns. Disable PLUS
patterns for TARGET_OPT_AGU.
......
......@@ -403,8 +403,6 @@
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
;; Mapping of immediate bits for blend instructions
(define_mode_attr blendbits
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
......@@ -1886,12 +1884,13 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; FMA4 floating point multiply/accumulate instructions. This
;; includes the scalar version of the instructions as well as the
;; vector.
;; FMA floating point multiply/accumulate instructions. These include
;; scalar versions of the instructions as well as vector versions.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
;; combine to generate a multiply/add with two memory references. We then
;; split this insn, into loading up the destination register with one of the
......@@ -1907,8 +1906,6 @@
;; We could now properly represent that only one memory operand is
;; allowed and not be penalized during optimization.
;; Intrinsic FMA operations.
;; The standard names for fma is only available with SSE math enabled.
(define_expand "fma<mode>4"
[(set (match_operand:FMAMODE 0 "register_operand")
......@@ -1942,7 +1939,7 @@
(neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
"(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
;; The builtin for intrinsics is not constrained by SSE math enabled.
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
......@@ -1951,7 +1948,71 @@
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
"TARGET_FMA || TARGET_FMA4")
(define_insn "*fma4i_fmadd_<mode>"
;; FMA3 version
(define_insn "*fma_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
(match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
(match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
"TARGET_FMA"
"@
vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma_fmsub_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
(match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
(neg:FMAMODE
(match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
"TARGET_FMA"
"@
vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma_fnmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
(fma:FMAMODE
(neg:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
(match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
(match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
"TARGET_FMA"
"@
vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma_fnmsub_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
(fma:FMAMODE
(neg:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
(match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
(neg:FMAMODE
(match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
"TARGET_FMA"
"@
vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
;; FMA4 version
(define_insn "*fma4_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
......@@ -1962,7 +2023,7 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma4i_fmsub_<mode>"
(define_insn "*fma4_fmsub_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
......@@ -1974,7 +2035,7 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma4i_fnmadd_<mode>"
(define_insn "*fma4_fnmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
(fma:FMAMODE
(neg:FMAMODE
......@@ -1986,7 +2047,7 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma4i_fnmsub_<mode>"
(define_insn "*fma4_fnmsub_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
(fma:FMAMODE
(neg:FMAMODE
......@@ -1999,22 +2060,88 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
;; Scalar versions of the above. Unlike ADDSS et al, these write the
;; entire destination register, with the high-order elements zeroed.
;; FMA parallel floating point multiply addsub and subadd operations.
(define_expand "fma4i_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand")
(vec_merge:VF_128
(fma:VF_128
(match_operand:VF_128 1 "nonimmediate_operand")
(match_operand:VF_128 2 "nonimmediate_operand")
(match_operand:VF_128 3 "nonimmediate_operand"))
(match_dup 4)
(const_int 1)))]
;; It would be possible to represent these without the UNSPEC as
;;
;; (vec_merge
;; (fma op1 op2 op3)
;; (fma op1 op2 (neg op3))
;; (merge-const))
;;
;; But this doesn't seem useful in practice.
(define_expand "fmaddsub_<mode>"
[(set (match_operand:VF 0 "register_operand")
(unspec:VF
[(match_operand:VF 1 "nonimmediate_operand")
(match_operand:VF 2 "nonimmediate_operand")
(match_operand:VF 3 "nonimmediate_operand")]
UNSPEC_FMADDSUB))]
"TARGET_FMA || TARGET_FMA4")
;; FMA3 version
(define_insn "*fma_fmaddsub_<mode>"
[(set (match_operand:VF 0 "register_operand" "=x,x,x")
(unspec:VF
[(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
(match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
(match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
UNSPEC_FMADDSUB))]
"TARGET_FMA"
"@
vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma_fmsubadd_<mode>"
[(set (match_operand:VF 0 "register_operand" "=x,x,x")
(unspec:VF
[(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
(match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
(neg:VF
(match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
UNSPEC_FMADDSUB))]
"TARGET_FMA"
"@
vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
;; FMA4 version
(define_insn "*fma4_fmaddsub_<mode>"
[(set (match_operand:VF 0 "register_operand" "=x,x")
(unspec:VF
[(match_operand:VF 1 "nonimmediate_operand" "%x,x")
(match_operand:VF 2 "nonimmediate_operand" " x,m")
(match_operand:VF 3 "nonimmediate_operand" "xm,x")]
UNSPEC_FMADDSUB))]
"TARGET_FMA4"
{
operands[4] = CONST0_RTX (<MODE>mode);
})
"vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma4_fmsubadd_<mode>"
[(set (match_operand:VF 0 "register_operand" "=x,x")
(unspec:VF
[(match_operand:VF 1 "nonimmediate_operand" "%x,x")
(match_operand:VF 2 "nonimmediate_operand" " x,m")
(neg:VF
(match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
UNSPEC_FMADDSUB))]
"TARGET_FMA4"
"vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
;; FMA3 floating point scalar intrinsics. These merge result with
;; high-order elements from the destination register.
(define_expand "fmai_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand")
......@@ -2099,6 +2226,21 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
;; FMA4 floating point scalar intrinsics. These write the
;; entire destination register, with the high-order elements zeroed.
(define_expand "fma4i_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand")
(vec_merge:VF_128
(fma:VF_128
(match_operand:VF_128 1 "nonimmediate_operand")
(match_operand:VF_128 2 "nonimmediate_operand")
(match_operand:VF_128 3 "nonimmediate_operand"))
(match_dup 4)
(const_int 1)))]
"TARGET_FMA4"
"operands[4] = CONST0_RTX (<MODE>mode);")
(define_insn "*fma4i_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
......@@ -2161,152 +2303,6 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; FMA4 Parallel floating point multiply addsub and subadd operations.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; It would be possible to represent these without the UNSPEC as
;;
;; (vec_merge
;; (fma op1 op2 op3)
;; (fma op1 op2 (neg op3))
;; (merge-const))
;;
;; But this doesn't seem useful in practice.
(define_expand "fmaddsub_<mode>"
[(set (match_operand:VF 0 "register_operand")
(unspec:VF
[(match_operand:VF 1 "nonimmediate_operand")
(match_operand:VF 2 "nonimmediate_operand")
(match_operand:VF 3 "nonimmediate_operand")]
UNSPEC_FMADDSUB))]
"TARGET_FMA || TARGET_FMA4")
(define_insn "*fma4_fmaddsub_<mode>"
[(set (match_operand:VF 0 "register_operand" "=x,x")
(unspec:VF
[(match_operand:VF 1 "nonimmediate_operand" "%x,x")
(match_operand:VF 2 "nonimmediate_operand" " x,m")
(match_operand:VF 3 "nonimmediate_operand" "xm,x")]
UNSPEC_FMADDSUB))]
"TARGET_FMA4"
"vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma4_fmsubadd_<mode>"
[(set (match_operand:VF 0 "register_operand" "=x,x")
(unspec:VF
[(match_operand:VF 1 "nonimmediate_operand" "%x,x")
(match_operand:VF 2 "nonimmediate_operand" " x,m")
(neg:VF
(match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
UNSPEC_FMADDSUB))]
"TARGET_FMA4"
"vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; FMA3 floating point multiply/accumulate instructions.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "*fma_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
(match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
(match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
"TARGET_FMA"
"@
vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma_fmsub_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
(match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
(neg:FMAMODE
(match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
"TARGET_FMA"
"@
vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma_fnmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
(fma:FMAMODE
(neg:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
(match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
(match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
"TARGET_FMA"
"@
vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma_fnmsub_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
(fma:FMAMODE
(neg:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
(match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
(neg:FMAMODE
(match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
"TARGET_FMA"
"@
vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma_fmaddsub_<mode>"
[(set (match_operand:VF 0 "register_operand" "=x,x,x")
(unspec:VF
[(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
(match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
(match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
UNSPEC_FMADDSUB))]
"TARGET_FMA"
"@
vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma_fmsubadd_<mode>"
[(set (match_operand:VF 0 "register_operand" "=x,x,x")
(unspec:VF
[(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
(match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
(neg:VF
(match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
UNSPEC_FMADDSUB))]
"TARGET_FMA"
"@
vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel single-precision floating point conversion operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment