Commit d31fd1e8 by Uros Bizjak Committed by Uros Bizjak

re PR target/89071 (AVX vcvtsd2ss lets us avoid PXOR dependency breaking for…

re PR target/89071 (AVX vcvtsd2ss lets us avoid PXOR dependency breaking for scalar float<->double and other scalar xmm,xmm instructions)

	PR target/89071
	* config/i386/i386.md (*sqrt<mode>2_sse): Add (v,0) alternative.
	Do not prefer (v,v) alternative for non-AVX targets and (m,v)
	alternative for speed when TARGET_SSE_PARTIAL_REG_DEPENDENCY is set.
	(*rcpsf2_sse): Ditto.
	(*rsqrtsf2_sse): Ditto.
	(sse4_1_round<mode<2): Ditto.

From-SVN: r268496
parent c8eaf058
2019-02-03 Uroš Bizjak <ubizjak@gmail.com>
PR target/89071
* config/i386/i386.md (*sqrt<mode>2_sse): Add (v,0) alternative.
Do not prefer (v,v) alternative for non-AVX targets and (m,v)
alternative for speed when TARGET_SSE_PARTIAL_REG_DEPENDENCY is set.
(*rcpsf2_sse): Ditto.
(*rsqrtsf2_sse): Ditto.
(sse4_1_round<mode<2): Ditto.
2019-02-03 Richard Biener <rguenther@suse.de> 2019-02-03 Richard Biener <rguenther@suse.de>
PR debug/87295 PR debug/87295
......
...@@ -4472,9 +4472,9 @@ ...@@ -4472,9 +4472,9 @@
(set (match_dup 0) (float_extend:DF (match_dup 2)))] (set (match_dup 0) (float_extend:DF (match_dup 2)))]
"operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);") "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
;; Break partial reg stall for cvtss2sd. This splitter should split ;; Break partial SSE register dependency stall. This splitter should split
;; late in the pass sequence (after register rename pass), ;; late in the pass sequence (after register rename pass), so allocated
;; so allocated registers won't change anymore. ;; registers won't change anymore
(define_split (define_split
[(set (match_operand:DF 0 "sse_reg_operand") [(set (match_operand:DF 0 "sse_reg_operand")
...@@ -4632,9 +4632,9 @@ ...@@ -4632,9 +4632,9 @@
(set (match_dup 0) (float_truncate:SF (match_dup 2)))] (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
"operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);") "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
;; Break partial reg stall for cvtsd2ss. This splitter should split ;; Break partial SSE register dependency stall. This splitter should split
;; late in the pass sequence (after register rename pass), ;; late in the pass sequence (after register rename pass), so allocated
;; so allocated registers won't change anymore. ;; registers won't change anymore
(define_split (define_split
[(set (match_operand:SF 0 "sse_reg_operand") [(set (match_operand:SF 0 "sse_reg_operand")
...@@ -5137,7 +5137,7 @@ ...@@ -5137,7 +5137,7 @@
(set_attr "unit" "i387") (set_attr "unit" "i387")
(set_attr "fp_int_src" "true")]) (set_attr "fp_int_src" "true")])
;; Avoid partial SSE register dependency stalls. This splitter should split ;; Break partial SSE register dependency stall. This splitter should split
;; late in the pass sequence (after register rename pass), so allocated ;; late in the pass sequence (after register rename pass), so allocated
;; registers won't change anymore ;; registers won't change anymore
...@@ -14765,18 +14765,26 @@ ...@@ -14765,18 +14765,26 @@
(symbol_ref "false"))))]) (symbol_ref "false"))))])
(define_insn "*rcpsf2_sse" (define_insn "*rcpsf2_sse"
[(set (match_operand:SF 0 "register_operand" "=x,x") [(set (match_operand:SF 0 "register_operand" "=x,x,x")
(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "x,m")] (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")]
UNSPEC_RCP))] UNSPEC_RCP))]
"TARGET_SSE && TARGET_SSE_MATH" "TARGET_SSE && TARGET_SSE_MATH"
"@ "@
%vrcpss\t{%d1, %0|%0, %d1} %vrcpss\t{%d1, %0|%0, %d1}
%vrcpss\t{%d1, %0|%0, %d1}
%vrcpss\t{%1, %d0|%d0, %1}" %vrcpss\t{%1, %d0|%d0, %1}"
[(set_attr "type" "sse") [(set_attr "type" "sse")
(set_attr "atom_sse_attr" "rcp") (set_attr "atom_sse_attr" "rcp")
(set_attr "btver2_sse_attr" "rcp") (set_attr "btver2_sse_attr" "rcp")
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SF")]) (set_attr "mode" "SF")
(set (attr "preferred_for_speed")
(cond [(eq_attr "alternative" "1")
(symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(eq_attr "alternative" "2")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(define_insn "*fop_xf_1_i387" (define_insn "*fop_xf_1_i387"
[(set (match_operand:XF 0 "register_operand" "=f,f") [(set (match_operand:XF 0 "register_operand" "=f,f")
...@@ -15003,18 +15011,26 @@ ...@@ -15003,18 +15011,26 @@
(set_attr "bdver1_decode" "direct")]) (set_attr "bdver1_decode" "direct")])
(define_insn "*rsqrtsf2_sse" (define_insn "*rsqrtsf2_sse"
[(set (match_operand:SF 0 "register_operand" "=x,x") [(set (match_operand:SF 0 "register_operand" "=x,x,x")
(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "x,m")] (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m")]
UNSPEC_RSQRT))] UNSPEC_RSQRT))]
"TARGET_SSE && TARGET_SSE_MATH" "TARGET_SSE && TARGET_SSE_MATH"
"@ "@
%vrsqrtss\t{%d1, %0|%0, %d1} %vrsqrtss\t{%d1, %0|%0, %d1}
%vrsqrtss\t{%d1, %0|%0, %d1}
%vrsqrtss\t{%1, %d0|%d0, %1}" %vrsqrtss\t{%1, %d0|%d0, %1}"
[(set_attr "type" "sse") [(set_attr "type" "sse")
(set_attr "atom_sse_attr" "rcp") (set_attr "atom_sse_attr" "rcp")
(set_attr "btver2_sse_attr" "rcp") (set_attr "btver2_sse_attr" "rcp")
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SF")]) (set_attr "mode" "SF")
(set (attr "preferred_for_speed")
(cond [(eq_attr "alternative" "1")
(symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(eq_attr "alternative" "2")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(define_expand "rsqrtsf2" (define_expand "rsqrtsf2"
[(set (match_operand:SF 0 "register_operand") [(set (match_operand:SF 0 "register_operand")
...@@ -15027,21 +15043,26 @@ ...@@ -15027,21 +15043,26 @@
}) })
(define_insn "*sqrt<mode>2_sse" (define_insn "*sqrt<mode>2_sse"
[(set (match_operand:MODEF 0 "register_operand" "=v,v") [(set (match_operand:MODEF 0 "register_operand" "=v,v,v")
(sqrt:MODEF (sqrt:MODEF
(match_operand:MODEF 1 "nonimmediate_operand" "v,m")))] (match_operand:MODEF 1 "nonimmediate_operand" "0,v,m")))]
"SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH" "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
"@ "@
%vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1} %vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
%vsqrt<ssemodesuffix>\t{%d1, %0|%0, %d1}
%vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}" %vsqrt<ssemodesuffix>\t{%1, %d0|%d0, %1}"
[(set_attr "type" "sse") [(set_attr "type" "sse")
(set_attr "atom_sse_attr" "sqrt") (set_attr "atom_sse_attr" "sqrt")
(set_attr "btver2_sse_attr" "sqrt") (set_attr "btver2_sse_attr" "sqrt")
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>") (set_attr "mode" "<MODE>")
(set_attr "athlon_decode" "*") (set (attr "preferred_for_speed")
(set_attr "amdfam10_decode" "*") (cond [(eq_attr "alternative" "1")
(set_attr "bdver1_decode" "*")]) (symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(eq_attr "alternative" "2")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(define_expand "sqrt<mode>2" (define_expand "sqrt<mode>2"
[(set (match_operand:MODEF 0 "register_operand") [(set (match_operand:MODEF 0 "register_operand")
...@@ -16175,21 +16196,30 @@ ...@@ -16175,21 +16196,30 @@
(define_insn "sse4_1_round<mode>2" (define_insn "sse4_1_round<mode>2"
[(set (match_operand:MODEF 0 "register_operand" "=x,x,v") [(set (match_operand:MODEF 0 "register_operand" "=x,x,x,v")
(unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "x,m,vm") (unspec:MODEF
(match_operand:SI 2 "const_0_to_15_operand" "n,n,n")] [(match_operand:MODEF 1 "nonimmediate_operand" "0,x,m,vm")
UNSPEC_ROUND))] (match_operand:SI 2 "const_0_to_15_operand" "n,n,n,n")]
UNSPEC_ROUND))]
"TARGET_SSE4_1" "TARGET_SSE4_1"
"@ "@
%vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2} %vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
%vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
%vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2} %vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}" vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
[(set_attr "type" "ssecvt") [(set_attr "type" "ssecvt")
(set_attr "prefix_extra" "1,1,*") (set_attr "prefix_extra" "1,1,1,*")
(set_attr "length_immediate" "*,*,1") (set_attr "length_immediate" "*,*,*,1")
(set_attr "prefix" "maybe_vex,maybe_vex,evex") (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex")
(set_attr "isa" "noavx512f,noavx512f,avx512f") (set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")
(set (attr "preferred_for_speed")
(cond [(eq_attr "alternative" "1")
(symbol_ref "TARGET_AVX || !TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(eq_attr "alternative" "2")
(symbol_ref "!TARGET_SSE_PARTIAL_REG_DEPENDENCY")
]
(symbol_ref "true")))])
(define_insn "rintxf2" (define_insn "rintxf2"
[(set (match_operand:XF 0 "register_operand" "=f") [(set (match_operand:XF 0 "register_operand" "=f")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment