Commit 1d4b4f49 by Uros Bizjak Committed by Uros Bizjak

re PR target/89071 (AVX vcvtsd2ss lets us avoid PXOR dependency breaking for…

re PR target/89071 (AVX vcvtsd2ss lets us avoid PXOR dependency breaking for scalar float<->double and other scalar xmm,xmm instructions)

	PR target/89071
	* config/i386/i386.md (*extendsfdf2): Split out reg->reg
	alternative to avoid partial SSE register stall for TARGET_AVX.
	(truncdfsf2): Ditto.
	(sse4_1_round<mode>2): Ditto.

From-SVN: r268427
parent ec2be203
2019-01-31 Uroš Bizjak <ubizjak@gmail.com>
PR target/89071
* config/i386/i386.md (*extendsfdf2): Split out reg->reg
alternative to avoid partial SSE register stall for TARGET_AVX.
(truncdfsf2): Ditto.
(sse4_1_round<mode>2): Ditto.
2018-01-31 Bill Schmidt <wschmidt@linux.ibm.com> 2018-01-31 Bill Schmidt <wschmidt@linux.ibm.com>
PR tree-optimization/89008 PR tree-optimization/89008
......
...@@ -4370,9 +4370,9 @@ ...@@ -4370,9 +4370,9 @@
}) })
(define_insn "*extendsfdf2" (define_insn "*extendsfdf2"
[(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v") [(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v,v")
(float_extend:DF (float_extend:DF
(match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))] (match_operand:SF 1 "nonimmediate_operand" "fm,f,v,m")))]
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
{ {
switch (which_alternative) switch (which_alternative)
...@@ -4382,15 +4382,17 @@ ...@@ -4382,15 +4382,17 @@
return output_387_reg_move (insn, operands); return output_387_reg_move (insn, operands);
case 2: case 2:
return "%vcvtss2sd\t{%d1, %0|%0, %d1}";
case 3:
return "%vcvtss2sd\t{%1, %d0|%d0, %1}"; return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
default: default:
gcc_unreachable (); gcc_unreachable ();
} }
} }
[(set_attr "type" "fmov,fmov,ssecvt") [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
(set_attr "prefix" "orig,orig,maybe_vex") (set_attr "prefix" "orig,orig,maybe_vex,maybe_vex")
(set_attr "mode" "SF,XF,DF") (set_attr "mode" "SF,XF,DF,DF")
(set (attr "enabled") (set (attr "enabled")
(if_then_else (if_then_else
(match_test ("TARGET_SSE2 && TARGET_SSE_MATH")) (match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
...@@ -4481,7 +4483,7 @@ ...@@ -4481,7 +4483,7 @@
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
&& optimize_function_for_speed_p (cfun) && optimize_function_for_speed_p (cfun)
&& (!REG_P (operands[1]) && (!REG_P (operands[1])
|| REGNO (operands[0]) != REGNO (operands[1])) || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
&& (!EXT_REX_SSE_REG_P (operands[0]) && (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)" || TARGET_AVX512VL)"
[(set (match_dup 0) [(set (match_dup 0)
...@@ -4534,9 +4536,9 @@ ...@@ -4534,9 +4536,9 @@
;; Conversion from DFmode to SFmode. ;; Conversion from DFmode to SFmode.
(define_insn "truncdfsf2" (define_insn "truncdfsf2"
[(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v") [(set (match_operand:SF 0 "nonimm_ssenomem_operand" "=m,f,v,v")
(float_truncate:SF (float_truncate:SF
(match_operand:DF 1 "register_ssemem_operand" "f,f,vm")))] (match_operand:DF 1 "register_ssemem_operand" "f,f,v,m")))]
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
{ {
switch (which_alternative) switch (which_alternative)
...@@ -4546,13 +4548,15 @@ ...@@ -4546,13 +4548,15 @@
return output_387_reg_move (insn, operands); return output_387_reg_move (insn, operands);
case 2: case 2:
return "%vcvtsd2ss\t{%d1, %0|%0, %d1}";
case 3:
return "%vcvtsd2ss\t{%1, %d0|%d0, %1}"; return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
default: default:
gcc_unreachable (); gcc_unreachable ();
} }
} }
[(set_attr "type" "fmov,fmov,ssecvt") [(set_attr "type" "fmov,fmov,ssecvt,ssecvt")
(set_attr "mode" "SF") (set_attr "mode" "SF")
(set (attr "enabled") (set (attr "enabled")
(if_then_else (if_then_else
...@@ -4639,7 +4643,7 @@ ...@@ -4639,7 +4643,7 @@
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
&& optimize_function_for_speed_p (cfun) && optimize_function_for_speed_p (cfun)
&& (!REG_P (operands[1]) && (!REG_P (operands[1])
|| REGNO (operands[0]) != REGNO (operands[1])) || (!TARGET_AVX && REGNO (operands[0]) != REGNO (operands[1])))
&& (!EXT_REX_SSE_REG_P (operands[0]) && (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)" || TARGET_AVX512VL)"
[(set (match_dup 0) [(set (match_dup 0)
...@@ -16171,19 +16175,20 @@ ...@@ -16171,19 +16175,20 @@
(define_insn "sse4_1_round<mode>2" (define_insn "sse4_1_round<mode>2"
[(set (match_operand:MODEF 0 "register_operand" "=x,v") [(set (match_operand:MODEF 0 "register_operand" "=x,x,v")
(unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "xm,vm") (unspec:MODEF [(match_operand:MODEF 1 "nonimmediate_operand" "x,m,vm")
(match_operand:SI 2 "const_0_to_15_operand" "n,n")] (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
UNSPEC_ROUND))] UNSPEC_ROUND))]
"TARGET_SSE4_1" "TARGET_SSE4_1"
"@ "@
%vround<ssemodesuffix>\t{%2, %d1, %0|%0, %d1, %2}
%vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2} %vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}" vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
[(set_attr "type" "ssecvt") [(set_attr "type" "ssecvt")
(set_attr "prefix_extra" "1,*") (set_attr "prefix_extra" "1,1,*")
(set_attr "length_immediate" "*,1") (set_attr "length_immediate" "*,*,1")
(set_attr "prefix" "maybe_vex,evex") (set_attr "prefix" "maybe_vex,maybe_vex,evex")
(set_attr "isa" "noavx512f,avx512f") (set_attr "isa" "noavx512f,noavx512f,avx512f")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")])
(define_insn "rintxf2" (define_insn "rintxf2"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment