Commit 4801cc61 by Uros Bizjak Committed by Uros Bizjak

i386.md (float partial SSE register stall splitter): Move splitter near its instruction pattern.

	* config/i386/i386.md (float partial SSE register stall splitter): Move
	splitter near its instruction pattern.
	(float_extend partial SSE register stall splitter): Ditto.
	(float_truncate partial SSE register stall splitter): Ditto.

From-SVN: r264185
parent 8eb7aec1
2018-09-09 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (float partial SSE register stall splitter): Move
splitter near its instruction pattern.
(float_extend partial SSE register stall splitter): Ditto.
(float_truncate partial SSE register stall splitter): Ditto.
2018-09-09 Hans-Peter Nilsson <hp@bitrange.com> 2018-09-09 Hans-Peter Nilsson <hp@bitrange.com>
PR target/86794 PR target/86794
......
...@@ -4477,6 +4477,40 @@ ...@@ -4477,6 +4477,40 @@
} }
}) })
(define_insn "*extendsfdf2"
[(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v")
(float_extend:DF
(match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))]
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
{
switch (which_alternative)
{
case 0:
case 1:
return output_387_reg_move (insn, operands);
case 2:
return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
default:
gcc_unreachable ();
}
}
[(set_attr "type" "fmov,fmov,ssecvt")
(set_attr "prefix" "orig,orig,maybe_vex")
(set_attr "mode" "SF,XF,DF")
(set (attr "enabled")
(if_then_else
(match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
(if_then_else
(eq_attr "alternative" "0,1")
(symbol_ref "TARGET_MIX_SSE_I387")
(symbol_ref "true"))
(if_then_else
(eq_attr "alternative" "0,1")
(symbol_ref "true")
(symbol_ref "false"))))])
/* For converting SF(xmm2) to DF(xmm1), use the following code instead of /* For converting SF(xmm2) to DF(xmm1), use the following code instead of
cvtss2sd: cvtss2sd:
unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs unpcklps xmm2,xmm2 ; packed conversion might crash on signaling NaNs
...@@ -4544,39 +4578,31 @@ ...@@ -4544,39 +4578,31 @@
(set (match_dup 0) (float_extend:DF (match_dup 2)))] (set (match_dup 0) (float_extend:DF (match_dup 2)))]
"operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);") "operands[2] = lowpart_subreg (SFmode, operands[0], DFmode);")
(define_insn "*extendsfdf2" ;; Break partial reg stall for cvtss2sd. This splitter should split
[(set (match_operand:DF 0 "nonimm_ssenomem_operand" "=f,m,v") ;; late in the pass sequence (after register rename pass),
;; so allocated registers won't change anymore.
(define_split
[(set (match_operand:DF 0 "sse_reg_operand")
(float_extend:DF (float_extend:DF
(match_operand:SF 1 "nonimmediate_operand" "fm,f,vm")))] (match_operand:SF 1 "nonimmediate_operand")))]
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)" "TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
&& optimize_function_for_speed_p (cfun)
&& (!REG_P (operands[1])
|| REGNO (operands[0]) != REGNO (operands[1]))
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
[(set (match_dup 0)
(vec_merge:V2DF
(vec_duplicate:V2DF
(float_extend:DF
(match_dup 1)))
(match_dup 0)
(const_int 1)))]
{ {
switch (which_alternative) operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
{ emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
case 0: })
case 1:
return output_387_reg_move (insn, operands);
case 2:
return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
default:
gcc_unreachable ();
}
}
[(set_attr "type" "fmov,fmov,ssecvt")
(set_attr "prefix" "orig,orig,maybe_vex")
(set_attr "mode" "SF,XF,DF")
(set (attr "enabled")
(if_then_else
(match_test ("TARGET_SSE2 && TARGET_SSE_MATH"))
(if_then_else
(eq_attr "alternative" "0,1")
(symbol_ref "TARGET_MIX_SSE_I387")
(symbol_ref "true"))
(if_then_else
(eq_attr "alternative" "0,1")
(symbol_ref "true")
(symbol_ref "false"))))])
(define_expand "extend<mode>xf2" (define_expand "extend<mode>xf2"
[(set (match_operand:XF 0 "nonimmediate_operand") [(set (match_operand:XF 0 "nonimmediate_operand")
...@@ -4710,6 +4736,32 @@ ...@@ -4710,6 +4736,32 @@
(set (match_dup 0) (float_truncate:SF (match_dup 2)))] (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
"operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);") "operands[2] = lowpart_subreg (DFmode, operands[0], SFmode);")
;; Break partial reg stall for cvtsd2ss. This splitter should split
;; late in the pass sequence (after register rename pass),
;; so allocated registers won't change anymore.
(define_split
[(set (match_operand:SF 0 "sse_reg_operand")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand")))]
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
&& optimize_function_for_speed_p (cfun)
&& (!REG_P (operands[1])
|| REGNO (operands[0]) != REGNO (operands[1]))
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
[(set (match_dup 0)
(vec_merge:V4SF
(vec_duplicate:V4SF
(float_truncate:SF
(match_dup 1)))
(match_dup 0)
(const_int 1)))]
{
operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
})
;; Conversion from XFmode to {SF,DF}mode ;; Conversion from XFmode to {SF,DF}mode
(define_insn "truncxf<mode>2" (define_insn "truncxf<mode>2"
...@@ -5152,83 +5204,6 @@ ...@@ -5152,83 +5204,6 @@
DONE; DONE;
}) })
;; Avoid partial SSE register dependency stalls. This splitter should split
;; late in the pass sequence (after register rename pass), so allocated
;; registers won't change anymore
(define_split
[(set (match_operand:MODEF 0 "sse_reg_operand")
(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
&& optimize_function_for_speed_p (cfun)
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
[(set (match_dup 0)
(vec_merge:<MODEF:ssevecmode>
(vec_duplicate:<MODEF:ssevecmode>
(float:MODEF
(match_dup 1)))
(match_dup 0)
(const_int 1)))]
{
const machine_mode vmode = <MODEF:ssevecmode>mode;
operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
emit_move_insn (operands[0], CONST0_RTX (vmode));
})
;; Break partial reg stall for cvtsd2ss. This splitter should split
;; late in the pass sequence (after register rename pass),
;; so allocated registers won't change anymore.
(define_split
[(set (match_operand:SF 0 "sse_reg_operand")
(float_truncate:SF
(match_operand:DF 1 "nonimmediate_operand")))]
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
&& optimize_function_for_speed_p (cfun)
&& (!REG_P (operands[1])
|| REGNO (operands[0]) != REGNO (operands[1]))
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
[(set (match_dup 0)
(vec_merge:V4SF
(vec_duplicate:V4SF
(float_truncate:SF
(match_dup 1)))
(match_dup 0)
(const_int 1)))]
{
operands[0] = lowpart_subreg (V4SFmode, operands[0], SFmode);
emit_move_insn (operands[0], CONST0_RTX (V4SFmode));
})
;; Break partial reg stall for cvtss2sd. This splitter should split
;; late in the pass sequence (after register rename pass),
;; so allocated registers won't change anymore.
(define_split
[(set (match_operand:DF 0 "sse_reg_operand")
(float_extend:DF
(match_operand:SF 1 "nonimmediate_operand")))]
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
&& optimize_function_for_speed_p (cfun)
&& (!REG_P (operands[1])
|| REGNO (operands[0]) != REGNO (operands[1]))
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
[(set (match_dup 0)
(vec_merge:V2DF
(vec_duplicate:V2DF
(float_extend:DF
(match_dup 1)))
(match_dup 0)
(const_int 1)))]
{
operands[0] = lowpart_subreg (V2DFmode, operands[0], DFmode);
emit_move_insn (operands[0], CONST0_RTX (V2DFmode));
})
;; Avoid store forwarding (partial memory) stall penalty ;; Avoid store forwarding (partial memory) stall penalty
;; by passing DImode value through XMM registers. */ ;; by passing DImode value through XMM registers. */
...@@ -5279,6 +5254,31 @@ ...@@ -5279,6 +5254,31 @@
(set_attr "unit" "i387") (set_attr "unit" "i387")
(set_attr "fp_int_src" "true")]) (set_attr "fp_int_src" "true")])
;; Avoid partial SSE register dependency stalls. This splitter should split
;; late in the pass sequence (after register rename pass), so allocated
;; registers won't change anymore
(define_split
[(set (match_operand:MODEF 0 "sse_reg_operand")
(float:MODEF (match_operand:SWI48 1 "nonimmediate_operand")))]
"TARGET_SSE_PARTIAL_REG_DEPENDENCY && epilogue_completed
&& optimize_function_for_speed_p (cfun)
&& (!EXT_REX_SSE_REG_P (operands[0])
|| TARGET_AVX512VL)"
[(set (match_dup 0)
(vec_merge:<MODEF:ssevecmode>
(vec_duplicate:<MODEF:ssevecmode>
(float:MODEF
(match_dup 1)))
(match_dup 0)
(const_int 1)))]
{
const machine_mode vmode = <MODEF:ssevecmode>mode;
operands[0] = lowpart_subreg (vmode, operands[0], <MODEF:MODE>mode);
emit_move_insn (operands[0], CONST0_RTX (vmode));
})
(define_expand "floatuns<SWI12:mode><MODEF:mode>2" (define_expand "floatuns<SWI12:mode><MODEF:mode>2"
[(set (match_operand:MODEF 0 "register_operand") [(set (match_operand:MODEF 0 "register_operand")
(unsigned_float:MODEF (unsigned_float:MODEF
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment