Commit 576e2f37 by Uros Bizjak

re PR target/57954 (AVX missing vxorps (zeroing) before vcvtsi2s %edx, slow down AVX code)

2013-07-29  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.md (float post-reload splitters): Do not check
	for subregs of SSE registers.

2013-07-29  Uros Bizjak  <ubizjak@gmail.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

	PR target/57954
	PR target/57988
	* config/i386/i386.md (post-reload splitter
	to avoid partial SSE reg dependency stalls): New pattern.

From-SVN: r201308
parent bb80c2eb
2013-07-29 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (float post-reload splitters): Do not check
for subregs of SSE registers.
2013-07-29 Uros Bizjak <ubizjak@gmail.com>
H.J. Lu <hongjiu.lu@intel.com>
PR target/57954
PR target/57988
* config/i386/i386.md (post-reload splitter
to avoid partial SSE reg dependency stalls): New pattern.
2013-07-29 Dominik Vogt <vogt@linux.vnet.ibm.com> 2013-07-29 Dominik Vogt <vogt@linux.vnet.ibm.com>
* config/s390/s390.md ("movcc"): Swap load and store instructions. * config/s390/s390.md ("movcc"): Swap load and store instructions.
...@@ -28,10 +41,10 @@ ...@@ -28,10 +41,10 @@
* config/aarch64/iterators.md: Add attributes rtn and vas. * config/aarch64/iterators.md: Add attributes rtn and vas.
2013-07-26 Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2013-07-26 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Richard Earnshaw <richard.earnshaw@arm.com> Richard Earnshaw <richard.earnshaw@arm.com>
* combine.c (simplify_comparison): Re-canonicalize operands * combine.c (simplify_comparison): Re-canonicalize operands
where appropriate. where appropriate.
* config/arm/arm.md (movcond_addsi): New splitter. * config/arm/arm.md (movcond_addsi): New splitter.
2013-07-25 Sterling Augustine <saugustine@google.com> 2013-07-25 Sterling Augustine <saugustine@google.com>
......
...@@ -4596,10 +4596,7 @@ ...@@ -4596,10 +4596,7 @@
(clobber (match_operand:SWI48 2 "memory_operand"))] (clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
&& TARGET_INTER_UNIT_CONVERSIONS && TARGET_INTER_UNIT_CONVERSIONS
&& reload_completed && reload_completed && SSE_REG_P (operands[0])"
&& (SSE_REG_P (operands[0])
|| (GET_CODE (operands[0]) == SUBREG
&& SSE_REG_P (SUBREG_REG (operands[0]))))"
[(set (match_dup 0) (float:MODEF (match_dup 1)))]) [(set (match_dup 0) (float:MODEF (match_dup 1)))])
(define_split (define_split
...@@ -4608,10 +4605,7 @@ ...@@ -4608,10 +4605,7 @@
(clobber (match_operand:SWI48 2 "memory_operand"))] (clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387 "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
&& !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
&& reload_completed && reload_completed && SSE_REG_P (operands[0])"
&& (SSE_REG_P (operands[0])
|| (GET_CODE (operands[0]) == SUBREG
&& SSE_REG_P (SUBREG_REG (operands[0]))))"
[(set (match_dup 2) (match_dup 1)) [(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (float:MODEF (match_dup 2)))]) (set (match_dup 0) (float:MODEF (match_dup 2)))])
...@@ -4697,10 +4691,7 @@ ...@@ -4697,10 +4691,7 @@
(clobber (match_operand:SI 2 "memory_operand"))] (clobber (match_operand:SI 2 "memory_operand"))]
"TARGET_SSE2 && TARGET_SSE_MATH "TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
&& reload_completed && reload_completed && SSE_REG_P (operands[0])"
&& (SSE_REG_P (operands[0])
|| (GET_CODE (operands[0]) == SUBREG
&& SSE_REG_P (SUBREG_REG (operands[0]))))"
[(const_int 0)] [(const_int 0)]
{ {
rtx op1 = operands[1]; rtx op1 = operands[1];
...@@ -4740,10 +4731,7 @@ ...@@ -4740,10 +4731,7 @@
(clobber (match_operand:SI 2 "memory_operand"))] (clobber (match_operand:SI 2 "memory_operand"))]
"TARGET_SSE2 && TARGET_SSE_MATH "TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
&& reload_completed && reload_completed && SSE_REG_P (operands[0])"
&& (SSE_REG_P (operands[0])
|| (GET_CODE (operands[0]) == SUBREG
&& SSE_REG_P (SUBREG_REG (operands[0]))))"
[(const_int 0)] [(const_int 0)]
{ {
operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0], operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
...@@ -4764,10 +4752,7 @@ ...@@ -4764,10 +4752,7 @@
(float:MODEF (match_operand:SI 1 "register_operand")))] (float:MODEF (match_operand:SI 1 "register_operand")))]
"TARGET_SSE2 && TARGET_SSE_MATH "TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
&& reload_completed && reload_completed && SSE_REG_P (operands[0])"
&& (SSE_REG_P (operands[0])
|| (GET_CODE (operands[0]) == SUBREG
&& SSE_REG_P (SUBREG_REG (operands[0]))))"
[(const_int 0)] [(const_int 0)]
{ {
rtx op1 = operands[1]; rtx op1 = operands[1];
...@@ -4810,10 +4795,7 @@ ...@@ -4810,10 +4795,7 @@
(float:MODEF (match_operand:SI 1 "memory_operand")))] (float:MODEF (match_operand:SI 1 "memory_operand")))]
"TARGET_SSE2 && TARGET_SSE_MATH "TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun) && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
&& reload_completed && reload_completed && SSE_REG_P (operands[0])"
&& (SSE_REG_P (operands[0])
|| (GET_CODE (operands[0]) == SUBREG
&& SSE_REG_P (SUBREG_REG (operands[0]))))"
[(const_int 0)] [(const_int 0)]
{ {
operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0], operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
...@@ -4872,10 +4854,7 @@ ...@@ -4872,10 +4854,7 @@
(clobber (match_operand:SWI48 2 "memory_operand"))] (clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
&& (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
&& reload_completed && reload_completed && SSE_REG_P (operands[0])"
&& (SSE_REG_P (operands[0])
|| (GET_CODE (operands[0]) == SUBREG
&& SSE_REG_P (SUBREG_REG (operands[0]))))"
[(set (match_dup 0) (float:MODEF (match_dup 1)))]) [(set (match_dup 0) (float:MODEF (match_dup 1)))])
(define_insn "*float<SWI48:mode><MODEF:mode>2_sse_nointerunit" (define_insn "*float<SWI48:mode><MODEF:mode>2_sse_nointerunit"
...@@ -4905,10 +4884,7 @@ ...@@ -4905,10 +4884,7 @@
(clobber (match_operand:SWI48 2 "memory_operand"))] (clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
&& !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun)) && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
&& reload_completed && reload_completed && SSE_REG_P (operands[0])"
&& (SSE_REG_P (operands[0])
|| (GET_CODE (operands[0]) == SUBREG
&& SSE_REG_P (SUBREG_REG (operands[0]))))"
[(set (match_dup 2) (match_dup 1)) [(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (float:MODEF (match_dup 2)))]) (set (match_dup 0) (float:MODEF (match_dup 2)))])
...@@ -4917,10 +4893,7 @@ ...@@ -4917,10 +4893,7 @@
(float:MODEF (match_operand:SWI48 1 "memory_operand"))) (float:MODEF (match_operand:SWI48 1 "memory_operand")))
(clobber (match_operand:SWI48 2 "memory_operand"))] (clobber (match_operand:SWI48 2 "memory_operand"))]
"SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
&& reload_completed && reload_completed && SSE_REG_P (operands[0])"
&& (SSE_REG_P (operands[0])
|| (GET_CODE (operands[0]) == SUBREG
&& SSE_REG_P (SUBREG_REG (operands[0]))))"
[(set (match_dup 0) (float:MODEF (match_dup 1)))]) [(set (match_dup 0) (float:MODEF (match_dup 1)))])
(define_insn "*float<SWI48x:mode><X87MODEF:mode>2_i387_with_temp" (define_insn "*float<SWI48x:mode><X87MODEF:mode>2_i387_with_temp"
...@@ -4968,6 +4941,46 @@ ...@@ -4968,6 +4941,46 @@
&& reload_completed" && reload_completed"
[(set (match_dup 0) (float:X87MODEF (match_dup 1)))]) [(set (match_dup 0) (float:X87MODEF (match_dup 1)))])
;; Avoid partial SSE register dependency stalls
(define_split
[(set (match_operand:MODEF 0 "register_operand")
(float:MODEF (match_operand:SI 1 "nonimmediate_operand")))]
"TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_SSE_PARTIAL_REG_DEPENDENCY
&& optimize_function_for_speed_p (cfun)
&& reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 0)
(vec_merge:<ssevecmode>
(vec_duplicate:<ssevecmode>
(float:MODEF (match_dup 1)))
(match_dup 0)
(const_int 1)))]
{
operands[0] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
<MODE>mode, 0);
emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
})
(define_split
[(set (match_operand:MODEF 0 "register_operand")
(float:MODEF (match_operand:DI 1 "nonimmediate_operand")))]
"TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_SSE_PARTIAL_REG_DEPENDENCY
&& optimize_function_for_speed_p (cfun)
&& reload_completed && SSE_REG_P (operands[0])"
[(set (match_dup 0)
(vec_merge:<ssevecmode>
(vec_duplicate:<ssevecmode>
(float:MODEF (match_dup 1)))
(match_dup 0)
(const_int 1)))]
{
operands[0] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
<MODE>mode, 0);
emit_move_insn (operands[0], CONST0_RTX (<ssevecmode>mode));
})
;; Avoid store forwarding (partial memory) stall penalty ;; Avoid store forwarding (partial memory) stall penalty
;; by passing DImode value through XMM registers. */ ;; by passing DImode value through XMM registers. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment