Commit 63705578 by Uros Bizjak Committed by Uros Bizjak

i386.md (*movti_internal_rex64): Avoid MOVAPS size optimization for TARGET_AVX.

	* config/i386/i386.md (*movti_internal_rex64): Avoid MOVAPS size
	optimization for TARGET_AVX.
	(*movti_internal_sse): Ditto.
	(*movdi_internal_rex64): Handle TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL.
	(*movdi_internal): Ditto.
	(*movsi_internal): Ditto.
	(*movtf_internal): Avoid MOVAPS size optimization for TARGET_AVX.
	(*movdf_internal_rex64): Ditto.
	(*movfd_internal): Ditto.
	(*movsf_internal): Ditto.
	* config/i386/sse.md (mov<mode>): Handle TARGET_SSE_LOAD0_BY_PXOR.

From-SVN: r187386
parent e580d8ee
2012-05-11 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (*movti_internal_rex64): Avoid MOVAPS size
optimization for TARGET_AVX.
(*movti_internal_sse): Ditto.
(*movdi_internal_rex64): Handle TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL.
(*movdi_internal): Ditto.
(*movsi_internal): Ditto.
(*movtf_internal): Avoid MOVAPS size optimization for TARGET_AVX.
(*movdf_internal_rex64): Ditto.
(*movfd_internal): Ditto.
(*movsf_internal): Ditto.
* config/i386/sse.md (mov<mode>): Handle TARGET_SSE_LOAD0_BY_PXOR.
2012-05-10 Eric Botcazou <ebotcazou@adacore.com>
* dwarf2out.c (add_byte_size_attribute) <RECORD_TYPE>: Handle variable
......
......@@ -1890,12 +1890,15 @@
(set (attr "mode")
(cond [(eq_attr "alternative" "0,1")
(const_string "DI")
(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(match_test "optimize_function_for_size_p (cfun)"))
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V4SF")
(and (eq_attr "alternative" "4")
(match_test "TARGET_SSE_TYPELESS_STORES"))
(const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
]
(const_string "TI")))])
......@@ -1943,13 +1946,15 @@
[(set_attr "type" "sselog1,ssemov,ssemov")
(set_attr "prefix" "maybe_vex")
(set (attr "mode")
(cond [(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(match_test "optimize_function_for_size_p (cfun)"))
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V4SF")
(and (eq_attr "alternative" "2")
(match_test "TARGET_SSE_TYPELESS_STORES"))
(const_string "V4SF")
(not (match_test "TARGET_SSE2"))
(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
]
(const_string "TI")))])
......@@ -1970,8 +1975,11 @@
return "movdq2q\t{%1, %0|%0, %1}";
case TYPE_SSEMOV:
if (get_attr_mode (insn) == MODE_TI)
if (get_attr_mode (insn) == MODE_V4SF)
return "%vmovaps\t{%1, %0|%0, %1}";
else if (get_attr_mode (insn) == MODE_TI)
return "%vmovdqa\t{%1, %0|%0, %1}";
/* Handle broken assemblers that require movd instead of movq. */
if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))
return "%vmovd\t{%1, %0|%0, %1}";
......@@ -2048,7 +2056,20 @@
(if_then_else (eq_attr "alternative" "10,11,12,13,14,15")
(const_string "maybe_vex")
(const_string "orig")))
(set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,TI,DI,TI,DI,DI,DI,DI,DI")])
(set (attr "mode")
(cond [(eq_attr "alternative" "0,4")
(const_string "SI")
(eq_attr "alternative" "10,12")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
]
(const_string "TI"))
]
(const_string "DI")))])
;; Reload patterns to support multi-word load/store
;; with non-offsetable address.
......@@ -2142,7 +2163,7 @@
case MODE_DI:
return "%vmovq\t{%1, %0|%0, %1}";
case MODE_V4SF:
return "movaps\t{%1, %0|%0, %1}";
return "%vmovaps\t{%1, %0|%0, %1}";
case MODE_V2SF:
return "movlps\t{%1, %0|%0, %1}";
default:
......@@ -2189,7 +2210,22 @@
(if_then_else (eq_attr "alternative" "5,6,7,8")
(const_string "maybe_vex")
(const_string "orig")))
(set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF,DI,DI")])
(set (attr "mode")
(cond [(eq_attr "alternative" "9,11")
(const_string "V4SF")
(eq_attr "alternative" "10,12")
(const_string "V2SF")
(eq_attr "alternative" "5,7")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
]
(const_string "TI"))
]
(const_string "DI")))])
(define_split
[(set (match_operand:DI 0 "nonimmediate_operand")
......@@ -2271,10 +2307,15 @@
(cond [(eq_attr "alternative" "2,3")
(const_string "DI")
(eq_attr "alternative" "6,7")
(if_then_else
(not (match_test "TARGET_SSE2"))
(const_string "V4SF")
(const_string "TI"))
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
]
(const_string "TI"))
(and (eq_attr "alternative" "8,9,10,11")
(not (match_test "TARGET_SSE2")))
(const_string "SF")
......@@ -2881,12 +2922,15 @@
(set (attr "mode")
(cond [(eq_attr "alternative" "3,4")
(const_string "DI")
(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(match_test "optimize_function_for_size_p (cfun)"))
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V4SF")
(and (eq_attr "alternative" "2")
(match_test "TARGET_SSE_TYPELESS_STORES"))
(const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
]
(const_string "TI")))])
......@@ -3030,9 +3074,11 @@
(eq_attr "alternative" "3,4,5,6,11,12")
(const_string "DI")
/* xorps is one byte shorter. */
/* xorps is one byte shorter for !TARGET_AVX. */
(eq_attr "alternative" "7")
(cond [(match_test "optimize_function_for_size_p (cfun)")
(cond [(match_test "TARGET_AVX")
(const_string "V2DF")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
(match_test "TARGET_SSE_LOAD0_BY_PXOR")
(const_string "TI")
......@@ -3043,14 +3089,16 @@
whole SSE registers use APD move to break dependency
chains, otherwise use short move to avoid extra work.
movaps encodes one byte shorter. */
movaps encodes one byte shorter for !TARGET_AVX. */
(eq_attr "alternative" "8")
(cond
[(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
(match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(const_string "V2DF")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V4SF")
(match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(const_string "V2DF")
(match_test "TARGET_AVX")
(const_string "DF")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
]
(const_string "DF"))
/* For architectures resolving dependencies on register
......@@ -3165,9 +3213,11 @@
(const_string "V4SF")
(const_string "V2SF"))
/* xorps is one byte shorter. */
/* xorps is one byte shorter for !TARGET_AVX. */
(eq_attr "alternative" "5,9")
(cond [(match_test "optimize_function_for_size_p (cfun)")
(cond [(match_test "TARGET_AVX")
(const_string "V2DF")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
(match_test "TARGET_SSE_LOAD0_BY_PXOR")
(const_string "TI")
......@@ -3178,16 +3228,19 @@
whole SSE registers use APD move to break dependency
chains, otherwise use short move to avoid extra work.
movaps encodes one byte shorter. */
movaps encodes one byte shorter for !TARGET_AVX. */
(eq_attr "alternative" "6,10")
(cond
[(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
(match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(const_string "V2DF")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V4SF")
(match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
(const_string "V2DF")
(match_test "TARGET_AVX")
(const_string "DF")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
]
(const_string "DF"))
/* For architectures resolving dependencies on register
parts we may avoid extra work to zero out upper part
of register. */
......@@ -3277,12 +3330,16 @@
(cond [(eq_attr "alternative" "3,4,9,10")
(const_string "SI")
(eq_attr "alternative" "5")
(if_then_else
(and (and (match_test "TARGET_SSE_LOAD0_BY_PXOR")
(match_test "TARGET_SSE2"))
(not (match_test "optimize_function_for_size_p (cfun)")))
(const_string "TI")
(const_string "V4SF"))
(cond [(match_test "TARGET_AVX")
(const_string "V4SF")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
(match_test "TARGET_SSE_LOAD0_BY_PXOR")
(const_string "TI")
]
(const_string "V4SF"))
/* For architectures resolving dependencies on
whole SSE registers use APS move to break dependency
chains, otherwise use short move to avoid extra work.
......
......@@ -491,6 +491,9 @@
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
(and (eq_attr "alternative" "0")
(match_test "TARGET_SSE_LOAD0_BY_PXOR"))
(const_string "TI")
]
(const_string "<sseinsnmode>")))])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment