Commit 860f5e77 by Uros Bizjak Committed by Uros Bizjak

sse.md (UNSPEC_MOVU): Remove.

	* config/i386/sse.md (UNSPEC_MOVU): Remove.
	(UNSPEC_LOADU): New.
	(UNSPEC_STOREU): Ditto.
	(<sse>_movu<ssemodesuffix><avxsizesuffix>): Split to ...
	(<sse>_loadu<ssemodesuffix><avxsizesuffix>): ... this and ...
	(<sse>_storeu<ssemodesuffix><avxsizesuffix>) ... this.
	(<sse2>_movdqu<avxsizesuffix>): Split to ...
	(<sse2>_loaddqu<avxsizesuffix>): ... this and ...
	(<sse2>_storedqu<avxsizesuffix>): ... this.
	(*sse4_2_pcmpestr_unaligned): Update.
	(*sse4_2_pcmpistr_unaligned): Ditto.

	* config/i386/i386.c (ix86_avx256_split_vector_move_misalign): Use
	gen_avx_load{dqu,ups,upd}256 to load from unaligned memory and
	gen_avx_store{dqu,ups,upd}256 to store to unaligned memory.
	(ix86_expand_vector_move_misalign): Use gen_sse_loadups or
	gen_sse2_load{dqu,upd} to load from unaligned memory and
	gen_sse_loadups or gen_sse2_store{dqu,upd}256 to store to
	unaligned memory.
	(struct builtin_description bdesc_spec) <IX86_BUILTIN_LOADUPS>:
	Use CODE_FOR_sse_loadups.
	<IX86_BUILTIN_LOADUPD>: Use CODE_FOR_sse2_loadupd.
	<IX86_BUILTIN_LOADDQU>: Use CODE_FOR_sse2_loaddqu.
	<IX86_BUILTIN_STOREUPS>: Use CODE_FOR_sse_storeups.
	<IX86_BUILTIN_STOREUPD>: Use CODE_FOR_sse2_storeupd.
	<IX86_BUILTIN_STOREDQU>: Use CODE_FOR_sse2_storedqu.
	<IX86_BUILTIN_LOADUPS256>: Use CODE_FOR_avx_loadups256.
	<IX86_BUILTIN_LOADUPD256>: Use CODE_FOR_avx_loadupd256.
	<IX86_BUILTIN_LOADDQU256>: Use CODE_FOR_avx_loaddqu256.
	<IX86_BUILTIN_STOREUPS256>: Use CODE_FOR_avx_storeups256.
	<IX86_BUILTIN_STOREUPD256>: Use CODE_FOR_avx_storeupd256.
	<IX86_BUILTIN_STOREDQU256>: Use CODE_FOR_avx_storedqu256.

testsuite/ChangeLog:

	* gcc.target/i386/avx256-unaligned-load-1.c: Update asm scan patterns.
	* gcc.target/i386/avx256-unaligned-load-2.c: Ditto.
	* gcc.target/i386/avx256-unaligned-load-3.c: Ditto.
	* gcc.target/i386/avx256-unaligned-load-4.c: Ditto.
	* gcc.target/i386/avx256-unaligned-store-1.c: Ditto.
	* gcc.target/i386/avx256-unaligned-store-2.c: Ditto.
	* gcc.target/i386/avx256-unaligned-store-3.c: Ditto.
	* gcc.target/i386/avx256-unaligned-store-4.c: Ditto.

From-SVN: r192468
parent 81833173
2012-10-13 Uros Bizjak <ubizjak@gmail.com>
* config/i386/sse.md (UNSPEC_MOVU): Remove.
(UNSPEC_LOADU): New.
(UNSPEC_STOREU): Ditto.
(<sse>_movu<ssemodesuffix><avxsizesuffix>): Split to ...
(<sse>_loadu<ssemodesuffix><avxsizesuffix>): ... this and ...
(<sse>_storeu<ssemodesuffix><avxsizesuffix>) ... this.
(<sse2>_movdqu<avxsizesuffix>): Split to ...
(<sse2>_loaddqu<avxsizesuffix>): ... this and ...
(<sse2>_storedqu<avxsizesuffix>): ... this.
(*sse4_2_pcmpestr_unaligned): Update.
(*sse4_2_pcmpistr_unaligned): Ditto.
* config/i386/i386.c (ix86_avx256_split_vector_move_misalign): Use
gen_avx_load{dqu,ups,upd}256 to load from unaligned memory and
gen_avx_store{dqu,ups,upd}256 to store to unaligned memory.
(ix86_expand_vector_move_misalign): Use gen_sse_loadups or
gen_sse2_load{dqu,upd} to load from unaligned memory and
gen_sse_loadups or gen_sse2_store{dqu,upd}256 to store to
unaligned memory.
(struct builtin_description bdesc_spec) <IX86_BUILTIN_LOADUPS>:
Use CODE_FOR_sse_loadups.
<IX86_BUILTIN_LOADUPD>: Use CODE_FOR_sse2_loadupd.
<IX86_BUILTIN_LOADDQU>: Use CODE_FOR_sse2_loaddqu.
<IX86_BUILTIN_STOREUPS>: Use CODE_FOR_sse_storeups.
<IX86_BUILTIN_STOREUPD>: Use CODE_FOR_sse2_storeupd.
<IX86_BUILTIN_STOREDQU>: Use CODE_FOR_sse2_storedqu.
<IX86_BUILTIN_LOADUPS256>: Use CODE_FOR_avx_loadups256.
<IX86_BUILTIN_LOADUPD256>: Use CODE_FOR_avx_loadupd256.
<IX86_BUILTIN_LOADDQU256>: Use CODE_FOR_avx_loaddqu256.
<IX86_BUILTIN_STOREUPS256>: Use CODE_FOR_avx_storeups256.
<IX86_BUILTIN_STOREUPD256>: Use CODE_FOR_avx_storeupd256.
<IX86_BUILTIN_STOREDQU256>: Use CODE_FOR_avx_storedqu256.
2012-10-15 Dodji Seketeli <dodji@redhat.com> 2012-10-15 Dodji Seketeli <dodji@redhat.com>
* alias.c: Cleanup comments. * alias.c: Cleanup comments.
...@@ -21,7 +21,8 @@ ...@@ -21,7 +21,8 @@
(define_c_enum "unspec" [ (define_c_enum "unspec" [
;; SSE ;; SSE
UNSPEC_MOVNT UNSPEC_MOVNT
UNSPEC_MOVU UNSPEC_LOADU
UNSPEC_STOREU
;; SSE3 ;; SSE3
UNSPEC_LDDQU UNSPEC_LDDQU
...@@ -586,12 +587,12 @@ ...@@ -586,12 +587,12 @@
DONE; DONE;
}) })
(define_insn "<sse>_movu<ssemodesuffix><avxsizesuffix>" (define_insn "<sse>_loadu<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:VF 0 "nonimmediate_operand" "=x,m") [(set (match_operand:VF 0 "register_operand" "=x")
(unspec:VF (unspec:VF
[(match_operand:VF 1 "nonimmediate_operand" "xm,x")] [(match_operand:VF 1 "memory_operand" "m")]
UNSPEC_MOVU))] UNSPEC_LOADU))]
"TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "TARGET_SSE"
{ {
switch (get_attr_mode (insn)) switch (get_attr_mode (insn))
{ {
...@@ -618,11 +619,79 @@ ...@@ -618,11 +619,79 @@
] ]
(const_string "<MODE>")))]) (const_string "<MODE>")))])
(define_insn "<sse2>_movdqu<avxsizesuffix>" (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:VI1 0 "nonimmediate_operand" "=x,m") [(set (match_operand:VF 0 "memory_operand" "=m")
(unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")] (unspec:VF
UNSPEC_MOVU))] [(match_operand:VF 1 "register_operand" "x")]
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" UNSPEC_STOREU))]
"TARGET_SSE"
{
switch (get_attr_mode (insn))
{
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
default:
return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
}
}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "maybe_vex")
(set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "<ssePSmode>")
(and (eq_attr "alternative" "1")
(match_test "TARGET_SSE_TYPELESS_STORES"))
(const_string "<ssePSmode>")
(match_test "TARGET_AVX")
(const_string "<MODE>")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
]
(const_string "<MODE>")))])
(define_insn "<sse2>_loaddqu<avxsizesuffix>"
[(set (match_operand:VI1 0 "register_operand" "=x")
(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
UNSPEC_LOADU))]
"TARGET_SSE2"
{
switch (get_attr_mode (insn))
{
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
default:
return "%vmovdqu\t{%1, %0|%0, %1}";
}
}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set (attr "prefix_data16")
(if_then_else
(match_test "TARGET_AVX")
(const_string "*")
(const_string "1")))
(set_attr "prefix" "maybe_vex")
(set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "<ssePSmode>")
(and (eq_attr "alternative" "1")
(match_test "TARGET_SSE_TYPELESS_STORES"))
(const_string "<ssePSmode>")
(match_test "TARGET_AVX")
(const_string "<sseinsnmode>")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
]
(const_string "<sseinsnmode>")))])
(define_insn "<sse2>_storedqu<avxsizesuffix>"
[(set (match_operand:VI1 0 "memory_operand" "=m")
(unspec:VI1 [(match_operand:VI1 1 "register_operand" "x")]
UNSPEC_STOREU))]
"TARGET_SSE2"
{ {
switch (get_attr_mode (insn)) switch (get_attr_mode (insn))
{ {
...@@ -9307,7 +9376,7 @@ ...@@ -9307,7 +9376,7 @@
(match_operand:SI 3 "register_operand" "a") (match_operand:SI 3 "register_operand" "a")
(unspec:V16QI (unspec:V16QI
[(match_operand:V16QI 4 "memory_operand" "m")] [(match_operand:V16QI 4 "memory_operand" "m")]
UNSPEC_MOVU) UNSPEC_LOADU)
(match_operand:SI 5 "register_operand" "d") (match_operand:SI 5 "register_operand" "d")
(match_operand:SI 6 "const_0_to_255_operand" "n")] (match_operand:SI 6 "const_0_to_255_operand" "n")]
UNSPEC_PCMPESTR)) UNSPEC_PCMPESTR))
...@@ -9315,7 +9384,7 @@ ...@@ -9315,7 +9384,7 @@
(unspec:V16QI (unspec:V16QI
[(match_dup 2) [(match_dup 2)
(match_dup 3) (match_dup 3)
(unspec:V16QI [(match_dup 4)] UNSPEC_MOVU) (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
(match_dup 5) (match_dup 5)
(match_dup 6)] (match_dup 6)]
UNSPEC_PCMPESTR)) UNSPEC_PCMPESTR))
...@@ -9323,7 +9392,7 @@ ...@@ -9323,7 +9392,7 @@
(unspec:CC (unspec:CC
[(match_dup 2) [(match_dup 2)
(match_dup 3) (match_dup 3)
(unspec:V16QI [(match_dup 4)] UNSPEC_MOVU) (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
(match_dup 5) (match_dup 5)
(match_dup 6)] (match_dup 6)]
UNSPEC_PCMPESTR))] UNSPEC_PCMPESTR))]
...@@ -9498,19 +9567,19 @@ ...@@ -9498,19 +9567,19 @@
[(match_operand:V16QI 2 "register_operand" "x") [(match_operand:V16QI 2 "register_operand" "x")
(unspec:V16QI (unspec:V16QI
[(match_operand:V16QI 3 "memory_operand" "m")] [(match_operand:V16QI 3 "memory_operand" "m")]
UNSPEC_MOVU) UNSPEC_LOADU)
(match_operand:SI 4 "const_0_to_255_operand" "n")] (match_operand:SI 4 "const_0_to_255_operand" "n")]
UNSPEC_PCMPISTR)) UNSPEC_PCMPISTR))
(set (match_operand:V16QI 1 "register_operand" "=Yz") (set (match_operand:V16QI 1 "register_operand" "=Yz")
(unspec:V16QI (unspec:V16QI
[(match_dup 2) [(match_dup 2)
(unspec:V16QI [(match_dup 3)] UNSPEC_MOVU) (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
(match_dup 4)] (match_dup 4)]
UNSPEC_PCMPISTR)) UNSPEC_PCMPISTR))
(set (reg:CC FLAGS_REG) (set (reg:CC FLAGS_REG)
(unspec:CC (unspec:CC
[(match_dup 2) [(match_dup 2)
(unspec:V16QI [(match_dup 3)] UNSPEC_MOVU) (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
(match_dup 4)] (match_dup 4)]
UNSPEC_PCMPISTR))] UNSPEC_PCMPISTR))]
"TARGET_SSE4_2 "TARGET_SSE4_2
......
...@@ -14,6 +14,6 @@ avx_test (void) ...@@ -14,6 +14,6 @@ avx_test (void)
c[i] = a[i] * b[i+3]; c[i] = a[i] * b[i+3];
} }
/* { dg-final { scan-assembler-not "avx_movups256/1" } } */ /* { dg-final { scan-assembler-not "avx_loadups256" } } */
/* { dg-final { scan-assembler "sse_movups/1" } } */ /* { dg-final { scan-assembler "sse_loadups" } } */
/* { dg-final { scan-assembler "vinsertf128" } } */ /* { dg-final { scan-assembler "vinsertf128" } } */
...@@ -24,6 +24,6 @@ avx_test (void) ...@@ -24,6 +24,6 @@ avx_test (void)
} }
} }
/* { dg-final { scan-assembler-not "avx_movdqu256/1" } } */ /* { dg-final { scan-assembler-not "avx_loaddqu256" } } */
/* { dg-final { scan-assembler "sse2_movdqu/1" } } */ /* { dg-final { scan-assembler "sse2_loaddqu" } } */
/* { dg-final { scan-assembler "vinsert.128" } } */ /* { dg-final { scan-assembler "vinsert.128" } } */
...@@ -14,6 +14,6 @@ avx_test (void) ...@@ -14,6 +14,6 @@ avx_test (void)
c[i] = a[i] * b[i+3]; c[i] = a[i] * b[i+3];
} }
/* { dg-final { scan-assembler-not "avx_movupd256/1" } } */ /* { dg-final { scan-assembler-not "avx_loadupd256" } } */
/* { dg-final { scan-assembler "sse2_movupd/1" } } */ /* { dg-final { scan-assembler "sse2_loadupd" } } */
/* { dg-final { scan-assembler "vinsertf128" } } */ /* { dg-final { scan-assembler "vinsertf128" } } */
...@@ -14,6 +14,6 @@ avx_test (void) ...@@ -14,6 +14,6 @@ avx_test (void)
b[i] = a[i+3] * 2; b[i] = a[i+3] * 2;
} }
/* { dg-final { scan-assembler "avx_movups256/1" } } */ /* { dg-final { scan-assembler "avx_loadups256" } } */
/* { dg-final { scan-assembler-not "avx_movups/1" } } */ /* { dg-final { scan-assembler-not "sse_loadups" } } */
/* { dg-final { scan-assembler-not "vinsertf128" } } */ /* { dg-final { scan-assembler-not "vinsertf128" } } */
...@@ -17,6 +17,6 @@ avx_test (void) ...@@ -17,6 +17,6 @@ avx_test (void)
d[i] = c[i] * 20.0; d[i] = c[i] * 20.0;
} }
/* { dg-final { scan-assembler-not "avx_movups256/2" } } */ /* { dg-final { scan-assembler-not "avx_storeups256" } } */
/* { dg-final { scan-assembler "vmovups.*\\*movv4sf_internal/3" } } */ /* { dg-final { scan-assembler "vmovups.*\\*movv4sf_internal/3" } } */
/* { dg-final { scan-assembler "vextractf128" } } */ /* { dg-final { scan-assembler "vextractf128" } } */
...@@ -24,6 +24,6 @@ avx_test (void) ...@@ -24,6 +24,6 @@ avx_test (void)
} }
} }
/* { dg-final { scan-assembler-not "avx_movdqu256/2" } } */ /* { dg-final { scan-assembler-not "avx_storedqu256" } } */
/* { dg-final { scan-assembler "vmovdqu.*\\*movv16qi_internal/3" } } */ /* { dg-final { scan-assembler "vmovdqu.*\\*movv16qi_internal/3" } } */
/* { dg-final { scan-assembler "vextract.128" } } */ /* { dg-final { scan-assembler "vextract.128" } } */
...@@ -17,6 +17,6 @@ avx_test (void) ...@@ -17,6 +17,6 @@ avx_test (void)
d[i] = c[i] * 20.0; d[i] = c[i] * 20.0;
} }
/* { dg-final { scan-assembler-not "avx_movupd256/2" } } */ /* { dg-final { scan-assembler-not "avx_storeupd256" } } */
/* { dg-final { scan-assembler "vmovupd.*\\*movv2df_internal/3" } } */ /* { dg-final { scan-assembler "vmovupd.*\\*movv2df_internal/3" } } */
/* { dg-final { scan-assembler "vextractf128" } } */ /* { dg-final { scan-assembler "vextractf128" } } */
...@@ -14,7 +14,7 @@ avx_test (void) ...@@ -14,7 +14,7 @@ avx_test (void)
b[i+3] = a[i] * c[i]; b[i+3] = a[i] * c[i];
} }
/* { dg-final { scan-assembler "avx_movups256/2" } } */ /* { dg-final { scan-assembler "avx_storeups256" } } */
/* { dg-final { scan-assembler-not "avx_movups/2" } } */ /* { dg-final { scan-assembler-not "sse_storeups" } } */
/* { dg-final { scan-assembler-not "\\*avx_movv4sf_internal/3" } } */ /* { dg-final { scan-assembler-not "\\*avx_movv4sf_internal/3" } } */
/* { dg-final { scan-assembler-not "vextractf128" } } */ /* { dg-final { scan-assembler-not "vextractf128" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment