Commit 7121e32b by Uros Bizjak Committed by Uros Bizjak

re PR target/66560 (Fails to generate ADDSUBPS)

	PR target/66560
	* config/i386/predicates.md (addsub_vm_operator): New predicate.
	(addsub_vs_operator): Ditto.
	(addsub_vs_parallel): Ditto.
	* config/i386/sse.md (ssedoublemode): Add V4SF and V2DF modes.
	(avx_addsubv4df3, avx_addsubv8sf3, sse3_addsubv2df3, sse3_addsubv4sf3):
	Put minus RTX before plus and adjust vec_merge selector.
	(*avx_addsubv4df3_1, *avx_addsubv4df3_1s, *sse3_addsubv2df3_1)
	(*sse_addsubv2df3_1s, *avx_addsubv8sf3_1, *avx_addsubv8sf3_1s)
	(*sse3_addsubv4sf3_1, *sse_addsubv4sf3_1s): Remove insn patterns.
	(addsub vec_merge splitters): New combiner splitters.
	(addsub vec_select/vec_concat splitters): Ditto.

testsuite/ChangeLog:

	PR target/66560
	* gcc.target/i386/pr66560-1.c: New test.
	* gcc.target/i386/pr66560-2.c: Ditto.
	* gcc.target/i386/pr66560-3.c: Ditto.
	* gcc.target/i386/pr66560-4.c: Ditto.

From-SVN: r224824
parent 31575896
2015-06-23 Uros Bizjak <ubizjak@gmail.com>
PR target/66560
* config/i386/predicates.md (addsub_vm_operator): New predicate.
(addsub_vs_operator): Ditto.
(addsub_vs_parallel): Ditto.
* config/i386/sse.md (ssedoublemode): Add V4SF and V2DF modes.
(avx_addsubv4df3, avx_addsubv8sf3, sse3_addsubv2df3, sse3_addsubv4sf3):
Put minus RTX before plus and adjust vec_merge selector.
(*avx_addsubv4df3_1, *avx_addsubv4df3_1s, *sse3_addsubv2df3_1)
(*sse_addsubv2df3_1s, *avx_addsubv8sf3_1, *avx_addsubv8sf3_1s)
(*sse3_addsubv4sf3_1, *sse_addsubv4sf3_1s): Remove insn patterns.
(addsub vec_merge splitters): New combiner splitters.
(addsub vec_select/vec_concat splitters): Ditto.
2015-06-23 Bin Cheng <bin.cheng@arm.com>
PR tree-optimization/66449
......
......@@ -1426,8 +1426,105 @@
(and (match_code "unspec_volatile")
(match_test "XINT (op, 1) == UNSPECV_VZEROUPPER")))
;; Return true if OP is a parallel for a vbroadcast permute.
;; Return true if OP is an addsub vec_merge operation
(define_predicate "addsub_vm_operator"
(match_code "vec_merge")
{
rtx op0, op1;
int swapped;
HOST_WIDE_INT mask;
int nunits, elt;
op0 = XEXP (op, 0);
op1 = XEXP (op, 1);
/* Sanity check. */
if (GET_CODE (op0) == MINUS && GET_CODE (op1) == PLUS)
swapped = 0;
else if (GET_CODE (op0) == PLUS && GET_CODE (op1) == MINUS)
swapped = 1;
else
gcc_unreachable ();
mask = INTVAL (XEXP (op, 2));
nunits = GET_MODE_NUNITS (mode);
for (elt = 0; elt < nunits; elt++)
{
/* bit clear: take from op0, set: take from op1 */
int bit = !(mask & (HOST_WIDE_INT_1U << elt));
if (bit != ((elt & 1) ^ swapped))
return false;
}
return true;
})
;; Return true if OP is an addsub vec_select/vec_concat operation
(define_predicate "addsub_vs_operator"
(and (match_code "vec_select")
(match_code "vec_concat" "0"))
{
rtx op0, op1;
bool swapped;
int nunits, elt;
op0 = XEXP (XEXP (op, 0), 0);
op1 = XEXP (XEXP (op, 0), 1);
/* Sanity check. */
if (GET_CODE (op0) == MINUS && GET_CODE (op1) == PLUS)
swapped = false;
else if (GET_CODE (op0) == PLUS && GET_CODE (op1) == MINUS)
swapped = true;
else
gcc_unreachable ();
nunits = GET_MODE_NUNITS (mode);
if (XVECLEN (XEXP (op, 1), 0) != nunits)
return false;
/* We already checked that permutation is suitable for addsub,
so only look at the first element of the parallel. */
elt = INTVAL (XVECEXP (XEXP (op, 1), 0, 0));
return elt == (swapped ? nunits : 0);
})
;; Return true if OP is a parallel for an addsub vec_select.
(define_predicate "addsub_vs_parallel"
(and (match_code "parallel")
(match_code "const_int" "a"))
{
int nelt = XVECLEN (op, 0);
int elt, i;
if (nelt < 2)
return false;
/* Check that the permutation is suitable for addsub.
For example, { 0 9 2 11 4 13 6 15 } or { 8 1 10 3 12 5 14 7 }. */
elt = INTVAL (XVECEXP (op, 0, 0));
if (elt == 0)
{
for (i = 1; i < nelt; ++i)
if (INTVAL (XVECEXP (op, 0, i)) != (i + (i & 1) * nelt))
return false;
}
else if (elt == nelt)
{
for (i = 1; i < nelt; ++i)
if (INTVAL (XVECEXP (op, 0, i)) != (elt + i - (i & 1) * nelt))
return false;
}
else
return false;
return true;
})
;; Return true if OP is a parallel for a vbroadcast permute.
(define_predicate "avx_vbroadcast_operand"
(and (match_code "parallel")
(match_code "const_int" "a"))
......
......@@ -487,10 +487,12 @@
(V4SI "v4di") (V8SI "v8di") (V16SI "v16di")])
(define_mode_attr ssedoublemode
[(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
(V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
(V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
(V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
[(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
(V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
(V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
(V4HI "V4SI") (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
(V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
(V4DI "V8DI") (V8DI "V16DI")])
(define_mode_attr ssebytemode
[(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
......@@ -2021,43 +2023,11 @@
(define_insn "avx_addsubv4df3"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_merge:V4DF
(plus:V4DF
(match_operand:V4DF 1 "register_operand" "x")
(match_operand:V4DF 2 "nonimmediate_operand" "xm"))
(minus:V4DF (match_dup 1) (match_dup 2))
(const_int 10)))]
"TARGET_AVX"
"vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
(define_insn "*avx_addsubv4df3_1"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_select:V4DF
(vec_concat:V8DF
(minus:V4DF
(match_operand:V4DF 1 "register_operand" "x")
(match_operand:V4DF 2 "nonimmediate_operand" "xm"))
(plus:V4DF (match_dup 1) (match_dup 2)))
(parallel [(const_int 0) (const_int 5)
(const_int 2) (const_int 7)])))]
"TARGET_AVX"
"vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
(define_insn "*avx_addsubv4df3_1s"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_select:V4DF
(vec_concat:V8DF
(minus:V4DF
(match_operand:V4DF 1 "register_operand" "x")
(match_operand:V4DF 2 "nonimmediate_operand" "xm"))
(plus:V4DF (match_dup 2) (match_dup 1)))
(parallel [(const_int 0) (const_int 5)
(const_int 2) (const_int 7)])))]
(plus:V4DF (match_dup 1) (match_dup 2))
(const_int 5)))]
"TARGET_AVX"
"vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
......@@ -2067,49 +2037,11 @@
(define_insn "sse3_addsubv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_merge:V2DF
(plus:V2DF
(match_operand:V2DF 1 "register_operand" "0,x")
(match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
(minus:V2DF (match_dup 1) (match_dup 2))
(const_int 2)))]
"TARGET_SSE3"
"@
addsubpd\t{%2, %0|%0, %2}
vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V2DF")])
(define_insn "*sse3_addsubv2df3_1"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_select:V2DF
(vec_concat:V4DF
(minus:V2DF
(match_operand:V2DF 1 "register_operand" "0,x")
(match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
(plus:V2DF (match_dup 1) (match_dup 2)))
(parallel [(const_int 0) (const_int 3)])))]
"TARGET_SSE3"
"@
addsubpd\t{%2, %0|%0, %2}
vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V2DF")])
(define_insn "*sse3_addsubv2df3_1s"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_select:V2DF
(vec_concat:V4DF
(minus:V2DF
(match_operand:V2DF 1 "register_operand" "0,x")
(match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
(plus:V2DF (match_dup 2) (match_dup 1)))
(parallel [(const_int 0) (const_int 3)])))]
(plus:V2DF (match_dup 1) (match_dup 2))
(const_int 1)))]
"TARGET_SSE3"
"@
addsubpd\t{%2, %0|%0, %2}
......@@ -2123,47 +2055,11 @@
(define_insn "avx_addsubv8sf3"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_merge:V8SF
(plus:V8SF
(match_operand:V8SF 1 "register_operand" "x")
(match_operand:V8SF 2 "nonimmediate_operand" "xm"))
(minus:V8SF (match_dup 1) (match_dup 2))
(const_int 170)))]
"TARGET_AVX"
"vaddsubps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "*avx_addsubv8sf3_1"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_select:V8SF
(vec_concat:V16SF
(minus:V8SF
(match_operand:V8SF 1 "register_operand" "x")
(match_operand:V8SF 2 "nonimmediate_operand" "xm"))
(plus:V8SF (match_dup 1) (match_dup 2)))
(parallel [(const_int 0) (const_int 9)
(const_int 2) (const_int 11)
(const_int 4) (const_int 13)
(const_int 6) (const_int 15)])))]
"TARGET_AVX"
"vaddsubps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "*avx_addsubv8sf3_1s"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_select:V8SF
(vec_concat:V16SF
(minus:V8SF
(match_operand:V8SF 1 "register_operand" "x")
(match_operand:V8SF 2 "nonimmediate_operand" "xm"))
(plus:V8SF (match_dup 2) (match_dup 1)))
(parallel [(const_int 0) (const_int 9)
(const_int 2) (const_int 11)
(const_int 4) (const_int 13)
(const_int 6) (const_int 15)])))]
(plus:V8SF (match_dup 1) (match_dup 2))
(const_int 85)))]
"TARGET_AVX"
"vaddsubps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
......@@ -2173,31 +2069,11 @@
(define_insn "sse3_addsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
(plus:V4SF
(match_operand:V4SF 1 "register_operand" "0,x")
(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
(minus:V4SF (match_dup 1) (match_dup 2))
(const_int 10)))]
"TARGET_SSE3"
"@
addsubps\t{%2, %0|%0, %2}
vaddsubps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "prefix" "orig,vex")
(set_attr "prefix_rep" "1,*")
(set_attr "mode" "V4SF")])
(define_insn "*sse3_addsubv4sf3_1"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_select:V4SF
(vec_concat:V8SF
(minus:V4SF
(match_operand:V4SF 1 "register_operand" "0,x")
(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
(plus:V4SF (match_dup 1) (match_dup 2)))
(parallel [(const_int 0) (const_int 5)
(const_int 2) (const_int 7)])))]
(plus:V4SF (match_dup 1) (match_dup 2))
(const_int 5)))]
"TARGET_SSE3"
"@
addsubps\t{%2, %0|%0, %2}
......@@ -2208,25 +2084,123 @@
(set_attr "prefix_rep" "1,*")
(set_attr "mode" "V4SF")])
(define_insn "*sse3_addsubv4sf3_1s"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_select:V4SF
(vec_concat:V8SF
(minus:V4SF
(match_operand:V4SF 1 "register_operand" "0,x")
(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
(plus:V4SF (match_dup 2) (match_dup 1)))
(parallel [(const_int 0) (const_int 5)
(const_int 2) (const_int 7)])))]
"TARGET_SSE3"
"@
addsubps\t{%2, %0|%0, %2}
vaddsubps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "prefix" "orig,vex")
(set_attr "prefix_rep" "1,*")
(set_attr "mode" "V4SF")])
(define_split
[(set (match_operand:VF_128_256 0 "register_operand")
(match_operator:VF_128_256 6 "addsub_vm_operator"
[(minus:VF_128_256
(match_operand:VF_128_256 1 "register_operand")
(match_operand:VF_128_256 2 "nonimmediate_operand"))
(plus:VF_128_256
(match_operand:VF_128_256 3 "nonimmediate_operand")
(match_operand:VF_128_256 4 "nonimmediate_operand"))
(match_operand 5 "const_int_operand")]))]
"TARGET_SSE3
&& can_create_pseudo_p ()
&& ((rtx_equal_p (operands[1], operands[3])
&& rtx_equal_p (operands[2], operands[4]))
|| (rtx_equal_p (operands[1], operands[4])
&& rtx_equal_p (operands[2], operands[3])))"
[(set (match_dup 0)
(vec_merge:VF_128_256
(minus:VF_128_256 (match_dup 1) (match_dup 2))
(plus:VF_128_256 (match_dup 1) (match_dup 2))
(match_dup 5)))])
(define_split
[(set (match_operand:VF_128_256 0 "register_operand")
(match_operator:VF_128_256 6 "addsub_vm_operator"
[(plus:VF_128_256
(match_operand:VF_128_256 1 "nonimmediate_operand")
(match_operand:VF_128_256 2 "nonimmediate_operand"))
(minus:VF_128_256
(match_operand:VF_128_256 3 "register_operand")
(match_operand:VF_128_256 4 "nonimmediate_operand"))
(match_operand 5 "const_int_operand")]))]
"TARGET_SSE3
&& can_create_pseudo_p ()
&& ((rtx_equal_p (operands[1], operands[3])
&& rtx_equal_p (operands[2], operands[4]))
|| (rtx_equal_p (operands[1], operands[4])
&& rtx_equal_p (operands[2], operands[3])))"
[(set (match_dup 0)
(vec_merge:VF_128_256
(minus:VF_128_256 (match_dup 3) (match_dup 4))
(plus:VF_128_256 (match_dup 3) (match_dup 4))
(match_dup 5)))]
{
/* Negate mask bits to compensate for swapped PLUS and MINUS RTXes. */
operands[5]
= GEN_INT (~INTVAL (operands[5])
& ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
})
(define_split
[(set (match_operand:VF_128_256 0 "register_operand")
(match_operator:VF_128_256 7 "addsub_vs_operator"
[(vec_concat:<ssedoublemode>
(minus:VF_128_256
(match_operand:VF_128_256 1 "register_operand")
(match_operand:VF_128_256 2 "nonimmediate_operand"))
(plus:VF_128_256
(match_operand:VF_128_256 3 "nonimmediate_operand")
(match_operand:VF_128_256 4 "nonimmediate_operand")))
(match_parallel 5 "addsub_vs_parallel"
[(match_operand 6 "const_int_operand")])]))]
"TARGET_SSE3
&& can_create_pseudo_p ()
&& ((rtx_equal_p (operands[1], operands[3])
&& rtx_equal_p (operands[2], operands[4]))
|| (rtx_equal_p (operands[1], operands[4])
&& rtx_equal_p (operands[2], operands[3])))"
[(set (match_dup 0)
(vec_merge:VF_128_256
(minus:VF_128_256 (match_dup 1) (match_dup 2))
(plus:VF_128_256 (match_dup 1) (match_dup 2))
(match_dup 5)))]
{
int i, nelt = XVECLEN (operands[5], 0);
HOST_WIDE_INT ival = 0;
for (i = 0; i < nelt; i++)
if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
ival |= HOST_WIDE_INT_1 << i;
operands[5] = GEN_INT (ival);
})
(define_split
[(set (match_operand:VF_128_256 0 "register_operand")
(match_operator:VF_128_256 7 "addsub_vs_operator"
[(vec_concat:<ssedoublemode>
(plus:VF_128_256
(match_operand:VF_128_256 1 "nonimmediate_operand")
(match_operand:VF_128_256 2 "nonimmediate_operand"))
(minus:VF_128_256
(match_operand:VF_128_256 3 "register_operand")
(match_operand:VF_128_256 4 "nonimmediate_operand")))
(match_parallel 5 "addsub_vs_parallel"
[(match_operand 6 "const_int_operand")])]))]
"TARGET_SSE3
&& can_create_pseudo_p ()
&& ((rtx_equal_p (operands[1], operands[3])
&& rtx_equal_p (operands[2], operands[4]))
|| (rtx_equal_p (operands[1], operands[4])
&& rtx_equal_p (operands[2], operands[3])))"
[(set (match_dup 0)
(vec_merge:VF_128_256
(minus:VF_128_256 (match_dup 3) (match_dup 4))
(plus:VF_128_256 (match_dup 3) (match_dup 4))
(match_dup 5)))]
{
int i, nelt = XVECLEN (operands[5], 0);
HOST_WIDE_INT ival = 0;
for (i = 0; i < nelt; i++)
if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
ival |= HOST_WIDE_INT_1 << i;
operands[5] = GEN_INT (ival);
})
(define_insn "avx_h<plusminus_insn>v4df3"
[(set (match_operand:V4DF 0 "register_operand" "=x")
......
2015-06-23 Uros Bizjak <ubizjak@gmail.com>
PR target/66560
* gcc.target/i386/pr66560-1.c: New test.
* gcc.target/i386/pr66560-2.c: Ditto.
* gcc.target/i386/pr66560-3.c: Ditto.
* gcc.target/i386/pr66560-4.c: Ditto.
2015-06-23 Thomas Schwinge <thomas@codesourcery.com>
* gcc.target/nvptx/nvptx.exp: New file.
......
/* PR target/66560 */
/* { dg-do compile } */
/* { dg-options "-O2 -msse4" } */
typedef float v4sf __attribute__((vector_size(16)));
typedef int v4si __attribute__((vector_size(16)));
v4sf foo1 (v4sf x, v4sf y)
{
v4sf tem0 = x - y;
v4sf tem1 = x + y;
return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 });
}
v4sf foo2 (v4sf x, v4sf y)
{
v4sf tem0 = x - y;
v4sf tem1 = y + x;
return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 });
}
v4sf foo3 (v4sf x, v4sf y)
{
v4sf tem0 = x + y;
v4sf tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v4si) { 4, 1, 6, 3 });
}
v4sf foo4 (v4sf x, v4sf y)
{
v4sf tem0 = y + x;
v4sf tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v4si) { 4, 1, 6, 3 });
}
/* { dg-final { scan-assembler-times "addsubps" 4 } } */
/* PR target/66560 */
/* { dg-do compile } */
/* { dg-options "-O2 -msse4" } */
typedef double v2df __attribute__((vector_size(16)));
typedef long long v2di __attribute__((vector_size(16)));
v2df foo1 (v2df x, v2df y)
{
v2df tem0 = x - y;
v2df tem1 = x + y;
return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 });
}
v2df foo2 (v2df x, v2df y)
{
v2df tem0 = x - y;
v2df tem1 = y + x;
return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 });
}
v2df foo3 (v2df x, v2df y)
{
v2df tem0 = x + y;
v2df tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v2di) { 2, 1 });
}
v2df foo4 (v2df x, v2df y)
{
v2df tem0 = y + x;
v2df tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v2di) { 2, 1 });
}
/* { dg-final { scan-assembler-times "addsubpd" 4 } } */
/* PR target/66560 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx" } */
typedef float v8sf __attribute__((vector_size(32)));
typedef int v8si __attribute__((vector_size(32)));
v8sf foo1 (v8sf x, v8sf y)
{
v8sf tem0 = x - y;
v8sf tem1 = x + y;
return __builtin_shuffle (tem0, tem1, (v8si) { 0, 9, 2, 11, 4, 13, 6, 15 });
}
v8sf foo2 (v8sf x, v8sf y)
{
v8sf tem0 = x - y;
v8sf tem1 = y + x;
return __builtin_shuffle (tem0, tem1, (v8si) { 0, 9, 2, 11, 4, 13, 6, 15 });
}
v8sf foo3 (v8sf x, v8sf y)
{
v8sf tem0 = x + y;
v8sf tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v8si) { 8, 1, 10, 3, 12, 5, 14, 7 });
}
v8sf foo4 (v8sf x, v8sf y)
{
v8sf tem0 = y + x;
v8sf tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v8si) { 8, 1, 10, 3, 12, 5, 14, 7 });
}
/* { dg-final { scan-assembler-times "vaddsubps" 4 } } */
/* PR target/66560 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx" } */
typedef double v4df __attribute__((vector_size(32)));
typedef long long v4di __attribute__((vector_size(32)));
v4df foo1 (v4df x, v4df y)
{
v4df tem0 = x - y;
v4df tem1 = x + y;
return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 });
}
v4df foo2 (v4df x, v4df y)
{
v4df tem0 = x - y;
v4df tem1 = y + x;
return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 });
}
v4df foo3 (v4df x, v4df y)
{
v4df tem0 = x + y;
v4df tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v4di) { 4, 1, 6, 3 });
}
v4df foo4 (v4df x, v4df y)
{
v4df tem0 = y + x;
v4df tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v4di) { 4, 1, 6, 3 });
}
/* { dg-final { scan-assembler-times "vaddsubpd" 4 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment