Commit 7121e32b by Uros Bizjak Committed by Uros Bizjak

re PR target/66560 (Fails to generate ADDSUBPS)

	PR target/66560
	* config/i386/predicates.md (addsub_vm_operator): New predicate.
	(addsub_vs_operator): Ditto.
	(addsub_vs_parallel): Ditto.
	* config/i386/sse.md (ssedoublemode): Add V4SF and V2DF modes.
	(avx_addsubv4df3, avx_addsubv8sf3, sse3_addsubv2df3, sse3_addsubv4sf3):
	Put minus RTX before plus and adjust vec_merge selector.
	(*avx_addsubv4df3_1, *avx_addsubv4df3_1s, *sse3_addsubv2df3_1)
	(*sse_addsubv2df3_1s, *avx_addsubv8sf3_1, *avx_addsubv8sf3_1s)
	(*sse3_addsubv4sf3_1, *sse_addsubv4sf3_1s): Remove insn patterns.
	(addsub vec_merge splitters): New combiner splitters.
	(addsub vec_select/vec_concat splitters): Ditto.

testsuite/ChangeLog:

	PR target/66560
	* gcc.target/i386/pr66560-1.c: New test.
	* gcc.target/i386/pr66560-2.c: Ditto.
	* gcc.target/i386/pr66560-3.c: Ditto.
	* gcc.target/i386/pr66560-4.c: Ditto.

From-SVN: r224824
parent 31575896
2015-06-23 Uros Bizjak <ubizjak@gmail.com>
PR target/66560
* config/i386/predicates.md (addsub_vm_operator): New predicate.
(addsub_vs_operator): Ditto.
(addsub_vs_parallel): Ditto.
* config/i386/sse.md (ssedoublemode): Add V4SF and V2DF modes.
(avx_addsubv4df3, avx_addsubv8sf3, sse3_addsubv2df3, sse3_addsubv4sf3):
Put minus RTX before plus and adjust vec_merge selector.
(*avx_addsubv4df3_1, *avx_addsubv4df3_1s, *sse3_addsubv2df3_1)
(*sse_addsubv2df3_1s, *avx_addsubv8sf3_1, *avx_addsubv8sf3_1s)
(*sse3_addsubv4sf3_1, *sse_addsubv4sf3_1s): Remove insn patterns.
(addsub vec_merge splitters): New combiner splitters.
(addsub vec_select/vec_concat splitters): Ditto.
2015-06-23 Bin Cheng <bin.cheng@arm.com> 2015-06-23 Bin Cheng <bin.cheng@arm.com>
PR tree-optimization/66449 PR tree-optimization/66449
......
...@@ -1426,8 +1426,105 @@ ...@@ -1426,8 +1426,105 @@
(and (match_code "unspec_volatile") (and (match_code "unspec_volatile")
(match_test "XINT (op, 1) == UNSPECV_VZEROUPPER"))) (match_test "XINT (op, 1) == UNSPECV_VZEROUPPER")))
;; Return true if OP is a parallel for a vbroadcast permute. ;; Return true if OP is an addsub vec_merge operation
(define_predicate "addsub_vm_operator"
(match_code "vec_merge")
{
rtx op0, op1;
int swapped;
HOST_WIDE_INT mask;
int nunits, elt;
op0 = XEXP (op, 0);
op1 = XEXP (op, 1);
/* Sanity check. */
if (GET_CODE (op0) == MINUS && GET_CODE (op1) == PLUS)
swapped = 0;
else if (GET_CODE (op0) == PLUS && GET_CODE (op1) == MINUS)
swapped = 1;
else
gcc_unreachable ();
mask = INTVAL (XEXP (op, 2));
nunits = GET_MODE_NUNITS (mode);
for (elt = 0; elt < nunits; elt++)
{
/* bit clear: take from op0, set: take from op1 */
int bit = !(mask & (HOST_WIDE_INT_1U << elt));
if (bit != ((elt & 1) ^ swapped))
return false;
}
return true;
})
;; Return true if OP is an addsub vec_select/vec_concat operation
(define_predicate "addsub_vs_operator"
(and (match_code "vec_select")
(match_code "vec_concat" "0"))
{
rtx op0, op1;
bool swapped;
int nunits, elt;
op0 = XEXP (XEXP (op, 0), 0);
op1 = XEXP (XEXP (op, 0), 1);
/* Sanity check. */
if (GET_CODE (op0) == MINUS && GET_CODE (op1) == PLUS)
swapped = false;
else if (GET_CODE (op0) == PLUS && GET_CODE (op1) == MINUS)
swapped = true;
else
gcc_unreachable ();
nunits = GET_MODE_NUNITS (mode);
if (XVECLEN (XEXP (op, 1), 0) != nunits)
return false;
/* We already checked that permutation is suitable for addsub,
so only look at the first element of the parallel. */
elt = INTVAL (XVECEXP (XEXP (op, 1), 0, 0));
return elt == (swapped ? nunits : 0);
})
;; Return true if OP is a parallel for an addsub vec_select.
(define_predicate "addsub_vs_parallel"
(and (match_code "parallel")
(match_code "const_int" "a"))
{
int nelt = XVECLEN (op, 0);
int elt, i;
if (nelt < 2)
return false;
/* Check that the permutation is suitable for addsub.
For example, { 0 9 2 11 4 13 6 15 } or { 8 1 10 3 12 5 14 7 }. */
elt = INTVAL (XVECEXP (op, 0, 0));
if (elt == 0)
{
for (i = 1; i < nelt; ++i)
if (INTVAL (XVECEXP (op, 0, i)) != (i + (i & 1) * nelt))
return false;
}
else if (elt == nelt)
{
for (i = 1; i < nelt; ++i)
if (INTVAL (XVECEXP (op, 0, i)) != (elt + i - (i & 1) * nelt))
return false;
}
else
return false;
return true;
})
;; Return true if OP is a parallel for a vbroadcast permute.
(define_predicate "avx_vbroadcast_operand" (define_predicate "avx_vbroadcast_operand"
(and (match_code "parallel") (and (match_code "parallel")
(match_code "const_int" "a")) (match_code "const_int" "a"))
......
2015-06-23 Uros Bizjak <ubizjak@gmail.com>
PR target/66560
* gcc.target/i386/pr66560-1.c: New test.
* gcc.target/i386/pr66560-2.c: Ditto.
* gcc.target/i386/pr66560-3.c: Ditto.
* gcc.target/i386/pr66560-4.c: Ditto.
2015-06-23 Thomas Schwinge <thomas@codesourcery.com> 2015-06-23 Thomas Schwinge <thomas@codesourcery.com>
* gcc.target/nvptx/nvptx.exp: New file. * gcc.target/nvptx/nvptx.exp: New file.
......
/* PR target/66560 */
/* { dg-do compile } */
/* { dg-options "-O2 -msse4" } */
typedef float v4sf __attribute__((vector_size(16)));
typedef int v4si __attribute__((vector_size(16)));
v4sf foo1 (v4sf x, v4sf y)
{
v4sf tem0 = x - y;
v4sf tem1 = x + y;
return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 });
}
v4sf foo2 (v4sf x, v4sf y)
{
v4sf tem0 = x - y;
v4sf tem1 = y + x;
return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 });
}
v4sf foo3 (v4sf x, v4sf y)
{
v4sf tem0 = x + y;
v4sf tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v4si) { 4, 1, 6, 3 });
}
v4sf foo4 (v4sf x, v4sf y)
{
v4sf tem0 = y + x;
v4sf tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v4si) { 4, 1, 6, 3 });
}
/* { dg-final { scan-assembler-times "addsubps" 4 } } */
/* PR target/66560 */
/* { dg-do compile } */
/* { dg-options "-O2 -msse4" } */
typedef double v2df __attribute__((vector_size(16)));
typedef long long v2di __attribute__((vector_size(16)));
v2df foo1 (v2df x, v2df y)
{
v2df tem0 = x - y;
v2df tem1 = x + y;
return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 });
}
v2df foo2 (v2df x, v2df y)
{
v2df tem0 = x - y;
v2df tem1 = y + x;
return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 });
}
v2df foo3 (v2df x, v2df y)
{
v2df tem0 = x + y;
v2df tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v2di) { 2, 1 });
}
v2df foo4 (v2df x, v2df y)
{
v2df tem0 = y + x;
v2df tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v2di) { 2, 1 });
}
/* { dg-final { scan-assembler-times "addsubpd" 4 } } */
/* PR target/66560 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx" } */
typedef float v8sf __attribute__((vector_size(32)));
typedef int v8si __attribute__((vector_size(32)));
v8sf foo1 (v8sf x, v8sf y)
{
v8sf tem0 = x - y;
v8sf tem1 = x + y;
return __builtin_shuffle (tem0, tem1, (v8si) { 0, 9, 2, 11, 4, 13, 6, 15 });
}
v8sf foo2 (v8sf x, v8sf y)
{
v8sf tem0 = x - y;
v8sf tem1 = y + x;
return __builtin_shuffle (tem0, tem1, (v8si) { 0, 9, 2, 11, 4, 13, 6, 15 });
}
v8sf foo3 (v8sf x, v8sf y)
{
v8sf tem0 = x + y;
v8sf tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v8si) { 8, 1, 10, 3, 12, 5, 14, 7 });
}
v8sf foo4 (v8sf x, v8sf y)
{
v8sf tem0 = y + x;
v8sf tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v8si) { 8, 1, 10, 3, 12, 5, 14, 7 });
}
/* { dg-final { scan-assembler-times "vaddsubps" 4 } } */
/* PR target/66560 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx" } */
typedef double v4df __attribute__((vector_size(32)));
typedef long long v4di __attribute__((vector_size(32)));
v4df foo1 (v4df x, v4df y)
{
v4df tem0 = x - y;
v4df tem1 = x + y;
return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 });
}
v4df foo2 (v4df x, v4df y)
{
v4df tem0 = x - y;
v4df tem1 = y + x;
return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 });
}
v4df foo3 (v4df x, v4df y)
{
v4df tem0 = x + y;
v4df tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v4di) { 4, 1, 6, 3 });
}
v4df foo4 (v4df x, v4df y)
{
v4df tem0 = y + x;
v4df tem1 = x - y;
return __builtin_shuffle (tem0, tem1, (v4di) { 4, 1, 6, 3 });
}
/* { dg-final { scan-assembler-times "vaddsubpd" 4 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment