Commit 5fd816e6 by Uros Bizjak

re PR target/56766 (Fails to combine (vec_select (vec_concat ...)) to (vec_merge ...))

	PR target/56776
	* config/i386/sse.md (*avx_addsubv4df3_1): New insn pattern.
	(*avx_addsubv4df3_1s): Ditto.
	(*sse3_addsubv2df3_1): Ditto.
	(*sse3_addsubv2df3_1s): Ditto.
	(*avx_addsubv8sf3_1): Ditto.
	(*avx_addsubv8sf3_1s): Ditto.
	(*sse3_addsubv4sf3_1): Ditto.
	(*sse3_addsubv4sf3_1s): Ditto.

testsuite/ChangeLog:

	PR target/56776
	* gcc.target/i386/pr56776-1.c: New test.
	* gcc.target/i386/pr56776-2.c: Ditto.

From-SVN: r224527
parent 39e99359
......@@ -2032,6 +2032,38 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
(define_insn "*avx_addsubv4df3_1"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_select:V4DF
(vec_concat:V8DF
(minus:V4DF
(match_operand:V4DF 1 "register_operand" "x")
(match_operand:V4DF 2 "nonimmediate_operand" "xm"))
(plus:V4DF (match_dup 1) (match_dup 2)))
(parallel [(const_int 0) (const_int 5)
(const_int 2) (const_int 7)])))]
"TARGET_AVX"
"vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
(define_insn "*avx_addsubv4df3_1s"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_select:V4DF
(vec_concat:V8DF
(minus:V4DF
(match_operand:V4DF 1 "register_operand" "x")
(match_operand:V4DF 2 "nonimmediate_operand" "xm"))
(plus:V4DF (match_dup 2) (match_dup 1)))
(parallel [(const_int 0) (const_int 5)
(const_int 2) (const_int 7)])))]
"TARGET_AVX"
"vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
(define_insn "sse3_addsubv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_merge:V2DF
......@@ -2050,6 +2082,44 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V2DF")])
(define_insn "*sse3_addsubv2df3_1"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_select:V2DF
(vec_concat:V4DF
(minus:V2DF
(match_operand:V2DF 1 "register_operand" "0,x")
(match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
(plus:V2DF (match_dup 1) (match_dup 2)))
(parallel [(const_int 0) (const_int 3)])))]
"TARGET_SSE3"
"@
addsubpd\t{%2, %0|%0, %2}
vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V2DF")])
(define_insn "*sse3_addsubv2df3_1s"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_select:V2DF
(vec_concat:V4DF
(minus:V2DF
(match_operand:V2DF 1 "register_operand" "0,x")
(match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
(plus:V2DF (match_dup 2) (match_dup 1)))
(parallel [(const_int 0) (const_int 3)])))]
"TARGET_SSE3"
"@
addsubpd\t{%2, %0|%0, %2}
vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V2DF")])
(define_insn "avx_addsubv8sf3"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_merge:V8SF
......@@ -2064,6 +2134,42 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "*avx_addsubv8sf3_1"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_select:V8SF
(vec_concat:V16SF
(minus:V8SF
(match_operand:V8SF 1 "register_operand" "x")
(match_operand:V8SF 2 "nonimmediate_operand" "xm"))
(plus:V8SF (match_dup 1) (match_dup 2)))
(parallel [(const_int 0) (const_int 9)
(const_int 2) (const_int 11)
(const_int 4) (const_int 13)
(const_int 6) (const_int 15)])))]
"TARGET_AVX"
"vaddsubps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "*avx_addsubv8sf3_1s"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_select:V8SF
(vec_concat:V16SF
(minus:V8SF
(match_operand:V8SF 1 "register_operand" "x")
(match_operand:V8SF 2 "nonimmediate_operand" "xm"))
(plus:V8SF (match_dup 2) (match_dup 1)))
(parallel [(const_int 0) (const_int 9)
(const_int 2) (const_int 11)
(const_int 4) (const_int 13)
(const_int 6) (const_int 15)])))]
"TARGET_AVX"
"vaddsubps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseadd")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "sse3_addsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
......@@ -2082,6 +2188,46 @@
(set_attr "prefix_rep" "1,*")
(set_attr "mode" "V4SF")])
(define_insn "*sse3_addsubv4sf3_1"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_select:V4SF
(vec_concat:V8SF
(minus:V4SF
(match_operand:V4SF 1 "register_operand" "0,x")
(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
(plus:V4SF (match_dup 1) (match_dup 2)))
(parallel [(const_int 0) (const_int 5)
(const_int 2) (const_int 7)])))]
"TARGET_SSE3"
"@
addsubps\t{%2, %0|%0, %2}
vaddsubps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "prefix" "orig,vex")
(set_attr "prefix_rep" "1,*")
(set_attr "mode" "V4SF")])
(define_insn "*sse3_addsubv4sf3_1s"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_select:V4SF
(vec_concat:V8SF
(minus:V4SF
(match_operand:V4SF 1 "register_operand" "0,x")
(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
(plus:V4SF (match_dup 2) (match_dup 1)))
(parallel [(const_int 0) (const_int 5)
(const_int 2) (const_int 7)])))]
"TARGET_SSE3"
"@
addsubps\t{%2, %0|%0, %2}
vaddsubps\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd")
(set_attr "prefix" "orig,vex")
(set_attr "prefix_rep" "1,*")
(set_attr "mode" "V4SF")])
(define_insn "avx_h<plusminus_insn>v4df3"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_concat:V4DF
......
/* PR target/56766 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx" } */
typedef float v4sf __attribute__((vector_size(16)));
typedef int v4si __attribute__((vector_size(16)));
v4sf foo_v4sf (v4sf x, v4sf y)
{
v4sf tem0 = x - y;
v4sf tem1 = x + y;
return __builtin_shuffle (tem0, tem1, (v4si) { 0, 5, 2, 7 });
}
typedef float v8sf __attribute__((vector_size(32)));
typedef int v8si __attribute__((vector_size(32)));
v8sf foo_v8sf (v8sf x, v8sf y)
{
v8sf tem0 = x - y;
v8sf tem1 = x + y;
return __builtin_shuffle (tem0, tem1, (v8si) { 0, 9, 2, 11, 4, 13, 6, 15 });
}
typedef double v2df __attribute__((vector_size(16)));
typedef long long v2di __attribute__((vector_size(16)));
v2df foo_v2df (v2df x, v2df y)
{
v2df tem0 = x - y;
v2df tem1 = x + y;
return __builtin_shuffle (tem0, tem1, (v2di) { 0, 3 });
}
typedef double v4df __attribute__((vector_size(32)));
typedef long long v4di __attribute__((vector_size(32)));
v4df foo_v4df (v4df x, v4df y)
{
v4df tem0 = x - y;
v4df tem1 = x + y;
return __builtin_shuffle (tem0, tem1, (v4di) { 0, 5, 2, 7 });
}
/* { dg-final { scan-assembler-times "vaddsubps" 2 } } */
/* { dg-final { scan-assembler-times "vaddsubpd" 2 } } */
/* PR target/56766 */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -mavx" } */
void test_v4sf (float * __restrict__ p, float * __restrict q)
{
p[0] = p[0] - q[0];
p[1] = p[1] + q[1];
p[2] = p[2] - q[2];
p[3] = p[3] + q[3];
}
void test_v8sf (float * __restrict__ p, float * __restrict q)
{
p[0] = p[0] - q[0];
p[1] = p[1] + q[1];
p[2] = p[2] - q[2];
p[3] = p[3] + q[3];
p[4] = p[4] - q[4];
p[5] = p[5] + q[5];
p[6] = p[6] - q[6];
p[7] = p[7] + q[7];
}
void test_v2df (double * __restrict__ p, double * __restrict q)
{
p[0] = p[0] - q[0];
p[1] = p[1] + q[1];
}
void test_v4df (double * __restrict__ p, double * __restrict q)
{
p[0] = p[0] - q[0];
p[1] = p[1] + q[1];
p[2] = p[2] - q[2];
p[3] = p[3] + q[3];
}
/* { dg-final { scan-assembler-times "vaddsubps" 2 } } */
/* { dg-final { scan-assembler-times "vaddsubpd" 2 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment