Commit c7501e00 by Jakub Jelinek Committed by Jakub Jelinek

* config/i386/sse.md

	(<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>): Rename
	to ...
	(avx512vl_shuf_<shuffletype>32x4_1<mask_name>): ... this.
	(*avx_vperm_broadcast_v4sf): Use v constraint instead of x.  Use
	maybe_evex prefix instead of vex.
	(*avx_vperm_broadcast_<mode>): Use v constraint instead of x.  Handle
	EXT_REX_SSE_REG_P (op0) case in the splitter.

	* gcc.target/i386/avx512vl-vbroadcast-3.c: New test.

From-SVN: r236763
parent ca449d26
2016-05-26 Jakub Jelinek <jakub@redhat.com>
* config/i386/sse.md
(<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>): Rename
to ...
(avx512vl_shuf_<shuffletype>32x4_1<mask_name>): ... this.
(*avx_vperm_broadcast_v4sf): Use v constraint instead of x. Use
maybe_evex prefix instead of vex.
(*avx_vperm_broadcast_<mode>): Use v constraint instead of x. Handle
EXT_REX_SSE_REG_P (op0) case in the splitter.
2016-05-25 Jeff Law <law@redhat.com>
PR tree-optimization/71272
......
......@@ -12399,7 +12399,7 @@
DONE;
})
(define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
(define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
[(set (match_operand:VI4F_256 0 "register_operand" "=v")
(vec_select:VI4F_256
(vec_concat:<ssedoublemode>
......@@ -17283,9 +17283,9 @@
;; If it so happens that the input is in memory, use vbroadcast.
;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
(define_insn "*avx_vperm_broadcast_v4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
[(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
(vec_select:V4SF
(match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
(match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
(match_parallel 2 "avx_vbroadcast_operand"
[(match_operand 3 "const_int_operand" "C,n,n")])))]
"TARGET_AVX"
......@@ -17307,13 +17307,13 @@
[(set_attr "type" "ssemov,ssemov,sselog1")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "0,0,1")
(set_attr "prefix" "vex")
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "SF,SF,V4SF")])
(define_insn_and_split "*avx_vperm_broadcast_<mode>"
[(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
[(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
(vec_select:VF_256
(match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
(match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
(match_parallel 2 "avx_vbroadcast_operand"
[(match_operand 3 "const_int_operand" "C,n,n")])))]
"TARGET_AVX"
......@@ -17345,6 +17345,23 @@
/* Shuffle the lane we care about into both lanes of the dest. */
mask = (elt / (<ssescalarnum> / 2)) * 0x11;
if (EXT_REX_SSE_REG_P (op0))
{
/* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
or VSHUFF128. */
gcc_assert (<MODE>mode == V8SFmode);
if ((mask & 1) == 0)
emit_insn (gen_avx2_vec_dupv8sf (op0,
gen_lowpart (V4SFmode, op0)));
else
emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
GEN_INT (4), GEN_INT (5),
GEN_INT (6), GEN_INT (7),
GEN_INT (12), GEN_INT (13),
GEN_INT (14), GEN_INT (15)));
DONE;
}
emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
DONE;
}
......
2016-05-26 Jakub Jelinek <jakub@redhat.com>
* gcc.target/i386/avx512vl-vbroadcast-3.c: New test.
2016-05-26 Jiong Wang <jiong.wang@arm.com>
* gcc.target/aarch64/simd/vmul_elem_1.c: Force result variables to be
......
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -mavx512vl -masm=att" } */
typedef float V1 __attribute__((vector_size (16)));
typedef float V2 __attribute__((vector_size (32)));
typedef int V4 __attribute__((vector_size (16)));
typedef int V5 __attribute__((vector_size (32)));
void
f1 (V1 x)
{
register V1 a __asm ("xmm16");
a = x;
asm volatile ("" : "+v" (a));
a = __builtin_shuffle (a, (V4) { 0, 0, 0, 0 });
asm volatile ("" : "+v" (a));
}
void
f2 (V1 x)
{
register V1 a __asm ("xmm16");
a = x;
asm volatile ("" : "+v" (a));
a = __builtin_shuffle (a, (V4) { 1, 1, 1, 1 });
asm volatile ("" : "+v" (a));
}
void
f3 (V1 x)
{
register V1 a __asm ("xmm16");
a = x;
asm volatile ("" : "+v" (a));
a = __builtin_shuffle (a, (V4) { 2, 2, 2, 2 });
asm volatile ("" : "+v" (a));
}
void
f4 (V1 x)
{
register V1 a __asm ("xmm16");
a = x;
asm volatile ("" : "+v" (a));
a = __builtin_shuffle (a, (V4) { 3, 3, 3, 3 });
asm volatile ("" : "+v" (a));
}
void
f5 (V1 *x)
{
register V1 a __asm ("xmm16");
a = __builtin_shuffle (*x, (V4) { 0, 0, 0, 0 });
asm volatile ("" : "+v" (a));
}
void
f6 (V1 *x)
{
register V1 a __asm ("xmm16");
a = __builtin_shuffle (*x, (V4) { 1, 1, 1, 1 });
asm volatile ("" : "+v" (a));
}
void
f7 (V1 *x)
{
register V1 a __asm ("xmm16");
a = __builtin_shuffle (*x, (V4) { 2, 2, 2, 2 });
asm volatile ("" : "+v" (a));
}
void
f8 (V1 *x)
{
register V1 a __asm ("xmm16");
a = __builtin_shuffle (*x, (V4) { 3, 3, 3, 3 });
asm volatile ("" : "+v" (a));
}
void
f9 (V2 x)
{
register V2 a __asm ("xmm16");
a = x;
asm volatile ("" : "+v" (a));
a = __builtin_shuffle (a, (V5) { 0, 0, 0, 0, 0, 0, 0, 0 });
asm volatile ("" : "+v" (a));
}
void
f10 (V2 x)
{
register V2 a __asm ("xmm16");
a = x;
asm volatile ("" : "+v" (a));
a = __builtin_shuffle (a, (V5) { 1, 1, 1, 1, 1, 1, 1, 1 });
asm volatile ("" : "+v" (a));
}
void
f11 (V2 x)
{
register V2 a __asm ("xmm16");
a = x;
asm volatile ("" : "+v" (a));
a = __builtin_shuffle (a, (V5) { 4, 4, 4, 4, 4, 4, 4, 4 });
asm volatile ("" : "+v" (a));
}
void
f12 (V2 x)
{
register V2 a __asm ("xmm16");
a = x;
asm volatile ("" : "+v" (a));
a = __builtin_shuffle (a, (V5) { 5, 5, 5, 5, 5, 5, 5, 5 });
asm volatile ("" : "+v" (a));
}
void
f13 (V2 *x)
{
register V2 a __asm ("xmm16");
a = __builtin_shuffle (*x, (V5) { 0, 0, 0, 0, 0, 0, 0, 0 });
asm volatile ("" : "+v" (a));
}
void
f14 (V2 *x)
{
register V2 a __asm ("xmm16");
a = __builtin_shuffle (*x, (V5) { 1, 1, 1, 1, 1, 1, 1, 1 });
asm volatile ("" : "+v" (a));
}
void
f15 (V2 *x)
{
register V2 a __asm ("xmm16");
a = __builtin_shuffle (*x, (V5) { 4, 4, 4, 4, 4, 4, 4, 4 });
asm volatile ("" : "+v" (a));
}
void
f16 (V2 *x)
{
register V2 a __asm ("xmm16");
a = __builtin_shuffle (*x, (V5) { 5, 5, 5, 5, 5, 5, 5, 5 });
asm volatile ("" : "+v" (a));
}
/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%rdi\[^\n\r]*%xmm16" 4 } } */
/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%xmm16\[^\n\r]*%ymm16" 3 } } */
/* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%rdi\[^\n\r]*%ymm16" 3 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$0\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$85\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$170\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$255\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$0\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 1 } } */
/* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$85\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 2 } } */
/* { dg-final { scan-assembler-times "vshuff32x4\[^\n\r]*\\\$3\[^\n\r]*%ymm16\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 2 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment