Commit 1f138b75 by Jakub Jelinek Committed by Jakub Jelinek

re PR target/79299 (Operand size mismatch for `vpgatherqd' w/ -O3 -masm=intel -mavx512bw)

	PR target/79299
	* config/i386/sse.md (xtg_mode, gatherq_mode): New mode attrs.
	(*avx512f_gathersi<mode>, *avx512f_gathersi<mode>_2,
	*avx512f_gatherdi<mode>, *avx512f_gatherdi<mode>_2): Use them,
	fix -masm=intel patterns.

	* gcc.target/i386/avx512vl-pr79299-1.c: New test.
	* gcc.target/i386/avx512vl-pr79299-2.c: New test.

From-SVN: r245248
parent a4cf4b64
2017-02-07 Jakub Jelinek <jakub@redhat.com>
PR target/79299
* config/i386/sse.md (xtg_mode, gatherq_mode): New mode attrs.
(*avx512f_gathersi<mode>, *avx512f_gathersi<mode>_2,
*avx512f_gatherdi<mode>, *avx512f_gatherdi<mode>_2): Use them,
fix -masm=intel patterns.
2017-02-07 Richard Biener <rguenther@suse.de>
PR tree-optimization/79256
......
......@@ -811,6 +811,12 @@
[(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
(V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
;; Tie mode of assembler operand to mode iterator
(define_mode_attr xtg_mode
[(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
(V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
;; Half mask mode for unpacks
(define_mode_attr HALFMASKMODE
[(DI "SI") (SI "HI")])
......@@ -19034,6 +19040,12 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
;; Memory operand override for -masm=intel of the v*gatherq* patterns.
(define_mode_attr gatherq_mode
[(V4SI "q") (V2DI "x") (V4SF "q") (V2DF "x")
(V8SI "x") (V4DI "t") (V8SF "x") (V4DF "t")
(V16SI "t") (V8DI "g") (V16SF "t") (V8DF "g")])
(define_expand "<avx512>_gathersi<mode>"
[(parallel [(set (match_operand:VI48F 0 "register_operand")
(unspec:VI48F
......@@ -19067,7 +19079,7 @@
UNSPEC_GATHER))
(clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
"TARGET_AVX512F"
"v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
"v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %<xtg_mode>6}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
......@@ -19086,7 +19098,7 @@
UNSPEC_GATHER))
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
"TARGET_AVX512F"
"v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
"v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<xtg_mode>5}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
......@@ -19126,9 +19138,7 @@
(clobber (match_scratch:QI 2 "=&Yk"))]
"TARGET_AVX512F"
{
if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %t6}";
return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}";
return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %<gatherq_mode>6}";
}
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
......@@ -19152,11 +19162,11 @@
if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
{
if (<MODE_SIZE> != 64)
return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %<gatherq_mode>5}";
else
return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %t5}";
}
return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<gatherq_mode>5}";
}
[(set_attr "type" "ssemov")
(set_attr "prefix" "evex")
......
2017-02-07 Jakub Jelinek <jakub@redhat.com>
PR target/79299
* gcc.target/i386/avx512vl-pr79299-1.c: New test.
* gcc.target/i386/avx512vl-pr79299-2.c: New test.
2017-02-07 Richard Biener <rguenther@suse.de>
* gcc.dg/gimplefe-23.c: New testcase.
......
/* PR target/79299 */
/* { dg-do assemble { target avx512vl } } */
/* { dg-require-effective-target masm_intel } */
/* { dg-options "-Ofast -mavx512vl -masm=intel" } */
#define N 1024
unsigned long long a[N];
unsigned int b[N], c[N], d[N], e[N], f[N];
unsigned long long g[N], h[N], j[N], k[N];
float l[N], m[N], n[N], o[N];
double p[N], q[N], r[N], s[N];
void
f1 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = c[a[i]];
for (i = 0; i < N; i++)
e[i] = f[i] ? f[i] : c[a[i]];
}
void
f2 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = c[b[i]];
for (i = 0; i < N; i++)
e[i] = f[i] ? f[i] : c[b[i]];
}
void
f3 (void)
{
int i;
for (i = 0; i < N; i++)
h[i] = g[a[i]];
for (i = 0; i < N; i++)
j[i] = k[i] != 0.0 ? k[i] : g[a[i]];
}
void
f4 (void)
{
int i;
for (i = 0; i < N; i++)
h[i] = g[b[i]];
for (i = 0; i < N; i++)
j[i] = k[i] != 0.0 ? k[i] : g[b[i]];
}
void
f5 (void)
{
int i;
for (i = 0; i < N; i++)
m[i] = l[a[i]];
for (i = 0; i < N; i++)
n[i] = o[i] ? o[i] : l[a[i]];
}
void
f6 (void)
{
int i;
for (i = 0; i < N; i++)
m[i] = c[b[i]];
for (i = 0; i < N; i++)
n[i] = o[i] ? o[i] : c[b[i]];
}
void
f7 (void)
{
int i;
for (i = 0; i < N; i++)
q[i] = p[a[i]];
for (i = 0; i < N; i++)
r[i] = s[i] != 0.0 ? s[i] : p[a[i]];
}
void
f8 (void)
{
int i;
for (i = 0; i < N; i++)
q[i] = p[b[i]];
for (i = 0; i < N; i++)
r[i] = s[i] != 0.0 ? s[i] : p[b[i]];
}
/* PR target/79299 */
/* { dg-do assemble { target avx512vl } } */
/* { dg-require-effective-target masm_intel } */
/* { dg-options "-Ofast -mavx512vl -masm=intel" } */
#include <immintrin.h>
__m512
f1 (__m512i x, void const *y)
{
return _mm512_i32gather_ps (x, y, 1);
}
__m512
f2 (__m512 x, __mmask16 y, __m512i z, void const *w)
{
return _mm512_mask_i32gather_ps (x, y, z, w, 1);
}
__m512d
f3 (__m256i x, void const *y)
{
return _mm512_i32gather_pd (x, y, 1);
}
__m512d
f4 (__m512d x, __mmask8 y, __m256i z, void const *w)
{
return _mm512_mask_i32gather_pd (x, y, z, w, 1);
}
__m256
f5 (__m512i x, void const *y)
{
return _mm512_i64gather_ps (x, y, 1);
}
__m256
f6 (__m256 x, __mmask16 y, __m512i z, void const *w)
{
return _mm512_mask_i64gather_ps (x, y, z, w, 1);
}
__m512d
f7 (__m512i x, void const *y)
{
return _mm512_i64gather_pd (x, y, 1);
}
__m512d
f8 (__m512d x, __mmask8 y, __m512i z, void const *w)
{
return _mm512_mask_i64gather_pd (x, y, z, w, 1);
}
__m512i
f9 (__m512i x, void const *y)
{
return _mm512_i32gather_epi32 (x, y, 1);
}
__m512i
f10 (__m512i x, __mmask16 y, __m512i z, void const *w)
{
return _mm512_mask_i32gather_epi32 (x, y, z, w, 1);
}
__m512i
f11 (__m256i x, void const *y)
{
return _mm512_i32gather_epi64 (x, y, 1);
}
__m512i
f12 (__m512i x, __mmask8 y, __m256i z, void const *w)
{
return _mm512_mask_i32gather_epi64 (x, y, z, w, 1);
}
__m256i
f13 (__m512i x, void const *y)
{
return _mm512_i64gather_epi32 (x, y, 1);
}
__m256i
f14 (__m256i x, __mmask16 y, __m512i z, void const *w)
{
return _mm512_mask_i64gather_epi32 (x, y, z, w, 1);
}
__m512i
f15 (__m512i x, void const *y)
{
return _mm512_i64gather_epi64 (x, y, 1);
}
__m512i
f16 (__m512i x, __mmask8 y, __m512i z, void const *w)
{
return _mm512_mask_i64gather_epi64 (x, y, z, w, 1);
}
__m256
f17 (__m256 x, __mmask8 y, __m256i z, void const *w)
{
return _mm256_mmask_i32gather_ps (x, y, z, w, 1);
}
__m128
f18 (__m128 x, __mmask8 y, __m128i z, void const *w)
{
return _mm_mmask_i32gather_ps (x, y, z, w, 1);
}
__m256d
f19 (__m256d x, __mmask8 y, __m128i z, void const *w)
{
return _mm256_mmask_i32gather_pd (x, y, z, w, 1);
}
__m128d
f20 (__m128d x, __mmask8 y, __m128i z, void const *w)
{
return _mm_mmask_i32gather_pd (x, y, z, w, 1);
}
__m128
f21 (__m128 x, __mmask8 y, __m256i z, void const *w)
{
return _mm256_mmask_i64gather_ps (x, y, z, w, 1);
}
__m128
f22 (__m128 x, __mmask8 y, __m128i z, void const *w)
{
return _mm_mmask_i64gather_ps (x, y, z, w, 1);
}
__m256d
f23 (__m256d x, __mmask8 y, __m256i z, void const *w)
{
return _mm256_mmask_i64gather_pd (x, y, z, w, 1);
}
__m128d
f24 (__m128d x, __mmask8 y, __m128i z, void const *w)
{
return _mm_mmask_i64gather_pd (x, y, z, w, 1);
}
__m256i
f25 (__m256i x, __mmask8 y, __m256i z, void const *w)
{
return _mm256_mmask_i32gather_epi32 (x, y, z, w, 1);
}
__m128i
f26 (__m128i x, __mmask8 y, __m128i z, void const *w)
{
return _mm_mmask_i32gather_epi32 (x, y, z, w, 1);
}
__m256i
f27 (__m256i x, __mmask8 y, __m128i z, void const *w)
{
return _mm256_mmask_i32gather_epi64 (x, y, z, w, 1);
}
__m128i
f28 (__m128i x, __mmask8 y, __m128i z, void const *w)
{
return _mm_mmask_i32gather_epi64 (x, y, z, w, 1);
}
__m128i
f29 (__m128i x, __mmask8 y, __m256i z, void const *w)
{
return _mm256_mmask_i64gather_epi32 (x, y, z, w, 1);
}
__m128i
f30 (__m128i x, __mmask8 y, __m128i z, void const *w)
{
return _mm_mmask_i64gather_epi32 (x, y, z, w, 1);
}
__m256i
f31 (__m256i x, __mmask8 y, __m256i z, void const *w)
{
return _mm256_mmask_i64gather_epi64 (x, y, z, w, 1);
}
__m128i
f32 (__m128i x, __mmask8 y, __m128i z, void const *w)
{
return _mm_mmask_i64gather_epi64 (x, y, z, w, 1);
}
__m256
f33 (__m256 x, __m256i z, void const *w)
{
return _mm256_mmask_i32gather_ps (x, -1, z, w, 1);
}
__m128
f34 (__m128 x, __m128i z, void const *w)
{
return _mm_mmask_i32gather_ps (x, -1, z, w, 1);
}
__m256d
f35 (__m256d x, __m128i z, void const *w)
{
return _mm256_mmask_i32gather_pd (x, -1, z, w, 1);
}
__m128d
f36 (__m128d x, __m128i z, void const *w)
{
return _mm_mmask_i32gather_pd (x, -1, z, w, 1);
}
__m128
f37 (__m128 x, __m256i z, void const *w)
{
return _mm256_mmask_i64gather_ps (x, -1, z, w, 1);
}
__m128
f38 (__m128 x, __m128i z, void const *w)
{
return _mm_mmask_i64gather_ps (x, -1, z, w, 1);
}
__m256d
f39 (__m256d x, __m256i z, void const *w)
{
return _mm256_mmask_i64gather_pd (x, -1, z, w, 1);
}
__m128d
f40 (__m128d x, __m128i z, void const *w)
{
return _mm_mmask_i64gather_pd (x, -1, z, w, 1);
}
__m256i
f41 (__m256i x, __m256i z, void const *w)
{
return _mm256_mmask_i32gather_epi32 (x, -1, z, w, 1);
}
__m128i
f42 (__m128i x, __m128i z, void const *w)
{
return _mm_mmask_i32gather_epi32 (x, -1, z, w, 1);
}
__m256i
f43 (__m256i x, __m128i z, void const *w)
{
return _mm256_mmask_i32gather_epi64 (x, -1, z, w, 1);
}
__m128i
f44 (__m128i x, __m128i z, void const *w)
{
return _mm_mmask_i32gather_epi64 (x, -1, z, w, 1);
}
__m128i
f45 (__m128i x, __m256i z, void const *w)
{
return _mm256_mmask_i64gather_epi32 (x, -1, z, w, 1);
}
__m128i
f46 (__m128i x, __m128i z, void const *w)
{
return _mm_mmask_i64gather_epi32 (x, -1, z, w, 1);
}
__m256i
f47 (__m256i x, __m256i z, void const *w)
{
return _mm256_mmask_i64gather_epi64 (x, -1, z, w, 1);
}
__m128i
f48 (__m128i x, __m128i z, void const *w)
{
return _mm_mmask_i64gather_epi64 (x, -1, z, w, 1);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment