Commit 395a191d by Sebastian Peryt Committed by Uros Bizjak

* config/i386/avx512vlintrin.h (_mm256_permutexvar_epi64)

	(_mm256_permutexvar_epi32, _mm256_permutex_epi64): New intrinsics.

testsuite/ChangeLog:

	* gcc.target/i386/avx512vl-vpermd-1.c (_mm256_permutexvar_epi32):
	Test new intrinsic.
	* gcc.target/i386/avx512vl-vpermq-imm-1.c (_mm256_permutex_epi64):
	Ditto.
	* gcc.target/i386/avx512vl-vpermq-var-1.c (_mm256_permutexvar_epi64):
	Ditto.
	* gcc.target/i386/avx512f-vpermd-2.c: Do not check for AVX512F_LEN.
	* gcc.target/i386/avx512f-vpermq-imm-2.c: Ditto.
	* gcc.target/i386/avx512f-vpermq-var-2.c: Ditto.

From-SVN: r249759
parent 38a79c5a
2017-06-28 Sebastian Peryt <sebastian.peryt@intel.com>
* config/i386/avx512vlintrin.h (_mm256_permutexvar_epi64)
(_mm256_permutexvar_epi32, _mm256_permutex_epi64): New intrinsics.
2017-06-28 Szabolcs Nagy <szabolcs.nagy@arm.com> 2017-06-28 Szabolcs Nagy <szabolcs.nagy@arm.com>
* config.gcc (*-linux-musl*): Add t-musl tmake_file. * config.gcc (*-linux-musl*): Add t-musl tmake_file.
......
...@@ -9099,6 +9099,17 @@ _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) ...@@ -9099,6 +9099,17 @@ _mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y)
extern __inline __m256i extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutexvar_epi64 (__m256i __X, __m256i __Y)
{
return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y,
(__v4di) __X,
(__v4di)
_mm256_setzero_si256 (),
(__mmask8) -1);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
__m256i __Y) __m256i __Y)
{ {
...@@ -9163,6 +9174,17 @@ _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) ...@@ -9163,6 +9174,17 @@ _mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y)
extern __inline __m256i extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
{
return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y,
(__v8si) __X,
(__v8si)
_mm256_setzero_si256 (),
(__mmask8) -1);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X, _mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
__m256i __Y) __m256i __Y)
{ {
...@@ -9751,6 +9773,17 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y) ...@@ -9751,6 +9773,17 @@ _mm_cmple_epi64_mask (__m128i __X, __m128i __Y)
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
extern __inline __m256i extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permutex_epi64 (__m256i __X, const int __I)
{
return (__m256i) __builtin_ia32_permdi256_mask ((__v4di) __X,
__I,
(__v4di)
_mm256_setzero_si256(),
(__mmask8) -1);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M, _mm256_mask_permutex_epi64 (__m256i __W, __mmask8 __M,
__m256i __X, const int __I) __m256i __X, const int __I)
{ {
...@@ -12367,6 +12400,13 @@ _mm256_permutex_pd (__m256d __X, const int __M) ...@@ -12367,6 +12400,13 @@ _mm256_permutex_pd (__m256d __X, const int __M)
_mm256_undefined_pd (), \ _mm256_undefined_pd (), \
(__mmask8)-1)) (__mmask8)-1))
#define _mm256_permutex_epi64(X, I) \
((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
(int)(I), \
(__v4di)(__m256i) \
(_mm256_setzero_si256 ()),\
(__mmask8) -1))
#define _mm256_maskz_permutex_epi64(M, X, I) \ #define _mm256_maskz_permutex_epi64(M, X, I) \
((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \ ((__m256i) __builtin_ia32_permdi256_mask ((__v4di)(__m256i)(X), \
(int)(I), \ (int)(I), \
......
2017-06-28 Sebastian Peryt <sebastian.peryt@intel.com>
* gcc.target/i386/avx512vl-vpermd-1.c (_mm256_permutexvar_epi32):
Test new intrinsic.
* gcc.target/i386/avx512vl-vpermq-imm-1.c (_mm256_permutex_epi64):
Ditto.
* gcc.target/i386/avx512vl-vpermq-var-1.c (_mm256_permutexvar_epi64):
Ditto.
* gcc.target/i386/avx512f-vpermd-2.c: Do not check for AVX512F_LEN.
* gcc.target/i386/avx512f-vpermq-imm-2.c: Ditto.
* gcc.target/i386/avx512f-vpermq-var-2.c: Ditto.
2017-06-28 Thomas Preud'homme <thomas.preudhomme@arm.com> 2017-06-28 Thomas Preud'homme <thomas.preudhomme@arm.com>
* lib/target-supports.exp (check_effective_target_vect_int): Replace * lib/target-supports.exp (check_effective_target_vect_int): Replace
......
...@@ -41,18 +41,14 @@ TEST (void) ...@@ -41,18 +41,14 @@ TEST (void)
res3.a[i] = DEFAULT_VALUE; res3.a[i] = DEFAULT_VALUE;
} }
#if AVX512F_LEN == 512
res1.x = INTRINSIC (_permutexvar_epi32) (src1.x, src2.x); res1.x = INTRINSIC (_permutexvar_epi32) (src1.x, src2.x);
#endif
res2.x = INTRINSIC (_maskz_permutexvar_epi32) (mask, src1.x, src2.x); res2.x = INTRINSIC (_maskz_permutexvar_epi32) (mask, src1.x, src2.x);
res3.x = INTRINSIC (_mask_permutexvar_epi32) (res3.x, mask, src1.x, src2.x); res3.x = INTRINSIC (_mask_permutexvar_epi32) (res3.x, mask, src1.x, src2.x);
CALC (src1.a, src2.a, res_ref); CALC (src1.a, src2.a, res_ref);
#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref)) if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort (); abort ();
#endif
MASK_ZERO (i_d) (res_ref, mask, SIZE); MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref)) if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
......
...@@ -40,18 +40,14 @@ TEST (void) ...@@ -40,18 +40,14 @@ TEST (void)
res3.a[i] = DEFAULT_VALUE; res3.a[i] = DEFAULT_VALUE;
} }
#if AVX512F_LEN == 512
res1.x = INTRINSIC (_permutex_epi64) (src1.x, IMM_MASK); res1.x = INTRINSIC (_permutex_epi64) (src1.x, IMM_MASK);
#endif
res2.x = INTRINSIC (_maskz_permutex_epi64) (mask, src1.x, IMM_MASK); res2.x = INTRINSIC (_maskz_permutex_epi64) (mask, src1.x, IMM_MASK);
res3.x = INTRINSIC (_mask_permutex_epi64) (res3.x, mask, src1.x, IMM_MASK); res3.x = INTRINSIC (_mask_permutex_epi64) (res3.x, mask, src1.x, IMM_MASK);
CALC (src1.a, IMM_MASK, res_ref); CALC (src1.a, IMM_MASK, res_ref);
#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref)) if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
abort (); abort ();
#endif
MASK_ZERO (i_q) (res_ref, mask, SIZE); MASK_ZERO (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref)) if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
......
...@@ -41,18 +41,14 @@ TEST (void) ...@@ -41,18 +41,14 @@ TEST (void)
res3.a[i] = DEFAULT_VALUE; res3.a[i] = DEFAULT_VALUE;
} }
#if AVX512F_LEN == 512
res1.x = INTRINSIC (_permutexvar_epi64) (src1.x, src2.x); res1.x = INTRINSIC (_permutexvar_epi64) (src1.x, src2.x);
#endif
res2.x = INTRINSIC (_maskz_permutexvar_epi64) (mask, src1.x, src2.x); res2.x = INTRINSIC (_maskz_permutexvar_epi64) (mask, src1.x, src2.x);
res3.x = INTRINSIC (_mask_permutexvar_epi64) (res3.x, mask, src1.x, src2.x); res3.x = INTRINSIC (_mask_permutexvar_epi64) (res3.x, mask, src1.x, src2.x);
CALC (src1.a, src2.a, res_ref); CALC (src1.a, src2.a, res_ref);
#if AVX512F_LEN == 512
if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref)) if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
abort (); abort ();
#endif
MASK_ZERO (i_q) (res_ref, mask, SIZE); MASK_ZERO (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref)) if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */ /* { dg-options "-mavx512vl -O2" } */
/* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
...@@ -11,6 +12,7 @@ volatile __mmask8 m; ...@@ -11,6 +12,7 @@ volatile __mmask8 m;
void extern void extern
avx512vl_test (void) avx512vl_test (void)
{ {
x = _mm256_permutexvar_epi32 (x, x);
x = _mm256_maskz_permutexvar_epi32 (m, x, x); x = _mm256_maskz_permutexvar_epi32 (m, x, x);
x = _mm256_mask_permutexvar_epi32 (x, m, x, x); x = _mm256_mask_permutexvar_epi32 (x, m, x, x);
} }
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */ /* { dg-options "-mavx512vl -O2" } */
/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
...@@ -11,6 +12,7 @@ volatile __mmask8 m; ...@@ -11,6 +12,7 @@ volatile __mmask8 m;
void extern void extern
avx512vl_test (void) avx512vl_test (void)
{ {
x = _mm256_permutex_epi64 (x, 13);
x = _mm256_mask_permutex_epi64 (x, m, x, 13); x = _mm256_mask_permutex_epi64 (x, m, x, 13);
x = _mm256_maskz_permutex_epi64 (m, x, 13); x = _mm256_maskz_permutex_epi64 (m, x, 13);
} }
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-mavx512vl -O2" } */ /* { dg-options "-mavx512vl -O2" } */
/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpermq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
...@@ -11,6 +12,7 @@ volatile __mmask8 m; ...@@ -11,6 +12,7 @@ volatile __mmask8 m;
void extern void extern
avx512vl_test (void) avx512vl_test (void)
{ {
x = _mm256_permutexvar_epi64 (x, x);
x = _mm256_maskz_permutexvar_epi64 (m, x, x); x = _mm256_maskz_permutexvar_epi64 (m, x, x);
x = _mm256_mask_permutexvar_epi64 (x, m, x, x); x = _mm256_mask_permutexvar_epi64 (x, m, x, x);
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment