Commit c46f9051 by Julia Koval Committed by Kirill Yukhin

Add mov[us]wb store intrinsics.

gcc/
	* config/i386/avx512bwintrin.h (_mm512_mask_cvtepi16_storeu_epi8,
	_mm512_mask_cvtsepi16_storeu_epi8,
	_mm512_mask_cvtusepi16_storeu_epi8): New intrinsics.
	* config/i386/avx512vlbwintrin.h (_mm256_mask_cvtepi16_storeu_epi8,
	_mm_mask_cvtsepi16_storeu_epi8, _mm256_mask_cvtsepi16_storeu_epi8,
	_mm_mask_cvtusepi16_storeu_epi8, _mm256_mask_cvtusepi16_storeu_epi8,
	_mm_mask_cvtepi16_storeu_epi8): New intrinsics.
	* config/i386/i386-builtin-types.def (PV8Q, V8QI): New pointer type.
	(VOID_FTYPE_PV32QI_V32HI_USI, VOID_FTYPE_PV8QI_V8HI_UQI,
	VOID_FTYPE_PV16QI_V16HI_UHI): New function types.
	* config/i386/i386-builtin.def (__builtin_ia32_pmovwb128mem_mask,
	__builtin_ia32_pmovwb256mem_mask, __builtin_ia32_pmovswb128mem_mask,
	__builtin_ia32_pmovswb256mem_mask, __builtin_ia32_pmovuswb128mem_mask,
	__builtin_ia32_pmovuswb256mem_mask,
	__builtin_ia32_pmovuswb512mem_mask, __builtin_ia32_pmovswb512mem_mask)
	__builtin_ia32_pmovwb512mem_mask): New builtins.

gcc/testsuite/
	* gcc.target/i386/avx512bw-vpmovswb-1.c: Add new intrinsics to test.
	* gcc.target/i386/avx512bw-vpmovswb-2.c: Ditto.
	* gcc.target/i386/avx512bw-vpmovuswb-1.c: Ditto.
	* gcc.target/i386/avx512bw-vpmovuswb-2.c: Ditto.
	* gcc.target/i386/avx512bw-vpmovwb-1.c: Ditto.
	* gcc.target/i386/avx512bw-vpmovwb-2.c: Ditto.

From-SVN: r249012
parent 5ed41889
2017-08-08 Julia Koval <julia.koval@intel.com>
* config/i386/avx512bwintrin.h (_mm512_mask_cvtepi16_storeu_epi8,
_mm512_mask_cvtsepi16_storeu_epi8,
_mm512_mask_cvtusepi16_storeu_epi8): New intrinsics.
* config/i386/avx512vlbwintrin.h (_mm256_mask_cvtepi16_storeu_epi8,
_mm_mask_cvtsepi16_storeu_epi8, _mm256_mask_cvtsepi16_storeu_epi8,
_mm_mask_cvtusepi16_storeu_epi8, _mm256_mask_cvtusepi16_storeu_epi8,
_mm_mask_cvtepi16_storeu_epi8): New intrinsics.
* config/i386/i386-builtin-types.def (PV8Q, V8QI): New pointer type.
(VOID_FTYPE_PV32QI_V32HI_USI, VOID_FTYPE_PV8QI_V8HI_UQI,
VOID_FTYPE_PV16QI_V16HI_UHI): New function types.
* config/i386/i386-builtin.def (__builtin_ia32_pmovwb128mem_mask,
__builtin_ia32_pmovwb256mem_mask, __builtin_ia32_pmovswb128mem_mask,
__builtin_ia32_pmovswb256mem_mask, __builtin_ia32_pmovuswb128mem_mask,
__builtin_ia32_pmovuswb256mem_mask,
__builtin_ia32_pmovuswb512mem_mask, __builtin_ia32_pmovswb512mem_mask)
__builtin_ia32_pmovwb512mem_mask): New builtins.
2017-08-08 Julia Koval <julia.koval@intel.com>
PR target/73350,80862
* config/i386/subst.md (round): Fix round pattern.
* config/i386/i386.c (ix86_erase_embedded_rounding):
......
......@@ -425,6 +425,13 @@ _mm512_cvtepi16_epi8 (__m512i __A)
(__mmask32) -1);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
{
__builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
......@@ -452,6 +459,13 @@ _mm512_cvtsepi16_epi8 (__m512i __A)
(__mmask32) -1);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
{
__builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
......@@ -489,6 +503,13 @@ _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A)
__M);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
{
__builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
}
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A)
......
......@@ -216,6 +216,13 @@ _mm256_cvtepi16_epi8 (__m256i __A)
(__mmask16) -1);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
{
__builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
}
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
......@@ -244,6 +251,13 @@ _mm_cvtsepi16_epi8 (__m128i __A)
(__mmask8) -1);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
{
__builtin_ia32_pmovswb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M);
}
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
......@@ -272,6 +286,13 @@ _mm256_cvtsepi16_epi8 (__m256i __A)
(__mmask16) -1);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
{
__builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
}
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
......@@ -300,6 +321,13 @@ _mm_cvtusepi16_epi8 (__m128i __A)
(__mmask8) -1);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
{
__builtin_ia32_pmovuswb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M);
}
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
......@@ -329,6 +357,13 @@ _mm256_cvtusepi16_epi8 (__m256i __A)
(__mmask16) -1);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M,__m256i __A)
{
__builtin_ia32_pmovuswb256mem_mask ((__v16qi *) __P , (__v16hi) __A, __M);
}
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A)
......@@ -4009,6 +4044,13 @@ _mm_cvtepi16_epi8 (__m128i __A)
(__mmask8) -1);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M,__m128i __A)
{
__builtin_ia32_pmovwb128mem_mask ((__v8qi *) __P , (__v8hi) __A, __M);
}
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
......
......@@ -155,6 +155,7 @@ DEF_POINTER_TYPE (PV4SF, V4SF)
DEF_POINTER_TYPE (PV8DF, V8DF)
DEF_POINTER_TYPE (PV8SF, V8SF)
DEF_POINTER_TYPE (PV4SI, V4SI)
DEF_POINTER_TYPE (PV8QI, V8QI)
DEF_POINTER_TYPE (PV8HI, V8HI)
DEF_POINTER_TYPE (PV8SI, V8SI)
DEF_POINTER_TYPE (PV8DI, V8DI)
......@@ -964,6 +965,7 @@ DEF_FUNCTION_TYPE (QI, V2DF, INT, UQI)
DEF_FUNCTION_TYPE (HI, V16SF, INT, UHI)
DEF_FUNCTION_TYPE (QI, V8SF, INT, UQI)
DEF_FUNCTION_TYPE (QI, V4SF, INT, UQI)
DEF_FUNCTION_TYPE (VOID, PV32QI, V32HI, USI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, UINT, UINT)
DEF_FUNCTION_TYPE (V4HI, HI, HI, HI, HI)
......@@ -1106,6 +1108,8 @@ DEF_FUNCTION_TYPE (VOID, PVOID, QI, V2DI, V2DI, INT)
DEF_FUNCTION_TYPE (VOID, QI, V8SI, PCVOID, INT, INT)
DEF_FUNCTION_TYPE (VOID, HI, V16SI, PCVOID, INT, INT)
DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCVOID, INT, INT)
DEF_FUNCTION_TYPE (VOID, PV8QI, V8HI, UQI)
DEF_FUNCTION_TYPE (VOID, PV16QI, V16HI, UHI)
DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND)
DEF_FUNCTION_TYPE_ALIAS (V4DF_FTYPE_V4DF, ROUND)
......
......@@ -378,6 +378,15 @@ BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_sto
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask_store, "__builtin_ia32_pmovwb128mem_mask", IX86_BUILTIN_PMOVWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8QI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovwb256mem_mask", IX86_BUILTIN_PMOVWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask_store, "__builtin_ia32_pmovswb128mem_mask", IX86_BUILTIN_PMOVSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8QI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovswb256mem_mask", IX86_BUILTIN_PMOVSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask_store, "__builtin_ia32_pmovuswb128mem_mask", IX86_BUILTIN_PMOVUSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8QI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovuswb256mem_mask", IX86_BUILTIN_PMOVUSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
/* RDPKRU and WRPKRU. */
BDESC (OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID)
......
......@@ -37033,6 +37033,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
case VOID_FTYPE_PFLOAT_V16SF_UHI:
case VOID_FTYPE_PFLOAT_V8SF_UQI:
case VOID_FTYPE_PFLOAT_V4SF_UQI:
case VOID_FTYPE_PV32QI_V32HI_USI:
case VOID_FTYPE_PV16QI_V16HI_UHI:
case VOID_FTYPE_PV8QI_V8HI_UQI:
nargs = 2;
klass = store;
/* Reserve memory operand for target. */
2017-08-08 Julia Koval <julia.koval@intel.com>
* gcc.target/i386/avx512bw-vpmovswb-1.c: Add new intrinsics to test.
* gcc.target/i386/avx512bw-vpmovswb-2.c: Ditto.
* gcc.target/i386/avx512bw-vpmovuswb-1.c: Ditto.
* gcc.target/i386/avx512bw-vpmovuswb-2.c: Ditto.
* gcc.target/i386/avx512bw-vpmovwb-1.c: Ditto.
* gcc.target/i386/avx512bw-vpmovwb-2.c: Ditto.
2017-06-08 Marek Polacek <polacek@redhat.com>
PR sanitize/80932
......
......@@ -3,18 +3,21 @@
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*res1\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*res2\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*res3\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
volatile __m128i x, z;
volatile __m256i y;
volatile __m512i u;
volatile __m128i x, z, res1;
volatile __m256i y, res2;
volatile __m512i u, res3;
volatile __mmask8 m1;
volatile __mmask16 m2;
volatile __mmask32 m3;
......@@ -25,10 +28,13 @@ avx512bw_test (void)
z = _mm_cvtsepi16_epi8 (x);
z = _mm_mask_cvtsepi16_epi8 (z, m1, x);
z = _mm_maskz_cvtsepi16_epi8 (m1, x);
_mm_mask_cvtsepi16_storeu_epi8 ((void *) &res1, m1, x);
z = _mm256_cvtsepi16_epi8 (y);
z = _mm256_mask_cvtsepi16_epi8 (z, m2, y);
z = _mm256_maskz_cvtsepi16_epi8 (m2, y);
_mm256_mask_cvtsepi16_storeu_epi8 ((void *) &res2, m2, y);
y = _mm512_cvtsepi16_epi8 (u);
y = _mm512_mask_cvtsepi16_epi8 (y, m3, u);
y = _mm512_maskz_cvtsepi16_epi8 (m3, u);
_mm512_mask_cvtsepi16_storeu_epi8 ((void *) &res3, m3, u);
}
......@@ -31,9 +31,11 @@ TEST (void)
{
int i, sign;
UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
char res4[SIZE];
UNION_TYPE (AVX512F_LEN, i_w) src;
MASK_TYPE mask = MASK_VALUE;
char res_ref[32];
char res_ref2[SIZE_HALF];
sign = -1;
for (i = 0; i < SIZE; i++)
......@@ -41,6 +43,7 @@ TEST (void)
src.a[i] = 1 + 34 * i * sign;
sign = sign * -1;
res2.a[i] = DEFAULT_VALUE;
res4[i] = DEFAULT_VALUE;
}
res1.x = INTRINSIC (_cvtsepi16_epi8) (src.x);
......@@ -59,4 +62,11 @@ TEST (void)
MASK_ZERO (i_b) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref))
abort ();
INTRINSIC (_mask_cvtsepi16_storeu_epi8) (res4, mask, src.x);
CALC (res_ref2, src.a);
MASK_MERGE (i_b) (res_ref2, mask, SIZE);
if (checkVc (res4, res_ref2, SIZE))
abort ();
}
......@@ -3,18 +3,21 @@
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*res1\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*res2\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovuswb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*res3\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
volatile __m128i x, z;
volatile __m256i y;
volatile __m512i u;
volatile __m128i x, z, res1;
volatile __m256i y, res2;
volatile __m512i u, res3;
volatile __mmask8 m1;
volatile __mmask16 m2;
volatile __mmask32 m3;
......@@ -25,10 +28,13 @@ avx512bw_test (void)
z = _mm_cvtusepi16_epi8 (x);
z = _mm_mask_cvtusepi16_epi8 (z, m1, x);
z = _mm_maskz_cvtusepi16_epi8 (m1, x);
_mm_mask_cvtusepi16_storeu_epi8 ((void *) &res1, m1, x);
z = _mm256_cvtusepi16_epi8 (y);
z = _mm256_mask_cvtusepi16_epi8 (z, m2, y);
z = _mm256_maskz_cvtusepi16_epi8 (m2, y);
_mm256_mask_cvtusepi16_storeu_epi8 ((void *) &res2, m2, y);
y = _mm512_cvtusepi16_epi8 (u);
y = _mm512_mask_cvtusepi16_epi8 (y, m3, u);
y = _mm512_maskz_cvtusepi16_epi8 (m3, u);
_mm512_mask_cvtusepi16_storeu_epi8 ((void *) &res3, m3, u);
}
......@@ -23,14 +23,17 @@ TEST (void)
{
int i;
UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
unsigned char res4[SIZE];
UNION_TYPE (AVX512F_LEN, i_w) src;
MASK_TYPE mask = MASK_VALUE;
unsigned char res_ref[32];
unsigned char res_ref2[SIZE];
for (i = 0; i < SIZE; i++)
{
src.a[i] = 1 + 34 * i;
res2.a[i] = DEFAULT_VALUE;
res4[i] = DEFAULT_VALUE;
}
res1.x = INTRINSIC (_cvtusepi16_epi8) (src.x);
......@@ -49,4 +52,11 @@ TEST (void)
MASK_ZERO (i_b) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref))
abort ();
INTRINSIC (_mask_cvtusepi16_storeu_epi8) (res4, mask, src.x);
CALC (res_ref2, src.a);
MASK_MERGE (i_b) (res_ref2, mask, SIZE);
if (checkVc (res4, res_ref2, SIZE))
abort ();
}
......@@ -3,18 +3,21 @@
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*res1\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*res2\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}{z}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpmovwb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*res3\[^\n\]*\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
volatile __m128i x, z;
volatile __m256i y;
volatile __m512i u;
volatile __m128i x, z, res1;
volatile __m256i y, res2;
volatile __m512i u, res3;
volatile __mmask8 m1;
volatile __mmask16 m2;
volatile __mmask32 m3;
......@@ -25,10 +28,13 @@ avx512bw_test (void)
z = _mm_cvtepi16_epi8 (x);
z = _mm_mask_cvtepi16_epi8 (z, m1, x);
z = _mm_maskz_cvtepi16_epi8 (m1, x);
_mm_mask_cvtepi16_storeu_epi8 ((void *) &res1, m1, x);
z = _mm256_cvtepi16_epi8 (y);
z = _mm256_mask_cvtepi16_epi8 (z, m2, y);
z = _mm256_maskz_cvtepi16_epi8 (m2, y);
_mm256_mask_cvtepi16_storeu_epi8 ((void *) &res2, m2, y);
y = _mm512_cvtepi16_epi8 (u);
y = _mm512_mask_cvtepi16_epi8 (y, m3, u);
y = _mm512_maskz_cvtepi16_epi8 (m3, u);
_mm512_mask_cvtepi16_storeu_epi8 ((void *) &res3, m3, u);
}
......@@ -24,9 +24,11 @@ TEST (void)
{
int i, sign;
UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3;
char res4[SIZE];
UNION_TYPE (AVX512F_LEN, i_w) src;
MASK_TYPE mask = MASK_VALUE;
char res_ref[32];
char res_ref2[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
......@@ -34,6 +36,7 @@ TEST (void)
src.a[i] = 1 + 34 * i * sign;
sign = sign * -1;
res2.a[i] = DEFAULT_VALUE;
res4[i] = DEFAULT_VALUE;
}
res1.x = INTRINSIC (_cvtepi16_epi8) (src.x);
......@@ -52,4 +55,11 @@ TEST (void)
MASK_ZERO (i_b) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN_HALF, i_b) (res3, res_ref))
abort ();
INTRINSIC (_mask_cvtepi16_storeu_epi8) (res4, mask, src.x);
CALC (res_ref2, src.a);
MASK_MERGE (i_b) (res_ref2, mask, SIZE);
if (checkVc (res4, res_ref2, SIZE))
abort ();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment