Commit 18379eea by Jakub Jelinek

avx512bitalgintrin.h (_mm512_mask_bitshuffle_epi64_mask): Use __mmask64 type…

avx512bitalgintrin.h (_mm512_mask_bitshuffle_epi64_mask): Use __mmask64 type instead of __mmask8 for __M argument.

	* config/i386/avx512bitalgintrin.h (_mm512_mask_bitshuffle_epi64_mask):
	Use __mmask64 type instead of __mmask8 for __M argument.
	* config/i386/avx512fintrin.h (_mm512_mask_xor_epi64,
	_mm512_maskz_xor_epi64): Use __mmask8 type instead of __mmask16 for
	__U argument.
	(_mm512_mask_cmpneq_epi64_mask): Use __mmask8 type instead of
	__mmask16 for __M argument.
	(_mm512_maskz_insertf32x4, _mm512_maskz_inserti32x4,
	_mm512_mask_insertf32x4, _mm512_mask_inserti32x4): Cast last argument
	to __mmask16 instead of __mmask8.
	* config/i386/avx512vlintrin.h (_mm_mask_add_ps, _mm_maskz_add_ps,
	_mm256_mask_add_ps, _mm256_maskz_add_ps, _mm_mask_sub_ps,
	_mm_maskz_sub_ps, _mm256_mask_sub_ps, _mm256_maskz_sub_ps,
	_mm256_maskz_cvtepi32_ps, _mm_maskz_cvtepi32_ps): Use __mmask8 type
	instead of __mmask16 for __U argument.
	* config/i386/avx512vlbwintrin.h (_mm_mask_cmp_epi8_mask): Use
	__mmask16 instead of __mmask8 for __U argument.
	(_mm256_mask_cmp_epi8_mask): Use __mmask32 instead of __mmask16 for
	__U argument.
	(_mm256_cmp_epi8_mask): Use __mmask32 return type instead of
	__mmask16.
	(_mm_mask_cmp_epu8_mask): Use __mmask16 instead of __mmask8 for __U
	argument.
	(_mm256_mask_cmp_epu8_mask): Use __mmask32 instead of __mmask16 for
	__U argument.
	(_mm256_cmp_epu8_mask): Use __mmask32 return type instead of
	__mmask16.
	(_mm_mask_cmp_epi16_mask): Cast last argument to __mmask8 instead
	of __mmask16.
	(_mm256_mask_cvtepi8_epi16): Use __mmask16 instead of __mmask32 for
	__U argument.
	(_mm_mask_cvtepi8_epi16): Use __mmask8 instead of __mmask32 for
	__U argument.
	(_mm256_mask_cvtepu8_epi16): Use __mmask16 instead of __mmask32 for
	__U argument.
	(_mm_mask_cvtepu8_epi16): Use __mmask8 instead of __mmask32 for
	__U argument.
	(_mm256_mask_cmpneq_epu8_mask, _mm256_mask_cmplt_epu8_mask,
	_mm256_mask_cmpge_epu8_mask, _mm256_mask_cmple_epu8_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask32.
	(_mm256_mask_cmpneq_epu16_mask, _mm256_mask_cmplt_epu16_mask,
	_mm256_mask_cmpge_epu16_mask, _mm256_mask_cmple_epu16_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask16.
	(_mm256_mask_cmpneq_epi8_mask, _mm256_mask_cmplt_epi8_mask,
	_mm256_mask_cmpge_epi8_mask, _mm256_mask_cmple_epi8_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask32.
	(_mm256_mask_cmpneq_epi16_mask, _mm256_mask_cmplt_epi16_mask,
	_mm256_mask_cmpge_epi16_mask, _mm256_mask_cmple_epi16_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask16.
	* config/i386/avx512vbmi2vlintrin.h (_mm_mask_shrdi_epi32,
	_mm_mask_shldi_epi32): Cast last argument to __mmask8 instead of
	__mmask16.

	* gcc.target/i386/avx512bw-vpcmpb-2.c (CMP): Use SIZE macro instead
	of hardcoding size.  Cast (rel) to MASK_TYPE.
	* gcc.target/i386/avx512bw-vpcmpub-2.c (CMP): Likewise.
	* gcc.target/i386/avx512f-vinserti32x4-3.c: New test.
	* gcc.target/i386/avx512f-vinsertf32x4-3.c: New test.
	* gcc.target/i386/avx512vl-vpcmpnequb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgeub-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpleb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgeb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltub-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpleub-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpneqb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpnequw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgeuw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmplew-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgew-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltuw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpleuw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpneqw-2.c: New test.

2018-07-11  Grazvydas Ignotas  <notasas@gmail.com>

	* config/i386/avx512bwintrin.h: (_mm512_mask_cmp_epi8_mask,
	_mm512_mask_cmp_epu8_mask): Use __mmask64 type instead of __mmask32
	for __U argument.

	* gcc.target/i386/avx512bw-vpcmpb-2.c (SIZE): Define to
	(AVX512F_LEN / 8) instead of (AVX512F_LEN / 16).
	* gcc.target/i386/avx512bw-vpcmpub-2.c (SIZE): Likewise.

From-SVN: r262566
parent 2d05894b
2018-07-11 Jakub Jelinek <jakub@redhat.com>
* config/i386/avx512bitalgintrin.h (_mm512_mask_bitshuffle_epi64_mask):
Use __mmask64 type instead of __mmask8 for __M argument.
* config/i386/avx512fintrin.h (_mm512_mask_xor_epi64,
_mm512_maskz_xor_epi64): Use __mmask8 type instead of __mmask16 for
__U argument.
(_mm512_mask_cmpneq_epi64_mask): Use __mmask8 type instead of
__mmask16 for __M argument.
(_mm512_maskz_insertf32x4, _mm512_maskz_inserti32x4,
_mm512_mask_insertf32x4, _mm512_mask_inserti32x4): Cast last argument
to __mmask16 instead of __mmask8.
* config/i386/avx512vlintrin.h (_mm_mask_add_ps, _mm_maskz_add_ps,
_mm256_mask_add_ps, _mm256_maskz_add_ps, _mm_mask_sub_ps,
_mm_maskz_sub_ps, _mm256_mask_sub_ps, _mm256_maskz_sub_ps,
_mm256_maskz_cvtepi32_ps, _mm_maskz_cvtepi32_ps): Use __mmask8 type
instead of __mmask16 for __U argument.
* config/i386/avx512vlbwintrin.h (_mm_mask_cmp_epi8_mask): Use
__mmask16 instead of __mmask8 for __U argument.
(_mm256_mask_cmp_epi8_mask): Use __mmask32 instead of __mmask16 for
__U argument.
(_mm256_cmp_epi8_mask): Use __mmask32 return type instead of
__mmask16.
(_mm_mask_cmp_epu8_mask): Use __mmask16 instead of __mmask8 for __U
argument.
(_mm256_mask_cmp_epu8_mask): Use __mmask32 instead of __mmask16 for
__U argument.
(_mm256_cmp_epu8_mask): Use __mmask32 return type instead of
__mmask16.
(_mm_mask_cmp_epi16_mask): Cast last argument to __mmask8 instead
of __mmask16.
(_mm256_mask_cvtepi8_epi16): Use __mmask16 instead of __mmask32 for
__U argument.
(_mm_mask_cvtepi8_epi16): Use __mmask8 instead of __mmask32 for
__U argument.
(_mm256_mask_cvtepu8_epi16): Use __mmask16 instead of __mmask32 for
__U argument.
(_mm_mask_cvtepu8_epi16): Use __mmask8 instead of __mmask32 for
__U argument.
(_mm256_mask_cmpneq_epu8_mask, _mm256_mask_cmplt_epu8_mask,
_mm256_mask_cmpge_epu8_mask, _mm256_mask_cmple_epu8_mask): Change
return type as well as __M argument type and all casts from __mmask8
to __mmask32.
(_mm256_mask_cmpneq_epu16_mask, _mm256_mask_cmplt_epu16_mask,
_mm256_mask_cmpge_epu16_mask, _mm256_mask_cmple_epu16_mask): Change
return type as well as __M argument type and all casts from __mmask8
to __mmask16.
(_mm256_mask_cmpneq_epi8_mask, _mm256_mask_cmplt_epi8_mask,
_mm256_mask_cmpge_epi8_mask, _mm256_mask_cmple_epi8_mask): Change
return type as well as __M argument type and all casts from __mmask8
to __mmask32.
(_mm256_mask_cmpneq_epi16_mask, _mm256_mask_cmplt_epi16_mask,
_mm256_mask_cmpge_epi16_mask, _mm256_mask_cmple_epi16_mask): Change
return type as well as __M argument type and all casts from __mmask8
to __mmask16.
* config/i386/avx512vbmi2vlintrin.h (_mm_mask_shrdi_epi32,
_mm_mask_shldi_epi32): Cast last argument to __mmask8 instead of
__mmask16.
2018-07-11 Grazvydas Ignotas <notasas@gmail.com>
* config/i386/avx512bwintrin.h: (_mm512_mask_cmp_epi8_mask,
_mm512_mask_cmp_epu8_mask): Use __mmask64 type instead of __mmask32
for __U argument.
2018-07-11 Paul Koning <ni1d@arrl.net> 2018-07-11 Paul Koning <ni1d@arrl.net>
* doc/md.texi (define_subst): Document how multiple occurrences of * doc/md.texi (define_subst): Document how multiple occurrences of
......
...@@ -107,7 +107,7 @@ _mm512_bitshuffle_epi64_mask (__m512i __A, __m512i __B) ...@@ -107,7 +107,7 @@ _mm512_bitshuffle_epi64_mask (__m512i __A, __m512i __B)
extern __inline __mmask64 extern __inline __mmask64
__attribute__((__gnu_inline__, __always_inline__, __artificial__)) __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_bitshuffle_epi64_mask (__mmask8 __M, __m512i __A, __m512i __B) _mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B)
{ {
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A, return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
(__v64qi) __B, (__v64qi) __B,
......
...@@ -3043,7 +3043,7 @@ _mm512_cmp_epi16_mask (__m512i __X, __m512i __Y, const int __P) ...@@ -3043,7 +3043,7 @@ _mm512_cmp_epi16_mask (__m512i __X, __m512i __Y, const int __P)
extern __inline __mmask64 extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cmp_epi8_mask (__mmask32 __U, __m512i __X, __m512i __Y, _mm512_mask_cmp_epi8_mask (__mmask64 __U, __m512i __X, __m512i __Y,
const int __P) const int __P)
{ {
return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X, return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
...@@ -3081,7 +3081,7 @@ _mm512_cmp_epu16_mask (__m512i __X, __m512i __Y, const int __P) ...@@ -3081,7 +3081,7 @@ _mm512_cmp_epu16_mask (__m512i __X, __m512i __Y, const int __P)
extern __inline __mmask64 extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cmp_epu8_mask (__mmask32 __U, __m512i __X, __m512i __Y, _mm512_mask_cmp_epu8_mask (__mmask64 __U, __m512i __X, __m512i __Y,
const int __P) const int __P)
{ {
return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X, return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
......
...@@ -7377,7 +7377,7 @@ _mm512_xor_epi64 (__m512i __A, __m512i __B) ...@@ -7377,7 +7377,7 @@ _mm512_xor_epi64 (__m512i __A, __m512i __B)
extern __inline __m512i extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) _mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
{ {
return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
(__v8di) __B, (__v8di) __B,
...@@ -7387,7 +7387,7 @@ _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) ...@@ -7387,7 +7387,7 @@ _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
extern __inline __m512i extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B) _mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
{ {
return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
(__v8di) __B, (__v8di) __B,
...@@ -9615,7 +9615,7 @@ _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y) ...@@ -9615,7 +9615,7 @@ _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
extern __inline __mmask8 extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y) _mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
{ {
return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
(__v8di) __Y, 4, (__v8di) __Y, 4,
...@@ -10877,22 +10877,22 @@ _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C, ...@@ -10877,22 +10877,22 @@ _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
#define _mm512_maskz_insertf32x4(A, X, Y, C) \ #define _mm512_maskz_insertf32x4(A, X, Y, C) \
((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
(__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \ (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
(__mmask8)(A))) (__mmask16)(A)))
#define _mm512_maskz_inserti32x4(A, X, Y, C) \ #define _mm512_maskz_inserti32x4(A, X, Y, C) \
((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
(__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \ (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
(__mmask8)(A))) (__mmask16)(A)))
#define _mm512_mask_insertf32x4(A, B, X, Y, C) \ #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
(__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \ (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
(__mmask8)(B))) (__mmask16)(B)))
#define _mm512_mask_inserti32x4(A, B, X, Y, C) \ #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
(__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \ (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
(__mmask8)(B))) (__mmask16)(B)))
#endif #endif
extern __inline __m512i extern __inline __m512i
......
...@@ -541,7 +541,7 @@ _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C) ...@@ -541,7 +541,7 @@ _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
(__v4si)(__m128i)(B),(int)(C)) (__v4si)(__m128i)(B),(int)(C))
#define _mm_mask_shrdi_epi32(A, B, C, D, E) \ #define _mm_mask_shrdi_epi32(A, B, C, D, E) \
((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \ ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \
(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B)) (__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask8)(B))
#define _mm_maskz_shrdi_epi32(A, B, C, D) \ #define _mm_maskz_shrdi_epi32(A, B, C, D) \
((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \ ((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \
(__v4si)(__m128i)(C),(int)(D), \ (__v4si)(__m128i)(C),(int)(D), \
...@@ -601,7 +601,7 @@ _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C) ...@@ -601,7 +601,7 @@ _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
(__v4si)(__m128i)(B),(int)(C)) (__v4si)(__m128i)(B),(int)(C))
#define _mm_mask_shldi_epi32(A, B, C, D, E) \ #define _mm_mask_shldi_epi32(A, B, C, D, E) \
((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \ ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \
(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B)) (__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask8)(B))
#define _mm_maskz_shldi_epi32(A, B, C, D) \ #define _mm_maskz_shldi_epi32(A, B, C, D) \
((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B), \ ((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B), \
(__v4si)(__m128i)(C),(int)(D), \ (__v4si)(__m128i)(C),(int)(D), \
......
...@@ -1467,7 +1467,7 @@ _mm256_cmp_epi16_mask (__m256i __X, __m256i __Y, const int __P) ...@@ -1467,7 +1467,7 @@ _mm256_cmp_epi16_mask (__m256i __X, __m256i __Y, const int __P)
extern __inline __mmask16 extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_epi8_mask (__mmask8 __U, __m128i __X, __m128i __Y, _mm_mask_cmp_epi8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
const int __P) const int __P)
{ {
return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X, return (__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi) __X,
...@@ -1486,7 +1486,7 @@ _mm_cmp_epi8_mask (__m128i __X, __m128i __Y, const int __P) ...@@ -1486,7 +1486,7 @@ _mm_cmp_epi8_mask (__m128i __X, __m128i __Y, const int __P)
extern __inline __mmask32 extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmp_epi8_mask (__mmask16 __U, __m256i __X, __m256i __Y, _mm256_mask_cmp_epi8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
const int __P) const int __P)
{ {
return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X, return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
...@@ -1494,7 +1494,7 @@ _mm256_mask_cmp_epi8_mask (__mmask16 __U, __m256i __X, __m256i __Y, ...@@ -1494,7 +1494,7 @@ _mm256_mask_cmp_epi8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
(__mmask32) __U); (__mmask32) __U);
} }
extern __inline __mmask16 extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_epi8_mask (__m256i __X, __m256i __Y, const int __P) _mm256_cmp_epi8_mask (__m256i __X, __m256i __Y, const int __P)
{ {
...@@ -1543,7 +1543,7 @@ _mm256_cmp_epu16_mask (__m256i __X, __m256i __Y, const int __P) ...@@ -1543,7 +1543,7 @@ _mm256_cmp_epu16_mask (__m256i __X, __m256i __Y, const int __P)
extern __inline __mmask16 extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cmp_epu8_mask (__mmask8 __U, __m128i __X, __m128i __Y, _mm_mask_cmp_epu8_mask (__mmask16 __U, __m128i __X, __m128i __Y,
const int __P) const int __P)
{ {
return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X, return (__mmask16) __builtin_ia32_ucmpb128_mask ((__v16qi) __X,
...@@ -1562,7 +1562,7 @@ _mm_cmp_epu8_mask (__m128i __X, __m128i __Y, const int __P) ...@@ -1562,7 +1562,7 @@ _mm_cmp_epu8_mask (__m128i __X, __m128i __Y, const int __P)
extern __inline __mmask32 extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmp_epu8_mask (__mmask16 __U, __m256i __X, __m256i __Y, _mm256_mask_cmp_epu8_mask (__mmask32 __U, __m256i __X, __m256i __Y,
const int __P) const int __P)
{ {
return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X, return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
...@@ -1570,7 +1570,7 @@ _mm256_mask_cmp_epu8_mask (__mmask16 __U, __m256i __X, __m256i __Y, ...@@ -1570,7 +1570,7 @@ _mm256_mask_cmp_epu8_mask (__mmask16 __U, __m256i __X, __m256i __Y,
(__mmask32) __U); (__mmask32) __U);
} }
extern __inline __mmask16 extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_cmp_epu8_mask (__m256i __X, __m256i __Y, const int __P) _mm256_cmp_epu8_mask (__m256i __X, __m256i __Y, const int __P)
{ {
...@@ -1998,7 +1998,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B) ...@@ -1998,7 +1998,7 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
#define _mm_mask_cmp_epi16_mask(M, X, Y, P) \ #define _mm_mask_cmp_epi16_mask(M, X, Y, P) \
((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \ ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \
(__v8hi)(__m128i)(Y), (int)(P),\ (__v8hi)(__m128i)(Y), (int)(P),\
(__mmask16)(M))) (__mmask8)(M)))
#define _mm_mask_cmp_epi8_mask(M, X, Y, P) \ #define _mm_mask_cmp_epi8_mask(M, X, Y, P) \
((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X), \ ((__mmask16) __builtin_ia32_cmpb128_mask ((__v16qi)(__m128i)(X), \
...@@ -2430,7 +2430,7 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) ...@@ -2430,7 +2430,7 @@ _mm_maskz_mullo_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
extern __inline __m256i extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask32 __U, __m128i __A) _mm256_mask_cvtepi8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
{ {
return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A, return (__m256i) __builtin_ia32_pmovsxbw256_mask ((__v16qi) __A,
(__v16hi) __W, (__v16hi) __W,
...@@ -2449,7 +2449,7 @@ _mm256_maskz_cvtepi8_epi16 (__mmask16 __U, __m128i __A) ...@@ -2449,7 +2449,7 @@ _mm256_maskz_cvtepi8_epi16 (__mmask16 __U, __m128i __A)
extern __inline __m128i extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepi8_epi16 (__m128i __W, __mmask32 __U, __m128i __A) _mm_mask_cvtepi8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
{ {
return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A, return (__m128i) __builtin_ia32_pmovsxbw128_mask ((__v16qi) __A,
(__v8hi) __W, (__v8hi) __W,
...@@ -2468,7 +2468,7 @@ _mm_maskz_cvtepi8_epi16 (__mmask8 __U, __m128i __A) ...@@ -2468,7 +2468,7 @@ _mm_maskz_cvtepi8_epi16 (__mmask8 __U, __m128i __A)
extern __inline __m256i extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask32 __U, __m128i __A) _mm256_mask_cvtepu8_epi16 (__m256i __W, __mmask16 __U, __m128i __A)
{ {
return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A, return (__m256i) __builtin_ia32_pmovzxbw256_mask ((__v16qi) __A,
(__v16hi) __W, (__v16hi) __W,
...@@ -2487,7 +2487,7 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) ...@@ -2487,7 +2487,7 @@ _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
extern __inline __m128i extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_cvtepu8_epi16 (__m128i __W, __mmask32 __U, __m128i __A) _mm_mask_cvtepu8_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
{ {
return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A, return (__m128i) __builtin_ia32_pmovzxbw128_mask ((__v16qi) __A,
(__v8hi) __W, (__v8hi) __W,
...@@ -4541,148 +4541,148 @@ _mm_mask_cmple_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y) ...@@ -4541,148 +4541,148 @@ _mm_mask_cmple_epi16_mask (__mmask8 __M, __m128i __X, __m128i __Y)
(__mmask8) __M); (__mmask8) __M);
} }
extern __inline __mmask8 extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpneq_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmpneq_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X, return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
(__v32qi) __Y, 4, (__v32qi) __Y, 4,
(__mmask8) __M); (__mmask32) __M);
} }
extern __inline __mmask8 extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmplt_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmplt_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X, return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
(__v32qi) __Y, 1, (__v32qi) __Y, 1,
(__mmask8) __M); (__mmask32) __M);
} }
extern __inline __mmask8 extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpge_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmpge_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X, return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
(__v32qi) __Y, 5, (__v32qi) __Y, 5,
(__mmask8) __M); (__mmask32) __M);
} }
extern __inline __mmask8 extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmple_epu8_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmple_epu8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_ucmpb256_mask ((__v32qi) __X, return (__mmask32) __builtin_ia32_ucmpb256_mask ((__v32qi) __X,
(__v32qi) __Y, 2, (__v32qi) __Y, 2,
(__mmask8) __M); (__mmask32) __M);
} }
extern __inline __mmask8 extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpneq_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmpneq_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X, return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
(__v16hi) __Y, 4, (__v16hi) __Y, 4,
(__mmask8) __M); (__mmask16) __M);
} }
extern __inline __mmask8 extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmplt_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmplt_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X, return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
(__v16hi) __Y, 1, (__v16hi) __Y, 1,
(__mmask8) __M); (__mmask16) __M);
} }
extern __inline __mmask8 extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpge_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmpge_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X, return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
(__v16hi) __Y, 5, (__v16hi) __Y, 5,
(__mmask8) __M); (__mmask16) __M);
} }
extern __inline __mmask8 extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmple_epu16_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmple_epu16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_ucmpw256_mask ((__v16hi) __X, return (__mmask16) __builtin_ia32_ucmpw256_mask ((__v16hi) __X,
(__v16hi) __Y, 2, (__v16hi) __Y, 2,
(__mmask8) __M); (__mmask16) __M);
} }
extern __inline __mmask8 extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpneq_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmpneq_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X, return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
(__v32qi) __Y, 4, (__v32qi) __Y, 4,
(__mmask8) __M); (__mmask32) __M);
} }
extern __inline __mmask8 extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmplt_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmplt_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X, return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
(__v32qi) __Y, 1, (__v32qi) __Y, 1,
(__mmask8) __M); (__mmask32) __M);
} }
extern __inline __mmask8 extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpge_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmpge_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X, return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
(__v32qi) __Y, 5, (__v32qi) __Y, 5,
(__mmask8) __M); (__mmask32) __M);
} }
extern __inline __mmask8 extern __inline __mmask32
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmple_epi8_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmple_epi8_mask (__mmask32 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_cmpb256_mask ((__v32qi) __X, return (__mmask32) __builtin_ia32_cmpb256_mask ((__v32qi) __X,
(__v32qi) __Y, 2, (__v32qi) __Y, 2,
(__mmask8) __M); (__mmask32) __M);
} }
extern __inline __mmask8 extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpneq_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmpneq_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X, return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
(__v16hi) __Y, 4, (__v16hi) __Y, 4,
(__mmask8) __M); (__mmask16) __M);
} }
extern __inline __mmask8 extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmplt_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmplt_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X, return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
(__v16hi) __Y, 1, (__v16hi) __Y, 1,
(__mmask8) __M); (__mmask16) __M);
} }
extern __inline __mmask8 extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmpge_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmpge_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X, return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
(__v16hi) __Y, 5, (__v16hi) __Y, 5,
(__mmask8) __M); (__mmask16) __M);
} }
extern __inline __mmask8 extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_cmple_epi16_mask (__mmask8 __M, __m256i __X, __m256i __Y) _mm256_mask_cmple_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y)
{ {
return (__mmask8) __builtin_ia32_cmpw256_mask ((__v16hi) __X, return (__mmask16) __builtin_ia32_cmpw256_mask ((__v16hi) __X,
(__v16hi) __Y, 2, (__v16hi) __Y, 2,
(__mmask8) __M); (__mmask16) __M);
} }
#ifdef __DISABLE_AVX512VLBW__ #ifdef __DISABLE_AVX512VLBW__
......
...@@ -466,7 +466,7 @@ _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B) ...@@ -466,7 +466,7 @@ _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
extern __inline __m128 extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) _mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{ {
return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A, return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
(__v4sf) __B, (__v4sf) __B,
...@@ -476,7 +476,7 @@ _mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) ...@@ -476,7 +476,7 @@ _mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
extern __inline __m128 extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B) _mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
{ {
return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A, return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
(__v4sf) __B, (__v4sf) __B,
...@@ -487,7 +487,7 @@ _mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B) ...@@ -487,7 +487,7 @@ _mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
extern __inline __m256 extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) _mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
{ {
return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A, return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
(__v8sf) __B, (__v8sf) __B,
...@@ -497,7 +497,7 @@ _mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) ...@@ -497,7 +497,7 @@ _mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
extern __inline __m256 extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B) _mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
{ {
return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A, return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
(__v8sf) __B, (__v8sf) __B,
...@@ -551,7 +551,7 @@ _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B) ...@@ -551,7 +551,7 @@ _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
extern __inline __m128 extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) _mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{ {
return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A, return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
(__v4sf) __B, (__v4sf) __B,
...@@ -561,7 +561,7 @@ _mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) ...@@ -561,7 +561,7 @@ _mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
extern __inline __m128 extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B) _mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
{ {
return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A, return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
(__v4sf) __B, (__v4sf) __B,
...@@ -572,7 +572,7 @@ _mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B) ...@@ -572,7 +572,7 @@ _mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
extern __inline __m256 extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) _mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
{ {
return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A, return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
(__v8sf) __B, (__v8sf) __B,
...@@ -582,7 +582,7 @@ _mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) ...@@ -582,7 +582,7 @@ _mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
extern __inline __m256 extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B) _mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
{ {
return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A, return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
(__v8sf) __B, (__v8sf) __B,
...@@ -1320,7 +1320,7 @@ _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) ...@@ -1320,7 +1320,7 @@ _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
extern __inline __m256 extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
{ {
return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A, return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
(__v8sf) (__v8sf)
...@@ -1339,7 +1339,7 @@ _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) ...@@ -1339,7 +1339,7 @@ _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
extern __inline __m128 extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
{ {
return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A, return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
(__v4sf) (__v4sf)
......
2018-07-11 Jakub Jelinek <jakub@redhat.com>
* gcc.target/i386/avx512bw-vpcmpb-2.c (CMP): Use SIZE macro instead
of hardcoding size. Cast (rel) to MASK_TYPE.
* gcc.target/i386/avx512bw-vpcmpub-2.c (CMP): Likewise.
* gcc.target/i386/avx512f-vinserti32x4-3.c: New test.
* gcc.target/i386/avx512f-vinsertf32x4-3.c: New test.
* gcc.target/i386/avx512vl-vpcmpnequb-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpgeub-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpleb-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpgeb-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpltb-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpltub-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpleub-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpneqb-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpnequw-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpgeuw-2.c: New test.
* gcc.target/i386/avx512vl-vpcmplew-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpgew-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpltw-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpltuw-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpleuw-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpneqw-2.c: New test.
2018-07-11 Grazvydas Ignotas <notasas@gmail.com>
* gcc.target/i386/avx512bw-vpcmpb-2.c (SIZE): Define to
(AVX512F_LEN / 8) instead of (AVX512F_LEN / 16).
* gcc.target/i386/avx512bw-vpcmpub-2.c (SIZE): Likewise.
2018-07-10 Mark Wielaard <mark@klomp.org> 2018-07-10 Mark Wielaard <mark@klomp.org>
PR debug/86459 PR debug/86459
......
...@@ -6,17 +6,15 @@ ...@@ -6,17 +6,15 @@
#include "avx512f-helper.h" #include "avx512f-helper.h"
#include <math.h> #include <math.h>
#define SIZE (AVX512F_LEN / 16) #define SIZE (AVX512F_LEN / 8)
#include "avx512f-mask-type.h" #include "avx512f-mask-type.h"
#if AVX512F_LEN == 512 #if AVX512F_LEN == 512
#undef CMP #undef CMP
#define CMP(imm, rel) \ #define CMP(imm, rel) \
dst_ref = 0; \ dst_ref = 0; \
for (i = 0; i < 64; i++) \ for (i = 0; i < SIZE; i++) \
{ \ dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
dst_ref = ((rel) << i) | dst_ref; \
} \
source1.x = _mm512_loadu_si512 (s1); \ source1.x = _mm512_loadu_si512 (s1); \
source2.x = _mm512_loadu_si512 (s2); \ source2.x = _mm512_loadu_si512 (s2); \
dst1 = _mm512_cmp_epi8_mask (source1.x, source2.x, imm);\ dst1 = _mm512_cmp_epi8_mask (source1.x, source2.x, imm);\
...@@ -29,10 +27,8 @@ ...@@ -29,10 +27,8 @@
#undef CMP #undef CMP
#define CMP(imm, rel) \ #define CMP(imm, rel) \
dst_ref = 0; \ dst_ref = 0; \
for (i = 0; i < 32; i++) \ for (i = 0; i < SIZE; i++) \
{ \ dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
dst_ref = ((rel) << i) | dst_ref; \
} \
source1.x = _mm256_loadu_si256 ((__m256i*)s1); \ source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
source2.x = _mm256_loadu_si256 ((__m256i*)s2); \ source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
dst1 = _mm256_cmp_epi8_mask (source1.x, source2.x, imm);\ dst1 = _mm256_cmp_epi8_mask (source1.x, source2.x, imm);\
...@@ -45,10 +41,8 @@ ...@@ -45,10 +41,8 @@
#undef CMP #undef CMP
#define CMP(imm, rel) \ #define CMP(imm, rel) \
dst_ref = 0; \ dst_ref = 0; \
for (i = 0; i < 16; i++) \ for (i = 0; i < SIZE; i++) \
{ \ dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
dst_ref = ((rel) << i) | dst_ref; \
} \
source1.x = _mm_loadu_si128 ((__m128i*)s1); \ source1.x = _mm_loadu_si128 ((__m128i*)s1); \
source2.x = _mm_loadu_si128 ((__m128i*)s2); \ source2.x = _mm_loadu_si128 ((__m128i*)s2); \
dst1 = _mm_cmp_epi8_mask (source1.x, source2.x, imm);\ dst1 = _mm_cmp_epi8_mask (source1.x, source2.x, imm);\
......
...@@ -6,17 +6,15 @@ ...@@ -6,17 +6,15 @@
#include "avx512f-helper.h" #include "avx512f-helper.h"
#include <math.h> #include <math.h>
#define SIZE (AVX512F_LEN / 16) #define SIZE (AVX512F_LEN / 8)
#include "avx512f-mask-type.h" #include "avx512f-mask-type.h"
#if AVX512F_LEN == 512 #if AVX512F_LEN == 512
#undef CMP #undef CMP
#define CMP(imm, rel) \ #define CMP(imm, rel) \
dst_ref = 0; \ dst_ref = 0; \
for (i = 0; i < 64; i++) \ for (i = 0; i < SIZE; i++) \
{ \ dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
dst_ref = ((rel) << i) | dst_ref; \
} \
source1.x = _mm512_loadu_si512 (s1); \ source1.x = _mm512_loadu_si512 (s1); \
source2.x = _mm512_loadu_si512 (s2); \ source2.x = _mm512_loadu_si512 (s2); \
dst1 = _mm512_cmp_epu8_mask (source1.x, source2.x, imm);\ dst1 = _mm512_cmp_epu8_mask (source1.x, source2.x, imm);\
...@@ -29,10 +27,8 @@ ...@@ -29,10 +27,8 @@
#undef CMP #undef CMP
#define CMP(imm, rel) \ #define CMP(imm, rel) \
dst_ref = 0; \ dst_ref = 0; \
for (i = 0; i < 32; i++) \ for (i = 0; i < SIZE; i++) \
{ \ dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
dst_ref = ((rel) << i) | dst_ref; \
} \
source1.x = _mm256_loadu_si256 ((__m256i*)s1); \ source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
source2.x = _mm256_loadu_si256 ((__m256i*)s2); \ source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
dst1 = _mm256_cmp_epu8_mask (source1.x, source2.x, imm);\ dst1 = _mm256_cmp_epu8_mask (source1.x, source2.x, imm);\
...@@ -45,10 +41,8 @@ ...@@ -45,10 +41,8 @@
#undef CMP #undef CMP
#define CMP(imm, rel) \ #define CMP(imm, rel) \
dst_ref = 0; \ dst_ref = 0; \
for (i = 0; i < 16; i++) \ for (i = 0; i < SIZE; i++) \
{ \ dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
dst_ref = ((rel) << i) | dst_ref; \
} \
source1.x = _mm_loadu_si128 ((__m128i*)s1); \ source1.x = _mm_loadu_si128 ((__m128i*)s1); \
source2.x = _mm_loadu_si128 ((__m128i*)s2); \ source2.x = _mm_loadu_si128 ((__m128i*)s2); \
dst1 = _mm_cmp_epu8_mask (source1.x, source2.x, imm);\ dst1 = _mm_cmp_epu8_mask (source1.x, source2.x, imm);\
......
/* { dg-do run } */
/* { dg-options "-O0 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#define AVX512F
#include "avx512f-helper.h"
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
#include "string.h"
void static
CALC (UNION_TYPE (AVX512F_LEN,) s1, union128 s2, float *res_ref, int imm)
{
memcpy (res_ref, s1.a, SIZE * sizeof (float));
memcpy (res_ref + imm * 4, s2.a, 16);
}
void
TEST (void)
{
UNION_TYPE (AVX512F_LEN,) s1, res1, res2, res3;
union128 s2;
float res_ref[SIZE];
int j;
MASK_TYPE mask = (MASK_TYPE) 0xa55a;
for (j = 0; j < SIZE; j++)
{
s1.a[j] = j * j / 10.2;
res1.a[j] = DEFAULT_VALUE;
res2.a[j] = DEFAULT_VALUE;
res3.a[j] = DEFAULT_VALUE;
}
for (j = 0; j < 4; j++)
s2.a[j] = j * j * j / 2.03;
res1.x = INTRINSIC (_insertf32x4) (s1.x, s2.x, 1);
res2.x = INTRINSIC (_mask_insertf32x4) (res2.x, mask, s1.x, s2.x, 1);
res3.x = INTRINSIC (_maskz_insertf32x4) (mask, s1.x, s2.x, 1);
CALC (s1, s2, res_ref, 1);
if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
abort ();
MASK_MERGE () (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O0 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#define AVX512F
#include "avx512f-helper.h"
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
#include "string.h"
void static
CALC (UNION_TYPE (AVX512F_LEN, i_d) s1, union128i_d s2, int *res_ref, int imm)
{
memcpy (res_ref, s1.a, SIZE * sizeof (int));
memcpy (res_ref + imm * 4, s2.a, 16);
}
void
TEST (void)
{
UNION_TYPE (AVX512F_LEN, i_d) s1, res1, res2, res3;
union128i_d s2;
int res_ref[SIZE];
int j;
MASK_TYPE mask = (MASK_TYPE) 0xa55a;
for (j = 0; j < SIZE; j++)
{
s1.a[j] = j * j;
res1.a[j] = DEFAULT_VALUE;
res2.a[j] = DEFAULT_VALUE;
res3.a[j] = DEFAULT_VALUE;
}
for (j = 0; j < 4; j++)
s2.a[j] = j * j * j;
res1.x = INTRINSIC (_inserti32x4) (s1.x, s2.x, 1);
res2.x = INTRINSIC (_mask_inserti32x4) (res2.x, mask, s1.x, s2.x, 1);
res3.x = INTRINSIC (_maskz_inserti32x4) (mask, s1.x, s2.x, 1);
CALC (s1, s2, res_ref, 1);
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
MASK_MERGE (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
abort ();
MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgeb-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgeb-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgeub-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgeub-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgeuw-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgeuw-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgew-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgew-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpleb-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpleb-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpleub-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpleb-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpleuw-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmplew-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmplew-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmplew-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltb-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltb-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltub-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltub-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltuw-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltuw-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltw-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltw-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpneqb-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpneqb-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpnequb-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpnequb-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpnequw-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpnequw-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpneqw-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpneqw-2.c"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment