Commit 18379eea by Jakub Jelinek

avx512bitalgintrin.h (_mm512_mask_bitshuffle_epi64_mask): Use __mmask64 type…

avx512bitalgintrin.h (_mm512_mask_bitshuffle_epi64_mask): Use __mmask64 type instead of __mmask8 for __M argument.

	* config/i386/avx512bitalgintrin.h (_mm512_mask_bitshuffle_epi64_mask):
	Use __mmask64 type instead of __mmask8 for __M argument.
	* config/i386/avx512fintrin.h (_mm512_mask_xor_epi64,
	_mm512_maskz_xor_epi64): Use __mmask8 type instead of __mmask16 for
	__U argument.
	(_mm512_mask_cmpneq_epi64_mask): Use __mmask8 type instead of
	__mmask16 for __M argument.
	(_mm512_maskz_insertf32x4, _mm512_maskz_inserti32x4,
	_mm512_mask_insertf32x4, _mm512_mask_inserti32x4): Cast last argument
	to __mmask16 instead of __mmask8.
	* config/i386/avx512vlintrin.h (_mm_mask_add_ps, _mm_maskz_add_ps,
	_mm256_mask_add_ps, _mm256_maskz_add_ps, _mm_mask_sub_ps,
	_mm_maskz_sub_ps, _mm256_mask_sub_ps, _mm256_maskz_sub_ps,
	_mm256_maskz_cvtepi32_ps, _mm_maskz_cvtepi32_ps): Use __mmask8 type
	instead of __mmask16 for __U argument.
	* config/i386/avx512vlbwintrin.h (_mm_mask_cmp_epi8_mask): Use
	__mmask16 instead of __mmask8 for __U argument.
	(_mm256_mask_cmp_epi8_mask): Use __mmask32 instead of __mmask16 for
	__U argument.
	(_mm256_cmp_epi8_mask): Use __mmask32 return type instead of
	__mmask16.
	(_mm_mask_cmp_epu8_mask): Use __mmask16 instead of __mmask8 for __U
	argument.
	(_mm256_mask_cmp_epu8_mask): Use __mmask32 instead of __mmask16 for
	__U argument.
	(_mm256_cmp_epu8_mask): Use __mmask32 return type instead of
	__mmask16.
	(_mm_mask_cmp_epi16_mask): Cast last argument to __mmask8 instead
	of __mmask16.
	(_mm256_mask_cvtepi8_epi16): Use __mmask16 instead of __mmask32 for
	__U argument.
	(_mm_mask_cvtepi8_epi16): Use __mmask8 instead of __mmask32 for
	__U argument.
	(_mm256_mask_cvtepu8_epi16): Use __mmask16 instead of __mmask32 for
	__U argument.
	(_mm_mask_cvtepu8_epi16): Use __mmask8 instead of __mmask32 for
	__U argument.
	(_mm256_mask_cmpneq_epu8_mask, _mm256_mask_cmplt_epu8_mask,
	_mm256_mask_cmpge_epu8_mask, _mm256_mask_cmple_epu8_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask32.
	(_mm256_mask_cmpneq_epu16_mask, _mm256_mask_cmplt_epu16_mask,
	_mm256_mask_cmpge_epu16_mask, _mm256_mask_cmple_epu16_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask16.
	(_mm256_mask_cmpneq_epi8_mask, _mm256_mask_cmplt_epi8_mask,
	_mm256_mask_cmpge_epi8_mask, _mm256_mask_cmple_epi8_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask32.
	(_mm256_mask_cmpneq_epi16_mask, _mm256_mask_cmplt_epi16_mask,
	_mm256_mask_cmpge_epi16_mask, _mm256_mask_cmple_epi16_mask): Change
	return type as well as __M argument type and all casts from __mmask8
	to __mmask16.
	* config/i386/avx512vbmi2vlintrin.h (_mm_mask_shrdi_epi32,
	_mm_mask_shldi_epi32): Cast last argument to __mmask8 instead of
	__mmask16.

	* gcc.target/i386/avx512bw-vpcmpb-2.c (CMP): Use SIZE macro instead
	of hardcoding size.  Cast (rel) to MASK_TYPE.
	* gcc.target/i386/avx512bw-vpcmpub-2.c (CMP): Likewise.
	* gcc.target/i386/avx512f-vinserti32x4-3.c: New test.
	* gcc.target/i386/avx512f-vinsertf32x4-3.c: New test.
	* gcc.target/i386/avx512vl-vpcmpnequb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgeub-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpleb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgeb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltub-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpleub-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpneqb-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpnequw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgeuw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmplew-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpgew-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpltuw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpleuw-2.c: New test.
	* gcc.target/i386/avx512vl-vpcmpneqw-2.c: New test.

2018-07-11  Grazvydas Ignotas  <notasas@gmail.com>

	* config/i386/avx512bwintrin.h: (_mm512_mask_cmp_epi8_mask,
	_mm512_mask_cmp_epu8_mask): Use __mmask64 type instead of __mmask32
	for __U argument.

	* gcc.target/i386/avx512bw-vpcmpb-2.c (SIZE): Define to
	(AVX512F_LEN / 8) instead of (AVX512F_LEN / 16).
	* gcc.target/i386/avx512bw-vpcmpub-2.c (SIZE): Likewise.

From-SVN: r262566
parent 2d05894b
2018-07-11 Jakub Jelinek <jakub@redhat.com>
* config/i386/avx512bitalgintrin.h (_mm512_mask_bitshuffle_epi64_mask):
Use __mmask64 type instead of __mmask8 for __M argument.
* config/i386/avx512fintrin.h (_mm512_mask_xor_epi64,
_mm512_maskz_xor_epi64): Use __mmask8 type instead of __mmask16 for
__U argument.
(_mm512_mask_cmpneq_epi64_mask): Use __mmask8 type instead of
__mmask16 for __M argument.
(_mm512_maskz_insertf32x4, _mm512_maskz_inserti32x4,
_mm512_mask_insertf32x4, _mm512_mask_inserti32x4): Cast last argument
to __mmask16 instead of __mmask8.
* config/i386/avx512vlintrin.h (_mm_mask_add_ps, _mm_maskz_add_ps,
_mm256_mask_add_ps, _mm256_maskz_add_ps, _mm_mask_sub_ps,
_mm_maskz_sub_ps, _mm256_mask_sub_ps, _mm256_maskz_sub_ps,
_mm256_maskz_cvtepi32_ps, _mm_maskz_cvtepi32_ps): Use __mmask8 type
instead of __mmask16 for __U argument.
* config/i386/avx512vlbwintrin.h (_mm_mask_cmp_epi8_mask): Use
__mmask16 instead of __mmask8 for __U argument.
(_mm256_mask_cmp_epi8_mask): Use __mmask32 instead of __mmask16 for
__U argument.
(_mm256_cmp_epi8_mask): Use __mmask32 return type instead of
__mmask16.
(_mm_mask_cmp_epu8_mask): Use __mmask16 instead of __mmask8 for __U
argument.
(_mm256_mask_cmp_epu8_mask): Use __mmask32 instead of __mmask16 for
__U argument.
(_mm256_cmp_epu8_mask): Use __mmask32 return type instead of
__mmask16.
(_mm_mask_cmp_epi16_mask): Cast last argument to __mmask8 instead
of __mmask16.
(_mm256_mask_cvtepi8_epi16): Use __mmask16 instead of __mmask32 for
__U argument.
(_mm_mask_cvtepi8_epi16): Use __mmask8 instead of __mmask32 for
__U argument.
(_mm256_mask_cvtepu8_epi16): Use __mmask16 instead of __mmask32 for
__U argument.
(_mm_mask_cvtepu8_epi16): Use __mmask8 instead of __mmask32 for
__U argument.
(_mm256_mask_cmpneq_epu8_mask, _mm256_mask_cmplt_epu8_mask,
_mm256_mask_cmpge_epu8_mask, _mm256_mask_cmple_epu8_mask): Change
return type as well as __M argument type and all casts from __mmask8
to __mmask32.
(_mm256_mask_cmpneq_epu16_mask, _mm256_mask_cmplt_epu16_mask,
_mm256_mask_cmpge_epu16_mask, _mm256_mask_cmple_epu16_mask): Change
return type as well as __M argument type and all casts from __mmask8
to __mmask16.
(_mm256_mask_cmpneq_epi8_mask, _mm256_mask_cmplt_epi8_mask,
_mm256_mask_cmpge_epi8_mask, _mm256_mask_cmple_epi8_mask): Change
return type as well as __M argument type and all casts from __mmask8
to __mmask32.
(_mm256_mask_cmpneq_epi16_mask, _mm256_mask_cmplt_epi16_mask,
_mm256_mask_cmpge_epi16_mask, _mm256_mask_cmple_epi16_mask): Change
return type as well as __M argument type and all casts from __mmask8
to __mmask16.
* config/i386/avx512vbmi2vlintrin.h (_mm_mask_shrdi_epi32,
_mm_mask_shldi_epi32): Cast last argument to __mmask8 instead of
__mmask16.
2018-07-11 Grazvydas Ignotas <notasas@gmail.com>
* config/i386/avx512bwintrin.h: (_mm512_mask_cmp_epi8_mask,
_mm512_mask_cmp_epu8_mask): Use __mmask64 type instead of __mmask32
for __U argument.
2018-07-11 Paul Koning <ni1d@arrl.net>
* doc/md.texi (define_subst): Document how multiple occurrences of
......
......@@ -107,7 +107,7 @@ _mm512_bitshuffle_epi64_mask (__m512i __A, __m512i __B)
extern __inline __mmask64
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_bitshuffle_epi64_mask (__mmask8 __M, __m512i __A, __m512i __B)
_mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B)
{
return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v64qi) __A,
(__v64qi) __B,
......
......@@ -3043,7 +3043,7 @@ _mm512_cmp_epi16_mask (__m512i __X, __m512i __Y, const int __P)
extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cmp_epi8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
_mm512_mask_cmp_epi8_mask (__mmask64 __U, __m512i __X, __m512i __Y,
const int __P)
{
return (__mmask64) __builtin_ia32_cmpb512_mask ((__v64qi) __X,
......@@ -3081,7 +3081,7 @@ _mm512_cmp_epu16_mask (__m512i __X, __m512i __Y, const int __P)
extern __inline __mmask64
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cmp_epu8_mask (__mmask32 __U, __m512i __X, __m512i __Y,
_mm512_mask_cmp_epu8_mask (__mmask64 __U, __m512i __X, __m512i __Y,
const int __P)
{
return (__mmask64) __builtin_ia32_ucmpb512_mask ((__v64qi) __X,
......
......@@ -7377,7 +7377,7 @@ _mm512_xor_epi64 (__m512i __A, __m512i __B)
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
_mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
(__v8di) __B,
......@@ -7387,7 +7387,7 @@ _mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B)
_mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
(__v8di) __B,
......@@ -9615,7 +9615,7 @@ _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
extern __inline __mmask8
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y)
_mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
{
return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
(__v8di) __Y, 4,
......@@ -10877,22 +10877,22 @@ _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
#define _mm512_maskz_insertf32x4(A, X, Y, C) \
((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
(__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
(__mmask8)(A)))
(__mmask16)(A)))
#define _mm512_maskz_inserti32x4(A, X, Y, C) \
((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
(__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
(__mmask8)(A)))
(__mmask16)(A)))
#define _mm512_mask_insertf32x4(A, B, X, Y, C) \
((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
(__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
(__mmask8)(B)))
(__mmask16)(B)))
#define _mm512_mask_inserti32x4(A, B, X, Y, C) \
((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
(__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
(__mmask8)(B)))
(__mmask16)(B)))
#endif
extern __inline __m512i
......
......@@ -541,7 +541,7 @@ _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
(__v4si)(__m128i)(B),(int)(C))
#define _mm_mask_shrdi_epi32(A, B, C, D, E) \
((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(C), \
(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B))
(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask8)(B))
#define _mm_maskz_shrdi_epi32(A, B, C, D) \
((__m128i) __builtin_ia32_vpshrd_v4si_mask ((__v4si)(__m128i)(B), \
(__v4si)(__m128i)(C),(int)(D), \
......@@ -601,7 +601,7 @@ _mm_shldi_epi64 (__m128i __A, __m128i __B, int __C)
(__v4si)(__m128i)(B),(int)(C))
#define _mm_mask_shldi_epi32(A, B, C, D, E) \
((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(C), \
(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask16)(B))
(__v4si)(__m128i)(D), (int)(E), (__v4si)(__m128i)(A),(__mmask8)(B))
#define _mm_maskz_shldi_epi32(A, B, C, D) \
((__m128i) __builtin_ia32_vpshld_v4si_mask ((__v4si)(__m128i)(B), \
(__v4si)(__m128i)(C),(int)(D), \
......
......@@ -466,7 +466,7 @@ _mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B)
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
_mm_mask_add_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
(__v4sf) __B,
......@@ -476,7 +476,7 @@ _mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
_mm_maskz_add_ps (__mmask8 __U, __m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
(__v4sf) __B,
......@@ -487,7 +487,7 @@ _mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B)
extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
_mm256_mask_add_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
{
return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
(__v8sf) __B,
......@@ -497,7 +497,7 @@ _mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B)
_mm256_maskz_add_ps (__mmask8 __U, __m256 __A, __m256 __B)
{
return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
(__v8sf) __B,
......@@ -551,7 +551,7 @@ _mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B)
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
_mm_mask_sub_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
(__v4sf) __B,
......@@ -561,7 +561,7 @@ _mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B)
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
_mm_maskz_sub_ps (__mmask8 __U, __m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
(__v4sf) __B,
......@@ -572,7 +572,7 @@ _mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B)
extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
_mm256_mask_sub_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
{
return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
(__v8sf) __B,
......@@ -582,7 +582,7 @@ _mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B)
extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B)
_mm256_maskz_sub_ps (__mmask8 __U, __m256 __A, __m256 __B)
{
return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
(__v8sf) __B,
......@@ -1320,7 +1320,7 @@ _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A)
extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A)
_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A)
{
return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
(__v8sf)
......@@ -1339,7 +1339,7 @@ _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A)
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A)
_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A)
{
return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
(__v4sf)
......
2018-07-11 Jakub Jelinek <jakub@redhat.com>
* gcc.target/i386/avx512bw-vpcmpb-2.c (CMP): Use SIZE macro instead
of hardcoding size. Cast (rel) to MASK_TYPE.
* gcc.target/i386/avx512bw-vpcmpub-2.c (CMP): Likewise.
* gcc.target/i386/avx512f-vinserti32x4-3.c: New test.
* gcc.target/i386/avx512f-vinsertf32x4-3.c: New test.
* gcc.target/i386/avx512vl-vpcmpnequb-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpgeub-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpleb-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpgeb-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpltb-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpltub-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpleub-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpneqb-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpnequw-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpgeuw-2.c: New test.
* gcc.target/i386/avx512vl-vpcmplew-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpgew-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpltw-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpltuw-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpleuw-2.c: New test.
* gcc.target/i386/avx512vl-vpcmpneqw-2.c: New test.
2018-07-11 Grazvydas Ignotas <notasas@gmail.com>
* gcc.target/i386/avx512bw-vpcmpb-2.c (SIZE): Define to
(AVX512F_LEN / 8) instead of (AVX512F_LEN / 16).
* gcc.target/i386/avx512bw-vpcmpub-2.c (SIZE): Likewise.
2018-07-10 Mark Wielaard <mark@klomp.org>
PR debug/86459
......
......@@ -6,17 +6,15 @@
#include "avx512f-helper.h"
#include <math.h>
#define SIZE (AVX512F_LEN / 16)
#define SIZE (AVX512F_LEN / 8)
#include "avx512f-mask-type.h"
#if AVX512F_LEN == 512
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
for (i = 0; i < 64; i++) \
{ \
dst_ref = ((rel) << i) | dst_ref; \
} \
for (i = 0; i < SIZE; i++) \
dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
source1.x = _mm512_loadu_si512 (s1); \
source2.x = _mm512_loadu_si512 (s2); \
dst1 = _mm512_cmp_epi8_mask (source1.x, source2.x, imm);\
......@@ -29,10 +27,8 @@
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
for (i = 0; i < 32; i++) \
{ \
dst_ref = ((rel) << i) | dst_ref; \
} \
for (i = 0; i < SIZE; i++) \
dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
dst1 = _mm256_cmp_epi8_mask (source1.x, source2.x, imm);\
......@@ -45,10 +41,8 @@
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
for (i = 0; i < 16; i++) \
{ \
dst_ref = ((rel) << i) | dst_ref; \
} \
for (i = 0; i < SIZE; i++) \
dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
source1.x = _mm_loadu_si128 ((__m128i*)s1); \
source2.x = _mm_loadu_si128 ((__m128i*)s2); \
dst1 = _mm_cmp_epi8_mask (source1.x, source2.x, imm);\
......
......@@ -6,17 +6,15 @@
#include "avx512f-helper.h"
#include <math.h>
#define SIZE (AVX512F_LEN / 16)
#define SIZE (AVX512F_LEN / 8)
#include "avx512f-mask-type.h"
#if AVX512F_LEN == 512
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
for (i = 0; i < 64; i++) \
{ \
dst_ref = ((rel) << i) | dst_ref; \
} \
for (i = 0; i < SIZE; i++) \
dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
source1.x = _mm512_loadu_si512 (s1); \
source2.x = _mm512_loadu_si512 (s2); \
dst1 = _mm512_cmp_epu8_mask (source1.x, source2.x, imm);\
......@@ -29,10 +27,8 @@
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
for (i = 0; i < 32; i++) \
{ \
dst_ref = ((rel) << i) | dst_ref; \
} \
for (i = 0; i < SIZE; i++) \
dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
source1.x = _mm256_loadu_si256 ((__m256i*)s1); \
source2.x = _mm256_loadu_si256 ((__m256i*)s2); \
dst1 = _mm256_cmp_epu8_mask (source1.x, source2.x, imm);\
......@@ -45,10 +41,8 @@
#undef CMP
#define CMP(imm, rel) \
dst_ref = 0; \
for (i = 0; i < 16; i++) \
{ \
dst_ref = ((rel) << i) | dst_ref; \
} \
for (i = 0; i < SIZE; i++) \
dst_ref = ((MASK_TYPE) (rel) << i) | dst_ref; \
source1.x = _mm_loadu_si128 ((__m128i*)s1); \
source2.x = _mm_loadu_si128 ((__m128i*)s2); \
dst1 = _mm_cmp_epu8_mask (source1.x, source2.x, imm);\
......
/* { dg-do run } */
/* { dg-options "-O0 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#define AVX512F
#include "avx512f-helper.h"
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
#include "string.h"
void static
CALC (UNION_TYPE (AVX512F_LEN,) s1, union128 s2, float *res_ref, int imm)
{
memcpy (res_ref, s1.a, SIZE * sizeof (float));
memcpy (res_ref + imm * 4, s2.a, 16);
}
void
TEST (void)
{
UNION_TYPE (AVX512F_LEN,) s1, res1, res2, res3;
union128 s2;
float res_ref[SIZE];
int j;
MASK_TYPE mask = (MASK_TYPE) 0xa55a;
for (j = 0; j < SIZE; j++)
{
s1.a[j] = j * j / 10.2;
res1.a[j] = DEFAULT_VALUE;
res2.a[j] = DEFAULT_VALUE;
res3.a[j] = DEFAULT_VALUE;
}
for (j = 0; j < 4; j++)
s2.a[j] = j * j * j / 2.03;
res1.x = INTRINSIC (_insertf32x4) (s1.x, s2.x, 1);
res2.x = INTRINSIC (_mask_insertf32x4) (res2.x, mask, s1.x, s2.x, 1);
res3.x = INTRINSIC (_maskz_insertf32x4) (mask, s1.x, s2.x, 1);
CALC (s1, s2, res_ref, 1);
if (UNION_CHECK (AVX512F_LEN,) (res1, res_ref))
abort ();
MASK_MERGE () (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN,) (res2, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN,) (res3, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O0 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#define AVX512F
#include "avx512f-helper.h"
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
#include "string.h"
void static
CALC (UNION_TYPE (AVX512F_LEN, i_d) s1, union128i_d s2, int *res_ref, int imm)
{
memcpy (res_ref, s1.a, SIZE * sizeof (int));
memcpy (res_ref + imm * 4, s2.a, 16);
}
void
TEST (void)
{
UNION_TYPE (AVX512F_LEN, i_d) s1, res1, res2, res3;
union128i_d s2;
int res_ref[SIZE];
int j;
MASK_TYPE mask = (MASK_TYPE) 0xa55a;
for (j = 0; j < SIZE; j++)
{
s1.a[j] = j * j;
res1.a[j] = DEFAULT_VALUE;
res2.a[j] = DEFAULT_VALUE;
res3.a[j] = DEFAULT_VALUE;
}
for (j = 0; j < 4; j++)
s2.a[j] = j * j * j;
res1.x = INTRINSIC (_inserti32x4) (s1.x, s2.x, 1);
res2.x = INTRINSIC (_mask_inserti32x4) (res2.x, mask, s1.x, s2.x, 1);
res3.x = INTRINSIC (_maskz_inserti32x4) (mask, s1.x, s2.x, 1);
CALC (s1, s2, res_ref, 1);
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
MASK_MERGE (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
abort ();
MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgeb-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgeb-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgeub-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgeub-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgeuw-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgeuw-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgew-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpgew-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpleb-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpleb-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpleub-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpleb-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpleuw-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmplew-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmplew-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmplew-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltb-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltb-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltub-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltub-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltuw-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltuw-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltw-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpltw-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpneqb-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpneqb-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpnequb-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpnequb-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpnequw-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpnequw-2.c"
/* { dg-do run } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512bw } */
#define AVX512VL
#define AVX512F_LEN 256
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpneqw-2.c"
#undef AVX512F_LEN
#undef AVX512F_LEN_HALF
#define AVX512F_LEN 128
#define AVX512F_LEN_HALF 128
#include "avx512bw-vpcmpneqw-2.c"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment