Commit 383321ec by Uros Bizjak Committed by Uros Bizjak

re PR target/72805 (AVX512: invalid code generation involving masks)

	PR target/72805
	* config/i386/avx512fintrin.h (_mm512_cmp_epi32_mask) [!__OPTIMIZE__]:
	Cast builtin function result to __mmask16 instead of __mmask8.
	(_mm512_cmp_epu32_mask) [!__OPTIMIZE__]: Ditto.
	(_mm512_mask_cmp_epi32_mask) [!__OPTIMIZE__]: Ditto.
	(_mm512_mask_cmp_epu32_mask) [!__OPTIMIZE__]: Ditto.

From-SVN: r239152
parent d4f7837c
2016-08-04 Uros Bizjak <ubizjak@gmail.com>
PR target/72805
* config/i386/avx512fintrin.h (_mm512_cmp_epi32_mask) [!__OPTIMIZE__]:
Cast builtin function result to __mmask16 instead of __mmask8.
(_mm512_cmp_epu32_mask) [!__OPTIMIZE__]: Ditto.
(_mm512_mask_cmp_epi32_mask) [!__OPTIMIZE__]: Ditto.
(_mm512_mask_cmp_epu32_mask) [!__OPTIMIZE__]: Ditto.
2016-08-04 David Malcolm <dmalcolm@redhat.com> 2016-08-04 David Malcolm <dmalcolm@redhat.com>
* selftest.h (ASSERT_TRUE): Reimplement in terms of... * selftest.h (ASSERT_TRUE): Reimplement in terms of...
......
...@@ -9130,9 +9130,9 @@ _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, ...@@ -9130,9 +9130,9 @@ _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
(__mmask8)-1)) (__mmask8)-1))
#define _mm512_cmp_epi32_mask(X, Y, P) \ #define _mm512_cmp_epi32_mask(X, Y, P) \
((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
(__v16si)(__m512i)(Y), (int)(P),\ (__v16si)(__m512i)(Y), (int)(P), \
(__mmask16)-1)) (__mmask16)-1))
#define _mm512_cmp_epu64_mask(X, Y, P) \ #define _mm512_cmp_epu64_mask(X, Y, P) \
((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
...@@ -9140,66 +9140,66 @@ _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, ...@@ -9140,66 +9140,66 @@ _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
(__mmask8)-1)) (__mmask8)-1))
#define _mm512_cmp_epu32_mask(X, Y, P) \ #define _mm512_cmp_epu32_mask(X, Y, P) \
((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
(__v16si)(__m512i)(Y), (int)(P),\ (__v16si)(__m512i)(Y), (int)(P), \
(__mmask16)-1)) (__mmask16)-1))
#define _mm512_cmp_round_pd_mask(X, Y, P, R) \ #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
(__v8df)(__m512d)(Y), (int)(P),\ (__v8df)(__m512d)(Y), (int)(P),\
(__mmask8)-1, R)) (__mmask8)-1, R))
#define _mm512_cmp_round_ps_mask(X, Y, P, R) \ #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
(__v16sf)(__m512)(Y), (int)(P),\ (__v16sf)(__m512)(Y), (int)(P),\
(__mmask16)-1, R)) (__mmask16)-1, R))
#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \ #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
(__v8di)(__m512i)(Y), (int)(P),\ (__v8di)(__m512i)(Y), (int)(P),\
(__mmask8)M)) (__mmask8)M))
#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \ #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
(__v16si)(__m512i)(Y), (int)(P),\ (__v16si)(__m512i)(Y), (int)(P), \
(__mmask16)M)) (__mmask16)M))
#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \ #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
(__v8di)(__m512i)(Y), (int)(P),\ (__v8di)(__m512i)(Y), (int)(P),\
(__mmask8)M)) (__mmask8)M))
#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \ #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
(__v16si)(__m512i)(Y), (int)(P),\ (__v16si)(__m512i)(Y), (int)(P), \
(__mmask16)M)) (__mmask16)M))
#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \ #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
(__v8df)(__m512d)(Y), (int)(P),\ (__v8df)(__m512d)(Y), (int)(P),\
(__mmask8)M, R)) (__mmask8)M, R))
#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \ #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
(__v16sf)(__m512)(Y), (int)(P),\ (__v16sf)(__m512)(Y), (int)(P),\
(__mmask16)M, R)) (__mmask16)M, R))
#define _mm_cmp_round_sd_mask(X, Y, P, R) \ #define _mm_cmp_round_sd_mask(X, Y, P, R) \
((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), (int)(P),\ (__v2df)(__m128d)(Y), (int)(P),\
(__mmask8)-1, R)) (__mmask8)-1, R))
#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), (int)(P),\ (__v2df)(__m128d)(Y), (int)(P),\
(M), R)) (M), R))
#define _mm_cmp_round_ss_mask(X, Y, P, R) \ #define _mm_cmp_round_ss_mask(X, Y, P, R) \
((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), (int)(P), \ (__v4sf)(__m128)(Y), (int)(P), \
(__mmask8)-1, R)) (__mmask8)-1, R))
#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), (int)(P), \ (__v4sf)(__m128)(Y), (int)(P), \
(M), R)) (M), R))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment