Commit fc9cf6da by H.J. Lu Committed by H.J. Lu

Remove UNSPEC_LOADU and UNSPEC_STOREU

Since *mov<mode>_internal and <avx512>_(load|store)<mode>_mask patterns
can handle unaligned load and store, we can remove UNSPEC_LOADU and
UNSPEC_STOREU.  We use function prototypes with pointer to scalar for
unaligned load/store builtin functions so that memory passed to
*mov<mode>_internal is unaligned.

gcc/

	PR target/69201
	* config/i386/avx512bwintrin.h (_mm512_mask_loadu_epi16): Pass
	const short * to __builtin_ia32_loaddquhi512_mask.
	(_mm512_maskz_loadu_epi16): Likewise.
	(_mm512_mask_storeu_epi16): Pass short * to
	__builtin_ia32_storedquhi512_mask.
	(_mm512_mask_loadu_epi8): Pass const char * to
	__builtin_ia32_loaddquqi512_mask.
	(_mm512_maskz_loadu_epi8): Likewise.
	(_mm512_mask_storeu_epi8): Pass char * to
	__builtin_ia32_storedquqi512_mask.
	* config/i386/avx512fintrin.h (_mm512_loadu_pd): Pass
	const double * to __builtin_ia32_loadupd512_mask.
	(_mm512_mask_loadu_pd): Likewise.
	(_mm512_maskz_loadu_pd): Likewise.
	(_mm512_storeu_pd): Pass double * to
	__builtin_ia32_storeupd512_mask.
	(_mm512_mask_storeu_pd): Likewise.
	(_mm512_loadu_ps): Pass const float * to
	__builtin_ia32_loadups512_mask.
	(_mm512_mask_loadu_ps): Likewise.
	(_mm512_maskz_loadu_ps): Likewise.
	(_mm512_storeu_ps): Pass float * to
	__builtin_ia32_storeups512_mask.
	(_mm512_mask_storeu_ps): Likewise.
	(_mm512_mask_loadu_epi64): Pass const long long * to
	__builtin_ia32_loaddqudi512_mask.
	(_mm512_maskz_loadu_epi64): Likewise.
	(_mm512_mask_storeu_epi64): Pass long long *
	to __builtin_ia32_storedqudi512_mask.
	(_mm512_loadu_si512): Pass const int * to
	__builtin_ia32_loaddqusi512_mask.
	(_mm512_mask_loadu_epi32): Likewise.
	(_mm512_maskz_loadu_epi32): Likewise.
	(_mm512_storeu_si512): Pass int * to
	__builtin_ia32_storedqusi512_mask.
	(_mm512_mask_storeu_epi32): Likewise.
	* config/i386/avx512vlbwintrin.h (_mm256_mask_storeu_epi8): Pass
	char * to __builtin_ia32_storedquqi256_mask.
	(_mm_mask_storeu_epi8): Likewise.
	(_mm256_mask_loadu_epi16): Pass const short * to
	__builtin_ia32_loaddquhi256_mask.
	(_mm256_maskz_loadu_epi16): Likewise.
	(_mm_mask_loadu_epi16): Pass const short * to
	__builtin_ia32_loaddquhi128_mask.
	(_mm_maskz_loadu_epi16): Likewise.
	(_mm256_mask_loadu_epi8): Pass const char * to
	__builtin_ia32_loaddquqi256_mask.
	(_mm256_maskz_loadu_epi8): Likewise.
	(_mm_mask_loadu_epi8): Pass const char * to
	__builtin_ia32_loaddquqi128_mask.
	(_mm_maskz_loadu_epi8): Likewise.
	(_mm256_mask_storeu_epi16): Pass short * to.
	__builtin_ia32_storedquhi256_mask.
	(_mm_mask_storeu_epi16): Pass short * to.
	__builtin_ia32_storedquhi128_mask.
	* config/i386/avx512vlintrin.h (_mm256_mask_loadu_pd): Pass
	const double * to __builtin_ia32_loadupd256_mask.
	(_mm256_maskz_loadu_pd): Likewise.
	(_mm_mask_loadu_pd): Pass onst double * to
	__builtin_ia32_loadupd128_mask.
	(_mm_maskz_loadu_pd): Likewise.
	(_mm256_mask_storeu_pd): Pass double * to
	__builtin_ia32_storeupd256_mask.
	(_mm_mask_storeu_pd): Pass double * to
	__builtin_ia32_storeupd128_mask.
	(_mm256_mask_loadu_ps): Pass const float * to
	__builtin_ia32_loadups256_mask.
	(_mm256_maskz_loadu_ps): Likewise.
	(_mm_mask_loadu_ps): Pass const float * to
	__builtin_ia32_loadups128_mask.
	(_mm_maskz_loadu_ps): Likewise.
	(_mm256_mask_storeu_ps): Pass float * to
	__builtin_ia32_storeups256_mask.
	(_mm_mask_storeu_ps): ass float * to
	__builtin_ia32_storeups128_mask.
	(_mm256_mask_loadu_epi64): Pass const long long * to
	__builtin_ia32_loaddqudi256_mask.
	(_mm256_maskz_loadu_epi64): Likewise.
	(_mm_mask_loadu_epi64): Pass const long long * to
	__builtin_ia32_loaddqudi128_mask.
	(_mm_maskz_loadu_epi64): Likewise.
	(_mm256_mask_storeu_epi64): Pass long long * to
	__builtin_ia32_storedqudi256_mask.
	(_mm_mask_storeu_epi64): Pass long long * to
	__builtin_ia32_storedqudi128_mask.
	(_mm256_mask_loadu_epi32): Pass const int * to
	__builtin_ia32_loaddqusi256_mask.
	(_mm256_maskz_loadu_epi32): Likewise.
	(_mm_mask_loadu_epi32): Pass const int * to
	__builtin_ia32_loaddqusi128_mask.
	(_mm_maskz_loadu_epi32): Likewise.
	(_mm256_mask_storeu_epi32): Pass int * to
	__builtin_ia32_storedqusi256_mask.
	(_mm_mask_storeu_epi32): Pass int * to
	__builtin_ia32_storedqusi128_mask.
	* config/i386/i386-builtin-types.def (PCSHORT): New.
	(PINT64): Likewise.
	(V64QI_FTYPE_PCCHAR_V64QI_UDI): Likewise.
	(V32HI_FTYPE_PCSHORT_V32HI_USI): Likewise.
	(V32QI_FTYPE_PCCHAR_V32QI_USI): Likewise.
	(V16SF_FTYPE_PCFLOAT_V16SF_UHI): Likewise.
	(V8DF_FTYPE_PCDOUBLE_V8DF_UQI): Likewise.
	(V16SI_FTYPE_PCINT_V16SI_UHI): Likewise.
	(V16HI_FTYPE_PCSHORT_V16HI_UHI): Likewise.
	(V16QI_FTYPE_PCCHAR_V16QI_UHI): Likewise.
	(V8SF_FTYPE_PCFLOAT_V8SF_UQI): Likewise.
	(V8DI_FTYPE_PCINT64_V8DI_UQI): Likewise.
	(V8SI_FTYPE_PCINT_V8SI_UQI): Likewise.
	(V8HI_FTYPE_PCSHORT_V8HI_UQI): Likewise.
	(V4DF_FTYPE_PCDOUBLE_V4DF_UQI): Likewise.
	(V4SF_FTYPE_PCFLOAT_V4SF_UQI): Likewise.
	(V4DI_FTYPE_PCINT64_V4DI_UQI): Likewise.
	(V4SI_FTYPE_PCINT_V4SI_UQI): Likewise.
	(V2DF_FTYPE_PCDOUBLE_V2DF_UQI): Likewise.
	(V2DI_FTYPE_PCINT64_V2DI_UQI): Likewise.
	(VOID_FTYPE_PDOUBLE_V8DF_UQI): Likewise.
	(VOID_FTYPE_PDOUBLE_V4DF_UQI): Likewise.
	(VOID_FTYPE_PDOUBLE_V2DF_UQI): Likewise.
	(VOID_FTYPE_PFLOAT_V16SF_UHI): Likewise.
	(VOID_FTYPE_PFLOAT_V8SF_UQI): Likewise.
	(VOID_FTYPE_PFLOAT_V4SF_UQI): Likewise.
	(VOID_FTYPE_PINT64_V8DI_UQI): Likewise.
	(VOID_FTYPE_PINT64_V4DI_UQI): Likewise.
	(VOID_FTYPE_PINT64_V2DI_UQI): Likewise.
	(VOID_FTYPE_PINT_V16SI_UHI): Likewise.
	(VOID_FTYPE_PINT_V8SI_UHI): Likewise.
	(VOID_FTYPE_PINT_V4SI_UHI): Likewise.
	(VOID_FTYPE_PSHORT_V32HI_USI): Likewise.
	(VOID_FTYPE_PSHORT_V16HI_UHI): Likewise.
	(VOID_FTYPE_PSHORT_V8HI_UQI): Likewise.
	(VOID_FTYPE_PCHAR_V64QI_UDI): Likewise.
	(VOID_FTYPE_PCHAR_V32QI_USI): Likewise.
	(VOID_FTYPE_PCHAR_V16QI_UHI): Likewise.
	(V64QI_FTYPE_PCV64QI_V64QI_UDI): Removed.
	(V32HI_FTYPE_PCV32HI_V32HI_USI): Likewise.
	(V32QI_FTYPE_PCV32QI_V32QI_USI): Likewise.
	(V16HI_FTYPE_PCV16HI_V16HI_UHI): Likewise.
	(V16QI_FTYPE_PCV16QI_V16QI_UHI): Likewise.
	(V8HI_FTYPE_PCV8HI_V8HI_UQI): Likewise.
	(VOID_FTYPE_PV32HI_V32HI_USI): Likewise.
	(VOID_FTYPE_PV16HI_V16HI_UHI): Likewise.
	(VOID_FTYPE_PV8HI_V8HI_UQI): Likewise.
	(VOID_FTYPE_PV64QI_V64QI_UDI): Likewise.
	(VOID_FTYPE_PV32QI_V32QI_USI): Likewise.
	(VOID_FTYPE_PV16QI_V16QI_UHI): Likewise.
	* config/i386/i386.c (ix86_emit_save_reg_using_mov): Don't
	use UNSPEC_STOREU.
	(ix86_emit_restore_sse_regs_using_mov): Don't use UNSPEC_LOADU.
	(ix86_avx256_split_vector_move_misalign): Don't use unaligned
	load nor store.
	(ix86_expand_vector_move_misalign): Likewise.
	(bdesc_special_args): Use CODE_FOR_movvNXY_internal and pointer
	to scalar function prototype for unaligned load/store builtins.
	(ix86_expand_special_args_builtin): Updated.
	* config/i386/sse.md (UNSPEC_LOADU): Removed.
	(UNSPEC_STOREU): Likewise.
	(VI_ULOADSTORE_BW_AVX512VL): Likewise.
	(VI_ULOADSTORE_F_AVX512VL): Likewise.
	(ssescalarsize): Handle V4TI, V2TI and V1TI.
	(<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>): Likewise.
	(*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>): Likewise.
	(<sse>_storeu<ssemodesuffix><avxsizesuffix>): Likewise.
	(<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask): Likewise.
	(<sse2_avx_avx512f>_loaddqu<mode><mask_name>): Likewise.
	(*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"): Likewise.
	(sse2_avx_avx512f>_storedqu<mode>): Likewise.
	(<avx512>_storedqu<mode>_mask): Likewise.
	(*sse4_2_pcmpestr_unaligned): Likewise.
	(*sse4_2_pcmpistr_unaligned): Likewise.
	(*mov<mode>_internal): Renamed to ...
	(mov<mode>_internal): This.  Remove check of AVX and IAMCU on
	misaligned operand.  Replace vmovdqu64 with vmovdqu<ssescalarsize>.
	(movsd/movhpd to movupd peephole): Don't use UNSPEC_LOADU.
	(movlpd/movhpd to movupd peephole): Don't use UNSPEC_STOREU.

gcc/testsuite/

	PR target/69201
	* gcc.target/i386/avx256-unaligned-store-1.c (a): Make it
	extern to force it misaligned.
	(b): Likewise.
	(c): Likewise.
	(d): Likewise.
	Check vmovups.*movv8sf_internal/3 instead of avx_storeups256.
	Don't check `*' before movv4sf_internal.
	* gcc.target/i386/avx256-unaligned-store-2.c: Check
	vmovups.*movv32qi_internal/3 instead of avx_storeups256.
	Don't check `*' before movv16qi_internal.
	* gcc.target/i386/avx256-unaligned-store-3.c (a): Make it
	extern to force it misaligned.
	(b): Likewise.
	(c): Likewise.
	(d): Likewise.
	Check vmovups.*movv4df_internal/3 instead of avx_storeupd256.
	Don't check `*' before movv2df_internal.
	* gcc.target/i386/avx256-unaligned-store-4.c (a): Make it
	extern to force it misaligned.
	(b): Likewise.
	(c): Likewise.
	(d): Likewise.
	Check movv8sf_internal instead of avx_storeups256.
	Check movups.*movv4sf_internal/3 instead of avx_storeups256.

From-SVN: r235209
parent ea8927ea
2016-04-19 H.J. Lu <hongjiu.lu@intel.com>
PR target/69201
* config/i386/avx512bwintrin.h (_mm512_mask_loadu_epi16): Pass
const short * to __builtin_ia32_loaddquhi512_mask.
(_mm512_maskz_loadu_epi16): Likewise.
(_mm512_mask_storeu_epi16): Pass short * to
__builtin_ia32_storedquhi512_mask.
(_mm512_mask_loadu_epi8): Pass const char * to
__builtin_ia32_loaddquqi512_mask.
(_mm512_maskz_loadu_epi8): Likewise.
(_mm512_mask_storeu_epi8): Pass char * to
__builtin_ia32_storedquqi512_mask.
* config/i386/avx512fintrin.h (_mm512_loadu_pd): Pass
const double * to __builtin_ia32_loadupd512_mask.
(_mm512_mask_loadu_pd): Likewise.
(_mm512_maskz_loadu_pd): Likewise.
(_mm512_storeu_pd): Pass double * to
__builtin_ia32_storeupd512_mask.
(_mm512_mask_storeu_pd): Likewise.
(_mm512_loadu_ps): Pass const float * to
__builtin_ia32_loadups512_mask.
(_mm512_mask_loadu_ps): Likewise.
(_mm512_maskz_loadu_ps): Likewise.
(_mm512_storeu_ps): Pass float * to
__builtin_ia32_storeups512_mask.
(_mm512_mask_storeu_ps): Likewise.
(_mm512_mask_loadu_epi64): Pass const long long * to
__builtin_ia32_loaddqudi512_mask.
(_mm512_maskz_loadu_epi64): Likewise.
(_mm512_mask_storeu_epi64): Pass long long *
to __builtin_ia32_storedqudi512_mask.
(_mm512_loadu_si512): Pass const int * to
__builtin_ia32_loaddqusi512_mask.
(_mm512_mask_loadu_epi32): Likewise.
(_mm512_maskz_loadu_epi32): Likewise.
(_mm512_storeu_si512): Pass int * to
__builtin_ia32_storedqusi512_mask.
(_mm512_mask_storeu_epi32): Likewise.
* config/i386/avx512vlbwintrin.h (_mm256_mask_storeu_epi8): Pass
char * to __builtin_ia32_storedquqi256_mask.
(_mm_mask_storeu_epi8): Likewise.
(_mm256_mask_loadu_epi16): Pass const short * to
__builtin_ia32_loaddquhi256_mask.
(_mm256_maskz_loadu_epi16): Likewise.
(_mm_mask_loadu_epi16): Pass const short * to
__builtin_ia32_loaddquhi128_mask.
(_mm_maskz_loadu_epi16): Likewise.
(_mm256_mask_loadu_epi8): Pass const char * to
__builtin_ia32_loaddquqi256_mask.
(_mm256_maskz_loadu_epi8): Likewise.
(_mm_mask_loadu_epi8): Pass const char * to
__builtin_ia32_loaddquqi128_mask.
(_mm_maskz_loadu_epi8): Likewise.
(_mm256_mask_storeu_epi16): Pass short * to.
__builtin_ia32_storedquhi256_mask.
(_mm_mask_storeu_epi16): Pass short * to.
__builtin_ia32_storedquhi128_mask.
* config/i386/avx512vlintrin.h (_mm256_mask_loadu_pd): Pass
const double * to __builtin_ia32_loadupd256_mask.
(_mm256_maskz_loadu_pd): Likewise.
(_mm_mask_loadu_pd): Pass onst double * to
__builtin_ia32_loadupd128_mask.
(_mm_maskz_loadu_pd): Likewise.
(_mm256_mask_storeu_pd): Pass double * to
__builtin_ia32_storeupd256_mask.
(_mm_mask_storeu_pd): Pass double * to
__builtin_ia32_storeupd128_mask.
(_mm256_mask_loadu_ps): Pass const float * to
__builtin_ia32_loadups256_mask.
(_mm256_maskz_loadu_ps): Likewise.
(_mm_mask_loadu_ps): Pass const float * to
__builtin_ia32_loadups128_mask.
(_mm_maskz_loadu_ps): Likewise.
(_mm256_mask_storeu_ps): Pass float * to
__builtin_ia32_storeups256_mask.
(_mm_mask_storeu_ps): ass float * to
__builtin_ia32_storeups128_mask.
(_mm256_mask_loadu_epi64): Pass const long long * to
__builtin_ia32_loaddqudi256_mask.
(_mm256_maskz_loadu_epi64): Likewise.
(_mm_mask_loadu_epi64): Pass const long long * to
__builtin_ia32_loaddqudi128_mask.
(_mm_maskz_loadu_epi64): Likewise.
(_mm256_mask_storeu_epi64): Pass long long * to
__builtin_ia32_storedqudi256_mask.
(_mm_mask_storeu_epi64): Pass long long * to
__builtin_ia32_storedqudi128_mask.
(_mm256_mask_loadu_epi32): Pass const int * to
__builtin_ia32_loaddqusi256_mask.
(_mm256_maskz_loadu_epi32): Likewise.
(_mm_mask_loadu_epi32): Pass const int * to
__builtin_ia32_loaddqusi128_mask.
(_mm_maskz_loadu_epi32): Likewise.
(_mm256_mask_storeu_epi32): Pass int * to
__builtin_ia32_storedqusi256_mask.
(_mm_mask_storeu_epi32): Pass int * to
__builtin_ia32_storedqusi128_mask.
* config/i386/i386-builtin-types.def (PCSHORT): New.
(PINT64): Likewise.
(V64QI_FTYPE_PCCHAR_V64QI_UDI): Likewise.
(V32HI_FTYPE_PCSHORT_V32HI_USI): Likewise.
(V32QI_FTYPE_PCCHAR_V32QI_USI): Likewise.
(V16SF_FTYPE_PCFLOAT_V16SF_UHI): Likewise.
(V8DF_FTYPE_PCDOUBLE_V8DF_UQI): Likewise.
(V16SI_FTYPE_PCINT_V16SI_UHI): Likewise.
(V16HI_FTYPE_PCSHORT_V16HI_UHI): Likewise.
(V16QI_FTYPE_PCCHAR_V16QI_UHI): Likewise.
(V8SF_FTYPE_PCFLOAT_V8SF_UQI): Likewise.
(V8DI_FTYPE_PCINT64_V8DI_UQI): Likewise.
(V8SI_FTYPE_PCINT_V8SI_UQI): Likewise.
(V8HI_FTYPE_PCSHORT_V8HI_UQI): Likewise.
(V4DF_FTYPE_PCDOUBLE_V4DF_UQI): Likewise.
(V4SF_FTYPE_PCFLOAT_V4SF_UQI): Likewise.
(V4DI_FTYPE_PCINT64_V4DI_UQI): Likewise.
(V4SI_FTYPE_PCINT_V4SI_UQI): Likewise.
(V2DF_FTYPE_PCDOUBLE_V2DF_UQI): Likewise.
(V2DI_FTYPE_PCINT64_V2DI_UQI): Likewise.
(VOID_FTYPE_PDOUBLE_V8DF_UQI): Likewise.
(VOID_FTYPE_PDOUBLE_V4DF_UQI): Likewise.
(VOID_FTYPE_PDOUBLE_V2DF_UQI): Likewise.
(VOID_FTYPE_PFLOAT_V16SF_UHI): Likewise.
(VOID_FTYPE_PFLOAT_V8SF_UQI): Likewise.
(VOID_FTYPE_PFLOAT_V4SF_UQI): Likewise.
(VOID_FTYPE_PINT64_V8DI_UQI): Likewise.
(VOID_FTYPE_PINT64_V4DI_UQI): Likewise.
(VOID_FTYPE_PINT64_V2DI_UQI): Likewise.
(VOID_FTYPE_PINT_V16SI_UHI): Likewise.
(VOID_FTYPE_PINT_V8SI_UHI): Likewise.
(VOID_FTYPE_PINT_V4SI_UHI): Likewise.
(VOID_FTYPE_PSHORT_V32HI_USI): Likewise.
(VOID_FTYPE_PSHORT_V16HI_UHI): Likewise.
(VOID_FTYPE_PSHORT_V8HI_UQI): Likewise.
(VOID_FTYPE_PCHAR_V64QI_UDI): Likewise.
(VOID_FTYPE_PCHAR_V32QI_USI): Likewise.
(VOID_FTYPE_PCHAR_V16QI_UHI): Likewise.
(V64QI_FTYPE_PCV64QI_V64QI_UDI): Removed.
(V32HI_FTYPE_PCV32HI_V32HI_USI): Likewise.
(V32QI_FTYPE_PCV32QI_V32QI_USI): Likewise.
(V16HI_FTYPE_PCV16HI_V16HI_UHI): Likewise.
(V16QI_FTYPE_PCV16QI_V16QI_UHI): Likewise.
(V8HI_FTYPE_PCV8HI_V8HI_UQI): Likewise.
(VOID_FTYPE_PV32HI_V32HI_USI): Likewise.
(VOID_FTYPE_PV16HI_V16HI_UHI): Likewise.
(VOID_FTYPE_PV8HI_V8HI_UQI): Likewise.
(VOID_FTYPE_PV64QI_V64QI_UDI): Likewise.
(VOID_FTYPE_PV32QI_V32QI_USI): Likewise.
(VOID_FTYPE_PV16QI_V16QI_UHI): Likewise.
* config/i386/i386.c (ix86_emit_save_reg_using_mov): Don't
use UNSPEC_STOREU.
(ix86_emit_restore_sse_regs_using_mov): Don't use UNSPEC_LOADU.
(ix86_avx256_split_vector_move_misalign): Don't use unaligned
load nor store.
(ix86_expand_vector_move_misalign): Likewise.
(bdesc_special_args): Use CODE_FOR_movvNXY_internal and pointer
to scalar function prototype for unaligned load/store builtins.
(ix86_expand_special_args_builtin): Updated.
* config/i386/sse.md (UNSPEC_LOADU): Removed.
(UNSPEC_STOREU): Likewise.
(VI_ULOADSTORE_BW_AVX512VL): Likewise.
(VI_ULOADSTORE_F_AVX512VL): Likewise.
(ssescalarsize): Handle V4TI, V2TI and V1TI.
(<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>): Likewise.
(*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>): Likewise.
(<sse>_storeu<ssemodesuffix><avxsizesuffix>): Likewise.
(<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask): Likewise.
(<sse2_avx_avx512f>_loaddqu<mode><mask_name>): Likewise.
(*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"): Likewise.
(sse2_avx_avx512f>_storedqu<mode>): Likewise.
(<avx512>_storedqu<mode>_mask): Likewise.
(*sse4_2_pcmpestr_unaligned): Likewise.
(*sse4_2_pcmpistr_unaligned): Likewise.
(*mov<mode>_internal): Renamed to ...
(mov<mode>_internal): This. Remove check of AVX and IAMCU on
misaligned operand. Replace vmovdqu64 with vmovdqu<ssescalarsize>.
(movsd/movhpd to movupd peephole): Don't use UNSPEC_LOADU.
(movlpd/movhpd to movupd peephole): Don't use UNSPEC_STOREU.
2016-04-19 Richard Biener <rguenther@suse.de> 2016-04-19 Richard Biener <rguenther@suse.de>
PR tree-optimization/70171 PR tree-optimization/70171
......
...@@ -87,7 +87,7 @@ extern __inline __m512i ...@@ -87,7 +87,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
{ {
return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, return (__m512i) __builtin_ia32_loaddquhi512_mask ((const short *) __P,
(__v32hi) __W, (__v32hi) __W,
(__mmask32) __U); (__mmask32) __U);
} }
...@@ -96,7 +96,7 @@ extern __inline __m512i ...@@ -96,7 +96,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
{ {
return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, return (__m512i) __builtin_ia32_loaddquhi512_mask ((const short *) __P,
(__v32hi) (__v32hi)
_mm512_setzero_hi (), _mm512_setzero_hi (),
(__mmask32) __U); (__mmask32) __U);
...@@ -106,7 +106,7 @@ extern __inline void ...@@ -106,7 +106,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
{ {
__builtin_ia32_storedquhi512_mask ((__v32hi *) __P, __builtin_ia32_storedquhi512_mask ((short *) __P,
(__v32hi) __A, (__v32hi) __A,
(__mmask32) __U); (__mmask32) __U);
} }
...@@ -150,7 +150,7 @@ extern __inline __m512i ...@@ -150,7 +150,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
{ {
return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, return (__m512i) __builtin_ia32_loaddquqi512_mask ((const char *) __P,
(__v64qi) __W, (__v64qi) __W,
(__mmask64) __U); (__mmask64) __U);
} }
...@@ -159,7 +159,7 @@ extern __inline __m512i ...@@ -159,7 +159,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
{ {
return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, return (__m512i) __builtin_ia32_loaddquqi512_mask ((const char *) __P,
(__v64qi) (__v64qi)
_mm512_setzero_hi (), _mm512_setzero_hi (),
(__mmask64) __U); (__mmask64) __U);
...@@ -169,7 +169,7 @@ extern __inline void ...@@ -169,7 +169,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
{ {
__builtin_ia32_storedquqi512_mask ((__v64qi *) __P, __builtin_ia32_storedquqi512_mask ((char *) __P,
(__v64qi) __A, (__v64qi) __A,
(__mmask64) __U); (__mmask64) __U);
} }
......
...@@ -5671,7 +5671,7 @@ extern __inline __m512d ...@@ -5671,7 +5671,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_loadu_pd (void const *__P) _mm512_loadu_pd (void const *__P)
{ {
return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
(__v8df) (__v8df)
_mm512_undefined_pd (), _mm512_undefined_pd (),
(__mmask8) -1); (__mmask8) -1);
...@@ -5681,7 +5681,7 @@ extern __inline __m512d ...@@ -5681,7 +5681,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
{ {
return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
(__v8df) __W, (__v8df) __W,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -5690,7 +5690,7 @@ extern __inline __m512d ...@@ -5690,7 +5690,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P) _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
{ {
return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
(__v8df) (__v8df)
_mm512_setzero_pd (), _mm512_setzero_pd (),
(__mmask8) __U); (__mmask8) __U);
...@@ -5700,7 +5700,7 @@ extern __inline void ...@@ -5700,7 +5700,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_storeu_pd (void *__P, __m512d __A) _mm512_storeu_pd (void *__P, __m512d __A)
{ {
__builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A, __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
(__mmask8) -1); (__mmask8) -1);
} }
...@@ -5708,7 +5708,7 @@ extern __inline void ...@@ -5708,7 +5708,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A) _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
{ {
__builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A, __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -5716,7 +5716,7 @@ extern __inline __m512 ...@@ -5716,7 +5716,7 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_loadu_ps (void const *__P) _mm512_loadu_ps (void const *__P)
{ {
return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
(__v16sf) (__v16sf)
_mm512_undefined_ps (), _mm512_undefined_ps (),
(__mmask16) -1); (__mmask16) -1);
...@@ -5726,7 +5726,7 @@ extern __inline __m512 ...@@ -5726,7 +5726,7 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
{ {
return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
(__v16sf) __W, (__v16sf) __W,
(__mmask16) __U); (__mmask16) __U);
} }
...@@ -5735,7 +5735,7 @@ extern __inline __m512 ...@@ -5735,7 +5735,7 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P) _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
{ {
return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
(__v16sf) (__v16sf)
_mm512_setzero_ps (), _mm512_setzero_ps (),
(__mmask16) __U); (__mmask16) __U);
...@@ -5745,7 +5745,7 @@ extern __inline void ...@@ -5745,7 +5745,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_storeu_ps (void *__P, __m512 __A) _mm512_storeu_ps (void *__P, __m512 __A)
{ {
__builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A, __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
(__mmask16) -1); (__mmask16) -1);
} }
...@@ -5753,7 +5753,7 @@ extern __inline void ...@@ -5753,7 +5753,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A) _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
{ {
__builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A, __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
(__mmask16) __U); (__mmask16) __U);
} }
...@@ -5761,7 +5761,7 @@ extern __inline __m512i ...@@ -5761,7 +5761,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
{ {
return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P, return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
(__v8di) __W, (__v8di) __W,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -5770,7 +5770,7 @@ extern __inline __m512i ...@@ -5770,7 +5770,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P) _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
{ {
return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P, return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
(__v8di) (__v8di)
_mm512_setzero_si512 (), _mm512_setzero_si512 (),
(__mmask8) __U); (__mmask8) __U);
...@@ -5780,7 +5780,7 @@ extern __inline void ...@@ -5780,7 +5780,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A) _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
{ {
__builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A, __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -5788,7 +5788,7 @@ extern __inline __m512i ...@@ -5788,7 +5788,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_loadu_si512 (void const *__P) _mm512_loadu_si512 (void const *__P)
{ {
return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
(__v16si) (__v16si)
_mm512_setzero_si512 (), _mm512_setzero_si512 (),
(__mmask16) -1); (__mmask16) -1);
...@@ -5798,7 +5798,7 @@ extern __inline __m512i ...@@ -5798,7 +5798,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
{ {
return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
(__v16si) __W, (__v16si) __W,
(__mmask16) __U); (__mmask16) __U);
} }
...@@ -5807,7 +5807,7 @@ extern __inline __m512i ...@@ -5807,7 +5807,7 @@ extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P) _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
{ {
return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
(__v16si) (__v16si)
_mm512_setzero_si512 (), _mm512_setzero_si512 (),
(__mmask16) __U); (__mmask16) __U);
...@@ -5817,7 +5817,7 @@ extern __inline void ...@@ -5817,7 +5817,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_storeu_si512 (void *__P, __m512i __A) _mm512_storeu_si512 (void *__P, __m512i __A)
{ {
__builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A, __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
(__mmask16) -1); (__mmask16) -1);
} }
...@@ -5825,7 +5825,7 @@ extern __inline void ...@@ -5825,7 +5825,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A) _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
{ {
__builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A, __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
(__mmask16) __U); (__mmask16) __U);
} }
......
...@@ -77,7 +77,7 @@ extern __inline void ...@@ -77,7 +77,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
{ {
__builtin_ia32_storedquqi256_mask ((__v32qi *) __P, __builtin_ia32_storedquqi256_mask ((char *) __P,
(__v32qi) __A, (__v32qi) __A,
(__mmask32) __U); (__mmask32) __U);
} }
...@@ -86,7 +86,7 @@ extern __inline void ...@@ -86,7 +86,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A) _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
{ {
__builtin_ia32_storedquqi128_mask ((__v16qi *) __P, __builtin_ia32_storedquqi128_mask ((char *) __P,
(__v16qi) __A, (__v16qi) __A,
(__mmask16) __U); (__mmask16) __U);
} }
...@@ -95,7 +95,7 @@ extern __inline __m256i ...@@ -95,7 +95,7 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P) _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
{ {
return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P,
(__v16hi) __W, (__v16hi) __W,
(__mmask16) __U); (__mmask16) __U);
} }
...@@ -104,7 +104,7 @@ extern __inline __m256i ...@@ -104,7 +104,7 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P) _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
{ {
return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P,
(__v16hi) (__v16hi)
_mm256_setzero_si256 (), _mm256_setzero_si256 (),
(__mmask16) __U); (__mmask16) __U);
...@@ -114,7 +114,7 @@ extern __inline __m128i ...@@ -114,7 +114,7 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P) _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
{ {
return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
(__v8hi) __W, (__v8hi) __W,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -123,7 +123,7 @@ extern __inline __m128i ...@@ -123,7 +123,7 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P) _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
{ {
return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
(__v8hi) (__v8hi)
_mm_setzero_hi (), _mm_setzero_hi (),
(__mmask8) __U); (__mmask8) __U);
...@@ -172,7 +172,7 @@ extern __inline __m256i ...@@ -172,7 +172,7 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P) _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
{ {
return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P,
(__v32qi) __W, (__v32qi) __W,
(__mmask32) __U); (__mmask32) __U);
} }
...@@ -181,7 +181,7 @@ extern __inline __m256i ...@@ -181,7 +181,7 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P) _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
{ {
return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P,
(__v32qi) (__v32qi)
_mm256_setzero_si256 (), _mm256_setzero_si256 (),
(__mmask32) __U); (__mmask32) __U);
...@@ -191,7 +191,7 @@ extern __inline __m128i ...@@ -191,7 +191,7 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P) _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
{ {
return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
(__v16qi) __W, (__v16qi) __W,
(__mmask16) __U); (__mmask16) __U);
} }
...@@ -200,7 +200,7 @@ extern __inline __m128i ...@@ -200,7 +200,7 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
{ {
return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
(__v16qi) (__v16qi)
_mm_setzero_hi (), _mm_setzero_hi (),
(__mmask16) __U); (__mmask16) __U);
...@@ -3679,7 +3679,7 @@ extern __inline void ...@@ -3679,7 +3679,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A) _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
{ {
__builtin_ia32_storedquhi256_mask ((__v16hi *) __P, __builtin_ia32_storedquhi256_mask ((short *) __P,
(__v16hi) __A, (__v16hi) __A,
(__mmask16) __U); (__mmask16) __U);
} }
...@@ -3688,7 +3688,7 @@ extern __inline void ...@@ -3688,7 +3688,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A) _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
{ {
__builtin_ia32_storedquhi128_mask ((__v8hi *) __P, __builtin_ia32_storedquhi128_mask ((short *) __P,
(__v8hi) __A, (__v8hi) __A,
(__mmask8) __U); (__mmask8) __U);
} }
......
...@@ -626,7 +626,7 @@ extern __inline __m256d ...@@ -626,7 +626,7 @@ extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
{ {
return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
(__v4df) __W, (__v4df) __W,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -635,7 +635,7 @@ extern __inline __m256d ...@@ -635,7 +635,7 @@ extern __inline __m256d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
{ {
return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, return (__m256d) __builtin_ia32_loadupd256_mask ((const double *) __P,
(__v4df) (__v4df)
_mm256_setzero_pd (), _mm256_setzero_pd (),
(__mmask8) __U); (__mmask8) __U);
...@@ -645,7 +645,7 @@ extern __inline __m128d ...@@ -645,7 +645,7 @@ extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
{ {
return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
(__v2df) __W, (__v2df) __W,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -654,7 +654,7 @@ extern __inline __m128d ...@@ -654,7 +654,7 @@ extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_loadu_pd (__mmask8 __U, void const *__P) _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
{ {
return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, return (__m128d) __builtin_ia32_loadupd128_mask ((const double *) __P,
(__v2df) (__v2df)
_mm_setzero_pd (), _mm_setzero_pd (),
(__mmask8) __U); (__mmask8) __U);
...@@ -664,7 +664,7 @@ extern __inline void ...@@ -664,7 +664,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
{ {
__builtin_ia32_storeupd256_mask ((__v4df *) __P, __builtin_ia32_storeupd256_mask ((double *) __P,
(__v4df) __A, (__v4df) __A,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -673,7 +673,7 @@ extern __inline void ...@@ -673,7 +673,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
{ {
__builtin_ia32_storeupd128_mask ((__v2df *) __P, __builtin_ia32_storeupd128_mask ((double *) __P,
(__v2df) __A, (__v2df) __A,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -682,7 +682,7 @@ extern __inline __m256 ...@@ -682,7 +682,7 @@ extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
{ {
return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
(__v8sf) __W, (__v8sf) __W,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -691,7 +691,7 @@ extern __inline __m256 ...@@ -691,7 +691,7 @@ extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
{ {
return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, return (__m256) __builtin_ia32_loadups256_mask ((const float *) __P,
(__v8sf) (__v8sf)
_mm256_setzero_ps (), _mm256_setzero_ps (),
(__mmask8) __U); (__mmask8) __U);
...@@ -701,7 +701,7 @@ extern __inline __m128 ...@@ -701,7 +701,7 @@ extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
{ {
return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
(__v4sf) __W, (__v4sf) __W,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -710,7 +710,7 @@ extern __inline __m128 ...@@ -710,7 +710,7 @@ extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_loadu_ps (__mmask8 __U, void const *__P) _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
{ {
return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, return (__m128) __builtin_ia32_loadups128_mask ((const float *) __P,
(__v4sf) (__v4sf)
_mm_setzero_ps (), _mm_setzero_ps (),
(__mmask8) __U); (__mmask8) __U);
...@@ -720,7 +720,7 @@ extern __inline void ...@@ -720,7 +720,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
{ {
__builtin_ia32_storeups256_mask ((__v8sf *) __P, __builtin_ia32_storeups256_mask ((float *) __P,
(__v8sf) __A, (__v8sf) __A,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -729,7 +729,7 @@ extern __inline void ...@@ -729,7 +729,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
{ {
__builtin_ia32_storeups128_mask ((__v4sf *) __P, __builtin_ia32_storeups128_mask ((float *) __P,
(__v4sf) __A, (__v4sf) __A,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -738,7 +738,7 @@ extern __inline __m256i ...@@ -738,7 +738,7 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
{ {
return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
(__v4di) __W, (__v4di) __W,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -747,7 +747,7 @@ extern __inline __m256i ...@@ -747,7 +747,7 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
{ {
return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
(__v4di) (__v4di)
_mm256_setzero_si256 (), _mm256_setzero_si256 (),
(__mmask8) __U); (__mmask8) __U);
...@@ -757,7 +757,7 @@ extern __inline __m128i ...@@ -757,7 +757,7 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
{ {
return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
(__v2di) __W, (__v2di) __W,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -766,7 +766,7 @@ extern __inline __m128i ...@@ -766,7 +766,7 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
{ {
return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
(__v2di) (__v2di)
_mm_setzero_di (), _mm_setzero_di (),
(__mmask8) __U); (__mmask8) __U);
...@@ -776,7 +776,7 @@ extern __inline void ...@@ -776,7 +776,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
{ {
__builtin_ia32_storedqudi256_mask ((__v4di *) __P, __builtin_ia32_storedqudi256_mask ((long long *) __P,
(__v4di) __A, (__v4di) __A,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -785,7 +785,7 @@ extern __inline void ...@@ -785,7 +785,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
{ {
__builtin_ia32_storedqudi128_mask ((__v2di *) __P, __builtin_ia32_storedqudi128_mask ((long long *) __P,
(__v2di) __A, (__v2di) __A,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -794,7 +794,7 @@ extern __inline __m256i ...@@ -794,7 +794,7 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
{ {
return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
(__v8si) __W, (__v8si) __W,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -803,7 +803,7 @@ extern __inline __m256i ...@@ -803,7 +803,7 @@ extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
{ {
return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
(__v8si) (__v8si)
_mm256_setzero_si256 (), _mm256_setzero_si256 (),
(__mmask8) __U); (__mmask8) __U);
...@@ -813,7 +813,7 @@ extern __inline __m128i ...@@ -813,7 +813,7 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
{ {
return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
(__v4si) __W, (__v4si) __W,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -822,7 +822,7 @@ extern __inline __m128i ...@@ -822,7 +822,7 @@ extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
{ {
return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
(__v4si) (__v4si)
_mm_setzero_si128 (), _mm_setzero_si128 (),
(__mmask8) __U); (__mmask8) __U);
...@@ -832,7 +832,7 @@ extern __inline void ...@@ -832,7 +832,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
{ {
__builtin_ia32_storedqusi256_mask ((__v8si *) __P, __builtin_ia32_storedqusi256_mask ((int *) __P,
(__v8si) __A, (__v8si) __A,
(__mmask8) __U); (__mmask8) __U);
} }
...@@ -841,7 +841,7 @@ extern __inline void ...@@ -841,7 +841,7 @@ extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
{ {
__builtin_ia32_storedqusi128_mask ((__v4si *) __P, __builtin_ia32_storedqusi128_mask ((int *) __P,
(__v4si) __A, (__v4si) __A,
(__mmask8) __U); (__mmask8) __U);
} }
......
...@@ -124,6 +124,7 @@ DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST) ...@@ -124,6 +124,7 @@ DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
DEF_POINTER_TYPE (PCFLOAT, FLOAT, CONST) DEF_POINTER_TYPE (PCFLOAT, FLOAT, CONST)
DEF_POINTER_TYPE (PCINT, INT, CONST) DEF_POINTER_TYPE (PCINT, INT, CONST)
DEF_POINTER_TYPE (PCINT64, INT64, CONST) DEF_POINTER_TYPE (PCINT64, INT64, CONST)
DEF_POINTER_TYPE (PCSHORT, SHORT, CONST)
DEF_POINTER_TYPE (PCHAR, CHAR) DEF_POINTER_TYPE (PCHAR, CHAR)
DEF_POINTER_TYPE (PCVOID, VOID, CONST) DEF_POINTER_TYPE (PCVOID, VOID, CONST)
DEF_POINTER_TYPE (PVOID, VOID) DEF_POINTER_TYPE (PVOID, VOID)
...@@ -132,6 +133,7 @@ DEF_POINTER_TYPE (PFLOAT, FLOAT) ...@@ -132,6 +133,7 @@ DEF_POINTER_TYPE (PFLOAT, FLOAT)
DEF_POINTER_TYPE (PSHORT, SHORT) DEF_POINTER_TYPE (PSHORT, SHORT)
DEF_POINTER_TYPE (PUSHORT, USHORT) DEF_POINTER_TYPE (PUSHORT, USHORT)
DEF_POINTER_TYPE (PINT, INT) DEF_POINTER_TYPE (PINT, INT)
DEF_POINTER_TYPE (PINT64, INT64)
DEF_POINTER_TYPE (PLONGLONG, LONGLONG) DEF_POINTER_TYPE (PLONGLONG, LONGLONG)
DEF_POINTER_TYPE (PULONGLONG, ULONGLONG) DEF_POINTER_TYPE (PULONGLONG, ULONGLONG)
DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED) DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED)
...@@ -754,24 +756,36 @@ DEF_FUNCTION_TYPE (V16HI, V8HI, V16HI, UHI) ...@@ -754,24 +756,36 @@ DEF_FUNCTION_TYPE (V16HI, V8HI, V16HI, UHI)
DEF_FUNCTION_TYPE (V16HI, HI, V16HI, UHI) DEF_FUNCTION_TYPE (V16HI, HI, V16HI, UHI)
DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, UQI) DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, UQI)
DEF_FUNCTION_TYPE (V8HI, HI, V8HI, UQI) DEF_FUNCTION_TYPE (V8HI, HI, V8HI, UQI)
DEF_FUNCTION_TYPE (V64QI, PCV64QI, V64QI, UDI)
DEF_FUNCTION_TYPE (V32HI, PCV32HI, V32HI, USI)
DEF_FUNCTION_TYPE (V32QI, PCV32QI, V32QI, USI)
DEF_FUNCTION_TYPE (V16SF, PCV16SF, V16SF, UHI) DEF_FUNCTION_TYPE (V16SF, PCV16SF, V16SF, UHI)
DEF_FUNCTION_TYPE (V8DF, PCV8DF, V8DF, UQI) DEF_FUNCTION_TYPE (V8DF, PCV8DF, V8DF, UQI)
DEF_FUNCTION_TYPE (V16SI, PCV16SI, V16SI, UHI) DEF_FUNCTION_TYPE (V16SI, PCV16SI, V16SI, UHI)
DEF_FUNCTION_TYPE (V16HI, PCV16HI, V16HI, UHI)
DEF_FUNCTION_TYPE (V16QI, PCV16QI, V16QI, UHI)
DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SF, UQI) DEF_FUNCTION_TYPE (V8SF, PCV8SF, V8SF, UQI)
DEF_FUNCTION_TYPE (V8DI, PCV8DI, V8DI, UQI) DEF_FUNCTION_TYPE (V8DI, PCV8DI, V8DI, UQI)
DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI, UQI) DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI, UQI)
DEF_FUNCTION_TYPE (V8HI, PCV8HI, V8HI, UQI)
DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DF, UQI) DEF_FUNCTION_TYPE (V4DF, PCV4DF, V4DF, UQI)
DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SF, UQI) DEF_FUNCTION_TYPE (V4SF, PCV4SF, V4SF, UQI)
DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI, UQI) DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI, UQI)
DEF_FUNCTION_TYPE (V4SI, PCV4SI, V4SI, UQI) DEF_FUNCTION_TYPE (V4SI, PCV4SI, V4SI, UQI)
DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DF, UQI) DEF_FUNCTION_TYPE (V2DF, PCV2DF, V2DF, UQI)
DEF_FUNCTION_TYPE (V2DI, PCV2DI, V2DI, UQI) DEF_FUNCTION_TYPE (V2DI, PCV2DI, V2DI, UQI)
DEF_FUNCTION_TYPE (V64QI, PCCHAR, V64QI, UDI)
DEF_FUNCTION_TYPE (V32HI, PCSHORT, V32HI, USI)
DEF_FUNCTION_TYPE (V32QI, PCCHAR, V32QI, USI)
DEF_FUNCTION_TYPE (V16SF, PCFLOAT, V16SF, UHI)
DEF_FUNCTION_TYPE (V8DF, PCDOUBLE, V8DF, UQI)
DEF_FUNCTION_TYPE (V16SI, PCINT, V16SI, UHI)
DEF_FUNCTION_TYPE (V16HI, PCSHORT, V16HI, UHI)
DEF_FUNCTION_TYPE (V16QI, PCCHAR, V16QI, UHI)
DEF_FUNCTION_TYPE (V8SF, PCFLOAT, V8SF, UQI)
DEF_FUNCTION_TYPE (V8DI, PCINT64, V8DI, UQI)
DEF_FUNCTION_TYPE (V8SI, PCINT, V8SI, UQI)
DEF_FUNCTION_TYPE (V8HI, PCSHORT, V8HI, UQI)
DEF_FUNCTION_TYPE (V4DF, PCDOUBLE, V4DF, UQI)
DEF_FUNCTION_TYPE (V4SF, PCFLOAT, V4SF, UQI)
DEF_FUNCTION_TYPE (V4DI, PCINT64, V4DI, UQI)
DEF_FUNCTION_TYPE (V4SI, PCINT, V4SI, UQI)
DEF_FUNCTION_TYPE (V2DF, PCDOUBLE, V2DF, UQI)
DEF_FUNCTION_TYPE (V2DI, PCINT64, V2DI, UQI)
DEF_FUNCTION_TYPE (V16HI, V16SI, V16HI, UHI) DEF_FUNCTION_TYPE (V16HI, V16SI, V16HI, UHI)
DEF_FUNCTION_TYPE (V8SI, V8DI, V8SI, UQI) DEF_FUNCTION_TYPE (V8SI, V8DI, V8SI, UQI)
DEF_FUNCTION_TYPE (V8HI, V8DI, V8HI, UQI) DEF_FUNCTION_TYPE (V8HI, V8DI, V8HI, UQI)
...@@ -823,12 +837,24 @@ DEF_FUNCTION_TYPE (VOID, PV16QI, V4DI, UQI) ...@@ -823,12 +837,24 @@ DEF_FUNCTION_TYPE (VOID, PV16QI, V4DI, UQI)
DEF_FUNCTION_TYPE (VOID, PV16QI, V2DI, UQI) DEF_FUNCTION_TYPE (VOID, PV16QI, V2DI, UQI)
DEF_FUNCTION_TYPE (VOID, PV8SI, V8SI, UQI) DEF_FUNCTION_TYPE (VOID, PV8SI, V8SI, UQI)
DEF_FUNCTION_TYPE (VOID, PV4SI, V4SI, UQI) DEF_FUNCTION_TYPE (VOID, PV4SI, V4SI, UQI)
DEF_FUNCTION_TYPE (VOID, PV32HI, V32HI, USI) DEF_FUNCTION_TYPE (VOID, PDOUBLE, V8DF, UQI)
DEF_FUNCTION_TYPE (VOID, PV16HI, V16HI, UHI) DEF_FUNCTION_TYPE (VOID, PDOUBLE, V4DF, UQI)
DEF_FUNCTION_TYPE (VOID, PV8HI, V8HI, UQI) DEF_FUNCTION_TYPE (VOID, PDOUBLE, V2DF, UQI)
DEF_FUNCTION_TYPE (VOID, PV64QI, V64QI, UDI) DEF_FUNCTION_TYPE (VOID, PFLOAT, V16SF, UHI)
DEF_FUNCTION_TYPE (VOID, PV32QI, V32QI, USI) DEF_FUNCTION_TYPE (VOID, PFLOAT, V8SF, UQI)
DEF_FUNCTION_TYPE (VOID, PV16QI, V16QI, UHI) DEF_FUNCTION_TYPE (VOID, PFLOAT, V4SF, UQI)
DEF_FUNCTION_TYPE (VOID, PINT64, V8DI, UQI)
DEF_FUNCTION_TYPE (VOID, PINT64, V4DI, UQI)
DEF_FUNCTION_TYPE (VOID, PINT64, V2DI, UQI)
DEF_FUNCTION_TYPE (VOID, PINT, V16SI, UHI)
DEF_FUNCTION_TYPE (VOID, PINT, V8SI, UQI)
DEF_FUNCTION_TYPE (VOID, PINT, V4SI, UQI)
DEF_FUNCTION_TYPE (VOID, PSHORT, V32HI, USI)
DEF_FUNCTION_TYPE (VOID, PSHORT, V16HI, UHI)
DEF_FUNCTION_TYPE (VOID, PSHORT, V8HI, UQI)
DEF_FUNCTION_TYPE (VOID, PCHAR, V64QI, UDI)
DEF_FUNCTION_TYPE (VOID, PCHAR, V32QI, USI)
DEF_FUNCTION_TYPE (VOID, PCHAR, V16QI, UHI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, INT, UQI) DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, INT, UQI)
DEF_FUNCTION_TYPE (V8SI, V8SF, V8SI, UQI) DEF_FUNCTION_TYPE (V8SI, V8SF, V8SI, UQI)
DEF_FUNCTION_TYPE (V4SI, V4SF, V4SI, UQI) DEF_FUNCTION_TYPE (V4SI, V4SF, V4SI, UQI)
......
2016-04-19 H.J. Lu <hongjiu.lu@intel.com>
PR target/69201
* gcc.target/i386/avx256-unaligned-store-1.c (a): Make it
extern to force it misaligned.
(b): Likewise.
(c): Likewise.
(d): Likewise.
Check vmovups.*movv8sf_internal/3 instead of avx_storeups256.
Don't check `*' before movv4sf_internal.
* gcc.target/i386/avx256-unaligned-store-2.c: Check
vmovups.*movv32qi_internal/3 instead of avx_storeups256.
Don't check `*' before movv16qi_internal.
* gcc.target/i386/avx256-unaligned-store-3.c (a): Make it
extern to force it misaligned.
(b): Likewise.
(c): Likewise.
(d): Likewise.
Check vmovups.*movv4df_internal/3 instead of avx_storeupd256.
Don't check `*' before movv2df_internal.
* gcc.target/i386/avx256-unaligned-store-4.c (a): Make it
extern to force it misaligned.
(b): Likewise.
(c): Likewise.
(d): Likewise.
Check movv8sf_internal instead of avx_storeups256.
Check movups.*movv4sf_internal/3 instead of avx_storeups256.
2016-04-19 Richard Biener <rguenther@suse.de> 2016-04-19 Richard Biener <rguenther@suse.de>
PR tree-optimization/70171 PR tree-optimization/70171
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#define N 1024 #define N 1024
float a[N], b[N+3], c[N], d[N]; extern float a[N], b[N+3], c[N], d[N];
void void
avx_test (void) avx_test (void)
...@@ -17,6 +17,6 @@ avx_test (void) ...@@ -17,6 +17,6 @@ avx_test (void)
d[i] = c[i] * 20.0; d[i] = c[i] * 20.0;
} }
/* { dg-final { scan-assembler-not "avx_storeups256" } } */ /* { dg-final { scan-assembler-not "vmovups.*movv8sf_internal/3" } } */
/* { dg-final { scan-assembler "vmovups.*\\*movv4sf_internal/3" } } */ /* { dg-final { scan-assembler "vmovups.*movv4sf_internal/3" } } */
/* { dg-final { scan-assembler "vextractf128" } } */ /* { dg-final { scan-assembler "vextractf128" } } */
...@@ -23,6 +23,6 @@ avx_test (void) ...@@ -23,6 +23,6 @@ avx_test (void)
} }
} }
/* { dg-final { scan-assembler-not "avx_storedqu256" } } */ /* { dg-final { scan-assembler-not "vmovups.*movv32qi_internal/3" } } */
/* { dg-final { scan-assembler "vmovups.*\\*movv16qi_internal/3" } } */ /* { dg-final { scan-assembler "vmovups.*movv16qi_internal/3" } } */
/* { dg-final { scan-assembler "vextract.128" } } */ /* { dg-final { scan-assembler "vextract.128" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#define N 1024 #define N 1024
double a[N], b[N+3], c[N], d[N]; extern double a[N], b[N+3], c[N], d[N];
void void
avx_test (void) avx_test (void)
...@@ -17,6 +17,6 @@ avx_test (void) ...@@ -17,6 +17,6 @@ avx_test (void)
d[i] = c[i] * 20.0; d[i] = c[i] * 20.0;
} }
/* { dg-final { scan-assembler-not "avx_storeupd256" } } */ /* { dg-final { scan-assembler-not "vmovups.*movv4df_internal/3" } } */
/* { dg-final { scan-assembler "vmovups.*\\*movv2df_internal/3" } } */ /* { dg-final { scan-assembler "vmovups.*movv2df_internal/3" } } */
/* { dg-final { scan-assembler "vextractf128" } } */ /* { dg-final { scan-assembler "vextractf128" } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#define N 1024 #define N 1024
float a[N], b[N+3], c[N]; extern float a[N], b[N+3], c[N];
void void
avx_test (void) avx_test (void)
...@@ -14,7 +14,6 @@ avx_test (void) ...@@ -14,7 +14,6 @@ avx_test (void)
b[i+3] = a[i] * c[i]; b[i+3] = a[i] * c[i];
} }
/* { dg-final { scan-assembler "avx_storeups256" } } */ /* { dg-final { scan-assembler "vmovups.*movv8sf_internal/3" } } */
/* { dg-final { scan-assembler-not "sse_storeups" } } */ /* { dg-final { scan-assembler-not "movups.*movv4sf_internal/3" } } */
/* { dg-final { scan-assembler-not "\\*avx_movv4sf_internal/3" } } */
/* { dg-final { scan-assembler-not "vextractf128" } } */ /* { dg-final { scan-assembler-not "vextractf128" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment