Commit 075691af by Alexander Ivchenko Committed by Kirill Yukhin

avx512fintrin.h (_mm_add_round_sd): New.

gcc/
	* config/i386/avx512fintrin.h (_mm_add_round_sd): New.
	(_mm_add_round_sd): Ditto.
	(_mm_add_round_ss): Ditto.
	(_mm_sub_round_sd): Ditto.
	(_mm_sub_round_ss): Ditto.
	(_mm_rcp14_sd): Ditto.
	(_mm_rcp14_ss): Ditto.
	(_mm_sqrt_round_sd): Ditto.
	(_mm_sqrt_round_ss): Ditto.
	(_mm_mul_round_sd): Ditto.
	(_mm_mul_round_ss): Ditto.
	(_mm_div_round_sd): Ditto.
	(_mm_div_round_ss): Ditto.
	(_mm_scalef_round_sd): Ditto.
	(_mm_scalef_round_ss): Ditto.
	(_mm_scalef_round_sd): Ditto.
	(_mm_scalef_round_ss): Ditto.
	(_mm_cvt_roundsd_ss): Ditto.
	(_mm_cvt_roundsd_sd): Ditto.
	(_mm_getexp_round_ss): Ditto.
	(_mm_getexp_round_sd): Ditto.
	(_mm_getmant_round_sd): Ditto.
	(_mm_getmant_round_ss): Ditto.
	(_mm_roundscale_round_ss): Ditto.
	(_mm_roundscale_round_sd): Ditto.
	(_mm_max_round_sd): Ditto.
	(_mm_max_round_ss): Ditto.
	(_mm_min_round_sd): Ditto.
	(_mm_min_round_ss): Ditto.
	(_mm_fmadd_round_sd): Ditto.
	(_mm_fmadd_round_ss): Ditto.
	(_mm_fmsub_round_sd): Ditto.
	(_mm_fmsub_round_ss): Ditto.
	(_mm_fnmadd_round_sd): Ditto.
	(_mm_fnmadd_round_ss): Ditto.
	(_mm_fnmsub_round_sd): Ditto.
	(_mm_fnmsub_round_ss): Ditto.
	(_mm_scalef_sd): Ditto.
	(_mm_scalef_ss): Ditto.
	(_mm_getexp_ss): Ditto.
	(_mm_getexp_sd): Ditto.
	(_mm_getmant_sd): Ditto.
	(_mm_getmant_ss): Ditto.
	(_mm_roundscale_ss): Ditto.
	(_mm_roundscale_sd): Ditto.
	* config/i386/i386-builtin-types.def: New types to support
	new built-ins: <V2DF, V2DF, V2DF, INT, INT>, <V4SF, V4SF, V4SF, INT, INT>,
	<(V4SF, V4SF, V2DF, INT>, <V2DF, V2DF, V4SF, INT>,
	<V4SF, V4SF, V4SF, V4SF, IN>.
	* config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_ADDSD_ROUND,
	IX86_BUILTIN_ADDSS_ROUND, IX86_BUILTIN_CVTSD2SS_ROUND,
	IX86_BUILTIN_CVTSS2SD_ROUND, IX86_BUILTIN_DIVSD_ROUND,
	IX86_BUILTIN_GETEXPSD128, IX86_BUILTIN_DIVSS_ROUND,
	IX86_BUILTIN_GETEXPSS128, IX86_BUILTIN_GETMANTSD128,
	IX86_BUILTIN_GETMANTSS128, IX86_BUILTIN_MAXSD_ROUND,
	IX86_BUILTIN_MAXSS_ROUND, IX86_BUILTIN_MINSD_ROUND,
	IX86_BUILTIN_MINSS_ROUND, IX86_BUILTIN_MULSD_ROUND,
	IX86_BUILTIN_MULSS_ROUND, IX86_BUILTIN_RCP14SD,
	IX86_BUILTIN_RCP14SS, IX86_BUILTIN_RNDSCALESD,
	IX86_BUILTIN_RNDSCALESS, IX86_BUILTIN_RSQRT14SD,
	IX86_BUILTIN_RSQRT14SS, IX86_BUILTIN_SCALEFSD,
	IX86_BUILTIN_SCALEFSS, IX86_BUILTIN_SQRTSD_ROUND,
	IX86_BUILTIN_SQRTSS_ROUND, IX86_BUILTIN_SUBSD_ROUND,
	IX86_BUILTIN_SUBSS_ROUND, IX86_BUILTIN_VFMADDSD3_ROUND,
	IX86_BUILTIN_VFMADDSS3_ROUND, IX86_BUILTIN_VFMSUBSD3_MASK3,
	IX86_BUILTIN_VFMSUBSS3_MASK3.
	(builtin_description bdesc_args[]): Add
	__builtin_ia32_rcp14sd, __builtin_ia32_rcp14ss,
	__builtin_ia32_rsqrt14pd512_mask, __builtin_ia32_rsqrt14ps512_mask,
	__builtin_ia32_rsqrt14sd, __builtin_ia32_rsqrt14ss,
	__builtin_ia32_addsd_round, __builtin_ia32_addss_round,
	__builtin_ia32_cvtsd2ss_round, __builtin_ia32_cvtss2sd_round,
	__builtin_ia32_divsd_round, __builtin_ia32_divss_round,
	__builtin_ia32_getexpsd128_round, __builtin_ia32_getexpss128_round,
	__builtin_ia32_getmantsd_round, __builtin_ia32_getmantss_round,
	__builtin_ia32_maxsd_round, __builtin_ia32_maxss_round,
	__builtin_ia32_minsd_round, __builtin_ia32_minss_round,
	__builtin_ia32_mulsd_round, __builtin_ia32_mulss_round,
	__builtin_ia32_rndscalesd_round, __builtin_ia32_rndscaless_round,
	__builtin_ia32_scalefsd_round, __builtin_ia32_scalefss_round,
	__builtin_ia32_sqrtsd_round, __builtin_ia32_sqrtss_round,
	__builtin_ia32_subsd_round, __builtin_ia32_subss_round,
	__builtin_ia32_vfmaddsd3_round, __builtin_ia32_vfmaddss3_round.
	(ix86_expand_round_builtin): Expand new FTYPEs.
	* config/i386/sse.md (<sse>_vm<plusminus_insn><mode>3): Support
	EVEX's embedded rouding.
	(<sse>_vm<multdiv_mnemonic><mode>3): Ditto.
	(<sse>_vmsqrt<mode>2): Ditto.
	(<sse>_vm<code><mode>3): Ditto.
	(sse2_cvtsd2ss): Ditto.
	(sse2_cvtss2sd): Ditto.
	(*avx512f_vmscalef<mode>): Ditto.
	(avx512f_sgetexp<mode>): Ditto.
	(*avx512f_rndscale<mode>): Ditto.
	(avx512f_getmant<mode>): Ditto.
	(*srcp14<mode>): Make visible.
	(*rsqrt14<mode>): Ditto.
	* config/i386/subst.md (mask_mode512bit_condition): Fix
	mode calculation.
	(sd_mask_mode512bit_condition): Ditto.
	(round_mode512bit_condition): Ditto.
	(round_modev4sf_condition): Ditto.
	(round_mask_scalar_operand3): Remove.
	(round_prefix): New.
	(round_saeonly_op3): Ditto.
	(round_saeonly_prefix): Ditto.

testsuite/
	* gcc.target/i386/avx-1.c: Update for AVX-512 scalar insns.
	* gcc.target/i386/avx512f-vaddsd-1.c: New.
	* gcc.target/i386/avx512f-vaddss-1.c: Ditto.
	* gcc.target/i386/avx512f-vcvtsd2ss-1.c: Ditto.
	* gcc.target/i386/avx512f-vcvtss2sd-1.c: Ditto.
	* gcc.target/i386/avx512f-vdivsd-1.c: Ditto.
	* gcc.target/i386/avx512f-vdivss-1.c: Ditto.
	* gcc.target/i386/avx512f-vextractf32x4-2.c: Ditto.
	* gcc.target/i386/avx512f-vextracti32x4-2.c: Ditto.
	* gcc.target/i386/avx512f-vfmaddXXXsd-1.c: Ditto.
	* gcc.target/i386/avx512f-vfmaddXXXss-1.c: Ditto.
	* gcc.target/i386/avx512f-vfmsubXXXsd-1.c: Ditto.
	* gcc.target/i386/avx512f-vfmsubXXXss-1.c: Ditto.
	* gcc.target/i386/avx512f-vfnmaddXXXsd-1.c: Ditto.
	* gcc.target/i386/avx512f-vfnmaddXXXss-1.c: Ditto.
	* gcc.target/i386/avx512f-vfnmsubXXXsd-1.c: Ditto.
	* gcc.target/i386/avx512f-vfnmsubXXXss-1.c: Ditto.
	* gcc.target/i386/avx512f-vgetexpsd-1.c: Ditto.
	* gcc.target/i386/avx512f-vgetexpsd-2.c: Ditto.
	* gcc.target/i386/avx512f-vgetexpss-1.c: Ditto.
	* gcc.target/i386/avx512f-vgetexpss-2.c: Ditto.
	* gcc.target/i386/avx512f-vgetmantsd-1.c: Ditto.
	* gcc.target/i386/avx512f-vgetmantsd-2.c: Ditto.
	* gcc.target/i386/avx512f-vgetmantss-1.c: Ditto.
	* gcc.target/i386/avx512f-vgetmantss-2.c: Ditto.
	* gcc.target/i386/avx512f-vmaxsd-1.c: Ditto.
	* gcc.target/i386/avx512f-vmaxss-1.c: Ditto.
	* gcc.target/i386/avx512f-vminsd-1.c: Ditto.
	* gcc.target/i386/avx512f-vminss-1.c: Ditto.
	* gcc.target/i386/avx512f-vmulsd-1.c: Ditto.
	* gcc.target/i386/avx512f-vmulss-1.c: Ditto.
	* gcc.target/i386/avx512f-vrcp14sd-1.c: Ditto.
	* gcc.target/i386/avx512f-vrcp14sd-2.c: Ditto.
	* gcc.target/i386/avx512f-vrcp14ss-1.c: Ditto.
	* gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto.
	* gcc.target/i386/avx512f-vrndscalesd-1.c: Ditto.
	* gcc.target/i386/avx512f-vrndscalesd-2.c: Ditto.
	* gcc.target/i386/avx512f-vrndscaless-1.c: Ditto.
	* gcc.target/i386/avx512f-vrndscaless-2.c: Ditto.
	* gcc.target/i386/avx512f-vrsqrt14sd-1.c: Ditto.
	* gcc.target/i386/avx512f-vrsqrt14sd-2.c: Ditto.
	* gcc.target/i386/avx512f-vrsqrt14ss-1.c: Ditto.
	* gcc.target/i386/avx512f-vrsqrt14ss-2.c: Ditto.
	* gcc.target/i386/avx512f-vscalefsd-1.c: Ditto.
	* gcc.target/i386/avx512f-vscalefsd-2.c: Ditto.
	* gcc.target/i386/avx512f-vscalefss-1.c: Ditto.
	* gcc.target/i386/avx512f-vscalefss-2.c: Ditto.
	* gcc.target/i386/avx512f-vsqrtsd-1.c: Ditto.
	* gcc.target/i386/avx512f-vsqrtss-1.c: Ditto.
	* gcc.target/i386/avx512f-vsubsd-1.c: Ditto.
	* gcc.target/i386/avx512f-vsubss-1.c: Ditto.
	* gcc.target/i386/sse-14.c: Update for AVX-512 scalar insns.
	* gcc.target/i386/sse-23.c: Ditto.
	* gcc.target/i386/testimm-10.c: Ditto.


Co-Authored-By: Andrey Turetskiy <andrey.turetskiy@intel.com>
Co-Authored-By: Anna Tikhonova <anna.tikhonova@intel.com>
Co-Authored-By: Ilya Tocar <ilya.tocar@intel.com>
Co-Authored-By: Ilya Verbin <ilya.verbin@intel.com>
Co-Authored-By: Kirill Yukhin <kirill.yukhin@intel.com>
Co-Authored-By: Maxim Kuznetsov <maxim.kuznetsov@intel.com>
Co-Authored-By: Michael Zolotukhin <michael.v.zolotukhin@intel.com>
Co-Authored-By: Sergey Lega <sergey.s.lega@intel.com>

From-SVN: r206265
parent cf3e5a89
...@@ -16,6 +16,123 @@ ...@@ -16,6 +16,123 @@
Kirill Yukhin <kirill.yukhin@intel.com> Kirill Yukhin <kirill.yukhin@intel.com>
Michael Zolotukhin <michael.v.zolotukhin@intel.com> Michael Zolotukhin <michael.v.zolotukhin@intel.com>
* config/i386/avx512fintrin.h (_mm_add_round_sd): New.
(_mm_add_round_sd): Ditto.
(_mm_add_round_ss): Ditto.
(_mm_sub_round_sd): Ditto.
(_mm_sub_round_ss): Ditto.
(_mm_rcp14_sd): Ditto.
(_mm_rcp14_ss): Ditto.
(_mm_sqrt_round_sd): Ditto.
(_mm_sqrt_round_ss): Ditto.
(_mm_mul_round_sd): Ditto.
(_mm_mul_round_ss): Ditto.
(_mm_div_round_sd): Ditto.
(_mm_div_round_ss): Ditto.
(_mm_scalef_round_sd): Ditto.
(_mm_scalef_round_ss): Ditto.
(_mm_scalef_round_sd): Ditto.
(_mm_scalef_round_ss): Ditto.
(_mm_cvt_roundsd_ss): Ditto.
(_mm_cvt_roundsd_sd): Ditto.
(_mm_getexp_round_ss): Ditto.
(_mm_getexp_round_sd): Ditto.
(_mm_getmant_round_sd): Ditto.
(_mm_getmant_round_ss): Ditto.
(_mm_roundscale_round_ss): Ditto.
(_mm_roundscale_round_sd): Ditto.
(_mm_max_round_sd): Ditto.
(_mm_max_round_ss): Ditto.
(_mm_min_round_sd): Ditto.
(_mm_min_round_ss): Ditto.
(_mm_fmadd_round_sd): Ditto.
(_mm_fmadd_round_ss): Ditto.
(_mm_fmsub_round_sd): Ditto.
(_mm_fmsub_round_ss): Ditto.
(_mm_fnmadd_round_sd): Ditto.
(_mm_fnmadd_round_ss): Ditto.
(_mm_fnmsub_round_sd): Ditto.
(_mm_fnmsub_round_ss): Ditto.
(_mm_scalef_sd): Ditto.
(_mm_scalef_ss): Ditto.
(_mm_getexp_ss): Ditto.
(_mm_getexp_sd): Ditto.
(_mm_getmant_sd): Ditto.
(_mm_getmant_ss): Ditto.
(_mm_roundscale_ss): Ditto.
(_mm_roundscale_sd): Ditto.
* config/i386/i386-builtin-types.def: New types to support
new built-ins: <V2DF, V2DF, V2DF, INT, INT>, <V4SF, V4SF, V4SF, INT, INT>,
<(V4SF, V4SF, V2DF, INT>, <V2DF, V2DF, V4SF, INT>,
<V4SF, V4SF, V4SF, V4SF, IN>.
* config/i386/i386.c (enum ix86_builtins): Add IX86_BUILTIN_ADDSD_ROUND,
IX86_BUILTIN_ADDSS_ROUND, IX86_BUILTIN_CVTSD2SS_ROUND,
IX86_BUILTIN_CVTSS2SD_ROUND, IX86_BUILTIN_DIVSD_ROUND,
IX86_BUILTIN_GETEXPSD128, IX86_BUILTIN_DIVSS_ROUND,
IX86_BUILTIN_GETEXPSS128, IX86_BUILTIN_GETMANTSD128,
IX86_BUILTIN_GETMANTSS128, IX86_BUILTIN_MAXSD_ROUND,
IX86_BUILTIN_MAXSS_ROUND, IX86_BUILTIN_MINSD_ROUND,
IX86_BUILTIN_MINSS_ROUND, IX86_BUILTIN_MULSD_ROUND,
IX86_BUILTIN_MULSS_ROUND, IX86_BUILTIN_RCP14SD,
IX86_BUILTIN_RCP14SS, IX86_BUILTIN_RNDSCALESD,
IX86_BUILTIN_RNDSCALESS, IX86_BUILTIN_RSQRT14SD,
IX86_BUILTIN_RSQRT14SS, IX86_BUILTIN_SCALEFSD,
IX86_BUILTIN_SCALEFSS, IX86_BUILTIN_SQRTSD_ROUND,
IX86_BUILTIN_SQRTSS_ROUND, IX86_BUILTIN_SUBSD_ROUND,
IX86_BUILTIN_SUBSS_ROUND, IX86_BUILTIN_VFMADDSD3_ROUND,
IX86_BUILTIN_VFMADDSS3_ROUND, IX86_BUILTIN_VFMSUBSD3_MASK3,
IX86_BUILTIN_VFMSUBSS3_MASK3.
(builtin_description bdesc_args[]): Add
__builtin_ia32_rcp14sd, __builtin_ia32_rcp14ss,
__builtin_ia32_rsqrt14pd512_mask, __builtin_ia32_rsqrt14ps512_mask,
__builtin_ia32_rsqrt14sd, __builtin_ia32_rsqrt14ss,
__builtin_ia32_addsd_round, __builtin_ia32_addss_round,
__builtin_ia32_cvtsd2ss_round, __builtin_ia32_cvtss2sd_round,
__builtin_ia32_divsd_round, __builtin_ia32_divss_round,
__builtin_ia32_getexpsd128_round, __builtin_ia32_getexpss128_round,
__builtin_ia32_getmantsd_round, __builtin_ia32_getmantss_round,
__builtin_ia32_maxsd_round, __builtin_ia32_maxss_round,
__builtin_ia32_minsd_round, __builtin_ia32_minss_round,
__builtin_ia32_mulsd_round, __builtin_ia32_mulss_round,
__builtin_ia32_rndscalesd_round, __builtin_ia32_rndscaless_round,
__builtin_ia32_scalefsd_round, __builtin_ia32_scalefss_round,
__builtin_ia32_sqrtsd_round, __builtin_ia32_sqrtss_round,
__builtin_ia32_subsd_round, __builtin_ia32_subss_round,
__builtin_ia32_vfmaddsd3_round, __builtin_ia32_vfmaddss3_round.
(ix86_expand_round_builtin): Expand new FTYPEs.
* config/i386/sse.md (<sse>_vm<plusminus_insn><mode>3): Support
EVEX's embedded rouding.
(<sse>_vm<multdiv_mnemonic><mode>3): Ditto.
(<sse>_vmsqrt<mode>2): Ditto.
(<sse>_vm<code><mode>3): Ditto.
(sse2_cvtsd2ss): Ditto.
(sse2_cvtss2sd): Ditto.
(*avx512f_vmscalef<mode>): Ditto.
(avx512f_sgetexp<mode>): Ditto.
(*avx512f_rndscale<mode>): Ditto.
(avx512f_getmant<mode>): Ditto.
(*srcp14<mode>): Make visible.
(*rsqrt14<mode>): Ditto.
* config/i386/subst.md (mask_mode512bit_condition): Fix
mode calculation.
(sd_mask_mode512bit_condition): Ditto.
(round_mode512bit_condition): Ditto.
(round_modev4sf_condition): Ditto.
(round_mask_scalar_operand3): Remove.
(round_prefix): New.
(round_saeonly_op3): Ditto.
(round_saeonly_prefix): Ditto.
2013-12-31 Alexander Ivchenko <alexander.ivchenko@intel.com>
Maxim Kuznetsov <maxim.kuznetsov@intel.com>
Sergey Lega <sergey.s.lega@intel.com>
Anna Tikhonova <anna.tikhonova@intel.com>
Ilya Tocar <ilya.tocar@intel.com>
Andrey Turetskiy <andrey.turetskiy@intel.com>
Ilya Verbin <ilya.verbin@intel.com>
Kirill Yukhin <kirill.yukhin@intel.com>
Michael Zolotukhin <michael.v.zolotukhin@intel.com>
* common/config/i386/i386-common.c (OPTION_MASK_ISA_SHA_SET): New. * common/config/i386/i386-common.c (OPTION_MASK_ISA_SHA_SET): New.
(OPTION_MASK_ISA_SHA_UNSET): Ditto. (OPTION_MASK_ISA_SHA_UNSET): Ditto.
(ix86_handle_option): Handle OPT_msha. (ix86_handle_option): Handle OPT_msha.
...@@ -516,6 +516,7 @@ DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, INT) ...@@ -516,6 +516,7 @@ DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, INT)
DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, V16QI) DEF_FUNCTION_TYPE (V16QI, V16QI, V16QI, V16QI)
DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI, INT) DEF_FUNCTION_TYPE (V1DI, V1DI, V1DI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT) DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, INT, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF) DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT) DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, DI, INT) DEF_FUNCTION_TYPE (V2DI, V2DI, DI, INT)
...@@ -531,6 +532,9 @@ DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI) ...@@ -531,6 +532,9 @@ DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI)
DEF_FUNCTION_TYPE (V4HI, V4HI, HI, INT) DEF_FUNCTION_TYPE (V4HI, V4HI, HI, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, FLOAT, INT) DEF_FUNCTION_TYPE (V4SF, V4SF, FLOAT, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT) DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, INT, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF) DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT) DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SI, INT)
DEF_FUNCTION_TYPE (V4SI, V4SI, SI, INT) DEF_FUNCTION_TYPE (V4SI, V4SI, SI, INT)
...@@ -678,6 +682,7 @@ DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI, INT) ...@@ -678,6 +682,7 @@ DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI, INT) DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, QI, INT) DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, QI, INT)
DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, INT) DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, INT)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, HI, INT) DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, HI, INT)
DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, QI, INT) DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, QI, INT)
......
...@@ -51,7 +51,7 @@ ...@@ -51,7 +51,7 @@
(define_subst_attr "mask_operand18" "mask" "" "%{%19%}%N18") (define_subst_attr "mask_operand18" "mask" "" "%{%19%}%N18")
(define_subst_attr "mask_operand19" "mask" "" "%{%20%}%N19") (define_subst_attr "mask_operand19" "mask" "" "%{%20%}%N19")
(define_subst_attr "mask_codefor" "mask" "*" "") (define_subst_attr "mask_codefor" "mask" "*" "")
(define_subst_attr "mask_mode512bit_condition" "mask" "1" "(GET_MODE_SIZE (GET_MODE (operands[0])) == 64)") (define_subst_attr "mask_mode512bit_condition" "mask" "1" "(GET_MODE_SIZE (<MODE>mode) == 64)")
(define_subst_attr "store_mask_constraint" "mask" "vm" "v") (define_subst_attr "store_mask_constraint" "mask" "vm" "v")
(define_subst_attr "store_mask_predicate" "mask" "nonimmediate_operand" "register_operand") (define_subst_attr "store_mask_predicate" "mask" "nonimmediate_operand" "register_operand")
(define_subst_attr "mask_prefix" "mask" "vex" "evex") (define_subst_attr "mask_prefix" "mask" "vex" "evex")
...@@ -85,7 +85,7 @@ ...@@ -85,7 +85,7 @@
(define_subst_attr "sd_mask_op4" "sd" "" "%{%5%}%N4") (define_subst_attr "sd_mask_op4" "sd" "" "%{%5%}%N4")
(define_subst_attr "sd_mask_op5" "sd" "" "%{%6%}%N5") (define_subst_attr "sd_mask_op5" "sd" "" "%{%6%}%N5")
(define_subst_attr "sd_mask_codefor" "sd" "*" "") (define_subst_attr "sd_mask_codefor" "sd" "*" "")
(define_subst_attr "sd_mask_mode512bit_condition" "sd" "1" "(GET_MODE_SIZE (GET_MODE (operands[0])) == 64)") (define_subst_attr "sd_mask_mode512bit_condition" "sd" "1" "(GET_MODE_SIZE (<MODE>mode) == 64)")
(define_subst "sd" (define_subst "sd"
[(set (match_operand:SUBST_V 0) [(set (match_operand:SUBST_V 0)
...@@ -101,7 +101,6 @@ ...@@ -101,7 +101,6 @@
(define_subst_attr "round_name" "round" "" "_round") (define_subst_attr "round_name" "round" "" "_round")
(define_subst_attr "round_mask_operand2" "mask" "%R2" "%R4") (define_subst_attr "round_mask_operand2" "mask" "%R2" "%R4")
(define_subst_attr "round_mask_operand3" "mask" "%R3" "%R5") (define_subst_attr "round_mask_operand3" "mask" "%R3" "%R5")
(define_subst_attr "round_mask_scalar_operand3" "mask_scalar" "%R3" "%R5")
(define_subst_attr "round_sd_mask_operand4" "sd" "%R4" "%R6") (define_subst_attr "round_sd_mask_operand4" "sd" "%R4" "%R6")
(define_subst_attr "round_op2" "round" "" "%R2") (define_subst_attr "round_op2" "round" "" "%R2")
(define_subst_attr "round_op3" "round" "" "%R3") (define_subst_attr "round_op3" "round" "" "%R3")
...@@ -116,8 +115,9 @@ ...@@ -116,8 +115,9 @@
(define_subst_attr "round_constraint2" "round" "m" "v") (define_subst_attr "round_constraint2" "round" "m" "v")
(define_subst_attr "round_constraint3" "round" "rm" "r") (define_subst_attr "round_constraint3" "round" "rm" "r")
(define_subst_attr "round_nimm_predicate" "round" "nonimmediate_operand" "register_operand") (define_subst_attr "round_nimm_predicate" "round" "nonimmediate_operand" "register_operand")
(define_subst_attr "round_mode512bit_condition" "round" "1" "(GET_MODE (operands[0]) == V16SFmode || GET_MODE (operands[0]) == V8DFmode)") (define_subst_attr "round_prefix" "round" "vex" "evex")
(define_subst_attr "round_modev4sf_condition" "round" "1" "(GET_MODE (operands[0]) == V4SFmode)") (define_subst_attr "round_mode512bit_condition" "round" "1" "(<MODE>mode == V16SFmode || <MODE>mode == V8DFmode)")
(define_subst_attr "round_modev4sf_condition" "round" "1" "(<MODE>mode == V4SFmode)")
(define_subst_attr "round_codefor" "round" "*" "") (define_subst_attr "round_codefor" "round" "*" "")
(define_subst_attr "round_opnum" "round" "5" "6") (define_subst_attr "round_opnum" "round" "5" "6")
...@@ -138,9 +138,11 @@ ...@@ -138,9 +138,11 @@
(define_subst_attr "round_saeonly_mask_scalar_merge_operand4" "mask_scalar_merge" "%R4" "%R5") (define_subst_attr "round_saeonly_mask_scalar_merge_operand4" "mask_scalar_merge" "%R4" "%R5")
(define_subst_attr "round_saeonly_sd_mask_operand5" "sd" "%R5" "%R7") (define_subst_attr "round_saeonly_sd_mask_operand5" "sd" "%R5" "%R7")
(define_subst_attr "round_saeonly_op2" "round_saeonly" "" "%R2") (define_subst_attr "round_saeonly_op2" "round_saeonly" "" "%R2")
(define_subst_attr "round_saeonly_op3" "round_saeonly" "" "%R3")
(define_subst_attr "round_saeonly_op4" "round_saeonly" "" "%R4") (define_subst_attr "round_saeonly_op4" "round_saeonly" "" "%R4")
(define_subst_attr "round_saeonly_op5" "round_saeonly" "" "%R5") (define_subst_attr "round_saeonly_op5" "round_saeonly" "" "%R5")
(define_subst_attr "round_saeonly_op6" "round_saeonly" "" "%R6") (define_subst_attr "round_saeonly_op6" "round_saeonly" "" "%R6")
(define_subst_attr "round_saeonly_prefix" "round_saeonly" "vex" "evex")
(define_subst_attr "round_saeonly_mask_op2" "round_saeonly" "" "<round_saeonly_mask_operand2>") (define_subst_attr "round_saeonly_mask_op2" "round_saeonly" "" "<round_saeonly_mask_operand2>")
(define_subst_attr "round_saeonly_mask_op3" "round_saeonly" "" "<round_saeonly_mask_operand3>") (define_subst_attr "round_saeonly_mask_op3" "round_saeonly" "" "<round_saeonly_mask_operand3>")
(define_subst_attr "round_saeonly_mask_scalar_op3" "round_saeonly" "" "<round_saeonly_mask_scalar_operand3>") (define_subst_attr "round_saeonly_mask_scalar_op3" "round_saeonly" "" "<round_saeonly_mask_scalar_operand3>")
......
...@@ -13,6 +13,71 @@ ...@@ -13,6 +13,71 @@
Kirill Yukhin <kirill.yukhin@intel.com> Kirill Yukhin <kirill.yukhin@intel.com>
Michael Zolotukhin <michael.v.zolotukhin@intel.com> Michael Zolotukhin <michael.v.zolotukhin@intel.com>
* gcc.target/i386/avx-1.c: Update for AVX-512 scalar insns.
* gcc.target/i386/avx512f-vaddsd-1.c: New.
* gcc.target/i386/avx512f-vaddss-1.c: Ditto.
* gcc.target/i386/avx512f-vcvtsd2ss-1.c: Ditto.
* gcc.target/i386/avx512f-vcvtss2sd-1.c: Ditto.
* gcc.target/i386/avx512f-vdivsd-1.c: Ditto.
* gcc.target/i386/avx512f-vdivss-1.c: Ditto.
* gcc.target/i386/avx512f-vextractf32x4-2.c: Ditto.
* gcc.target/i386/avx512f-vextracti32x4-2.c: Ditto.
* gcc.target/i386/avx512f-vfmaddXXXsd-1.c: Ditto.
* gcc.target/i386/avx512f-vfmaddXXXss-1.c: Ditto.
* gcc.target/i386/avx512f-vfmsubXXXsd-1.c: Ditto.
* gcc.target/i386/avx512f-vfmsubXXXss-1.c: Ditto.
* gcc.target/i386/avx512f-vfnmaddXXXsd-1.c: Ditto.
* gcc.target/i386/avx512f-vfnmaddXXXss-1.c: Ditto.
* gcc.target/i386/avx512f-vfnmsubXXXsd-1.c: Ditto.
* gcc.target/i386/avx512f-vfnmsubXXXss-1.c: Ditto.
* gcc.target/i386/avx512f-vgetexpsd-1.c: Ditto.
* gcc.target/i386/avx512f-vgetexpsd-2.c: Ditto.
* gcc.target/i386/avx512f-vgetexpss-1.c: Ditto.
* gcc.target/i386/avx512f-vgetexpss-2.c: Ditto.
* gcc.target/i386/avx512f-vgetmantsd-1.c: Ditto.
* gcc.target/i386/avx512f-vgetmantsd-2.c: Ditto.
* gcc.target/i386/avx512f-vgetmantss-1.c: Ditto.
* gcc.target/i386/avx512f-vgetmantss-2.c: Ditto.
* gcc.target/i386/avx512f-vmaxsd-1.c: Ditto.
* gcc.target/i386/avx512f-vmaxss-1.c: Ditto.
* gcc.target/i386/avx512f-vminsd-1.c: Ditto.
* gcc.target/i386/avx512f-vminss-1.c: Ditto.
* gcc.target/i386/avx512f-vmulsd-1.c: Ditto.
* gcc.target/i386/avx512f-vmulss-1.c: Ditto.
* gcc.target/i386/avx512f-vrcp14sd-1.c: Ditto.
* gcc.target/i386/avx512f-vrcp14sd-2.c: Ditto.
* gcc.target/i386/avx512f-vrcp14ss-1.c: Ditto.
* gcc.target/i386/avx512f-vrcp14ss-2.c: Ditto.
* gcc.target/i386/avx512f-vrndscalesd-1.c: Ditto.
* gcc.target/i386/avx512f-vrndscalesd-2.c: Ditto.
* gcc.target/i386/avx512f-vrndscaless-1.c: Ditto.
* gcc.target/i386/avx512f-vrndscaless-2.c: Ditto.
* gcc.target/i386/avx512f-vrsqrt14sd-1.c: Ditto.
* gcc.target/i386/avx512f-vrsqrt14sd-2.c: Ditto.
* gcc.target/i386/avx512f-vrsqrt14ss-1.c: Ditto.
* gcc.target/i386/avx512f-vrsqrt14ss-2.c: Ditto.
* gcc.target/i386/avx512f-vscalefsd-1.c: Ditto.
* gcc.target/i386/avx512f-vscalefsd-2.c: Ditto.
* gcc.target/i386/avx512f-vscalefss-1.c: Ditto.
* gcc.target/i386/avx512f-vscalefss-2.c: Ditto.
* gcc.target/i386/avx512f-vsqrtsd-1.c: Ditto.
* gcc.target/i386/avx512f-vsqrtss-1.c: Ditto.
* gcc.target/i386/avx512f-vsubsd-1.c: Ditto.
* gcc.target/i386/avx512f-vsubss-1.c: Ditto.
* gcc.target/i386/sse-14.c: Update for AVX-512 scalar insns.
* gcc.target/i386/sse-23.c: Ditto.
* gcc.target/i386/testimm-10.c: Ditto.
2013-12-31 Alexander Ivchenko <alexander.ivchenko@intel.com>
Maxim Kuznetsov <maxim.kuznetsov@intel.com>
Sergey Lega <sergey.s.lega@intel.com>
Anna Tikhonova <anna.tikhonova@intel.com>
Ilya Tocar <ilya.tocar@intel.com>
Andrey Turetskiy <andrey.turetskiy@intel.com>
Ilya Verbin <ilya.verbin@intel.com>
Kirill Yukhin <kirill.yukhin@intel.com>
Michael Zolotukhin <michael.v.zolotukhin@intel.com>
* gcc.target/i386/avx-1.c: Add define for __builtin_ia32_sha1rnds4. * gcc.target/i386/avx-1.c: Add define for __builtin_ia32_sha1rnds4.
* gcc.target/i386/i386.exp (check_effective_target_sha): New. * gcc.target/i386/i386.exp (check_effective_target_sha): New.
* gcc.target/i386/sha-check.h: New file. * gcc.target/i386/sha-check.h: New file.
......
...@@ -169,6 +169,8 @@ ...@@ -169,6 +169,8 @@
/* avx512fintrin.h */ /* avx512fintrin.h */
#define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 1) #define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 1) #define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 1)
#define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 1)
#define __builtin_ia32_addss_round(A, B, C) __builtin_ia32_addss_round(A, B, 1)
#define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E) #define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E)
#define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E) #define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E)
#define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D) #define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D)
...@@ -184,11 +186,11 @@ ...@@ -184,11 +186,11 @@
#define __builtin_ia32_cvtps2dq512_mask(A, B, C, D) __builtin_ia32_cvtps2dq512_mask(A, B, C, 1) #define __builtin_ia32_cvtps2dq512_mask(A, B, C, D) __builtin_ia32_cvtps2dq512_mask(A, B, C, 1)
#define __builtin_ia32_cvtps2pd512_mask(A, B, C, D) __builtin_ia32_cvtps2pd512_mask(A, B, C, 5) #define __builtin_ia32_cvtps2pd512_mask(A, B, C, D) __builtin_ia32_cvtps2pd512_mask(A, B, C, 5)
#define __builtin_ia32_cvtps2udq512_mask(A, B, C, D) __builtin_ia32_cvtps2udq512_mask(A, B, C, 1) #define __builtin_ia32_cvtps2udq512_mask(A, B, C, D) __builtin_ia32_cvtps2udq512_mask(A, B, C, 1)
#define __builtin_ia32_cvtsd2ss_mask(A, B, C, D, E) __builtin_ia32_cvtsd2ss_mask(A, B, C, D, 1) #define __builtin_ia32_cvtsd2ss_round(A, B, C) __builtin_ia32_cvtsd2ss_round(A, B, 1)
#define __builtin_ia32_cvtss2sd_round(A, B, C) __builtin_ia32_cvtss2sd_round(A, B, 4)
#define __builtin_ia32_cvtsi2sd64(A, B, C) __builtin_ia32_cvtsi2sd64(A, B, 1) #define __builtin_ia32_cvtsi2sd64(A, B, C) __builtin_ia32_cvtsi2sd64(A, B, 1)
#define __builtin_ia32_cvtsi2ss32(A, B, C) __builtin_ia32_cvtsi2ss32(A, B, 1) #define __builtin_ia32_cvtsi2ss32(A, B, C) __builtin_ia32_cvtsi2ss32(A, B, 1)
#define __builtin_ia32_cvtsi2ss64(A, B, C) __builtin_ia32_cvtsi2ss64(A, B, 1) #define __builtin_ia32_cvtsi2ss64(A, B, C) __builtin_ia32_cvtsi2ss64(A, B, 1)
#define __builtin_ia32_cvtss2sd_mask(A, B, C, D, E) __builtin_ia32_cvtss2sd_mask(A, B, C, D, 5)
#define __builtin_ia32_cvttpd2dq512_mask(A, B, C, D) __builtin_ia32_cvttpd2dq512_mask(A, B, C, 5) #define __builtin_ia32_cvttpd2dq512_mask(A, B, C, D) __builtin_ia32_cvttpd2dq512_mask(A, B, C, 5)
#define __builtin_ia32_cvttpd2udq512_mask(A, B, C, D) __builtin_ia32_cvttpd2udq512_mask(A, B, C, 5) #define __builtin_ia32_cvttpd2udq512_mask(A, B, C, D) __builtin_ia32_cvttpd2udq512_mask(A, B, C, 5)
#define __builtin_ia32_cvttps2dq512_mask(A, B, C, D) __builtin_ia32_cvttps2dq512_mask(A, B, C, 5) #define __builtin_ia32_cvttps2dq512_mask(A, B, C, D) __builtin_ia32_cvttps2dq512_mask(A, B, C, 5)
...@@ -199,6 +201,8 @@ ...@@ -199,6 +201,8 @@
#define __builtin_ia32_cvtusi2ss64(A, B, C) __builtin_ia32_cvtusi2ss64(A, B, 1) #define __builtin_ia32_cvtusi2ss64(A, B, C) __builtin_ia32_cvtusi2ss64(A, B, 1)
#define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 1) #define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 1) #define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 1)
#define __builtin_ia32_divsd_round(A, B, C) __builtin_ia32_divsd_round(A, B, 1)
#define __builtin_ia32_divss_round(A, B, C) __builtin_ia32_divss_round(A, B, 1)
#define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D) #define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D)
#define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D) #define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D)
#define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D) #define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D)
...@@ -221,18 +225,28 @@ ...@@ -221,18 +225,28 @@
#define __builtin_ia32_gathersiv8di(A, B, C, D, F) __builtin_ia32_gathersiv8di(A, B, C, D, 1) #define __builtin_ia32_gathersiv8di(A, B, C, D, F) __builtin_ia32_gathersiv8di(A, B, C, D, 1)
#define __builtin_ia32_getexppd512_mask(A, B, C, D) __builtin_ia32_getexppd512_mask(A, B, C, 5) #define __builtin_ia32_getexppd512_mask(A, B, C, D) __builtin_ia32_getexppd512_mask(A, B, C, 5)
#define __builtin_ia32_getexpps512_mask(A, B, C, D) __builtin_ia32_getexpps512_mask(A, B, C, 5) #define __builtin_ia32_getexpps512_mask(A, B, C, D) __builtin_ia32_getexpps512_mask(A, B, C, 5)
#define __builtin_ia32_getexpsd128_round(A, B, C) __builtin_ia32_getexpsd128_round(A, B, 4)
#define __builtin_ia32_getexpss128_round(A, B, C) __builtin_ia32_getexpss128_round(A, B, 4)
#define __builtin_ia32_getmantpd512_mask(A, F, C, D, E) __builtin_ia32_getmantpd512_mask(A, 1, C, D, 5) #define __builtin_ia32_getmantpd512_mask(A, F, C, D, E) __builtin_ia32_getmantpd512_mask(A, 1, C, D, 5)
#define __builtin_ia32_getmantps512_mask(A, F, C, D, E) __builtin_ia32_getmantps512_mask(A, 1, C, D, 5) #define __builtin_ia32_getmantps512_mask(A, F, C, D, E) __builtin_ia32_getmantps512_mask(A, 1, C, D, 5)
#define __builtin_ia32_getmantsd_round(A, B, C, D) __builtin_ia32_getmantsd_round(A, B, 1, 4)
#define __builtin_ia32_getmantss_round(A, B, C, D) __builtin_ia32_getmantss_round(A, B, 1, 4)
#define __builtin_ia32_insertf32x4_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_mask(A, B, 1, D, E) #define __builtin_ia32_insertf32x4_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_mask(A, B, 1, D, E)
#define __builtin_ia32_insertf64x4_mask(A, B, F, D, E) __builtin_ia32_insertf64x4_mask(A, B, 1, D, E) #define __builtin_ia32_insertf64x4_mask(A, B, F, D, E) __builtin_ia32_insertf64x4_mask(A, B, 1, D, E)
#define __builtin_ia32_inserti32x4_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_mask(A, B, 1, D, E) #define __builtin_ia32_inserti32x4_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_mask(A, B, 1, D, E)
#define __builtin_ia32_inserti64x4_mask(A, B, F, D, E) __builtin_ia32_inserti64x4_mask(A, B, 1, D, E) #define __builtin_ia32_inserti64x4_mask(A, B, F, D, E) __builtin_ia32_inserti64x4_mask(A, B, 1, D, E)
#define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 5) #define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 5)
#define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 5) #define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 5)
#define __builtin_ia32_maxsd_round(A, B, C) __builtin_ia32_maxsd_round(A, B, 4)
#define __builtin_ia32_maxss_round(A, B, C) __builtin_ia32_maxss_round(A, B, 4)
#define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 5) #define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 5)
#define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 5) #define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 5)
#define __builtin_ia32_minsd_round(A, B, C) __builtin_ia32_minsd_round(A, B, 4)
#define __builtin_ia32_minss_round(A, B, C) __builtin_ia32_minss_round(A, B, 4)
#define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 1) #define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 1) #define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 1)
#define __builtin_ia32_mulsd_round(A, B, C) __builtin_ia32_mulsd_round(A, B, 1)
#define __builtin_ia32_mulss_round(A, B, C) __builtin_ia32_mulss_round(A, B, 1)
#define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D) #define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D)
#define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D) #define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D)
#define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D) #define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D)
...@@ -252,10 +266,12 @@ ...@@ -252,10 +266,12 @@
#define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E) #define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E)
#define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 5) #define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 5)
#define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 5) #define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 5)
#define __builtin_ia32_rndscalesd_mask(A, B, I, D, E, F) __builtin_ia32_rndscalesd_mask(A, B, 1, D, E, 5) #define __builtin_ia32_rndscalesd_round(A, B, C, D) __builtin_ia32_rndscalesd_round(A, B, 1, 4)
#define __builtin_ia32_rndscaless_mask(A, B, I, D, E, F) __builtin_ia32_rndscaless_mask(A, B, 1, D, E, 5) #define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4)
#define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 1) #define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 1) #define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 1)
#define __builtin_ia32_scalefsd_round(A, B, C) __builtin_ia32_scalefsd_round(A, B, 1)
#define __builtin_ia32_scalefss_round(A, B, C) __builtin_ia32_scalefss_round(A, B, 1)
#define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 1) #define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 1)
#define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 1) #define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 1)
#define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 1) #define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 1)
...@@ -272,10 +288,12 @@ ...@@ -272,10 +288,12 @@
#define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E) #define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E)
#define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 1) #define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 1)
#define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 1) #define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 1)
#define __builtin_ia32_sqrtsd_mask(A, B, C, D, E) __builtin_ia32_sqrtsd_mask(A, B, C, D, 1) #define __builtin_ia32_sqrtss_round(A, B, C) __builtin_ia32_sqrtss_round(A, B, 1)
#define __builtin_ia32_sqrtss_mask(A, B, C, D, E) __builtin_ia32_sqrtss_mask(A, B, C, D, 1) #define __builtin_ia32_sqrtsd_round(A, B, C) __builtin_ia32_sqrtsd_round(A, B, 1)
#define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 1) #define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 1) #define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 1)
#define __builtin_ia32_subsd_round(A, B, C) __builtin_ia32_subsd_round(A, B, 1)
#define __builtin_ia32_subss_round(A, B, C) __builtin_ia32_subss_round(A, B, 1)
#define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D) #define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D)
#define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D) #define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D)
#define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 5) #define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 5)
...@@ -304,12 +322,8 @@ ...@@ -304,12 +322,8 @@
#define __builtin_ia32_vfmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask(A, B, C, D, 1) #define __builtin_ia32_vfmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask(A, B, C, D, 1)
#define __builtin_ia32_vfmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask3(A, B, C, D, 1) #define __builtin_ia32_vfmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddps512_maskz(A, B, C, D, 1) #define __builtin_ia32_vfmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddps512_maskz(A, B, C, D, 1)
#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 1) #define __builtin_ia32_vfmaddsd3_round(A, B, C, D) __builtin_ia32_vfmaddsd3_round(A, B, C, 1)
#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 1) #define __builtin_ia32_vfmaddss3_round(A, B, C, D) __builtin_ia32_vfmaddss3_round(A, B, C, 1)
#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 1)
#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 1)
#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 1)
#define __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, 1) #define __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, 1) #define __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, 1) #define __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, 1)
......
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vaddsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
#include <immintrin.h>
volatile __m128d x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_add_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vaddss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
#include <immintrin.h>
volatile __m128 x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_add_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vcvtsd2ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128 s1, r;
volatile __m128d s2;
void extern
avx512f_test (void)
{
r = _mm_cvt_roundsd_ss (s1, s2, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vcvtss2sd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128d s1, r;
volatile __m128 s2;
void extern
avx512f_test (void)
{
r = _mm_cvt_roundss_sd (s1, s2, _MM_FROUND_NO_EXC);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vdivsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
#include <immintrin.h>
volatile __m128d x1, x2;
volatile __mmask8 m;
void extern
avx512f_test (void)
{
x1 = _mm_div_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vdivss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
#include <immintrin.h>
volatile __m128 x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_div_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#define AVX512F
#include "avx512f-helper.h"
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
#include "string.h"
void
CALC (UNION_TYPE (AVX512F_LEN,) s1, float *res_ref, int mask)
{
memset (res_ref, 0, 16);
memcpy (res_ref, s1.a + mask * 4, 16);
}
void static
TEST (void)
{
UNION_TYPE (AVX512F_LEN,) s1;
union128 res1, res2, res3;
float res_ref[4];
MASK_TYPE mask = MASK_VALUE;
int j;
for (j = 0; j < SIZE; j++)
{
s1.a[j] = j * j / 4.56;
}
for (j = 0; j < 4; j++)
{
res1.a[j] = DEFAULT_VALUE;
res2.a[j] = DEFAULT_VALUE;
res3.a[j] = DEFAULT_VALUE;
}
res1.x = INTRINSIC (_extractf32x4_ps) (s1.x, 1);
res2.x = INTRINSIC (_mask_extractf32x4_ps) (res2.x, mask, s1.x, 1);
res3.x = INTRINSIC (_maskz_extractf32x4_ps) (mask, s1.x, 1);
CALC (s1, res_ref, 1);
if (check_union128 (res1, res_ref))
abort ();
MASK_MERGE ()(res_ref, mask, 4);
if (check_union128 (res2, res_ref))
abort ();
MASK_ZERO ()(res_ref, mask, 4);
if (check_union128 (res3, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#define AVX512F
#include "avx512f-helper.h"
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
#include "string.h"
void
CALC (UNION_TYPE (AVX512F_LEN, i_d) s1, int *res_ref, int mask)
{
memset (res_ref, 0, 16);
memcpy (res_ref, s1.a + mask * 4, 16);
}
void static
TEST (void)
{
UNION_TYPE (AVX512F_LEN, i_d) s1;
union128i_d res1, res2, res3;
int res_ref[4];
MASK_TYPE mask = MASK_VALUE;
int j;
for (j = 0; j < SIZE; j++)
{
s1.a[j] = j * j / 4.56;
}
for (j = 0; j < 4; j++)
{
res1.a[j] = DEFAULT_VALUE;
res2.a[j] = DEFAULT_VALUE;
res3.a[j] = DEFAULT_VALUE;
}
res1.x = INTRINSIC (_extracti32x4_epi32) (s1.x, 1);
res2.x =
INTRINSIC (_mask_extracti32x4_epi32) (res2.x, mask, s1.x, 1);
res3.x = INTRINSIC (_maskz_extracti32x4_epi32) (mask, s1.x, 1);
CALC (s1, res_ref, 1);
if (check_union128i_d (res1, res_ref))
abort ();
MASK_MERGE (i_d) (res_ref, mask, 4);
if (check_union128i_d (res2, res_ref))
abort ();
MASK_ZERO (i_d) (res_ref, mask, 4);
if (check_union128i_d (res3, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vfmadd...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128d a, b, c;
void extern
avx512f_test (void)
{
a = _mm_fmadd_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vfmadd...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128 a, b, c;
void extern
avx512f_test (void)
{
a = _mm_fmadd_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vfmsub...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128d a, b, c;
void extern
avx512f_test (void)
{
a = _mm_fmsub_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vfmsub...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128 a, b, c;
void extern
avx512f_test (void)
{
a = _mm_fmsub_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vfnmadd...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128d a, b, c;
void extern
avx512f_test (void)
{
a = _mm_fnmadd_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vfnmadd...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128 a, b, c;
void extern
avx512f_test (void)
{
a = _mm_fnmadd_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vfnmsub...sd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128d a, b, c;
void extern
avx512f_test (void)
{
a = _mm_fnmsub_round_sd (a, b, c, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vfnmsub...ss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128 a, b, c;
void extern
avx512f_test (void)
{
a = _mm_fnmsub_round_ss (a, b, c, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vgetexpsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\[^\{\]" 2 } } */
/* { dg-final { scan-assembler-times "vgetexpsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128d x;
void extern
avx512f_test (void)
{
x = _mm_getexp_sd (x, x);
x = _mm_getexp_round_sd (x, x, _MM_FROUND_NO_EXC);
}
/* { dg-do run } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-require-effective-target avx512f } */
#define SIZE (128 / 64)
#include <math.h>
#include "avx512f-check.h"
#include "avx512f-helper.h"
static void
compute_vgetexpsd (double *s, double *r)
{
r[0] = floor (log (s[0]) / log (2));
}
void static
avx512f_test (void)
{
int i;
union128d res1, s1;
double res_ref[SIZE];
for (i = 0; i < SIZE; i++)
{
s1.a[i] = 5.0 - i;
res_ref[i] = s1.a[i];
}
res1.x = _mm_getexp_sd (s1.x, s1.x);
compute_vgetexpsd (s1.a, res_ref);
if (check_fp_union128d (res1, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vgetexpss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\[^\{\]" 2 } } */
/* { dg-final { scan-assembler-times "vgetexpss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\, %xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128 x;
void extern
avx512f_test (void)
{
x = _mm_getexp_ss (x, x);
x = _mm_getexp_round_ss (x, x, _MM_FROUND_NO_EXC);
}
/* { dg-do run } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-require-effective-target avx512f } */
#define SIZE (128 / 32)
#include <math.h>
#include "avx512f-check.h"
#include "avx512f-helper.h"
static void
compute_vgetexpss (float *s, float *r)
{
r[0] = floor (log (s[0]) / log (2));
}
void static
avx512f_test (void)
{
int i;
union128 res1, s1;
float res_ref[SIZE];
for (i = 0; i < SIZE; i++)
{
s1.a[i] = 5.0 - i;
res_ref[i] = s1.a[i];
}
res1.x = _mm_getexp_ss (s1.x, s1.x);
compute_vgetexpss (s1.a, res_ref);
if (check_fp_union128 (res1, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-final { scan-assembler-times "vgetmantsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[\\n\]" 2 } } */
/* { dg-final { scan-assembler-times "vgetmantsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
#include <immintrin.h>
volatile __m128d x, y, z;
void extern
avx512f_test (void)
{
x = _mm_getmant_sd (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
x = _mm_getmant_round_sd (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src,
_MM_FROUND_NO_EXC);
}
/* { dg-do run } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#include <math.h>
union fp_int_t
{
long long int int_val;
double fp_val;
};
double
get_norm_mant (double source, int signctrl, int interv)
{
long long src, sign, exp, fraction;
union fp_int_t bin_conv;
bin_conv.fp_val = source;
src = bin_conv.int_val;
sign = (signctrl & 0x1) ? 0 : (src >> 63);
exp = (src & 0x7ff0000000000000) >> 52;
fraction = (src & 0xfffffffffffff);
if (isnan (source))
return signbit (source) ? -NAN : NAN;
if (source == 0.0 || source == -0.0 || isinf (source))
return sign ? -1.0 : 1.0;
if (signbit (source) && (signctrl & 0x2))
return -NAN;
if (!isnormal (source))
{
src = (src & 0xfff7ffffffffffff);
exp = 0x3ff;
while (!(src & 0x8000000000000))
{
src += fraction & 0x8000000000000;
fraction = fraction << 1;
exp--;
}
}
switch (interv)
{
case 0:
exp = 0x3ff;
break;
case 1:
exp = ((exp - 0x3ff) & 0x1) ? 0x3fe : 0x3ff;
break;
case 2:
exp = 0x3fe;
break;
case 3:
exp = (fraction & 0x8000000000000) ? 0x3fe : 0x3ff;
break;
default:
abort ();
}
bin_conv.int_val = (sign << 63) | (exp << 52) | fraction;
return bin_conv.fp_val;
}
static void
compute_vgetmantsd (double *r, double *s1, double *s2, int interv,
int signctrl)
{
r[0] = get_norm_mant (s2[0], signctrl, interv);
r[1] = s1[1];
}
static void
avx512f_test (void)
{
int i, sign;
union128d res1, src1, src2;
double res_ref[2];
int interv = _MM_MANT_NORM_p5_1;
int signctrl = _MM_MANT_SIGN_src;
src1.x = _mm_set_pd (-3.0, 111.111);
src2.x = _mm_set_pd (222.222, -2.0);
res1.x = _mm_getmant_sd (src1.x, src2.x, interv, signctrl);
compute_vgetmantsd (res_ref, src1.a, src2.a, interv, signctrl);
if (check_union128d (res1, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-final { scan-assembler-times "vgetmantss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[\\n\]" 2 } } */
/* { dg-final { scan-assembler-times "vgetmantss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[\\n\]" 1 } } */
#include <immintrin.h>
volatile __m128 x, y, z;
void extern
avx512f_test (void)
{
x = _mm_getmant_ss (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src);
x = _mm_getmant_round_ss (y, z, _MM_MANT_NORM_p75_1p5, _MM_MANT_SIGN_src,
_MM_FROUND_NO_EXC);
}
/* { dg-do run } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#include <math.h>
union fp_int_t
{
int int_val;
float fp_val;
};
float
get_norm_mant (float source, int signctrl, int interv)
{
int src, sign, exp, fraction;
union fp_int_t bin_conv;
bin_conv.fp_val = source;
src = bin_conv.int_val;
sign = (signctrl & 0x1) ? 0 : (src >> 31);
exp = (src & 0x7f800000) >> 23;
fraction = (src & 0x7fffff);
if (isnan (source))
return signbit (source) ? -NAN : NAN;
if (source == 0.0 || source == -0.0 || isinf (source))
return sign ? -1.0 : 1.0;
if (signbit (source) && (signctrl & 0x2))
return -NAN;
if (!isnormal (source))
{
src = (src & 0xffbfffff);
exp = 0x7f;
while (!(src & 0x400000))
{
src += fraction & 0x400000;
fraction = fraction << 1;
exp--;
}
}
switch (interv)
{
case 0:
exp = 0x7f;
break;
case 1:
exp = ((exp - 0x7f) & 0x1) ? 0x7e : 0x7f;
break;
case 2:
exp = 0x7e;
break;
case 3:
exp = (fraction & 0x400000) ? 0x7e : 0x7f;
break;
default:
abort ();
}
bin_conv.int_val = (sign << 31) | (exp << 23) | fraction;
return bin_conv.fp_val;
}
static void
compute_vgetmantss (float *r, float *s1, float *s2, int interv,
int signctrl)
{
int i;
r[0] = get_norm_mant (s2[0], signctrl, interv);
for (i = 1; i < 4; i++)
{
r[i] = s1[i];
}
}
static void
avx512f_test (void)
{
int i, sign;
union128 res1, src1, src2;
float res_ref[4];
int interv = _MM_MANT_NORM_p5_1;
int signctrl = _MM_MANT_SIGN_src;
src1.x = _mm_set_ps (-24.043, 68.346, -43.35, 546.46);
src2.x = _mm_set_ps (222.222, 333.333, 444.444, -2.0);
res1.x = _mm_getmant_ss (src1.x, src2.x, interv, signctrl);
compute_vgetmantss (res_ref, src1.a, src2.a, interv, signctrl);
if (check_union128 (res1, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vmaxsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128d x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_max_round_sd (x1, x2, _MM_FROUND_NO_EXC);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vmaxss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
#include <immintrin.h>
volatile __m128 x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_max_round_ss (x1, x2, _MM_FROUND_NO_EXC);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vminsd\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128d x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_min_round_sd (x1, x2, _MM_FROUND_NO_EXC);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vminss\[ \\t\]+\[^\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128 x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_min_round_ss (x1, x2, _MM_FROUND_NO_EXC);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vmulsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
#include <immintrin.h>
volatile __m128d x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_mul_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vmulss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
#include <immintrin.h>
volatile __m128 x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_mul_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vrcp14sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128d x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_rcp14_sd (x1, x2);
}
/* { dg-do run } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
static void
compute_vrcp14sd (double *s1, double *s2, double *r)
{
r[0] = 1.0 / s2[0];
r[1] = s1[1];
}
static void
avx512f_test (void)
{
union128d s1, s2, res1, res2, res3;
double res_ref[2];
s1.x = _mm_set_pd (-3.0, 111.111);
s2.x = _mm_set_pd (222.222, -2.0);
res2.a[0] = DEFAULT_VALUE;
res1.x = _mm_rcp14_sd (s1.x, s2.x);
compute_vrcp14sd (s1.a, s2.a, res_ref);
if (check_union128d (res1, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vrcp14ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128 x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_rcp14_ss (x1, x2);
}
/* { dg-do run } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
static void
compute_vrcp14ss (float *s1, float *s2, float *r)
{
r[0] = 1.0 / s2[0];
r[1] = s1[1];
r[2] = s1[2];
r[3] = s1[3];
}
static void
avx512f_test (void)
{
union128 s1, s2, res1, res2, res3;
float res_ref[4];
s1.x = _mm_set_ps (-24.043, 68.346, -43.35, 546.46);
s2.x = _mm_set_ps (222.222, 333.333, 444.444, -2.0);
res2.a[0] = DEFAULT_VALUE;
res1.x = _mm_rcp14_ss (s1.x, s2.x);
compute_vrcp14ss (s1.a, s2.a, res_ref);
if (check_union128 (res1, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
/* { dg-final { scan-assembler-times "vrndscalesd\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128d x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_roundscale_sd (x1, x2, 0x42);
x1 = _mm_roundscale_round_sd (x1, x2, 0x42, _MM_FROUND_NO_EXC);
}
/* { dg-do run } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-require-effective-target avx512f } */
#define SIZE (128 / 64)
#include <math.h>
#include "avx512f-check.h"
#include "avx512f-helper.h"
static void
compute_rndscalesd (double *s1, double *s2, double *r, int imm)
{
int rc, m;
rc = imm & 0xf;
m = imm >> 4;
switch (rc)
{
case _MM_FROUND_FLOOR:
r[0] = floor (s2[0] * pow (2, m)) / pow (2, m);
break;
case _MM_FROUND_CEIL:
r[0] = ceil (s2[0] * pow (2, m)) / pow (2, m);
break;
default:
abort ();
break;
}
r[1] = s1[1];
}
static void
avx512f_test (void)
{
int imm = _MM_FROUND_FLOOR | (7 << 4);
union128d s1, s2, res1;
double res_ref[SIZE];
s1.x = _mm_set_pd (4.05084, -1.23162);
s2.x = _mm_set_pd (-3.53222, 7.33527);
res1.x = _mm_roundscale_sd (s1.x, s2.x, imm);
compute_rndscalesd (s1.a, s2.a, res_ref, imm);
if (check_union128d (res1, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
/* { dg-final { scan-assembler-times "vrndscaless\[ \\t\]+\\S*,\[ \\t\]+\{sae\}\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128 x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_roundscale_ss (x1, x2, 0x42);
x1 = _mm_roundscale_round_ss (x1, x2, 0x42, _MM_FROUND_NO_EXC);
}
/* { dg-do run } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-require-effective-target avx512f } */
#define SIZE (128 / 32)
#include <math.h>
#include "avx512f-check.h"
#include "avx512f-helper.h"
static void
compute_rndscaless (float *s1, float *s2, float *r, int imm)
{
int rc, m;
rc = imm & 0xf;
m = imm >> 4;
switch (rc)
{
case _MM_FROUND_FLOOR:
r[0] = floorf (s2[0] * pow (2, m)) / pow (2, m);
break;
case _MM_FROUND_CEIL:
r[0] = ceilf (s2[0] * pow (2, m)) / pow (2, m);
break;
default:
abort ();
break;
}
r[1] = s1[1];
r[2] = s1[2];
r[3] = s1[3];
}
static void
avx512f_test (void)
{
int imm = _MM_FROUND_FLOOR | (7 << 4);
union128 s1, s2, res1;
float res_ref[SIZE];
s1.x = _mm_set_ps (4.05084, -1.23162, 2.00231, -6.22103);
s2.x = _mm_set_ps (-4.19319, -3.53222, 7.33527, 5.57655);
res1.x = _mm_roundscale_ss (s1.x, s2.x, imm);
compute_rndscaless (s1.a, s2.a, res_ref, imm);
if (check_union128 (res1, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128d x1, x2;
volatile __mmask8 m;
void extern
avx512f_test (void)
{
x1 = _mm_rsqrt14_sd (x1, x2);
}
/* { dg-do run } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-require-effective-target avx512f } */
#include <math.h>
#include "avx512f-check.h"
#include "avx512f-helper.h"
static void
compute_vrsqrt14sd (double *s1, double *s2, double *r)
{
r[0] = 1.0 / sqrt (s2[0]);
r[1] = s1[1];
}
static void
avx512f_test (void)
{
union128d s1, s2, res1, res2, res3;
double res_ref[2];
s1.x = _mm_set_pd (-3.0, 111.111);
s2.x = _mm_set_pd (222.222, 4.0);
res2.a[0] = DEFAULT_VALUE;
res1.x = _mm_rsqrt14_sd (s1.x, s2.x);
compute_vrsqrt14sd (s1.a, s2.a, res_ref);
if (check_fp_union128d (res1, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 1 } } */
#include <immintrin.h>
volatile __m128 x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_rsqrt14_ss (x1, x2);
}
/* { dg-do run } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-require-effective-target avx512f } */
#include <math.h>
#include "avx512f-check.h"
#include "avx512f-helper.h"
static void
compute_vrsqrt14ss (float *s1, float *s2, float *r)
{
r[0] = 1.0 / sqrt (s2[0]);
r[1] = s1[1];
r[2] = s1[2];
r[3] = s1[3];
}
static void
avx512f_test (void)
{
union128 s1, s2, res1, res2, res3;
float res_ref[4];
s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
s2.x = _mm_set_ps (222.222, 333.333, 444.444, 4.0);
res2.a[0] = DEFAULT_VALUE;
res1.x = _mm_rsqrt14_ss (s1.x, s2.x);
compute_vrsqrt14ss (s1.a, s2.a, res_ref);
if (check_fp_union128 (res1, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
#include <immintrin.h>
volatile __m128d x;
void extern
avx512f_test (void)
{
x = _mm_scalef_sd (x, x);
x = _mm_scalef_round_sd (x, x, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do run } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-require-effective-target avx512f } */
#include <math.h>
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 64)
static void
compute_scalefsd (double *s1, double *s2, double *r)
{
r[0] = s1[0] * pow (2, floor (s2[0]));
r[1] = s1[1];
}
void static
avx512f_test (void)
{
union128d res1, s1, s2;
double res_ref[SIZE];
int i;
for (i = 0; i < SIZE; i++)
{
s1.a[i] = 11.5 * (i + 1);
s2.a[i] = 10.5 * (i + 1);
}
res1.x = _mm_scalef_sd (s1.x, s2.x);
compute_scalefsd (s1.a, s2.a, res_ref);
if (check_union128d (res1, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\{\]" 2 } } */
/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
#include <immintrin.h>
volatile __m128 x;
void extern
avx512f_test (void)
{
x = _mm_scalef_ss (x, x);
x = _mm_scalef_round_ss (x, x, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do run } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-require-effective-target avx512f } */
#include <math.h>
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 32)
static void
compute_scalefss (float *s1, float *s2, float *r)
{
r[0] = s1[0] * (float) pow (2, floor (s2[0]));
r[1] = s1[1];
r[2] = s1[2];
r[3] = s1[3];
}
static void
avx512f_test (void)
{
union128 res1, s1, s2;
float res_ref[SIZE];
int i;
for (i = 0; i < SIZE; i++)
{
s1.a[i] = 11.5 * (i + 1);
s2.a[i] = 10.5 * (i + 1);
}
res1.x = _mm_scalef_ss (s1.x, s2.x);
compute_scalefss (s1.a, s2.a, res_ref);
if (check_union128 (res1, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vsqrtsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
#include <immintrin.h>
volatile __m128d x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_sqrt_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vsqrtss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
#include <immintrin.h>
volatile __m128 x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_sqrt_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vsubsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
#include <immintrin.h>
volatile __m128d x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_sub_round_sd (x1, x2, _MM_FROUND_TO_NEAREST_INT);
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vsubss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\n\]*%xmm\[0-9\]" 1 } } */
#include <immintrin.h>
volatile __m128 x1, x2;
void extern
avx512f_test (void)
{
x1 = _mm_sub_round_ss (x1, x2, _MM_FROUND_TO_NEAREST_INT);
}
...@@ -199,6 +199,7 @@ test_1x (_mm512_getmant_pd, __m512d, __m512d, 1, 1) ...@@ -199,6 +199,7 @@ test_1x (_mm512_getmant_pd, __m512d, __m512d, 1, 1)
test_1x (_mm512_getmant_ps, __m512, __m512, 1, 1) test_1x (_mm512_getmant_ps, __m512, __m512, 1, 1)
test_1x (_mm512_roundscale_round_pd, __m512d, __m512d, 1, 5) test_1x (_mm512_roundscale_round_pd, __m512d, __m512d, 1, 5)
test_1x (_mm512_roundscale_round_ps, __m512, __m512, 1, 5) test_1x (_mm512_roundscale_round_ps, __m512, __m512, 1, 5)
test_1x (_mm_cvt_roundi32_ss, __m128, __m128, 1, 1)
test_2 (_mm512_add_round_pd, __m512d, __m512d, __m512d, 1) test_2 (_mm512_add_round_pd, __m512d, __m512d, __m512d, 1)
test_2 (_mm512_add_round_ps, __m512, __m512, __m512, 1) test_2 (_mm512_add_round_ps, __m512, __m512, __m512, 1)
test_2 (_mm512_alignr_epi32, __m512i, __m512i, __m512i, 1) test_2 (_mm512_alignr_epi32, __m512i, __m512i, __m512i, 1)
...@@ -278,16 +279,45 @@ test_2 (_mm512_shuffle_pd, __m512d, __m512d, __m512d, 1) ...@@ -278,16 +279,45 @@ test_2 (_mm512_shuffle_pd, __m512d, __m512d, __m512d, 1)
test_2 (_mm512_shuffle_ps, __m512, __m512, __m512, 1) test_2 (_mm512_shuffle_ps, __m512, __m512, __m512, 1)
test_2 (_mm512_sub_round_pd, __m512d, __m512d, __m512d, 1) test_2 (_mm512_sub_round_pd, __m512d, __m512d, __m512d, 1)
test_2 (_mm512_sub_round_ps, __m512, __m512, __m512, 1) test_2 (_mm512_sub_round_ps, __m512, __m512, __m512, 1)
test_2 (_mm_add_round_sd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_add_round_ss, __m128, __m128, __m128, 1)
test_2 (_mm_cmp_sd_mask, __mmask8, __m128d, __m128d, 1) test_2 (_mm_cmp_sd_mask, __mmask8, __m128d, __m128d, 1)
test_2 (_mm_cmp_ss_mask, __mmask8, __m128, __m128, 1) test_2 (_mm_cmp_ss_mask, __mmask8, __m128, __m128, 1)
#ifdef __x86_64__ #ifdef __x86_64__
test_2 (_mm_cvt_roundi64_sd, __m128d, __m128d, long long, 1)
test_2 (_mm_cvt_roundi64_ss, __m128, __m128, long long, 1)
#endif #endif
test_2 (_mm_cvt_roundsd_ss, __m128, __m128, __m128d, 1)
test_2 (_mm_cvt_roundss_sd, __m128d, __m128d, __m128, 5)
test_2 (_mm_cvt_roundu32_ss, __m128, __m128, unsigned, 1)
#ifdef __x86_64__ #ifdef __x86_64__
test_2 (_mm_cvt_roundu64_sd, __m128d, __m128d, unsigned long long, 1)
test_2 (_mm_cvt_roundu64_ss, __m128, __m128, unsigned long long, 1)
#endif #endif
test_2 (_mm_div_round_sd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_div_round_ss, __m128, __m128, __m128, 1)
test_2 (_mm_getexp_round_sd, __m128d, __m128d, __m128d, 5)
test_2 (_mm_getexp_round_ss, __m128, __m128, __m128, 5)
test_2y (_mm_getmant_round_sd, __m128d, __m128d, __m128d, 1, 1, 5)
test_2y (_mm_getmant_round_ss, __m128, __m128, __m128, 1, 1, 5)
test_2 (_mm_mul_round_sd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_mul_round_ss, __m128, __m128, __m128, 1)
test_2 (_mm_scalef_round_sd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_scalef_round_ss, __m128, __m128, __m128, 1)
test_2 (_mm_sqrt_round_sd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_sqrt_round_ss, __m128, __m128, __m128, 1)
test_2 (_mm_sub_round_sd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_sub_round_ss, __m128, __m128, __m128, 1)
test_2x (_mm512_cmp_round_pd_mask, __mmask8, __m512d, __m512d, 1, 5) test_2x (_mm512_cmp_round_pd_mask, __mmask8, __m512d, __m512d, 1, 5)
test_2x (_mm512_cmp_round_ps_mask, __mmask16, __m512, __m512, 1, 5) test_2x (_mm512_cmp_round_ps_mask, __mmask16, __m512, __m512, 1, 5)
test_2x (_mm512_maskz_roundscale_round_pd, __m512d, __mmask8, __m512d, 1, 5) test_2x (_mm512_maskz_roundscale_round_pd, __m512d, __mmask8, __m512d, 1, 5)
test_2x (_mm512_maskz_roundscale_round_ps, __m512, __mmask16, __m512, 1, 5) test_2x (_mm512_maskz_roundscale_round_ps, __m512, __mmask16, __m512, 1, 5)
test_2x (_mm_cmp_round_sd_mask, __mmask8, __m128d, __m128d, 1, 5)
test_2x (_mm_cmp_round_ss_mask, __mmask8, __m128, __m128, 1, 5)
test_2x (_mm_comi_round_sd, int, __m128d, __m128d, 1, 5)
test_2x (_mm_comi_round_ss, int, __m128, __m128, 1, 5)
test_2x (_mm_roundscale_round_sd, __m128d, __m128d, __m128d, 1, 5)
test_2x (_mm_roundscale_round_ss, __m128, __m128, __m128, 1, 5)
test_3 (_mm512_fmadd_round_pd, __m512d, __m512d, __m512d, __m512d, 1) test_3 (_mm512_fmadd_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
test_3 (_mm512_fmadd_round_ps, __m512, __m512, __m512, __m512, 1) test_3 (_mm512_fmadd_round_ps, __m512, __m512, __m512, __m512, 1)
test_3 (_mm512_fmaddsub_round_pd, __m512d, __m512d, __m512d, __m512d, 1) test_3 (_mm512_fmaddsub_round_pd, __m512d, __m512d, __m512d, __m512d, 1)
...@@ -373,6 +403,14 @@ test_3 (_mm512_maskz_sub_round_pd, __m512d, __mmask8, __m512d, __m512d, 1) ...@@ -373,6 +403,14 @@ test_3 (_mm512_maskz_sub_round_pd, __m512d, __mmask8, __m512d, __m512d, 1)
test_3 (_mm512_maskz_sub_round_ps, __m512, __mmask16, __m512, __m512, 1) test_3 (_mm512_maskz_sub_round_ps, __m512, __mmask16, __m512, __m512, 1)
test_3 (_mm512_ternarylogic_epi32, __m512i, __m512i, __m512i, __m512i, 1) test_3 (_mm512_ternarylogic_epi32, __m512i, __m512i, __m512i, __m512i, 1)
test_3 (_mm512_ternarylogic_epi64, __m512i, __m512i, __m512i, __m512i, 1) test_3 (_mm512_ternarylogic_epi64, __m512i, __m512i, __m512i, __m512i, 1)
test_3 (_mm_fmadd_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
test_3 (_mm_fmadd_round_ss, __m128, __m128, __m128, __m128, 1)
test_3 (_mm_fmsub_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
test_3 (_mm_fmsub_round_ss, __m128, __m128, __m128, __m128, 1)
test_3 (_mm_fnmadd_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
test_3 (_mm_fnmadd_round_ss, __m128, __m128, __m128, __m128, 1)
test_3 (_mm_fnmsub_round_sd, __m128d, __m128d, __m128d, __m128d, 1)
test_3 (_mm_fnmsub_round_ss, __m128, __m128, __m128, __m128, 1)
test_3 (_mm_mask_cmp_sd_mask, __mmask8, __mmask8, __m128d, __m128d, 1) test_3 (_mm_mask_cmp_sd_mask, __mmask8, __mmask8, __m128d, __m128d, 1)
test_3 (_mm_mask_cmp_ss_mask, __mmask8, __mmask8, __m128, __m128, 1) test_3 (_mm_mask_cmp_ss_mask, __mmask8, __mmask8, __m128, __m128, 1)
test_3v (_mm512_i32scatter_epi32, void *, __m512i, __m512i, 1) test_3v (_mm512_i32scatter_epi32, void *, __m512i, __m512i, 1)
...@@ -385,6 +423,10 @@ test_3v (_mm512_i64scatter_pd, void *, __m512i, __m512d, 1) ...@@ -385,6 +423,10 @@ test_3v (_mm512_i64scatter_pd, void *, __m512i, __m512d, 1)
test_3v (_mm512_i64scatter_ps, void *, __m512i, __m256, 1) test_3v (_mm512_i64scatter_ps, void *, __m512i, __m256, 1)
test_3x (_mm512_mask_roundscale_round_pd, __m512d, __m512d, __mmask8, __m512d, 1, 5) test_3x (_mm512_mask_roundscale_round_pd, __m512d, __m512d, __mmask8, __m512d, 1, 5)
test_3x (_mm512_mask_roundscale_round_ps, __m512, __m512, __mmask16, __m512, 1, 5) test_3x (_mm512_mask_roundscale_round_ps, __m512, __m512, __mmask16, __m512, 1, 5)
test_3x (_mm_fixupimm_round_sd, __m128d, __m128d, __m128d, __m128i, 1, 5)
test_3x (_mm_fixupimm_round_ss, __m128, __m128, __m128, __m128i, 1, 5)
test_3x (_mm_mask_cmp_round_sd_mask, __mmask8, __mmask8, __m128d, __m128d, 1, 5)
test_3x (_mm_mask_cmp_round_ss_mask, __mmask8, __mmask8, __m128, __m128, 1, 5)
test_4 (_mm512_mask3_fmadd_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1) test_4 (_mm512_mask3_fmadd_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
test_4 (_mm512_mask3_fmadd_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1) test_4 (_mm512_mask3_fmadd_round_ps, __m512, __m512, __m512, __m512, __mmask16, 1)
test_4 (_mm512_mask3_fmaddsub_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1) test_4 (_mm512_mask3_fmaddsub_round_pd, __m512d, __m512d, __m512d, __m512d, __mmask8, 1)
...@@ -471,6 +513,10 @@ test_4x (_mm512_mask_fixupimm_round_pd, __m512d, __m512d, __mmask8, __m512d, __m ...@@ -471,6 +513,10 @@ test_4x (_mm512_mask_fixupimm_round_pd, __m512d, __m512d, __mmask8, __m512d, __m
test_4x (_mm512_mask_fixupimm_round_ps, __m512, __m512, __mmask16, __m512, __m512i, 1, 5) test_4x (_mm512_mask_fixupimm_round_ps, __m512, __m512, __mmask16, __m512, __m512i, 1, 5)
test_4x (_mm512_maskz_fixupimm_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512i, 1, 5) test_4x (_mm512_maskz_fixupimm_round_pd, __m512d, __mmask8, __m512d, __m512d, __m512i, 1, 5)
test_4x (_mm512_maskz_fixupimm_round_ps, __m512, __mmask16, __m512, __m512, __m512i, 1, 5) test_4x (_mm512_maskz_fixupimm_round_ps, __m512, __mmask16, __m512, __m512, __m512i, 1, 5)
test_4x (_mm_mask_fixupimm_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128i, 1, 5)
test_4x (_mm_mask_fixupimm_round_ss, __m128, __m128, __mmask8, __m128, __m128i, 1, 5)
test_4x (_mm_maskz_fixupimm_round_sd, __m128d, __mmask8, __m128d, __m128d, __m128i, 1, 5)
test_4x (_mm_maskz_fixupimm_round_ss, __m128, __mmask8, __m128, __m128, __m128i, 1, 5)
/* avx512pfintrin.h */ /* avx512pfintrin.h */
test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, 1) test_3vx (_mm512_mask_prefetch_i32gather_ps, __m512i, __mmask16, void const *, 1, 1)
......
...@@ -186,6 +186,8 @@ ...@@ -186,6 +186,8 @@
/* avx512fintrin.h */ /* avx512fintrin.h */
#define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 1) #define __builtin_ia32_addpd512_mask(A, B, C, D, E) __builtin_ia32_addpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 1) #define __builtin_ia32_addps512_mask(A, B, C, D, E) __builtin_ia32_addps512_mask(A, B, C, D, 1)
#define __builtin_ia32_addsd_round(A, B, C) __builtin_ia32_addsd_round(A, B, 1)
#define __builtin_ia32_addss_round(A, B, C) __builtin_ia32_addss_round(A, B, 1)
#define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E) #define __builtin_ia32_alignd512_mask(A, B, F, D, E) __builtin_ia32_alignd512_mask(A, B, 1, D, E)
#define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E) #define __builtin_ia32_alignq512_mask(A, B, F, D, E) __builtin_ia32_alignq512_mask(A, B, 1, D, E)
#define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D) #define __builtin_ia32_cmpd512_mask(A, B, E, D) __builtin_ia32_cmpd512_mask(A, B, 1, D)
...@@ -201,6 +203,8 @@ ...@@ -201,6 +203,8 @@
#define __builtin_ia32_cvtps2dq512_mask(A, B, C, D) __builtin_ia32_cvtps2dq512_mask(A, B, C, 1) #define __builtin_ia32_cvtps2dq512_mask(A, B, C, D) __builtin_ia32_cvtps2dq512_mask(A, B, C, 1)
#define __builtin_ia32_cvtps2pd512_mask(A, B, C, D) __builtin_ia32_cvtps2pd512_mask(A, B, C, 5) #define __builtin_ia32_cvtps2pd512_mask(A, B, C, D) __builtin_ia32_cvtps2pd512_mask(A, B, C, 5)
#define __builtin_ia32_cvtps2udq512_mask(A, B, C, D) __builtin_ia32_cvtps2udq512_mask(A, B, C, 1) #define __builtin_ia32_cvtps2udq512_mask(A, B, C, D) __builtin_ia32_cvtps2udq512_mask(A, B, C, 1)
#define __builtin_ia32_cvtsd2ss_round(A, B, C) __builtin_ia32_cvtsd2ss_round(A, B, 1)
#define __builtin_ia32_cvtss2sd_round(A, B, C) __builtin_ia32_cvtss2sd_round(A, B, 4)
#define __builtin_ia32_cvtsi2sd64(A, B, C) __builtin_ia32_cvtsi2sd64(A, B, 1) #define __builtin_ia32_cvtsi2sd64(A, B, C) __builtin_ia32_cvtsi2sd64(A, B, 1)
#define __builtin_ia32_cvtsi2ss32(A, B, C) __builtin_ia32_cvtsi2ss32(A, B, 1) #define __builtin_ia32_cvtsi2ss32(A, B, C) __builtin_ia32_cvtsi2ss32(A, B, 1)
#define __builtin_ia32_cvtsi2ss64(A, B, C) __builtin_ia32_cvtsi2ss64(A, B, 1) #define __builtin_ia32_cvtsi2ss64(A, B, C) __builtin_ia32_cvtsi2ss64(A, B, 1)
...@@ -214,6 +218,8 @@ ...@@ -214,6 +218,8 @@
#define __builtin_ia32_cvtusi2ss64(A, B, C) __builtin_ia32_cvtusi2ss64(A, B, 1) #define __builtin_ia32_cvtusi2ss64(A, B, C) __builtin_ia32_cvtusi2ss64(A, B, 1)
#define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 1) #define __builtin_ia32_divpd512_mask(A, B, C, D, E) __builtin_ia32_divpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 1) #define __builtin_ia32_divps512_mask(A, B, C, D, E) __builtin_ia32_divps512_mask(A, B, C, D, 1)
#define __builtin_ia32_divsd_round(A, B, C) __builtin_ia32_divsd_round(A, B, 1)
#define __builtin_ia32_divss_round(A, B, C) __builtin_ia32_divss_round(A, B, 1)
#define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D) #define __builtin_ia32_extractf32x4_mask(A, E, C, D) __builtin_ia32_extractf32x4_mask(A, 1, C, D)
#define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D) #define __builtin_ia32_extractf64x4_mask(A, E, C, D) __builtin_ia32_extractf64x4_mask(A, 1, C, D)
#define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D) #define __builtin_ia32_extracti32x4_mask(A, E, C, D) __builtin_ia32_extracti32x4_mask(A, 1, C, D)
...@@ -236,18 +242,28 @@ ...@@ -236,18 +242,28 @@
#define __builtin_ia32_gathersiv8di(A, B, C, D, F) __builtin_ia32_gathersiv8di(A, B, C, D, 1) #define __builtin_ia32_gathersiv8di(A, B, C, D, F) __builtin_ia32_gathersiv8di(A, B, C, D, 1)
#define __builtin_ia32_getexppd512_mask(A, B, C, D) __builtin_ia32_getexppd512_mask(A, B, C, 5) #define __builtin_ia32_getexppd512_mask(A, B, C, D) __builtin_ia32_getexppd512_mask(A, B, C, 5)
#define __builtin_ia32_getexpps512_mask(A, B, C, D) __builtin_ia32_getexpps512_mask(A, B, C, 5) #define __builtin_ia32_getexpps512_mask(A, B, C, D) __builtin_ia32_getexpps512_mask(A, B, C, 5)
#define __builtin_ia32_getexpsd128_round(A, B, C) __builtin_ia32_getexpsd128_round(A, B, 4)
#define __builtin_ia32_getexpss128_round(A, B, C) __builtin_ia32_getexpss128_round(A, B, 4)
#define __builtin_ia32_getmantpd512_mask(A, F, C, D, E) __builtin_ia32_getmantpd512_mask(A, 1, C, D, 5) #define __builtin_ia32_getmantpd512_mask(A, F, C, D, E) __builtin_ia32_getmantpd512_mask(A, 1, C, D, 5)
#define __builtin_ia32_getmantps512_mask(A, F, C, D, E) __builtin_ia32_getmantps512_mask(A, 1, C, D, 5) #define __builtin_ia32_getmantps512_mask(A, F, C, D, E) __builtin_ia32_getmantps512_mask(A, 1, C, D, 5)
#define __builtin_ia32_getmantsd_round(A, B, C, D) __builtin_ia32_getmantsd_round(A, B, 1, 4)
#define __builtin_ia32_getmantss_round(A, B, C, D) __builtin_ia32_getmantss_round(A, B, 1, 4)
#define __builtin_ia32_insertf32x4_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_mask(A, B, 1, D, E) #define __builtin_ia32_insertf32x4_mask(A, B, F, D, E) __builtin_ia32_insertf32x4_mask(A, B, 1, D, E)
#define __builtin_ia32_insertf64x4_mask(A, B, F, D, E) __builtin_ia32_insertf64x4_mask(A, B, 1, D, E) #define __builtin_ia32_insertf64x4_mask(A, B, F, D, E) __builtin_ia32_insertf64x4_mask(A, B, 1, D, E)
#define __builtin_ia32_inserti32x4_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_mask(A, B, 1, D, E) #define __builtin_ia32_inserti32x4_mask(A, B, F, D, E) __builtin_ia32_inserti32x4_mask(A, B, 1, D, E)
#define __builtin_ia32_inserti64x4_mask(A, B, F, D, E) __builtin_ia32_inserti64x4_mask(A, B, 1, D, E) #define __builtin_ia32_inserti64x4_mask(A, B, F, D, E) __builtin_ia32_inserti64x4_mask(A, B, 1, D, E)
#define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 5) #define __builtin_ia32_maxpd512_mask(A, B, C, D, E) __builtin_ia32_maxpd512_mask(A, B, C, D, 5)
#define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 5) #define __builtin_ia32_maxps512_mask(A, B, C, D, E) __builtin_ia32_maxps512_mask(A, B, C, D, 5)
#define __builtin_ia32_maxsd_round(A, B, C) __builtin_ia32_maxsd_round(A, B, 4)
#define __builtin_ia32_maxss_round(A, B, C) __builtin_ia32_maxss_round(A, B, 4)
#define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 5) #define __builtin_ia32_minpd512_mask(A, B, C, D, E) __builtin_ia32_minpd512_mask(A, B, C, D, 5)
#define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 5) #define __builtin_ia32_minps512_mask(A, B, C, D, E) __builtin_ia32_minps512_mask(A, B, C, D, 5)
#define __builtin_ia32_minsd_round(A, B, C) __builtin_ia32_minsd_round(A, B, 4)
#define __builtin_ia32_minss_round(A, B, C) __builtin_ia32_minss_round(A, B, 4)
#define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 1) #define __builtin_ia32_mulpd512_mask(A, B, C, D, E) __builtin_ia32_mulpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 1) #define __builtin_ia32_mulps512_mask(A, B, C, D, E) __builtin_ia32_mulps512_mask(A, B, C, D, 1)
#define __builtin_ia32_mulsd_round(A, B, C) __builtin_ia32_mulsd_round(A, B, 1)
#define __builtin_ia32_mulss_round(A, B, C) __builtin_ia32_mulss_round(A, B, 1)
#define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D) #define __builtin_ia32_permdf512_mask(A, E, C, D) __builtin_ia32_permdf512_mask(A, 1, C, D)
#define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D) #define __builtin_ia32_permdi512_mask(A, E, C, D) __builtin_ia32_permdi512_mask(A, 1, C, D)
#define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D) #define __builtin_ia32_prold512_mask(A, E, C, D) __builtin_ia32_prold512_mask(A, 1, C, D)
...@@ -267,8 +283,12 @@ ...@@ -267,8 +283,12 @@
#define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E) #define __builtin_ia32_pternlogq512_maskz(A, B, C, F, E) __builtin_ia32_pternlogq512_maskz(A, B, C, 1, E)
#define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 5) #define __builtin_ia32_rndscalepd_mask(A, F, C, D, E) __builtin_ia32_rndscalepd_mask(A, 1, C, D, 5)
#define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 5) #define __builtin_ia32_rndscaleps_mask(A, F, C, D, E) __builtin_ia32_rndscaleps_mask(A, 1, C, D, 5)
#define __builtin_ia32_rndscalesd_round(A, B, C, D) __builtin_ia32_rndscalesd_round(A, B, 1, 4)
#define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4)
#define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 1) #define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 1) #define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 1)
#define __builtin_ia32_scalefsd_round(A, B, C) __builtin_ia32_scalefsd_round(A, B, 1)
#define __builtin_ia32_scalefss_round(A, B, C) __builtin_ia32_scalefss_round(A, B, 1)
#define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 1) #define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 1)
#define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 1) #define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 1)
#define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 1) #define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 1)
...@@ -285,8 +305,12 @@ ...@@ -285,8 +305,12 @@
#define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E) #define __builtin_ia32_shufps512_mask(A, B, F, D, E) __builtin_ia32_shufps512_mask(A, B, 1, D, E)
#define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 1) #define __builtin_ia32_sqrtpd512_mask(A, B, C, D) __builtin_ia32_sqrtpd512_mask(A, B, C, 1)
#define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 1) #define __builtin_ia32_sqrtps512_mask(A, B, C, D) __builtin_ia32_sqrtps512_mask(A, B, C, 1)
#define __builtin_ia32_sqrtss_round(A, B, C) __builtin_ia32_sqrtss_round(A, B, 1)
#define __builtin_ia32_sqrtsd_round(A, B, C) __builtin_ia32_sqrtsd_round(A, B, 1)
#define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 1) #define __builtin_ia32_subpd512_mask(A, B, C, D, E) __builtin_ia32_subpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 1) #define __builtin_ia32_subps512_mask(A, B, C, D, E) __builtin_ia32_subps512_mask(A, B, C, D, 1)
#define __builtin_ia32_subsd_round(A, B, C) __builtin_ia32_subsd_round(A, B, 1)
#define __builtin_ia32_subss_round(A, B, C) __builtin_ia32_subss_round(A, B, 1)
#define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D) #define __builtin_ia32_ucmpd512_mask(A, B, E, D) __builtin_ia32_ucmpd512_mask(A, B, 1, D)
#define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D) #define __builtin_ia32_ucmpq512_mask(A, B, E, D) __builtin_ia32_ucmpq512_mask(A, B, 1, D)
#define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 5) #define __builtin_ia32_vcomisd(A, B, C, D) __builtin_ia32_vcomisd(A, B, 1, 5)
...@@ -315,12 +339,8 @@ ...@@ -315,12 +339,8 @@
#define __builtin_ia32_vfmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask(A, B, C, D, 1) #define __builtin_ia32_vfmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask(A, B, C, D, 1)
#define __builtin_ia32_vfmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask3(A, B, C, D, 1) #define __builtin_ia32_vfmaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddps512_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddps512_maskz(A, B, C, D, 1) #define __builtin_ia32_vfmaddps512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddps512_maskz(A, B, C, D, 1)
#define __builtin_ia32_vfmaddsd3_mask(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask(A, B, C, D, 1) #define __builtin_ia32_vfmaddsd3_round(A, B, C, D) __builtin_ia32_vfmaddsd3_round(A, B, C, 1)
#define __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsd3_mask3(A, B, C, D, 1) #define __builtin_ia32_vfmaddss3_round(A, B, C, D) __builtin_ia32_vfmaddss3_round(A, B, C, 1)
#define __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsd3_maskz(A, B, C, D, 1)
#define __builtin_ia32_vfmaddss3_mask(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask(A, B, C, D, 1)
#define __builtin_ia32_vfmaddss3_mask3(A, B, C, D, E) __builtin_ia32_vfmaddss3_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmaddss3_maskz(A, B, C, D, E) __builtin_ia32_vfmaddss3_maskz(A, B, C, D, 1)
#define __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, 1) #define __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, 1) #define __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, 1) #define __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, E) __builtin_ia32_vfmaddsubpd512_maskz(A, B, C, D, 1)
...@@ -331,8 +351,6 @@ ...@@ -331,8 +351,6 @@
#define __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, 1) #define __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubaddps512_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, 1) #define __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubpd512_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubps512_mask3(A, B, C, D, 1) #define __builtin_ia32_vfmsubps512_mask3(A, B, C, D, E) __builtin_ia32_vfmsubps512_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubsd3_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfmsubss3_mask3(A, B, C, D, E) __builtin_ia32_vfmsubss3_mask3(A, B, C, D, 1)
#define __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, 1) #define __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddpd512_mask(A, B, C, D, 1)
#define __builtin_ia32_vfnmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddps512_mask(A, B, C, D, 1) #define __builtin_ia32_vfnmaddps512_mask(A, B, C, D, E) __builtin_ia32_vfnmaddps512_mask(A, B, C, D, 1)
#define __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, 1) #define __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, E) __builtin_ia32_vfnmsubpd512_mask(A, B, C, D, 1)
......
...@@ -77,7 +77,13 @@ test8bit (void) ...@@ -77,7 +77,13 @@ test8bit (void)
m512 = _mm512_mask_fixupimm_ps (m512, mmask16, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512 = _mm512_mask_fixupimm_ps (m512, mmask16, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m512 = _mm512_maskz_fixupimm_ps (mmask16, m512, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512 = _mm512_maskz_fixupimm_ps (mmask16, m512, m512, m512i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m128d = _mm_fixupimm_sd (m128d, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m128d = _mm_mask_fixupimm_sd (m128d, mmask8, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m128d = _mm_maskz_fixupimm_sd (mmask8, m128d, m128d, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m128 = _mm_fixupimm_ss (m128, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m128 = _mm_mask_fixupimm_ss (m128, mmask8, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m128 = _mm_maskz_fixupimm_ss (mmask8, m128, m128, m128i, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m512i = _mm512_rol_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_rol_epi32 (m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
m512i = _mm512_mask_rol_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_rol_epi32 (m512i, mmask16, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
...@@ -107,6 +113,8 @@ test8bit (void) ...@@ -107,6 +113,8 @@ test8bit (void)
m512 = _mm512_mask_roundscale_ps (m512, mmask16, m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512 = _mm512_mask_roundscale_ps (m512, mmask16, m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m512 = _mm512_maskz_roundscale_ps (mmask16, m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */ m512 = _mm512_maskz_roundscale_ps (mmask16, m512, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m128d = _mm_roundscale_sd (m128d, m128d, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m128 = _mm_roundscale_ss (m128, m128, 256); /* { dg-error "the immediate argument must be an 8-bit immediate" } */
m512i = _mm512_alignr_epi32 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_alignr_epi32 (m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
m512i = _mm512_mask_alignr_epi32 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */ m512i = _mm512_mask_alignr_epi32 (m512i, mmask16, m512i, m512i, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
...@@ -179,5 +187,6 @@ test4bit (void) { ...@@ -179,5 +187,6 @@ test4bit (void) {
m512 = _mm512_mask_getmant_ps (m512, mmask16, m512, 1, 64); /* { dg-error "the immediate argument must be a 4-bit immediate" } */ m512 = _mm512_mask_getmant_ps (m512, mmask16, m512, 1, 64); /* { dg-error "the immediate argument must be a 4-bit immediate" } */
m512 = _mm512_maskz_getmant_ps (mmask16, m512, 1, 64); /* { dg-error "the immediate argument must be a 4-bit immediate" } */ m512 = _mm512_maskz_getmant_ps (mmask16, m512, 1, 64); /* { dg-error "the immediate argument must be a 4-bit immediate" } */
m128d = _mm_getmant_sd (m128d, m128d, 1, 64); /* { dg-error "the immediate argument must be a 4-bit immediate" } */
m128 = _mm_getmant_ss (m128, m128, 1, 64); /* { dg-error "the immediate argument must be a 4-bit immediate" } */
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment