Commit 95879c72 by H.J. Lu

[multiple changes]

2008-08-28  H.J. Lu  <hongjiu.lu@intel.com>
	    Joey Ye  <joey.ye@intel.com>
	    Xuepeng Guo  <xuepeng.guo@intel.com>

	* config.gcc (extra_headers): Add gmmintrin.h for x86 and x86-64.

	* config/i386/cpuid.h (bit_FMA): New.
	(bit_XSAVE): Likewise.
	(bit_OSXSAVE): Likewise.
	(bit_AVX): Likewise.

	* config/i386/gas.h (ASM_OUTPUT_OPCODE): Undefine before
	define.  Use ASM_OUTPUT_AVX_PREFIX.

	* config/i386/gmmintrin.h: New.

	* config/i386/i386.c (x86_64_reg_class): Add X86_64_AVX_CLASS.
	(OPTION_MASK_ISA_AVX_SET): New.
	(OPTION_MASK_ISA_FMA_SET): Likewise.
	(OPTION_MASK_ISA_AVX_UNSET): Likewise.
	(OPTION_MASK_ISA_FMA_SET): Likewise.
	(OPTION_MASK_ISA_SSE4_2_UNSET): Updated.
	(ix86_handle_option): Handle OPT_mavx and OPT_mfma.
	(pta_flags): Add PTA_AVX and PTA_FMA.
	(override_options): Handle PTA_AVX and PTA_FMA.
	(init_cumulative_args): Handle warn_avx.
	(classify_argument): Return 0 for COImode and OImode.  Return
	1 and X86_64_AVX_CLASS for 256bit vector types.
	(examine_argument): Handle X86_64_AVX_CLASS.
	(construct_container): Likewise.
	(function_arg_advance_32): Pass OImode and 256bit vector types
	in AVX register.
	(function_arg_advance_64): Take a new argument to indicate if a
	parameter is named.  Handle 256bit vector types.  Return
	immediately for unnamed 256bit vector mode parameters.
	(function_arg_advance): Updated.
	(function_arg_32): Add comments for TImode.  Handle OImode
	and 256bit vector types.
	(function_arg_64): Take a new argument to indicate if a
	parameter is named.  Handle 256bit vector types.  Return NULL
	for unnamed 256bit vector mode parameters.
	(function_arg): Updated.
	(setup_incoming_varargs_64): Support
	AVX encoding for *sse_prologue_save_insn.
	(ix86_gimplify_va_arg): Handle 256bit vector mode parameters.
	(standard_sse_constant_p): Return -2 for all 1s if SSE2 isn't
	enabled.  For all 1s in 256bit vector modes, return 3 if AVX is
	enabled, otherwise return -3.
	(standard_sse_constant_opcode): Handle AVX and 256bit vector
	modes.
	(print_reg): Support AVX registers.  Handle 'x' and 't'.
	Handle 'd' to duplicate the operand.
	(print_operand): Likewise.  Also support AVX vector compare
	instructions.
	(output_387_binary_op): Support AVX.
	(output_fp_compare): Likewise.
	(ix86_expand_vector_move_misalign): Likewise.
	(ix86_attr_length_vex_default): New.
	(ix86_builtins): Add IX86_BUILTIN_ADDPD256,
	IX86_BUILTIN_ADDPS256, IX86_BUILTIN_ADDSUBPD256,
	IX86_BUILTIN_ADDSUBPS256, IX86_BUILTIN_ANDPD256,
	IX86_BUILTIN_ANDPS256, IX86_BUILTIN_ANDNPD256,
	IX86_BUILTIN_ANDNPS256, IX86_BUILTIN_BLENDPD256,
	IX86_BUILTIN_BLENDPS256, IX86_BUILTIN_BLENDVPD256,
	IX86_BUILTIN_BLENDVPS256, IX86_BUILTIN_DIVPD256,
	IX86_BUILTIN_DIVPS256, IX86_BUILTIN_DPPS256,
	IX86_BUILTIN_HADDPD256, IX86_BUILTIN_HADDPS256,
	IX86_BUILTIN_HSUBPD256, IX86_BUILTIN_HSUBPS256,
	IX86_BUILTIN_MAXPD256, IX86_BUILTIN_MAXPS256,
	IX86_BUILTIN_MINPD256, IX86_BUILTIN_MINPS256,
	IX86_BUILTIN_MULPD256, IX86_BUILTIN_MULPS256,
	IX86_BUILTIN_ORPD256, IX86_BUILTIN_ORPS256,
	IX86_BUILTIN_SHUFPD256, IX86_BUILTIN_SHUFPS256,
	IX86_BUILTIN_SUBPD256, IX86_BUILTIN_SUBPS256,
	IX86_BUILTIN_XORPD256, IX86_BUILTIN_XORPS256,
	IX86_BUILTIN_CMPSD, IX86_BUILTIN_CMPSS, IX86_BUILTIN_CMPPD,
	IX86_BUILTIN_CMPPS, IX86_BUILTIN_CMPPD256,
	IX86_BUILTIN_CMPPS256, IX86_BUILTIN_CVTDQ2PD256,
	IX86_BUILTIN_CVTDQ2PS256, IX86_BUILTIN_CVTPD2PS256,
	IX86_BUILTIN_CVTPS2DQ256, IX86_BUILTIN_CVTPS2PD256,
	IX86_BUILTIN_CVTTPD2DQ256, IX86_BUILTIN_CVTPD2DQ256,
	IX86_BUILTIN_CVTTPS2DQ256, IX86_BUILTIN_EXTRACTF128PD256,
	IX86_BUILTIN_EXTRACTF128PS256, IX86_BUILTIN_EXTRACTF128SI256,
	IX86_BUILTIN_VZEROALL, IX86_BUILTIN_VZEROUPPER,
	IX86_BUILTIN_VZEROUPPER_REX64, IX86_BUILTIN_VPERMILVARPD,
	IX86_BUILTIN_VPERMILVARPS, IX86_BUILTIN_VPERMILVARPD256,
	IX86_BUILTIN_VPERMILVARPS256, IX86_BUILTIN_VPERMILPD,
	IX86_BUILTIN_VPERMILPS, IX86_BUILTIN_VPERMILPD256,
	IX86_BUILTIN_VPERMILPS256, IX86_BUILTIN_VPERMIL2PD,
	IX86_BUILTIN_VPERMIL2PS, IX86_BUILTIN_VPERMIL2PD256,
	IX86_BUILTIN_VPERMIL2PS256, IX86_BUILTIN_VPERM2F128PD256,
	IX86_BUILTIN_VPERM2F128PS256, IX86_BUILTIN_VPERM2F128SI256,
	IX86_BUILTIN_VBROADCASTSS, IX86_BUILTIN_VBROADCASTSD256,
	IX86_BUILTIN_VBROADCASTSS256, IX86_BUILTIN_VBROADCASTPD256,
	IX86_BUILTIN_VBROADCASTPS256, IX86_BUILTIN_VINSERTF128PD256,
	IX86_BUILTIN_VINSERTF128PS256, IX86_BUILTIN_VINSERTF128SI256,
	IX86_BUILTIN_LOADUPD256, IX86_BUILTIN_LOADUPS256,
	IX86_BUILTIN_STOREUPD256, IX86_BUILTIN_STOREUPS256,
	IX86_BUILTIN_LDDQU256, IX86_BUILTIN_LOADDQU256,
	IX86_BUILTIN_STOREDQU256, IX86_BUILTIN_MASKLOADPD,
	IX86_BUILTIN_MASKLOADPS, IX86_BUILTIN_MASKSTOREPD,
	IX86_BUILTIN_MASKSTOREPS, IX86_BUILTIN_MASKLOADPD256,
	IX86_BUILTIN_MASKLOADPS256, IX86_BUILTIN_MASKSTOREPD256,
	IX86_BUILTIN_MASKSTOREPS256, IX86_BUILTIN_MOVSHDUP256,
	IX86_BUILTIN_MOVSLDUP256, IX86_BUILTIN_MOVDDUP256,
	IX86_BUILTIN_SQRTPD256, IX86_BUILTIN_SQRTPS256,
	IX86_BUILTIN_SQRTPS_NR256, IX86_BUILTIN_RSQRTPS256,
	IX86_BUILTIN_RSQRTPS_NR256, IX86_BUILTIN_RCPPS256,
	IX86_BUILTIN_ROUNDPD256, IX86_BUILTIN_ROUNDPS256,
	IX86_BUILTIN_UNPCKHPD256, IX86_BUILTIN_UNPCKLPD256,
	IX86_BUILTIN_UNPCKHPS256, IX86_BUILTIN_UNPCKLPS256,
	IX86_BUILTIN_SI256_SI, IX86_BUILTIN_PS256_PS,
	IX86_BUILTIN_PD256_PD, IX86_BUILTIN_SI_SI256,
	IX86_BUILTIN_PS_PS256, IX86_BUILTIN_PD_PD256,
	IX86_BUILTIN_VTESTZPD, IX86_BUILTIN_VTESTCPD,
	IX86_BUILTIN_VTESTNZCPD, IX86_BUILTIN_VTESTZPS,
	IX86_BUILTIN_VTESTCPS, IX86_BUILTIN_VTESTNZCPS,
	IX86_BUILTIN_VTESTZPD256, IX86_BUILTIN_VTESTCPD256,
	IX86_BUILTIN_VTESTNZCPD256, IX86_BUILTIN_VTESTZPS256,
	IX86_BUILTIN_VTESTCPS256, IX86_BUILTIN_VTESTNZCPS256,
	IX86_BUILTIN_PTESTZ256, IX86_BUILTIN_PTESTC256,
	IX86_BUILTIN_PTESTNZC256, IX86_BUILTIN_MOVMSKPD256
	and IX86_BUILTIN_MOVMSKPS256,
	(ix86_special_builtin_type): Add V32QI_FTYPE_PCCHAR,
	V8SF_FTYPE_PCV4SF, V8SF_FTYPE_PCFLOAT, V4DF_FTYPE_PCV2DF,
	V4DF_FTYPE_PCDOUBLE, V8SF_FTYPE_PCV8SF_V8SF,
	V4DF_FTYPE_PCV4DF_V4DF, V4SF_FTYPE_PCV4SF_V4SF,
	V2DF_FTYPE_PCV2DF_V2DF, VOID_FTYPE_PCHAR_V32QI,
	VOID_FTYPE_PFLOAT_V8SF, VOID_FTYPE_PDOUBLE_V4DF,
	VOID_FTYPE_PV8SF_V8SF_V8SF, VOID_FTYPE_PV4DF_V4DF_V4DF,
	VOID_FTYPE_PV4SF_V4SF_V4SF and VOID_FTYPE_PV2DF_V2DF_V2DF,
	(ix86_builtin_type): Add INT_FTYPE_V8SF_V8SF_PTEST,
	INT_FTYPE_V4DI_V4DI_PTEST, INT_FTYPE_V4DF_V4DF_PTEST,
	INT_FTYPE_V4SF_V4SF_PTEST, INT_FTYPE_V2DF_V2DF_PTEST,
	INT_FTYPE_V8SF, INT_FTYPE_V4DF, V8SI_FTYPE_V8SF, V8SI_FTYPE_V4SI,
	V8SF_FTYPE_V8SF, V8SF_FTYPE_V8SI, V8SF_FTYPE_V4SF,
	V4SI_FTYPE_V8SI, V4SI_FTYPE_V4DF, V4DF_FTYPE_V4DF,
	V4DF_FTYPE_V4SI, V4DF_FTYPE_V4SF, V4DF_FTYPE_V2DF,
	V4SF_FTYPE_V4DF, V4SF_FTYPE_V8SF, V2DF_FTYPE_V4DF,
	V8SF_FTYPE_V8SF_V8SF, V8SF_FTYPE_V8SF_V8SI,
	V4DF_FTYPE_V4DF_V4DF, V4DF_FTYPE_V4DF_V4DI,
	V4SF_FTYPE_V4SF_V4SI, V2DF_FTYPE_V2DF_V2DI,
	V8SF_FTYPE_V8SF_INT, V4SI_FTYPE_V8SI_INT, V4SF_FTYPE_V8SF_INT,
	V2DF_FTYPE_V4DF_INT, V4DF_FTYPE_V4DF_INT,
	V8SF_FTYPE_V8SF_V8SF_V8SF, V4DF_FTYPE_V4DF_V4DF_V4DF,
	V8SI_FTYPE_V8SI_V8SI_INT, V8SF_FTYPE_V8SF_V8SF_INT,
	V4DF_FTYPE_V4DF_V4DF_INT, V4DF_FTYPE_V4DF_V2DF_INT,
	V8SF_FTYPE_V8SF_V8SF_V8SI_INT, V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
	V4SF_FTYPE_V4SF_V4SF_V4SI_INT and V2DF_FTYPE_V2DF_V2DF_V2DI_INT.
	(bdesc_special_args): Add IX86_BUILTIN_VZEROALL,
	IX86_BUILTIN_VZEROUPPER. IX86_BUILTIN_VZEROUPPER_REX64,
	IX86_BUILTIN_VBROADCASTSS, IX86_BUILTIN_VBROADCASTSD256,
	IX86_BUILTIN_VBROADCASTSS256, IX86_BUILTIN_VBROADCASTPD256,
	IX86_BUILTIN_VBROADCASTPS256, IX86_BUILTIN_LOADUPD256,
	IX86_BUILTIN_LOADUPS256, IX86_BUILTIN_STOREUPD256,
	IX86_BUILTIN_STOREUPS256, IX86_BUILTIN_LOADDQU256,
	IX86_BUILTIN_STOREDQU256, IX86_BUILTIN_LDDQU256,
	IX86_BUILTIN_MASKLOADPD, IX86_BUILTIN_MASKLOADPS,
	IX86_BUILTIN_MASKLOADPD256, IX86_BUILTIN_MASKLOADPS256,
	IX86_BUILTIN_MASKSTOREPD, IX86_BUILTIN_MASKSTOREPS,
	IX86_BUILTIN_MASKSTOREPD256 and IX86_BUILTIN_MASKSTOREPS256.
	(ix86_builtins): Add IX86_BUILTIN_ADDPD256,
	IX86_BUILTIN_ADDPS256, IX86_BUILTIN_ADDSUBPD256,
	IX86_BUILTIN_ADDSUBPS256, IX86_BUILTIN_ANDPD256,
	IX86_BUILTIN_ANDPS256, IX86_BUILTIN_ANDNPD256,
	IX86_BUILTIN_ANDNPS256, IX86_BUILTIN_DIVPD256,
	IX86_BUILTIN_DIVPS256, IX86_BUILTIN_HADDPD256,
	IX86_BUILTIN_HSUBPS256, IX86_BUILTIN_HSUBPD256,
	IX86_BUILTIN_HADDPS256, IX86_BUILTIN_MAXPD256,
	IX86_BUILTIN_MAXPS256, IX86_BUILTIN_MINPD256,
	IX86_BUILTIN_MINPS256, IX86_BUILTIN_MULPD256,
	IX86_BUILTIN_MULPS256, IX86_BUILTIN_ORPD256,
	IX86_BUILTIN_ORPS256, IX86_BUILTIN_SUBPD256,
	IX86_BUILTIN_SUBPS256, IX86_BUILTIN_XORPD256,
	IX86_BUILTIN_XORPS256, IX86_BUILTIN_VPERMILVARPD,
	IX86_BUILTIN_VPERMILVARPS, IX86_BUILTIN_VPERMILVARPD256,
	IX86_BUILTIN_VPERMILVARPS256, IX86_BUILTIN_BLENDPD256,
	IX86_BUILTIN_BLENDPS256, IX86_BUILTIN_BLENDVPD256,
	IX86_BUILTIN_BLENDVPS256, IX86_BUILTIN_DPPS256,
	IX86_BUILTIN_SHUFPD256, IX86_BUILTIN_SHUFPS256,
	IX86_BUILTIN_CMPSD, IX86_BUILTIN_CMPSS, IX86_BUILTIN_CMPPD,
	IX86_BUILTIN_CMPPS,
	IX86_BUILTIN_CMPPD256,IX86_BUILTIN_CMPPS256,
	IX86_BUILTIN_EXTRACTF128PD256, IX86_BUILTIN_EXTRACTF128PS256,
	IX86_BUILTIN_EXTRACTF128SI256, IX86_BUILTIN_CVTDQ2PD256,
	IX86_BUILTIN_CVTDQ2PS256, IX86_BUILTIN_CVTPD2PS256,
	IX86_BUILTIN_CVTPS2DQ256, IX86_BUILTIN_CVTPS2PD256,
	IX86_BUILTIN_CVTTPD2DQ256, IX86_BUILTIN_CVTPD2DQ256,
	IX86_BUILTIN_CVTTPS2DQ256, IX86_BUILTIN_VPERM2F128PD256,
	IX86_BUILTIN_VPERM2F128PS256, IX86_BUILTIN_VPERM2F128SI256,
	IX86_BUILTIN_VPERMILPD, IX86_BUILTIN_VPERMILPS,
	IX86_BUILTIN_VPERMILPD256, IX86_BUILTIN_VPERMILPS256,
	IX86_BUILTIN_VPERMIL2PD, IX86_BUILTIN_VPERMILPS,
	IX86_BUILTIN_VPERMILPD256, IX86_BUILTIN_VPERMILPS256,
	IX86_BUILTIN_VPERMIL2PD, IX86_BUILTIN_VPERMIL2PS,
	IX86_BUILTIN_VPERMIL2PD256, IX86_BUILTIN_VPERMIL2PS256,
	IX86_BUILTIN_VINSERTF128PD256, IX86_BUILTIN_VINSERTF128PS256,
	IX86_BUILTIN_VINSERTF128SI256, IX86_BUILTIN_MOVSHDUP256,
	IX86_BUILTIN_MOVSLDUP256, IX86_BUILTIN_MOVDDUP256,
	IX86_BUILTIN_SQRTPD256, IX86_BUILTIN_SQRTPS256,
	IX86_BUILTIN_SQRTPS_NR256, IX86_BUILTIN_RSQRTPS256,
	IX86_BUILTIN_RSQRTPS_NR256, IX86_BUILTIN_RCPPS256,
	IX86_BUILTIN_ROUNDPD256, IX86_BUILTIN_ROUNDPS256,
	IX86_BUILTIN_UNPCKHPD256, IX86_BUILTIN_UNPCKLPD256,
	IX86_BUILTIN_UNPCKHPS256, IX86_BUILTIN_UNPCKLPS256,
	IX86_BUILTIN_SI256_SI, IX86_BUILTIN_PS256_PS,
	IX86_BUILTIN_PD256_PD, IX86_BUILTIN_SI_SI256,
	IX86_BUILTIN_PS_PS256, IX86_BUILTIN_PD_PD256,
	IX86_BUILTIN_VTESTZPD, IX86_BUILTIN_VTESTCPD,
	IX86_BUILTIN_VTESTNZCPD, IX86_BUILTIN_VTESTZPS,
	IX86_BUILTIN_VTESTCPS, IX86_BUILTIN_VTESTNZCPS,
	IX86_BUILTIN_VTESTZPD256, IX86_BUILTIN_VTESTCPD256,
	IX86_BUILTIN_VTESTNZCPD256, IX86_BUILTIN_VTESTZPS256,
	IX86_BUILTIN_VTESTCPS256, IX86_BUILTIN_VTESTNZCPS256,
	IX86_BUILTIN_PTESTZ256, IX86_BUILTIN_PTESTC256,
	IX86_BUILTIN_PTESTNZC256, IX86_BUILTIN_MOVMSKPD256 and
	IX86_BUILTIN_MOVMSKPS256.
	(ix86_init_mmx_sse_builtins): Support AVX builtins.
	(ix86_expand_args_builtin): Likewise.
	(ix86_expand_special_args_builtin): Likewise.
	(ix86_hard_regno_mode_ok): Handle AVX modes.
	(ix86_expand_vector_init_duplicate): Likewise.
	(ix86_expand_vector_init_one_nonzero): Likewise.
	(ix86_expand_vector_init_one_var): Likewise.
	(ix86_expand_vector_init_concat): Likewise.
	(ix86_expand_vector_init_general): Likewise.
	(ix86_expand_vector_set): Likewise.
	(ix86_vector_mode_supported_p): Likewise.
	(x86_extended_reg_mentioned_p): Check INSN_P before using
	PATTERN.

	* config/i386/i386-c.c (ix86_target_macros_internal): Handle
	OPTION_MASK_ISA_AVX and OPTION_MASK_ISA_FMA.

	* config/i386/i386.h (TARGET_AVX): New.
	(TARGET_FMA): Likewise.
	(TARGET_CPU_CPP_BUILTINS): Handle TARGET_AVX and TARGET_FMA.
	(BIGGEST_ALIGNMENT): Set to 256 for TARGET_AVX.
	(VALID_AVX256_REG_MODE): New.
	(AVX256_VEC_FLOAT_MODE_P): Likewise.
	(AVX_FLOAT_MODE_P): Likewise.
	(AVX128_VEC_FLOAT_MODE_P): Likewise.
	(AVX256_VEC_FLOAT_MODE_P): Likewise.
	(AVX_VEC_FLOAT_MODE_P): Likewise.
	(ASM_OUTPUT_AVX_PREFIX): Likewise.
	(ASM_OUTPUT_OPCODE): Likewise.
	(UNITS_PER_SIMD_WORD): Add a FIXME for 32byte vectorizer
	support.
	(SSE_REG_MODE_P): Allow 256bit vector modes.
	(ix86_args): Add a warn_avx field.

	* config/i386/i386.md (UNSPEC_PCMP): New.
	(UNSPEC_VPERMIL): Likewise.
	(UNSPEC_VPERMIL2): Likewise.
	(UNSPEC_VPERMIL2F128): Likewise.
	(UNSPEC_MASKLOAD): Likewise.
	(UNSPEC_MASKSTORE): Likewise.
	(UNSPEC_CAST): Likewise.
	(UNSPEC_VTESTP): Likewise.
	(UNSPECV_VZEROALL): Likewise.
	(UNSPECV_VZEROUPPER): Likewise.
	(XMM0_REG): Likewise.
	(XMM1_REG): Likewise.
	(XMM2_REG): Likewise.
	(XMM3_REG): Likewise.
	(XMM4_REG): Likewise.
	(XMM5_REG): Likewise.
	(XMM6_REG): Likewise.
	(XMM8_REG): Likewise.
	(XMM9_REG): Likewise.
	(XMM10_REG): Likewise.
	(XMM11_REG): Likewise.
	(XMM12_REG): Likewise.
	(XMM13_REG): Likewise.
	(XMM14_REG): Likewise.
	(XMM15_REG): Likewise.
	(prefix): Likewise.
	(prefix_vex_imm8): Likewise.
	(prefix_vex_w): Likewise.
	(length_vex): Likewise.
	(maxmin): Likewise.
	(movoi): Likewise.
	(*avx_ashlti3): Likewise.
	(*avx_lshrti3): Likewise.
	(*avx_setcc<mode>): Likewise.
	(*fop_<mode>_comm_mixed_avx): Likewise.
	(*fop_<mode>_comm_avx): Likewise.
	(*fop_<mode>_1_mixed_avx): Likewise.
	(*fop_<mode>_1_avx): Likewise.
	(*avx_<code><mode>3): Likewise.
	(*avx_ieee_smin<mode>3): Likewise.
	(*avx_ieee_smax<mode>3): Likewise.
	(mode): Add OI, V8SF and V4DF.
	(length): Support VEX prefix.
	(*cmpfp_i_mixed): Set prefix attribute.
	(*cmpfp_i_sse): Likewise.
	(*cmpfp_iu_mixed): Likewise.
	(*cmpfp_iu_sse): Likewise.
	(*movsi_1): Support AVX.
	(*movdi_2): Likewise.
	(*movdi_1_rex64): Likewise.
	(*movti_internal): Likewise.
	(*movti_rex64): Likewise.
	(*movsf_1): Likewise.
	(*movdf_nointeger): Likewise.
	(*movdf_integer_rex64): Likewise.
	(*movtf_internal): Likewise.
	(zero_extendsidi2_32): Likewise.
	(zero_extendsidi2_rex64): Likewise.
	(*extendsfdf2_mixed): Likewise.
	(*extendsfdf2_sse): Likewise.
	(*truncdfsf_fast_mixed): Likewise.
	(*truncdfsf_fast_sse): Likewise.
	(*truncdfsf_mixed): Likewise.
	(fix_trunc<mode>di_sse): Likewise.
	(fix_trunc<mode>si_sse): Likewise.
	(*float<SSEMODEI24:mode><MODEF:mode>2_mixed_interunit): Likewise.
	(*float<SSEMODEI24:mode><MODEF:mode>2_mixed_nointerunit): Likewise.
	(*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit): Likewise.
	(*float<SSEMODEI24:mode><MODEF:mode>2_sse_nointerunit): Likewise.
	(*rcpsf2_sse): Likewise.
	(*rsqrtsf2_sse): Likewise.
	(*sqrt<mode>2_sse): Likewise.
	(sse4_1_round<mode>2): Likewise.
	(*sse_prologue_save_insn): Disallow REX prefix for AVX.
	Support AVX.  Set length attribute properly for AVX.

	* config/i386/i386-modes.def (VECTOR_MODES (INT, 32)): New.
	(VECTOR_MODES (FLOAT, 32)): Likewise.
	(VECTOR_MODE (INT, DI, 8)): Likewise.
	(VECTOR_MODE (INT, HI, 32)): Likewise.
	(VECTOR_MODE (INT, QI, 64)): Likewise.
	(VECTOR_MODE (FLOAT, DF, 8)): Likewise.
	(VECTOR_MODE (FLOAT, SF, 16)): Likewise.
	(VECTOR_MODE (INT, DI, 4)): Removed.
	(VECTOR_MODE (INT, SI, 8)): Likewise.
	(VECTOR_MODE (INT, HI, 16)): Likewise.
	(VECTOR_MODE (INT, QI, 32)): Likewise.
	(VECTOR_MODE (FLOAT, SF, 8)): Likewise.
	(INT_MODE (OI, 32)): Likewise.

	* config/i386/i386.opt (mavx): New.
	(mfma): Likewise.

	* config/i386/i386-protos.h (ix86_attr_length_vex_default): New.

	* config/i386/mmx.md (*mov<mode>_internal_rex64): Support AVX.
	(*mov<mode>_internal_avx): New.
	(*movv2sf_internal_rex64_avx): Likewise.
	(*movv2sf_internal_avx): Likewise.

	* config/i386/predicates.md (const_4_to_5_operand): New.
	(const_6_to_7_operand): Likewise.
	(const_8_to_11_operand): Likewise.
	(const_12_to_15_operand): Likewise.
	(avx_comparison_float_operator): Likewise.

	* config/i386/sse.md (AVX256MODEI): New.
	(AVX256MODE): Likewise.
	(AVXMODEQI): Likewise.
	(AVXMODE): Likewise.
	(AVX256MODEF2P): Likewise.
	(AVX256MODE2P): Likewise.
	(AVX256MODE4P): Likewise.
	(AVX256MODE8P): Likewise.
	(AVXMODEF2P): Likewise.
	(AVXMODEF4P): Likewise.
	(AVXMODEDCVTDQ2PS): Likewise.
	(AVXMODEDCVTPS2DQ): Likewise.
	(avxvecmode): Likewise.
	(avxvecpsmode): Likewise.
	(avxhalfvecmode): Likewise.
	(avxscalarmode): Likewise.
	(avxcvtvecmode): Likewise.
	(avxpermvecmode): Likewise.
	(avxmodesuffixf2c): Likewise.
	(avxmodesuffixp): Likewise.
	(avxmodesuffixs): Likewise.
	(avxmodesuffix): Likewise.
	(vpermilbits): Likewise.
	(pinsrbits): Likewise.
	(mov<mode>): Likewise.
	(*mov<mode>_internal): Likewise.
	(push<mode>1): Likewise.
	(movmisalign<mode>): Likewise.
	(avx_movup<avxmodesuffixf2c><avxmodesuffix>): Likewise.
	(avx_movdqu<avxmodesuffix>): Likewise.
	(avx_lddqu<avxmodesuffix>): Likewise.
	(<plusminus_insn><mode>3): Likewise.
	(*avx_<plusminus_insn><mode>3): Likewise.
	(*avx_vm<plusminus_insn><mode>3): Likewise.
	(mul<mode>3): Likewise.
	(*avx_mul<mode>3): Likewise.
	(*avx_vmmul<mode>3): Likewise.
	(divv8sf3): Likewise.
	(divv4df3): Likewise.
	(avx_div<mode>3): Likewise.
	(*avx_div<mode>3): Likewise.
	(*avx_vmdiv<mode>3): Likewise.
	(avx_rcpv8sf2): Likewise.
	(*avx_vmrcpv4sf2): Likewise.
	(sqrtv8sf2): Likewise.
	(avx_sqrtv8sf2): Likewise.
	(*avx_vmsqrt<mode>2): Likewise.
	(rsqrtv8sf2): Likewise.
	(avx_rsqrtv8sf2): Likewise.
	(*avx_vmrsqrtv4sf2): Likewise.
	(<code><mode>3): Likewise.
	(*avx_<code><mode>3_finite): Likewise.
	(*avx_<code><mode>3): Likewise.
	(*avx_vm<code><mode>3): Likewise.
	(*avx_ieee_smin<mode>3): Likewise.
	(*avx_ieee_smax<mode>3): Likewise.
	(avx_addsubv8sf3): Likewise.
	(avx_addsubv4df3): Likewise.
	(*avx_addsubv4sf3): Likewise.
	(*avx_addsubv2df3): Likewise.
	(avx_h<plusminus_insn>v4df3): Likewise.
	(avx_h<plusminus_insn>v8sf3): Likewise.
	(*avx_h<plusminus_insn>v4sf3): Likewise.
	(*avx_h<plusminus_insn>v2df3): Likewise.
	(avx_cmpp<avxmodesuffixf2c><mode>3): Likewise.
	(avx_cmps<ssemodesuffixf2c><mode>3): Likewise.
	(*avx_maskcmp<mode>3): Likewise.
	(avx_nand<mode>3): Likewise.
	(*avx_<code><mode>3): Likewise.
	(*avx_nand<mode>3): Likewise.
	(*avx_<code><mode>3): Likewise.
	(*avx_cvtsi2ss): Likewise.
	(*avx_cvtsi2ssq): Likewise.
	(*avx_cvtsi2sd): Likewise.
	(*avx_cvtsi2sdq): Likewise.
	(*avx_cvtsd2ss): Likewise.
	(avx_cvtss2sd): Likewise.
	(avx_cvtdq2ps<avxmodesuffix>): Likewise.
	(avx_cvtps2dq<avxmodesuffix>): Likewise.
	(avx_cvttps2dq<avxmodesuffix>): Likewise.
	(*avx_cvtsi2sd): Likewise.
	(*avx_cvtsi2sdq): Likewise.
	(avx_cvtdq2pd256): Likewise.
	(avx_cvtpd2dq256): Likewise.
	(avx_cvttpd2dq256): Likewise.
	(*avx_cvtsd2ss): Likewise.
	(*avx_cvtss2sd): Likewise.
	(avx_cvtpd2ps256): Likewise.
	(avx_cvtps2pd256): Likewise.
	(*avx_movhlps): Likewise.
	(*avx_movlhps): Likewise.
	(avx_unpckhps256): Likewise.
	(*avx_unpckhps): Likewise.
	(avx_unpcklps256): Likewise.
	(*avx_unpcklps): Likewise.
	(avx_movshdup256): Likewise.
	(avx_movsldup256): Likewise.
	(avx_shufps256): Likewise.
	(avx_shufps256_1): Likewise.
	(*avx_shufps_<mode>): Likewise.
	(*avx_loadhps): Likewise.
	(*avx_storelps): Likewise.
	(*avx_loadlps): Likewise.
	(*avx_movss): Likewise.
	(*vec_dupv4sf_avx): Likewise.
	(*vec_concatv2sf_avx): Likewise.
	(*vec_concatv4sf_avx): Likewise.
	(*vec_setv4sf_0_avx): Likewise.
	(*vec_setv4sf_avx): Likewise.
	(*avx_insertps): Likewise.
	(avx_vextractf128<mode>): Likewise.
	(vec_extract_lo_<mode>): Likewise.
	(vec_extract_hi_<mode>): Likewise.
	(vec_extract_lo_<mode>): Likewise.
	(vec_extract_hi_<mode>): Likewise.
	(vec_extract_lo_v16hi): Likewise.
	(vec_extract_hi_v16hi): Likewise.
	(vec_extract_lo_v32qi): Likewise.
	(vec_extract_hi_v32qi): Likewise.
	(avx_unpckhpd256): Likewise.
	(*avx_unpckhpd): Likewise.
	(avx_movddup256): Likewise.
	(*avx_movddup): Likewise.
	(avx_unpcklpd256): Likewise.
	(*avx_unpcklpd): Likewise.
	(avx_shufpd256): Likewise.
	(avx_shufpd256_1): Likewise.
	(*avx_punpckhqdq): Likewise.
	(*avx_punpcklqdq): Likewise.
	(*avx_shufpd_<mode>): Likewise.
	(*avx_storehpd): Likewise.
	(*avx_loadhpd): Likewise.
	(*avx_loadlpd): Likewise.
	(*avx_movsd): Likewise.
	(*vec_concatv2df_avx): Likewise.
	(*avx_<plusminus_insn><mode>3): Likewise.
	(*avx_<plusminus_insn><mode>3): Likewise.
	(*avx_mulv8hi3): Likewise.
	(*avxv8hi3_highpart): Likewise.
	(*avx_umulv8hi3_highpart): Likewise.
	(*avx_umulv2siv2di3): Likewise.
	(*avx_mulv2siv2di3): Likewise.
	(*avx_pmaddwd): Likewise.
	(*avx_mulv4si3): Likewise.
	(*avx_ashr<mode>3): Likewise.
	(*avx_lshr<mode>3): Likewise.
	(*avx_ashl<mode>3): Likewise.
	(*avx_<code><mode>3): Likewise.
	(*avx_eq<mode>3): Likewise.
	(*avx_gt<mode>3): Likewise.
	(*avx_nand<mode>3): Likewise.
	(*avx_nand<mode>3): Likewise.
	(*avx_<code><mode>3): Likewise.
	(*avx_<code><mode>3): Likewise.
	(*avx_packsswb): Likewise.
	(*avx_packssdw): Likewise.
	(*avx_packuswb): Likewise.
	(*avx_punpckhbw): Likewise.
	(*avx_punpcklbw): Likewise.
	(*avx_punpckhwd): Likewise.
	(*avx_punpcklwd): Likewise.
	(*avx_punpckhdq): Likewise.
	(*avx_punpckldq): Likewise.
	(*avx_pinsr<avxmodesuffixs>): Likewise.
	(*avx_pinsrq): Likewise.
	(*avx_loadld): Likewise.
	(*vec_extractv2di_1_rex64_avx): Likewise.
	(*vec_extractv2di_1_avx): Likewise.
	(*vec_dupv2di_avx): Likewise.
	(*vec_concatv2si_avx): Likewise.
	(*vec_concatv4si_1_avx): Likewise.
	(*vec_concatv2di_avx): Likewise.
	(*vec_concatv2di_rex64_avx): Likewise.
	(*avx_uavgv16qi3): Likewise.
	(*avx_uavgv8hi3): Likewise.
	(*avx_psadbw): Likewise.
	(avx_movmskp<avxmodesuffixf2c>256): Likewise.
	(*avx_phaddwv8hi3): Likewise.
	(*avx_phadddv4si3): Likewise.
	(*avx_phaddswv8hi3): Likewise.
	(*avx_phsubwv8hi3): Likewise.
	(*avx_phsubdv4si3): Likewise.
	(*avx_phsubswv8hi3): Likewise.
	(*avx_pmaddubsw128): Likewise.
	(*avx_pmulhrswv8hi3): Likewise.
	(*avx_pshufbv16qi3): Likewise.
	(*avx_psign<mode>3): Likewise.
	(*avx_palignrti): Likewise.
	(avx_blendp<avxmodesuffixf2c><avxmodesuffix>): Likewise.
	(avx_blendvp<avxmodesuffixf2c><avxmodesuffix>): Likewise.
	(avx_dpp<avxmodesuffixf2c><avxmodesuffix>): Likewise.
	(*avx_mpsadbw): Likewise.
	(*avx_packusdw): Likewise.
	(*avx_pblendvb): Likewise.
	(*avx_pblendw): Likewise.
	(avx_vtestp<avxmodesuffixf2c><avxmodesuffix>): Likewise.
	(avx_ptest256): Likewise.
	(avx_roundp<avxmodesuffixf2c>256): Likewise.
	(*avx_rounds<ssemodesuffixf2c>): Likewise.
	(*avx_aesenc): Likewise.
	(*avx_aesenclast): Likewise.
	(*avx_aesdec): Likewise.
	(*avx_aesdeclast): Likewise.
	(avx_vzeroupper): Likewise.
	(avx_vzeroupper_rex64): Likewise.
	(avx_vpermil<mode>): Likewise.
	(avx_vpermilvar<mode>3): Likewise.
	(avx_vpermil2<mode>3): Likewise.
	(avx_vperm2f128<mode>3): Likewise.
	(avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>): Likewise.
	(avx_vbroadcastss256): Likewise.
	(avx_vbroadcastf128_p<avxmodesuffixf2c>256): Likewise.
	(avx_vinsertf128<mode>): Likewise.
	(vec_set_lo_<mode>): Likewise.
	(vec_set_hi_<mode>): Likewise.
	(vec_set_lo_<mode>): Likewise.
	(vec_set_hi_<mode>): Likewise.
	(vec_set_lo_v16hi): Likewise.
	(vec_set_hi_v16hi): Likewise.
	(vec_set_lo_v32qi): Likewise.
	(vec_set_hi_v32qi): Likewise.
	(avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>): Likewise.
	(avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>): Likewise.
	(avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>): Likewise.
	(avx_<avxmodesuffixp>_<avxmodesuffixp><avxmodesuffix>): Likewise.
	(vec_init<mode>): Likewise.
	(*vec_concat<mode>_avx): Likewise.
	(blendbits): Support V8SF and V4DF.
	(sse2_movq128): Support AVX.
	(<sse>_movnt<mode>): Likewise.
	(sse2_movntv2di): Likewise.
	(sse_rcpv4sf2): Likewise.
	(sse_sqrtv4sf2): Likewise.
	(sse_rsqrtv4sf2): Likewise.
	(<sse>_comi): Likewise.
	(<sse>_ucomi): Likewise.
	(sse_cvtss2si): Likewise.
	(sse_cvtss2si_2): Likewise.
	(sse_cvtss2siq): Likewise.
	(sse_cvtss2siq_2): Likewise.
	(sse_cvttss2si): Likewise.
	(sse_cvttss2siq): Likewise.
	(sse2_cvtsd2si): Likewise.
	(sse2_cvtsd2si_2): Likewise.
	(sse2_cvtsd2siq): Likewise.
	(sse2_cvtsd2siq_2): Likewise.
	(sse2_cvttsd2si): Likewise.
	(sse2_cvttsd2siq): Likewise.
	(sse2_cvtdq2pd): Likewise.
	(*sse2_cvtpd2dq): Likewise.
	(*sse2_cvttpd2dq): Likewise.
	(*sse2_cvtpd2ps): Likewise.
	(sse2_cvtps2pd): Likewise.
	(sse3_movshdup): Likewise.
	(sse3_movsldup): Likewise.
	(sse_storehps): Likewise.
	(*sse4_1_extractps): Likewise.
	(sse2_storelpd): Likewise.
	(vec_dupv2df_sse3): Likewise.
	(*vec_concatv2df_sse3): Likewise.
	(*sse4_1_pextrb): Likewise.
	(*sse4_1_pextrb_memory): Likewise.
	(*sse2_pextrw): Likewise.
	(*sse4_1_pextrw_memory): Likewise.
	(*sse4_1_pextrd): Likewise.
	(*sse4_1_pextrq): Likewise.
	(sse2_pshufd_1): Likewise.
	(sse2_pshuflw_1): Likewise.
	(sse2_pshufhw_1): Likewise.
	(*sse2_storeq_rex64): Likewise.
	(*vec_dupv4si): Likewise.
	(<sse>_movmskp<ssemodesuffixf2c>): Likewise.
	(sse2_pmovmskb): Likewise.
	(*sse2_maskmovdqu): Likewise.
	(*sse2_maskmovdqu_rex64): Likewise.
	(sse_ldmxcsr): Likewise.
	(sse_stmxcsr): Likewise.
	(abs<mode>2): Likewise.
	(sse4_1_movntdqa): Likewise.
	(sse4_1_phminposuw): Likewise.
	(sse4_1_extendv8qiv8hi2): Likewise.
	(*sse4_1_extendv8qiv8hi2): Likewise.
	(sse4_1_extendv4qiv4si2): Likewise.
	(*sse4_1_extendv4qiv4si2): Likewise.
	(sse4_1_extendv2qiv2di2): Likewise.
	(*sse4_1_extendv2qiv2di2): Likewise.
	(sse4_1_extendv4hiv4si2): Likewise.
	(*sse4_1_extendv4hiv4si2): Likewise.
	(sse4_1_extendv2hiv2di2): Likewise.
	(*sse4_1_extendv2hiv2di2): Likewise.
	(sse4_1_extendv2siv2di2): Likewise.
	(*sse4_1_extendv2siv2di2): Likewise.
	(sse4_1_zero_extendv8qiv8hi2): Likewise.
	(*sse4_1_zero_extendv8qiv8hi2): Likewise.
	(sse4_1_zero_extendv4qiv4si2): Likewise.
	(*sse4_1_zero_extendv4qiv4si2): Likewise.
	(sse4_1_zero_extendv2qiv2di2): Likewise.
	(*sse4_1_zero_extendv2qiv2di2): Likewise.
	(sse4_1_zero_extendv4hiv4si2): Likewise.
	(*sse4_1_zero_extendv4hiv4si2): Likewise.
	(sse4_1_zero_extendv2hiv2di2): Likewise.
	(*sse4_1_zero_extendv2hiv2di2): Likewise.
	(sse4_1_zero_extendv2siv2di2): Likewise.
	(*sse4_1_zero_extendv2siv2di2): Likewise.
	(sse4_1_ptest): Likewise.
	(sse4_1_roundp<ssemodesuffixf2c>): Likewise.
	(sse4_2_pcmpestri): Likewise.
	(sse4_2_pcmpestrm): Likewise.
	(sse4_2_pcmpistri): Likewise.
	(sse4_2_pcmpistrm): Likewise.
	(aesimc): Likewise.
	(aeskeygenassist): Likewise.

2008-08-28  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/predicates.md (vzeroall_operation): New.

	* config/i386/sse.md (avx_vzeroall): New.
	(*avx_vzeroall): Likewise.

From-SVN: r139726
parent f67358da
......@@ -299,7 +299,7 @@ i[34567]86-*-*)
extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h bmmintrin.h mmintrin-common.h
wmmintrin.h cross-stdarg.h"
wmmintrin.h gmmintrin.h cross-stdarg.h"
;;
x86_64-*-*)
cpu_type=i386
......@@ -308,7 +308,7 @@ x86_64-*-*)
extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h bmmintrin.h mmintrin-common.h
wmmintrin.h cross-stdarg.h"
wmmintrin.h gmmintrin.h cross-stdarg.h"
need_64bit_hwint=yes
;;
ia64-*-*)
......
......@@ -35,11 +35,15 @@
#define bit_SSE3 (1 << 0)
#define bit_PCLMUL (1 << 1)
#define bit_SSSE3 (1 << 9)
#define bit_FMA (1 << 12)
#define bit_CMPXCHG16B (1 << 13)
#define bit_SSE4_1 (1 << 19)
#define bit_SSE4_2 (1 << 20)
#define bit_POPCNT (1 << 23)
#define bit_AES (1 << 25)
#define bit_XSAVE (1 << 26)
#define bit_OSXSAVE (1 << 27)
#define bit_AVX (1 << 28)
/* %edx */
#define bit_CMPXCHG8B (1 << 8)
......
......@@ -86,6 +86,7 @@ along with GCC; see the file COPYING3. If not see
GAS version 1.38.1 doesn't understand the `repz' opcode mnemonic.
So use `repe' instead. */
#undef ASM_OUTPUT_OPCODE
#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
{ \
if ((PTR)[0] == 'r' \
......@@ -103,6 +104,8 @@ along with GCC; see the file COPYING3. If not see
(PTR) += 5; \
} \
} \
else \
ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR)); \
}
/* Define macro used to output shift-double opcodes when the shift
......
......@@ -217,6 +217,10 @@ ix86_target_macros_internal (int isa_flag,
def_or_undef (parse_in, "__AES__");
if (isa_flag & OPTION_MASK_ISA_PCLMUL)
def_or_undef (parse_in, "__PCLMUL__");
if (isa_flag & OPTION_MASK_ISA_AVX)
def_or_undef (parse_in, "__AVX__");
if (isa_flag & OPTION_MASK_ISA_FMA)
def_or_undef (parse_in, "__FMA__");
if (isa_flag & OPTION_MASK_ISA_SSE4A)
def_or_undef (parse_in, "__SSE4A__");
if (isa_flag & OPTION_MASK_ISA_SSE5)
......
......@@ -73,17 +73,20 @@ CC_MODE (CCFPU);
VECTOR_MODES (INT, 4); /* V4QI V2HI */
VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */
VECTOR_MODE (INT, DI, 1); /* V1DI */
VECTOR_MODE (INT, SI, 1); /* V1SI */
VECTOR_MODE (INT, QI, 2); /* V2QI */
VECTOR_MODE (INT, DI, 4); /* V4DI */
VECTOR_MODE (INT, SI, 8); /* V8SI */
VECTOR_MODE (INT, HI, 16); /* V16HI */
VECTOR_MODE (INT, QI, 32); /* V32QI */
VECTOR_MODE (FLOAT, DF, 4); /* V4DF */
VECTOR_MODE (FLOAT, SF, 8); /* V8SF */
VECTOR_MODE (INT, DI, 8); /* V8DI */
VECTOR_MODE (INT, HI, 32); /* V32HI */
VECTOR_MODE (INT, QI, 64); /* V64QI */
VECTOR_MODE (FLOAT, DF, 8); /* V8DF */
VECTOR_MODE (FLOAT, SF, 16); /* V16SF */
INT_MODE (OI, 32);
/* The symbol Pmode stands for one of the above machine modes (usually SImode).
The tm.h file specifies which one. It is not a distinct mode. */
......@@ -128,6 +128,7 @@ extern int ix86_check_movabs (rtx, int);
extern rtx assign_386_stack_local (enum machine_mode, enum ix86_stack_slot);
extern int ix86_attr_length_immediate_default (rtx, int);
extern int ix86_attr_length_address_default (rtx);
extern int ix86_attr_length_vex_default (rtx, int, int);
extern enum machine_mode ix86_fp_compare_mode (enum rtx_code);
......
......@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3. If not see
#define TARGET_SSSE3 OPTION_ISA_SSSE3
#define TARGET_SSE4_1 OPTION_ISA_SSE4_1
#define TARGET_SSE4_2 OPTION_ISA_SSE4_2
#define TARGET_AVX OPTION_ISA_AVX
#define TARGET_FMA OPTION_ISA_FMA
#define TARGET_SSE4A OPTION_ISA_SSE4A
#define TARGET_SSE5 OPTION_ISA_SSE5
#define TARGET_ROUND OPTION_ISA_ROUND
......@@ -702,7 +704,7 @@ enum target_cpu_default
Pentium+ prefers DFmode values to be aligned to 64 bit boundary
and Pentium Pro XFmode values at 128 bit boundaries. */
#define BIGGEST_ALIGNMENT 128
#define BIGGEST_ALIGNMENT (TARGET_AVX ? 256: 128)
/* Maximum stack alignment. */
#define MAX_STACK_ALIGNMENT MAX_OFILE_ALIGNMENT
......@@ -996,6 +998,10 @@ do { \
#define HARD_REGNO_NREGS_WITH_PADDING(REGNO, MODE) ((MODE) == XFmode ? 4 : 8)
#define VALID_AVX256_REG_MODE(MODE) \
((MODE) == V32QImode || (MODE) == V16HImode || (MODE) == V8SImode \
|| (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode)
#define VALID_SSE2_REG_MODE(MODE) \
((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
|| (MODE) == V2DImode || (MODE) == DFmode)
......@@ -1013,8 +1019,14 @@ do { \
|| (MODE) == V4HImode || (MODE) == V8QImode)
/* ??? No autovectorization into MMX or 3DNOW until we can reliably
place emms and femms instructions. */
#define UNITS_PER_SIMD_WORD(MODE) (TARGET_SSE ? 16 : UNITS_PER_WORD)
place emms and femms instructions.
FIXME: AVX has 32byte floating point vector operations and 16byte
integer vector operations. But vectorizer doesn't support
different sizes for integer and floating point vectors. We limit
vector size to 16byte. */
#define UNITS_PER_SIMD_WORD(MODE) \
(TARGET_AVX ? (((MODE) == DFmode || (MODE) == SFmode) ? 16 : 16) \
: (TARGET_SSE ? 16 : UNITS_PER_WORD))
#define VALID_DFP_MODE_P(MODE) \
((MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode)
......@@ -1035,7 +1047,9 @@ do { \
#define SSE_REG_MODE_P(MODE) \
((MODE) == TImode || (MODE) == V16QImode || (MODE) == TFmode \
|| (MODE) == V8HImode || (MODE) == V2DFmode || (MODE) == V2DImode \
|| (MODE) == V4SFmode || (MODE) == V4SImode)
|| (MODE) == V4SFmode || (MODE) == V4SImode || (MODE) == V32QImode \
|| (MODE) == V16HImode || (MODE) == V8SImode || (MODE) == V4DImode \
|| (MODE) == V8SFmode || (MODE) == V4DFmode)
/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */
......@@ -1339,6 +1353,19 @@ enum reg_class
#define SSE_VEC_FLOAT_MODE_P(MODE) \
((TARGET_SSE && (MODE) == V4SFmode) || (TARGET_SSE2 && (MODE) == V2DFmode))
#define AVX_FLOAT_MODE_P(MODE) \
(TARGET_AVX && ((MODE) == SFmode || (MODE) == DFmode))
#define AVX128_VEC_FLOAT_MODE_P(MODE) \
(TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode))
#define AVX256_VEC_FLOAT_MODE_P(MODE) \
(TARGET_AVX && ((MODE) == V8SFmode || (MODE) == V4DFmode))
#define AVX_VEC_FLOAT_MODE_P(MODE) \
(TARGET_AVX && ((MODE) == V4SFmode || (MODE) == V2DFmode \
|| (MODE) == V8SFmode || (MODE) == V4DFmode))
#define MMX_REG_P(XOP) (REG_P (XOP) && MMX_REGNO_P (REGNO (XOP)))
#define MMX_REGNO_P(N) IN_RANGE ((N), FIRST_MMX_REG, LAST_MMX_REG)
......@@ -1559,6 +1586,7 @@ typedef struct ix86_args {
int fastcall; /* fastcall calling convention is used */
int sse_words; /* # sse words passed so far */
int sse_nregs; /* # sse registers available for passing */
int warn_avx; /* True when we want to warn about AVX ABI. */
int warn_sse; /* True when we want to warn about SSE ABI. */
int warn_mmx; /* True when we want to warn about MMX ABI. */
int sse_regno; /* next available sse register number */
......@@ -2133,6 +2161,29 @@ do { \
#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
ix86_output_addr_diff_elt ((FILE), (VALUE), (REL))
/* When we see %v, we will print the 'v' prefix if TARGET_AVX is
true. */
#define ASM_OUTPUT_AVX_PREFIX(STREAM, PTR) \
{ \
if ((PTR)[0] == '%' && (PTR)[1] == 'v') \
{ \
if (TARGET_AVX) \
(PTR) += 1; \
else \
(PTR) += 2; \
} \
}
/* A C statement or statements which output an assembler instruction
opcode to the stdio stream STREAM. The macro-operand PTR is a
variable of type `char *' which points to the opcode name in
its "internal" form--the form that is written in the machine
description. */
#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR))
/* Under some conditions we need jump tables in the text section,
because the assembler cannot handle label differences between
sections. This is the case for x86_64 on Mach-O for example. */
......
......@@ -299,6 +299,14 @@ mno-sse4
Target RejectNegative Report InverseMask(ISA_SSE4_1) MaskExists Var(ix86_isa_flags) VarExists Save
Do not support SSE4.1 and SSE4.2 built-in functions and code generation
mavx
Target Report Mask(ISA_AVX) Var(ix86_isa_flags) VarExists
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2 and AVX built-in functions and code generation
mfma
Target Report Mask(ISA_FMA) Var(ix86_isa_flags) VarExists
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation
msse4a
Target Report Mask(ISA_SSE4A) Var(ix86_isa_flags) VarExists Save
Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation
......
......@@ -78,15 +78,45 @@
movq\t{%1, %0|%0, %1}
movdq2q\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}
pxor\t%0, %0
movq\t{%1, %0|%0, %1}
movq\t{%1, %0|%0, %1}
movd\t{%1, %0|%0, %1}
movd\t{%1, %0|%0, %1}"
%vpxor\t%0, %d0
%vmovq\t{%1, %0|%0, %1}
%vmovq\t{%1, %0|%0, %1}
%vmovq\t{%1, %0|%0, %1}
%vmovq\t{%1, %0|%0, %1}"
[(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
(set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*")
(set (attr "prefix")
(if_then_else (eq_attr "alternative" "7,8,9,10,11")
(const_string "maybe_vex")
(const_string "orig")))
(set_attr "mode" "DI")])
(define_insn "*mov<mode>_internal_avx"
[(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
"=!?y,!?y,m ,!y ,*Y2,*Y2,*Y2 ,m ,r ,m")
(match_operand:MMXMODEI8 1 "vector_move_operand"
"C ,!ym,!?y,*Y2,!y ,C ,*Y2m,*Y2,irm,r"))]
"TARGET_AVX
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
pxor\t%0, %0
movq\t{%1, %0|%0, %1}
movq\t{%1, %0|%0, %1}
movdq2q\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}
vpxor\t%0, %0, %0
vmovq\t{%1, %0|%0, %1}
vmovq\t{%1, %0|%0, %1}
#
#"
[(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,*,*")
(set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*")
(set (attr "prefix")
(if_then_else (eq_attr "alternative" "5,6,7")
(const_string "vex")
(const_string "orig")))
(set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,DI,DI,DI")])
(define_insn "*mov<mode>_internal"
[(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
"=!?y,!?y,m ,!y ,*Y2,*Y2,*Y2 ,m ,*x,*x,*x,m ,r ,m")
......@@ -122,6 +152,35 @@
DONE;
})
(define_insn "*movv2sf_internal_rex64_avx"
[(set (match_operand:V2SF 0 "nonimmediate_operand"
"=rm,r ,!?y,!?y ,m ,!y,Y2,x,x,x,m,r,x")
(match_operand:V2SF 1 "vector_move_operand"
"Cr ,m ,C ,!?ym,!y,Y2,!y,C,x,m,x,x,r"))]
"TARGET_64BIT && TARGET_AVX
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
mov{q}\t{%1, %0|%0, %1}
mov{q}\t{%1, %0|%0, %1}
pxor\t%0, %0
movq\t{%1, %0|%0, %1}
movq\t{%1, %0|%0, %1}
movdq2q\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}
vxorps\t%0, %0, %0
vmovaps\t{%1, %0|%0, %1}
vmovlps\t{%1, %0, %0|%0, %0, %1}
vmovlps\t{%1, %0|%0, %1}
vmovq\t{%1, %0|%0, %1}
vmovq\t{%1, %0|%0, %1}"
[(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,sselog1,ssemov,ssemov,ssemov,ssemov")
(set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
(set (attr "prefix")
(if_then_else (eq_attr "alternative" "7,8,9,10,11,12")
(const_string "vex")
(const_string "orig")))
(set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
(define_insn "*movv2sf_internal_rex64"
[(set (match_operand:V2SF 0 "nonimmediate_operand"
"=rm,r ,!?y,!?y ,m ,!y,*Y2,x,x,x,m,r,Yi")
......@@ -147,6 +206,33 @@
(set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
(set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
(define_insn "*movv2sf_internal_avx"
[(set (match_operand:V2SF 0 "nonimmediate_operand"
"=!?y,!?y ,m ,!y ,*Y2,*x,*x,*x,m ,r ,m")
(match_operand:V2SF 1 "vector_move_operand"
"C ,!?ym,!?y,*Y2,!y ,C ,*x,m ,*x,irm,r"))]
"TARGET_AVX
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
pxor\t%0, %0
movq\t{%1, %0|%0, %1}
movq\t{%1, %0|%0, %1}
movdq2q\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}
vxorps\t%0, %0, %0
vmovaps\t{%1, %0|%0, %1}
vmovlps\t{%1, %0, %0|%0, %0, %1}
vmovlps\t{%1, %0|%0, %1}
#
#"
[(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,*,*")
(set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*,*")
(set (attr "prefix")
(if_then_else (eq_attr "alternative" "5,6,7,8")
(const_string "vex")
(const_string "orig")))
(set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
(define_insn "*movv2sf_internal"
[(set (match_operand:V2SF 0 "nonimmediate_operand"
"=!?y,!?y ,m ,!y ,*Y2,*x,*x,*x,m ,r ,m")
......
......@@ -640,11 +640,31 @@
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 2, 3)")))
;; Match 4 to 5.
(define_predicate "const_4_to_5_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 4, 5)")))
;; Match 4 to 7.
(define_predicate "const_4_to_7_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 4, 7)")))
;; Match 6 to 7.
(define_predicate "const_6_to_7_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 6, 7)")))
;; Match 8 to 11.
(define_predicate "const_8_to_11_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 8, 11)")))
;; Match 12 to 15.
(define_predicate "const_12_to_15_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 12, 15)")))
;; Match exactly one bit in 2-bit mask.
(define_predicate "const_pow2_1_to_2_operand"
(and (match_code "const_int")
......@@ -914,6 +934,11 @@
(define_special_predicate "sse_comparison_operator"
(match_code "eq,lt,le,unordered,ne,unge,ungt,ordered"))
;; Return 1 if OP is a comparison operator that can be issued by
;; avx predicate generation instructions
(define_predicate "avx_comparison_float_operator"
(match_code "ne,eq,ge,gt,le,lt,unordered,ordered,uneq,unge,ungt,unle,unlt,ltgt"))
;; Return 1 if OP is a comparison operator that can be issued by sse predicate
;; generation instructions
(define_predicate "sse5_comparison_float_operator"
......@@ -1057,3 +1082,15 @@
(define_predicate "misaligned_operand"
(and (match_code "mem")
(match_test "MEM_ALIGN (op) < GET_MODE_ALIGNMENT (mode)")))
;; Return 1 if OP is a vzeroall operation, known to be a PARALLEL.
(define_predicate "vzeroall_operation"
(match_code "parallel")
{
int nregs = TARGET_64BIT ? 16 : 8;
if (XVECLEN (op, 0) != nregs + 1)
return 0;
return 1;
})
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment