Commit 43a8b705 by Harsha Jagasia Committed by Dwarakanath Rajagopal

config.gcc (i[34567]86-*-*): Include xopintrin.h.

2009-11-04  Harsha Jagasia  <harsha.jagasia@amd.com>
            Dwarakanath Rajagopal  <dwarak.rajagopal@amd.com>
        
        * config.gcc (i[34567]86-*-*): Include xopintrin.h.
        (x86_64-*-*): Ditto.
        * config/i386/xopintrin.h: New file, provide common x86 compiler
        intrinisics for XOP.
        * config/i386/cpuid.h (bit_XOP): Define XOP bit.
        * config/i386/x86intrin.h: Add XOP check and xopintrin.h.
        * config/i386/i386-c.c(ix86_target_macros_internal): Check
        ISA_FLAG for XOP. 
        * config/i386/i386.h(TARGET_XOP): New macro for XOP.
        * config/i386/i386.opt (-mxop): New switch for XOP support.
        * config/i386/i386.md (UNSPEC_XOP_UNSIGNED_CMP)
        (UNSPEC_XOP_TRUEFALSE)
        (UNSPEC_XOP_PERMUTE)
        (UNSPEC_FRCZ): Add new UNSPEC for XOP support.
        (PPERM_*): New constants for vpperm instruction.
        (xop_pcmov_<mode>): Add XOP conditional mov instructions.
        * config/i386/i386.c (OPTION_MASK_ISA_XOP_SET): New.
        (OPTION_MASK_ISA_XOP_UNSET): New.       
        (OPTION_MASK_ISA_XOP_UNSET): Change definition to
        depend on XOP.
        (ix86_handle_option): Handle -mxop.
        (isa_opts): Handle -mxop.
        (enum pta_flags): Add PTA_XOP.
        (override_options): Add XOP support.
        (print_operand): Add code for XOP compare instructions.
        (ix86_expand_sse_movcc): Extend for XOP conditional move
instruction.
        (ix86_expand_int_vcond): Extend for XOP compare instruction.

        (IX86_BUILTIN_VPCMOV): New for XOP intrinsic.
        (IX86_BUILTIN_VPCMOV_V2DI): Ditto.
        (IX86_BUILTIN_VPCMOV_V4SI): Ditto.
        (IX86_BUILTIN_VPCMOV_V8HI): Ditto.
        (IX86_BUILTIN_VPCMOV_V16QI): Ditto.
        (IX86_BUILTIN_VPCMOV_V4SF): Ditto.
        (IX86_BUILTIN_VPCMOV_V2DF): Ditto.

        (IX86_BUILTIN_VPCMOV256): Ditto.
        (IX86_BUILTIN_VPCMOV_V4DI256): Ditto.
        (IX86_BUILTIN_VPCMOV_V8SI256): Ditto.
        (IX86_BUILTIN_VPCMOV_V16HI256): Ditto.
        (IX86_BUILTIN_VPCMOV_V32QI256): Ditto.
        (IX86_BUILTIN_VPCMOV_V8SF256): Ditto.
        (IX86_BUILTIN_VPCMOV_V4DF256): Ditto.

        (IX86_BUILTIN_VPPERM): Ditto.

        (IX86_BUILTIN_VPMACSSWW): Ditto.
        (IX86_BUILTIN_VPMACSWW): Ditto.
        (IX86_BUILTIN_VPMACSSWD): Ditto.
        (IX86_BUILTIN_VPMACSWD): Ditto.
        (IX86_BUILTIN_VPMACSSDD): Ditto.
        (IX86_BUILTIN_VPMACSDD): Ditto.
        (IX86_BUILTIN_VPMACSSDQL): Ditto.
        (IX86_BUILTIN_VPMACSSDQH): Ditto.
        (IX86_BUILTIN_VPMACSDQL): Ditto.
        (IX86_BUILTIN_VPMACSDQH): Ditto.
        (IX86_BUILTIN_VPMADCSSWD): Ditto.
        (IX86_BUILTIN_VPMADCSWD): Ditto.

        (IX86_BUILTIN_VPHADDBW): Ditto.
        (IX86_BUILTIN_VPHADDBD): Ditto.
        (IX86_BUILTIN_VPHADDBQ): Ditto.
        (IX86_BUILTIN_VPHADDWD): Ditto.
        (IX86_BUILTIN_VPHADDWQ): Ditto.
        (IX86_BUILTIN_VPHADDDQ): Ditto.
        (IX86_BUILTIN_VPHADDUBW): Ditto.
        (IX86_BUILTIN_VPHADDUBD): Ditto.
        (IX86_BUILTIN_VPHADDUBQ): Ditto.
        (IX86_BUILTIN_VPHADDUWD): Ditto.
        (IX86_BUILTIN_VPHADDUWQ): Ditto.
        (IX86_BUILTIN_VPHADDUDQ): Ditto.
        (IX86_BUILTIN_VPHSUBBW): Ditto.
        (IX86_BUILTIN_VPHSUBWD): Ditto.
        (IX86_BUILTIN_VPHSUBDQ): Ditto.

        (IX86_BUILTIN_VPROTB): Ditto.
        (IX86_BUILTIN_VPROTW): Ditto.
        (IX86_BUILTIN_VPROTD): Ditto.
        (IX86_BUILTIN_VPROTQ): Ditto.
        (IX86_BUILTIN_VPROTB_IMM): Ditto.
        (IX86_BUILTIN_VPROTW_IMM): Ditto.
        (IX86_BUILTIN_VPROTD_IMM): Ditto.
        (IX86_BUILTIN_VPROTQ_IMM): Ditto.

        (IX86_BUILTIN_VPSHLB): Ditto.
        (IX86_BUILTIN_VPSHLW): Ditto.
        (IX86_BUILTIN_VPSHLD): Ditto.
        (IX86_BUILTIN_VPSHLQ): Ditto.
        (IX86_BUILTIN_VPSHAB): Ditto.
        (IX86_BUILTIN_VPSHAW): Ditto.
        (IX86_BUILTIN_VPSHAD): Ditto.
        (IX86_BUILTIN_VPSHAQ): Ditto.

        (IX86_BUILTIN_VFRCZSS): Ditto.
        (IX86_BUILTIN_VFRCZSD): Ditto.
        (IX86_BUILTIN_VFRCZPS): Ditto.
        (IX86_BUILTIN_VFRCZPD): Ditto.
        (IX86_BUILTIN_VFRCZPS256): Ditto.
        (IX86_BUILTIN_VFRCZPD256): Ditto.

        (IX86_BUILTIN_VPCOMEQUB): Ditto.
        (IX86_BUILTIN_VPCOMNEUB): Ditto.
        (IX86_BUILTIN_VPCOMLTUB): Ditto.
        (IX86_BUILTIN_VPCOMLEUB): Ditto.
        (IX86_BUILTIN_VPCOMGTUB): Ditto.
        (IX86_BUILTIN_VPCOMGEUB): Ditto.
        (IX86_BUILTIN_VPCOMFALSEUB): Ditto.
        (IX86_BUILTIN_VPCOMTRUEUB): Ditto.

        (IX86_BUILTIN_VPCOMEQUW): Ditto.
        (IX86_BUILTIN_VPCOMNEUW): Ditto.
        (IX86_BUILTIN_VPCOMLTUW): Ditto.
        (IX86_BUILTIN_VPCOMLEUW): Ditto.
        (IX86_BUILTIN_VPCOMGTUW): Ditto.
        (IX86_BUILTIN_VPCOMGEUW): Ditto.
        (IX86_BUILTIN_VPCOMFALSEUW): Ditto.
        (IX86_BUILTIN_VPCOMTRUEUW): Ditto.

        (IX86_BUILTIN_VPCOMEQUD): Ditto.
        (IX86_BUILTIN_VPCOMNEUD): Ditto.
        (IX86_BUILTIN_VPCOMLTUD): Ditto.
        (IX86_BUILTIN_VPCOMLEUD): Ditto.
        (IX86_BUILTIN_VPCOMGTUD): Ditto.
        (IX86_BUILTIN_VPCOMGEUD): Ditto.
        (IX86_BUILTIN_VPCOMFALSEUD): Ditto.
        (IX86_BUILTIN_VPCOMTRUEUD): Ditto.

        (IX86_BUILTIN_VPCOMEQUQ): Ditto.
        (IX86_BUILTIN_VPCOMNEUQ): Ditto.
        (IX86_BUILTIN_VPCOMLTUQ): Ditto.
        (IX86_BUILTIN_VPCOMLEUQ): Ditto.
        (IX86_BUILTIN_VPCOMGTUQ): Ditto.
        (IX86_BUILTIN_VPCOMGEUQ): Ditto.
        (IX86_BUILTIN_VPCOMFALSEUQ): Ditto.
        (IX86_BUILTIN_VPCOMTRUEUQ): Ditto.

        (IX86_BUILTIN_VPCOMEQB): Ditto.
        (IX86_BUILTIN_VPCOMNEB): Ditto.
        (IX86_BUILTIN_VPCOMLTB): Ditto.
        (IX86_BUILTIN_VPCOMLEB): Ditto.
        (IX86_BUILTIN_VPCOMGTB): Ditto.
        (IX86_BUILTIN_VPCOMGEB): Ditto.
        (IX86_BUILTIN_VPCOMFALSEB): Ditto.
        (IX86_BUILTIN_VPCOMTRUEB): Ditto.

        (IX86_BUILTIN_VPCOMEQW): Ditto.
        (IX86_BUILTIN_VPCOMNEW): Ditto.
        (IX86_BUILTIN_VPCOMLTW): Ditto.
        (IX86_BUILTIN_VPCOMLEW): Ditto.
        (IX86_BUILTIN_VPCOMGTW): Ditto.
        (IX86_BUILTIN_VPCOMGEW): Ditto.
        (IX86_BUILTIN_VPCOMFALSEW): Ditto.
        (IX86_BUILTIN_VPCOMTRUEW): Ditto.

        (IX86_BUILTIN_VPCOMEQD): Ditto.
        (IX86_BUILTIN_VPCOMNED): Ditto.
        (IX86_BUILTIN_VPCOMLTD): Ditto.
        (IX86_BUILTIN_VPCOMLED): Ditto.
        (IX86_BUILTIN_VPCOMGTD): Ditto.
        (IX86_BUILTIN_VPCOMGED): Ditto.
        (IX86_BUILTIN_VPCOMFALSED): Ditto.
        (IX86_BUILTIN_VPCOMTRUED): Ditto.

        (IX86_BUILTIN_VPCOMEQQ): Ditto.
        (IX86_BUILTIN_VPCOMNEQ): Ditto.
        (IX86_BUILTIN_VPCOMLTQ): Ditto.
        (IX86_BUILTIN_VPCOMLEQ): Ditto.
        (IX86_BUILTIN_VPCOMGTQ): Ditto.
        (IX86_BUILTIN_VPCOMGEQ): Ditto.
        (IX86_BUILTIN_VPCOMFALSEQ): Ditto.
        (IX86_BUILTIN_VPCOMTRUEQ): Ditto.

        (enum multi_arg_type): New enum for describing the various XOP
        intrinsic argument types.
        (bdesc_multi_arg): New table for XOP intrinsics.
        (ix86_init_mmx_sse_builtins): Add XOP intrinsic support.
        (ix86_expand_multi_arg_builtin): New function for creating XOP
        intrinsics.

        * config/i386/sse.md (sserotatemax): New mode attribute for XOP.
        (xop_pmacsww): Ditto.
        (xop_pmacssww): Ditto.
        (xop_pmacsdd): Ditto.
        (xop_pmacssdd): Ditto.
        (xop_pmacssdql): Ditto.
        (xop_pmacssdqh): Ditto.
        (xop_pmacsdql): Ditto.
        (xop_pmacsdql_mem): Ditto.
        (xop_mulv2div2di3_low): Ditto.
        (xop_pmacsdqh): Ditto.
        (xop_pmacsdqh_mem): Ditto.
        (xop_mulv2div2di3_high): Ditto.
        (xop_pmacsswd): Ditto.
        (xop_pmacswd): Ditto.
        (xop_pmadcsswd): Ditto.
        (xop_pmadcswd): Ditto.
        (xop_pcmov_<mode>): Ditto.
        (xop_pcmov_<mode>)256: Ditto.
        (xop_phaddbw): Ditto.
        (xop_phaddbd): Ditto.
        (xop_phaddbq): Ditto.
        (xop_phaddwd): Ditto.
        (xop_phaddwq): Ditto.
        (xop_phadddq): Ditto.
        (xop_phaddubw): Ditto.
        (xop_phaddubd): Ditto.
        (xop_phaddubq): Ditto.
        (xop_phadduwd): Ditto.
        (xop_phadduwq): Ditto.
        (xop_phaddudq): Ditto.
        (xop_phsubbw): Ditto.
        (xop_phsubwd): Ditto.
        (xop_phsubdq): Ditto.
        (xop_pperm): Ditto.
        (rotl<mode>3): Ditto.
        (rotr<mode>3): Ditto.
        (xop_rotl<mode>3): Ditto.
        (xop_rotr<mode>3): Ditto.
        (vrotr<mode>3): Ditto.
        (vrotl<mode>3): Ditto.
        (xop_vrotl<mode>3): Ditto.
        (vlshr<mode>3): Ditto.
        (vashr<mode>3): Ditto.
        (vashl<mode>3
        (xop_ashl<mode>3): Ditto.
        (xop_lshl<mode>3): Ditto.
        (ashlv16qi3): Ditto.
        (lshlv16qi3): Ditto.
        (ashrv16qi3): Ditto.
        (ashrv2di3): Ditto.
        (xop_frcz<mode>2): Ditto.
        (xop_vmfrcz<mode>2): Ditto.
        (xop_frcz<mode>2256): Ditto.    
        (xop_maskcmp<mode>3): Ditto.
        (xop_maskcmp_uns<mode>3): Ditto.
        (xop_maskcmp_uns2<mode>3): Ditto.
        (xop_pcom_tf<mode>3): Ditto.

        * doc/invoke.texi (-mxop): Add documentation.
        * doc/extend.texi (x86 intrinsics): Add XOP intrinsics.

        * gcc.target/i386/xop-check.h: New file.
        * gcc.target/i386/xop-hadduX.c: Ditto.
        * gcc.target/i386/xop-haddX.c: Ditto.
        * gcc.target/i386/xop-hsubX.c: Ditto.
        * gcc.target/i386/xop-imul32widen-vector.c: Ditto.
        * gcc.target/i386/xop-imul32widen-vector.c: Ditto.
        * gcc.target/i386/xop-pcmov2.c: Ditto.
        * gcc.target/i386/xop-pcmov.c: Ditto.
        * gcc.target/i386/xop-rotate1-vector.c: Ditto.
        * gcc.target/i386/xop-rotate2-vector.c: Ditto.
        * gcc.target/i386/xop-rotate3-vector.c: Ditto.
        * gcc.target/i386/xop-shift1-vector.c: Ditto.
        * gcc.target/i386/xop-shift2-vector.c: Ditto.
        * gcc.target/i386/xop-shift3-vector.c: Ditto.
        * gcc.target/i386/i386.exp:  Add check_effective_target_xop.
        * gcc.target/i386/sse-12.c: Update with new compile options to 
        activate and check xopintrin.h intrinsic file.
        * gcc.target/i386/sse-13.c: Ditto.
        * gcc.target/i386/sse-14.c: Ditto.
        * gcc.target/i386/sse-22.c: Ditto.
        * gcc.target/i386/sse-23.c: Ditto.
        * g++.dg/other/i386-2.C: Ditto.
        * g++.dg/other/i386-3.C: Ditto.
        * g++.dg/other/i386-5.C: Ditto.
        * g++.dg/other/i386-6.C: Ditto.
        


Co-Authored-By: Dwarakanath Rajagopal <dwarak.rajagopal@amd.com>

From-SVN: r153901
parent 7d6ce94a
2009-11-04 Harsha Jagasia <harsha.jagasia@amd.com>
Dwarakanath Rajagopal <dwarak.rajagopal@amd.com>
* config.gcc (i[34567]86-*-*): Include xopintrin.h.
(x86_64-*-*): Ditto.
* config/i386/xopintrin.h: New file, provide common x86 compiler
intrinisics for XOP.
* config/i386/cpuid.h (bit_XOP): Define XOP bit.
* config/i386/x86intrin.h: Add XOP check and xopintrin.h.
* config/i386/i386-c.c(ix86_target_macros_internal): Check
ISA_FLAG for XOP.
* config/i386/i386.h(TARGET_XOP): New macro for XOP.
* config/i386/i386.opt (-mxop): New switch for XOP support.
* config/i386/i386.md (UNSPEC_XOP_UNSIGNED_CMP)
(UNSPEC_XOP_TRUEFALSE)
(UNSPEC_XOP_PERMUTE)
(UNSPEC_FRCZ): Add new UNSPEC for XOP support.
(PPERM_*): New constants for vpperm instruction.
(xop_pcmov_<mode>): Add XOP conditional mov instructions.
* config/i386/i386.c (OPTION_MASK_ISA_XOP_SET): New.
(OPTION_MASK_ISA_XOP_UNSET): New.
(OPTION_MASK_ISA_XOP_UNSET): Change definition to
depend on XOP.
(ix86_handle_option): Handle -mxop.
(isa_opts): Handle -mxop.
(enum pta_flags): Add PTA_XOP.
(override_options): Add XOP support.
(print_operand): Add code for XOP compare instructions.
(ix86_expand_sse_movcc): Extend for XOP conditional move instruction.
(ix86_expand_int_vcond): Extend for XOP compare instruction.
(IX86_BUILTIN_VPCMOV): New for XOP intrinsic.
(IX86_BUILTIN_VPCMOV_V2DI): Ditto.
(IX86_BUILTIN_VPCMOV_V4SI): Ditto.
(IX86_BUILTIN_VPCMOV_V8HI): Ditto.
(IX86_BUILTIN_VPCMOV_V16QI): Ditto.
(IX86_BUILTIN_VPCMOV_V4SF): Ditto.
(IX86_BUILTIN_VPCMOV_V2DF): Ditto.
(IX86_BUILTIN_VPCMOV256): Ditto.
(IX86_BUILTIN_VPCMOV_V4DI256): Ditto.
(IX86_BUILTIN_VPCMOV_V8SI256): Ditto.
(IX86_BUILTIN_VPCMOV_V16HI256): Ditto.
(IX86_BUILTIN_VPCMOV_V32QI256): Ditto.
(IX86_BUILTIN_VPCMOV_V8SF256): Ditto.
(IX86_BUILTIN_VPCMOV_V4DF256): Ditto.
(IX86_BUILTIN_VPPERM): Ditto.
(IX86_BUILTIN_VPMACSSWW): Ditto.
(IX86_BUILTIN_VPMACSWW): Ditto.
(IX86_BUILTIN_VPMACSSWD): Ditto.
(IX86_BUILTIN_VPMACSWD): Ditto.
(IX86_BUILTIN_VPMACSSDD): Ditto.
(IX86_BUILTIN_VPMACSDD): Ditto.
(IX86_BUILTIN_VPMACSSDQL): Ditto.
(IX86_BUILTIN_VPMACSSDQH): Ditto.
(IX86_BUILTIN_VPMACSDQL): Ditto.
(IX86_BUILTIN_VPMACSDQH): Ditto.
(IX86_BUILTIN_VPMADCSSWD): Ditto.
(IX86_BUILTIN_VPMADCSWD): Ditto.
(IX86_BUILTIN_VPHADDBW): Ditto.
(IX86_BUILTIN_VPHADDBD): Ditto.
(IX86_BUILTIN_VPHADDBQ): Ditto.
(IX86_BUILTIN_VPHADDWD): Ditto.
(IX86_BUILTIN_VPHADDWQ): Ditto.
(IX86_BUILTIN_VPHADDDQ): Ditto.
(IX86_BUILTIN_VPHADDUBW): Ditto.
(IX86_BUILTIN_VPHADDUBD): Ditto.
(IX86_BUILTIN_VPHADDUBQ): Ditto.
(IX86_BUILTIN_VPHADDUWD): Ditto.
(IX86_BUILTIN_VPHADDUWQ): Ditto.
(IX86_BUILTIN_VPHADDUDQ): Ditto.
(IX86_BUILTIN_VPHSUBBW): Ditto.
(IX86_BUILTIN_VPHSUBWD): Ditto.
(IX86_BUILTIN_VPHSUBDQ): Ditto.
(IX86_BUILTIN_VPROTB): Ditto.
(IX86_BUILTIN_VPROTW): Ditto.
(IX86_BUILTIN_VPROTD): Ditto.
(IX86_BUILTIN_VPROTQ): Ditto.
(IX86_BUILTIN_VPROTB_IMM): Ditto.
(IX86_BUILTIN_VPROTW_IMM): Ditto.
(IX86_BUILTIN_VPROTD_IMM): Ditto.
(IX86_BUILTIN_VPROTQ_IMM): Ditto.
(IX86_BUILTIN_VPSHLB): Ditto.
(IX86_BUILTIN_VPSHLW): Ditto.
(IX86_BUILTIN_VPSHLD): Ditto.
(IX86_BUILTIN_VPSHLQ): Ditto.
(IX86_BUILTIN_VPSHAB): Ditto.
(IX86_BUILTIN_VPSHAW): Ditto.
(IX86_BUILTIN_VPSHAD): Ditto.
(IX86_BUILTIN_VPSHAQ): Ditto.
(IX86_BUILTIN_VFRCZSS): Ditto.
(IX86_BUILTIN_VFRCZSD): Ditto.
(IX86_BUILTIN_VFRCZPS): Ditto.
(IX86_BUILTIN_VFRCZPD): Ditto.
(IX86_BUILTIN_VFRCZPS256): Ditto.
(IX86_BUILTIN_VFRCZPD256): Ditto.
(IX86_BUILTIN_VPCOMEQUB): Ditto.
(IX86_BUILTIN_VPCOMNEUB): Ditto.
(IX86_BUILTIN_VPCOMLTUB): Ditto.
(IX86_BUILTIN_VPCOMLEUB): Ditto.
(IX86_BUILTIN_VPCOMGTUB): Ditto.
(IX86_BUILTIN_VPCOMGEUB): Ditto.
(IX86_BUILTIN_VPCOMFALSEUB): Ditto.
(IX86_BUILTIN_VPCOMTRUEUB): Ditto.
(IX86_BUILTIN_VPCOMEQUW): Ditto.
(IX86_BUILTIN_VPCOMNEUW): Ditto.
(IX86_BUILTIN_VPCOMLTUW): Ditto.
(IX86_BUILTIN_VPCOMLEUW): Ditto.
(IX86_BUILTIN_VPCOMGTUW): Ditto.
(IX86_BUILTIN_VPCOMGEUW): Ditto.
(IX86_BUILTIN_VPCOMFALSEUW): Ditto.
(IX86_BUILTIN_VPCOMTRUEUW): Ditto.
(IX86_BUILTIN_VPCOMEQUD): Ditto.
(IX86_BUILTIN_VPCOMNEUD): Ditto.
(IX86_BUILTIN_VPCOMLTUD): Ditto.
(IX86_BUILTIN_VPCOMLEUD): Ditto.
(IX86_BUILTIN_VPCOMGTUD): Ditto.
(IX86_BUILTIN_VPCOMGEUD): Ditto.
(IX86_BUILTIN_VPCOMFALSEUD): Ditto.
(IX86_BUILTIN_VPCOMTRUEUD): Ditto.
(IX86_BUILTIN_VPCOMEQUQ): Ditto.
(IX86_BUILTIN_VPCOMNEUQ): Ditto.
(IX86_BUILTIN_VPCOMLTUQ): Ditto.
(IX86_BUILTIN_VPCOMLEUQ): Ditto.
(IX86_BUILTIN_VPCOMGTUQ): Ditto.
(IX86_BUILTIN_VPCOMGEUQ): Ditto.
(IX86_BUILTIN_VPCOMFALSEUQ): Ditto.
(IX86_BUILTIN_VPCOMTRUEUQ): Ditto.
(IX86_BUILTIN_VPCOMEQB): Ditto.
(IX86_BUILTIN_VPCOMNEB): Ditto.
(IX86_BUILTIN_VPCOMLTB): Ditto.
(IX86_BUILTIN_VPCOMLEB): Ditto.
(IX86_BUILTIN_VPCOMGTB): Ditto.
(IX86_BUILTIN_VPCOMGEB): Ditto.
(IX86_BUILTIN_VPCOMFALSEB): Ditto.
(IX86_BUILTIN_VPCOMTRUEB): Ditto.
(IX86_BUILTIN_VPCOMEQW): Ditto.
(IX86_BUILTIN_VPCOMNEW): Ditto.
(IX86_BUILTIN_VPCOMLTW): Ditto.
(IX86_BUILTIN_VPCOMLEW): Ditto.
(IX86_BUILTIN_VPCOMGTW): Ditto.
(IX86_BUILTIN_VPCOMGEW): Ditto.
(IX86_BUILTIN_VPCOMFALSEW): Ditto.
(IX86_BUILTIN_VPCOMTRUEW): Ditto.
(IX86_BUILTIN_VPCOMEQD): Ditto.
(IX86_BUILTIN_VPCOMNED): Ditto.
(IX86_BUILTIN_VPCOMLTD): Ditto.
(IX86_BUILTIN_VPCOMLED): Ditto.
(IX86_BUILTIN_VPCOMGTD): Ditto.
(IX86_BUILTIN_VPCOMGED): Ditto.
(IX86_BUILTIN_VPCOMFALSED): Ditto.
(IX86_BUILTIN_VPCOMTRUED): Ditto.
(IX86_BUILTIN_VPCOMEQQ): Ditto.
(IX86_BUILTIN_VPCOMNEQ): Ditto.
(IX86_BUILTIN_VPCOMLTQ): Ditto.
(IX86_BUILTIN_VPCOMLEQ): Ditto.
(IX86_BUILTIN_VPCOMGTQ): Ditto.
(IX86_BUILTIN_VPCOMGEQ): Ditto.
(IX86_BUILTIN_VPCOMFALSEQ): Ditto.
(IX86_BUILTIN_VPCOMTRUEQ): Ditto.
(enum multi_arg_type): New enum for describing the various XOP
intrinsic argument types.
(bdesc_multi_arg): New table for XOP intrinsics.
(ix86_init_mmx_sse_builtins): Add XOP intrinsic support.
(ix86_expand_multi_arg_builtin): New function for creating XOP
intrinsics.
* config/i386/sse.md (sserotatemax): New mode attribute for XOP.
(xop_pmacsww): Ditto.
(xop_pmacssww): Ditto.
(xop_pmacsdd): Ditto.
(xop_pmacssdd): Ditto.
(xop_pmacssdql): Ditto.
(xop_pmacssdqh): Ditto.
(xop_pmacsdql): Ditto.
(xop_pmacsdql_mem): Ditto.
(xop_mulv2div2di3_low): Ditto.
(xop_pmacsdqh): Ditto.
(xop_pmacsdqh_mem): Ditto.
(xop_mulv2div2di3_high): Ditto.
(xop_pmacsswd): Ditto.
(xop_pmacswd): Ditto.
(xop_pmadcsswd): Ditto.
(xop_pmadcswd): Ditto.
(xop_pcmov_<mode>): Ditto.
(xop_pcmov_<mode>)256: Ditto.
(xop_phaddbw): Ditto.
(xop_phaddbd): Ditto.
(xop_phaddbq): Ditto.
(xop_phaddwd): Ditto.
(xop_phaddwq): Ditto.
(xop_phadddq): Ditto.
(xop_phaddubw): Ditto.
(xop_phaddubd): Ditto.
(xop_phaddubq): Ditto.
(xop_phadduwd): Ditto.
(xop_phadduwq): Ditto.
(xop_phaddudq): Ditto.
(xop_phsubbw): Ditto.
(xop_phsubwd): Ditto.
(xop_phsubdq): Ditto.
(xop_pperm): Ditto.
(rotl<mode>3): Ditto.
(rotr<mode>3): Ditto.
(xop_rotl<mode>3): Ditto.
(xop_rotr<mode>3): Ditto.
(vrotr<mode>3): Ditto.
(vrotl<mode>3): Ditto.
(xop_vrotl<mode>3): Ditto.
(vlshr<mode>3): Ditto.
(vashr<mode>3): Ditto.
(vashl<mode>3
(xop_ashl<mode>3): Ditto.
(xop_lshl<mode>3): Ditto.
(ashlv16qi3): Ditto.
(lshlv16qi3): Ditto.
(ashrv16qi3): Ditto.
(ashrv2di3): Ditto.
(xop_frcz<mode>2): Ditto.
(xop_vmfrcz<mode>2): Ditto.
(xop_frcz<mode>2256): Ditto.
(xop_maskcmp<mode>3): Ditto.
(xop_maskcmp_uns<mode>3): Ditto.
(xop_maskcmp_uns2<mode>3): Ditto.
(xop_pcom_tf<mode>3): Ditto.
* doc/invoke.texi (-mxop): Add documentation.
* doc/extend.texi (x86 intrinsics): Add XOP intrinsics.
2009-11-03 Mark Mitchell <mark@codesourcery.com> 2009-11-03 Mark Mitchell <mark@codesourcery.com>
PR driver/11810 PR driver/11810
...@@ -287,7 +287,7 @@ i[34567]86-*-*) ...@@ -287,7 +287,7 @@ i[34567]86-*-*)
extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
immintrin.h x86intrin.h avxintrin.h immintrin.h x86intrin.h avxintrin.h xopintrin.h
ia32intrin.h cross-stdarg.h" ia32intrin.h cross-stdarg.h"
;; ;;
x86_64-*-*) x86_64-*-*)
...@@ -297,7 +297,7 @@ x86_64-*-*) ...@@ -297,7 +297,7 @@ x86_64-*-*)
extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
immintrin.h x86intrin.h avxintrin.h immintrin.h x86intrin.h avxintrin.h xopintrin.h
ia32intrin.h cross-stdarg.h" ia32intrin.h cross-stdarg.h"
need_64bit_hwint=yes need_64bit_hwint=yes
;; ;;
......
...@@ -46,9 +46,10 @@ ...@@ -46,9 +46,10 @@
/* Extended Features */ /* Extended Features */
/* %ecx */ /* %ecx */
#define bit_FMA4 (1 << 16)
#define bit_LAHF_LM (1 << 0) #define bit_LAHF_LM (1 << 0)
#define bit_SSE4a (1 << 6) #define bit_SSE4a (1 << 6)
#define bit_FMA4 (1 << 16) #define bit_XOP (1 << 11)
/* %edx */ /* %edx */
#define bit_LM (1 << 29) #define bit_LM (1 << 29)
......
...@@ -232,6 +232,8 @@ ix86_target_macros_internal (int isa_flag, ...@@ -232,6 +232,8 @@ ix86_target_macros_internal (int isa_flag,
def_or_undef (parse_in, "__SSE4A__"); def_or_undef (parse_in, "__SSE4A__");
if (isa_flag & OPTION_MASK_ISA_FMA4) if (isa_flag & OPTION_MASK_ISA_FMA4)
def_or_undef (parse_in, "__FMA4__"); def_or_undef (parse_in, "__FMA4__");
if (isa_flag & OPTION_MASK_ISA_XOP)
def_or_undef (parse_in, "__XOP__");
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE)) if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE))
def_or_undef (parse_in, "__SSE_MATH__"); def_or_undef (parse_in, "__SSE_MATH__");
if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2)) if ((fpmath & FPMATH_SSE) && (isa_flag & OPTION_MASK_ISA_SSE2))
......
...@@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ...@@ -55,6 +55,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_FMA OPTION_ISA_FMA #define TARGET_FMA OPTION_ISA_FMA
#define TARGET_SSE4A OPTION_ISA_SSE4A #define TARGET_SSE4A OPTION_ISA_SSE4A
#define TARGET_FMA4 OPTION_ISA_FMA4 #define TARGET_FMA4 OPTION_ISA_FMA4
#define TARGET_XOP OPTION_ISA_XOP
#define TARGET_ROUND OPTION_ISA_ROUND #define TARGET_ROUND OPTION_ISA_ROUND
#define TARGET_ABM OPTION_ISA_ABM #define TARGET_ABM OPTION_ISA_ABM
#define TARGET_POPCNT OPTION_ISA_POPCNT #define TARGET_POPCNT OPTION_ISA_POPCNT
......
...@@ -57,6 +57,7 @@ ...@@ -57,6 +57,7 @@
;; X -- don't print any sort of PIC '@' suffix for a symbol. ;; X -- don't print any sort of PIC '@' suffix for a symbol.
;; & -- print some in-use local-dynamic symbol name. ;; & -- print some in-use local-dynamic symbol name.
;; H -- print a memory address offset by 8; used for sse high-parts ;; H -- print a memory address offset by 8; used for sse high-parts
;; Y -- print condition for XOP pcom* instruction.
;; + -- print a branch hint as 'cs' or 'ds' prefix ;; + -- print a branch hint as 'cs' or 'ds' prefix
;; ; -- print a semicolon (after prefixes due to bug in older gas). ;; ; -- print a semicolon (after prefixes due to bug in older gas).
...@@ -199,6 +200,11 @@ ...@@ -199,6 +200,11 @@
(UNSPEC_FMA4_INTRINSIC 150) (UNSPEC_FMA4_INTRINSIC 150)
(UNSPEC_FMA4_FMADDSUB 151) (UNSPEC_FMA4_FMADDSUB 151)
(UNSPEC_FMA4_FMSUBADD 152) (UNSPEC_FMA4_FMSUBADD 152)
(UNSPEC_XOP_UNSIGNED_CMP 151)
(UNSPEC_XOP_TRUEFALSE 152)
(UNSPEC_XOP_PERMUTE 153)
(UNSPEC_FRCZ 154)
; For AES support ; For AES support
(UNSPEC_AESENC 159) (UNSPEC_AESENC 159)
(UNSPEC_AESENCLAST 160) (UNSPEC_AESENCLAST 160)
...@@ -254,6 +260,20 @@ ...@@ -254,6 +260,20 @@
(COM_TRUE_P 5) (COM_TRUE_P 5)
]) ])
;; Constants used in the XOP pperm instruction
(define_constants
[(PPERM_SRC 0x00) /* copy source */
(PPERM_INVERT 0x20) /* invert source */
(PPERM_REVERSE 0x40) /* bit reverse source */
(PPERM_REV_INV 0x60) /* bit reverse & invert src */
(PPERM_ZERO 0x80) /* all 0's */
(PPERM_ONES 0xa0) /* all 1's */
(PPERM_SIGN 0xc0) /* propagate sign bit */
(PPERM_INV_SIGN 0xe0) /* invert & propagate sign */
(PPERM_SRC1 0x00) /* use first source byte */
(PPERM_SRC2 0x10) /* use second source byte */
])
;; Registers by name. ;; Registers by name.
(define_constants (define_constants
[(AX_REG 0) [(AX_REG 0)
...@@ -19676,6 +19696,20 @@ ...@@ -19676,6 +19696,20 @@
[(set_attr "type" "fcmov") [(set_attr "type" "fcmov")
(set_attr "mode" "XF")]) (set_attr "mode" "XF")])
;; All moves in XOP pcmov instructions are 128 bits and hence we restrict
;; the scalar versions to have only XMM registers as operands.
;; XOP conditional move
(define_insn "*xop_pcmov_<mode>"
[(set (match_operand:MODEF 0 "register_operand" "=x")
(if_then_else:MODEF
(match_operand:MODEF 1 "register_operand" "x")
(match_operand:MODEF 2 "register_operand" "x")
(match_operand:MODEF 3 "register_operand" "x")))]
"TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)"
"vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
[(set_attr "type" "sse4arg")])
;; These versions of the min/max patterns are intentionally ignorant of ;; These versions of the min/max patterns are intentionally ignorant of
;; their behavior wrt -0.0 and NaN (via the commutative operand mark). ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
......
...@@ -314,6 +314,10 @@ mfma4 ...@@ -314,6 +314,10 @@ mfma4
Target Report Mask(ISA_FMA4) Var(ix86_isa_flags) VarExists Save Target Report Mask(ISA_FMA4) Var(ix86_isa_flags) VarExists Save
Support FMA4 built-in functions and code generation Support FMA4 built-in functions and code generation
mxop
Target Report Mask(ISA_XOP) Var(ix86_isa_flags) VarExists Save
Support XOP built-in functions and code generation
mabm mabm
Target Report Mask(ISA_ABM) Var(ix86_isa_flags) VarExists Save Target Report Mask(ISA_ABM) Var(ix86_isa_flags) VarExists Save
Support code generation of Advanced Bit Manipulation (ABM) instructions. Support code generation of Advanced Bit Manipulation (ABM) instructions.
......
...@@ -58,6 +58,10 @@ ...@@ -58,6 +58,10 @@
#include <fma4intrin.h> #include <fma4intrin.h>
#endif #endif
#ifdef __XOP__
#include <xopintrin.h>
#endif
#if defined (__AES__) || defined (__PCLMUL__) #if defined (__AES__) || defined (__PCLMUL__)
#include <wmmintrin.h> #include <wmmintrin.h>
#endif #endif
......
...@@ -3207,6 +3207,11 @@ Enable/disable the generation of the SSE4A instructions. ...@@ -3207,6 +3207,11 @@ Enable/disable the generation of the SSE4A instructions.
@cindex @code{target("fma4")} attribute @cindex @code{target("fma4")} attribute
Enable/disable the generation of the FMA4 instructions. Enable/disable the generation of the FMA4 instructions.
@item xop
@itemx no-xop
@cindex @code{target("xop")} attribute
Enable/disable the generation of the XOP instructions.
@item ssse3 @item ssse3
@itemx no-ssse3 @itemx no-ssse3
@cindex @code{target("ssse3")} attribute @cindex @code{target("ssse3")} attribute
...@@ -8928,6 +8933,134 @@ v2di __builtin_ia32_insertq (v2di, v2di) ...@@ -8928,6 +8933,134 @@ v2di __builtin_ia32_insertq (v2di, v2di)
v2di __builtin_ia32_insertqi (v2di, v2di, const unsigned int, const unsigned int) v2di __builtin_ia32_insertqi (v2di, v2di, const unsigned int, const unsigned int)
@end smallexample @end smallexample
The following built-in functions are available when @option{-mxop} is used.
@smallexample
v2df __builtin_ia32_vfrczpd (v2df)
v4sf __builtin_ia32_vfrczps (v4sf)
v2df __builtin_ia32_vfrczsd (v2df, v2df)
v4sf __builtin_ia32_vfrczss (v4sf, v4sf)
v4df __builtin_ia32_vfrczpd256 (v4df)
v8sf __builtin_ia32_vfrczps256 (v8sf)
v2di __builtin_ia32_vpcmov (v2di, v2di, v2di)
v2di __builtin_ia32_vpcmov_v2di (v2di, v2di, v2di)
v4si __builtin_ia32_vpcmov_v4si (v4si, v4si, v4si)
v8hi __builtin_ia32_vpcmov_v8hi (v8hi, v8hi, v8hi)
v16qi __builtin_ia32_vpcmov_v16qi (v16qi, v16qi, v16qi)
v2df __builtin_ia32_vpcmov_v2df (v2df, v2df, v2df)
v4sf __builtin_ia32_vpcmov_v4sf (v4sf, v4sf, v4sf)
v4di __builtin_ia32_vpcmov_v4di256 (v4di, v4di, v4di)
v8si __builtin_ia32_vpcmov_v8si256 (v8si, v8si, v8si)
v16hi __builtin_ia32_vpcmov_v16hi256 (v16hi, v16hi, v16hi)
v32qi __builtin_ia32_vpcmov_v32qi256 (v32qi, v32qi, v32qi)
v4df __builtin_ia32_vpcmov_v4df256 (v4df, v4df, v4df)
v8sf __builtin_ia32_vpcmov_v8sf256 (v8sf, v8sf, v8sf)
v16qi __builtin_ia32_vpcomeqb (v16qi, v16qi)
v8hi __builtin_ia32_vpcomeqw (v8hi, v8hi)
v4si __builtin_ia32_vpcomeqd (v4si, v4si)
v2di __builtin_ia32_vpcomeqq (v2di, v2di)
v16qi __builtin_ia32_vpcomequb (v16qi, v16qi)
v4si __builtin_ia32_vpcomequd (v4si, v4si)
v2di __builtin_ia32_vpcomequq (v2di, v2di)
v8hi __builtin_ia32_vpcomequw (v8hi, v8hi)
v8hi __builtin_ia32_vpcomeqw (v8hi, v8hi)
v16qi __builtin_ia32_vpcomfalseb (v16qi, v16qi)
v4si __builtin_ia32_vpcomfalsed (v4si, v4si)
v2di __builtin_ia32_vpcomfalseq (v2di, v2di)
v16qi __builtin_ia32_vpcomfalseub (v16qi, v16qi)
v4si __builtin_ia32_vpcomfalseud (v4si, v4si)
v2di __builtin_ia32_vpcomfalseuq (v2di, v2di)
v8hi __builtin_ia32_vpcomfalseuw (v8hi, v8hi)
v8hi __builtin_ia32_vpcomfalsew (v8hi, v8hi)
v16qi __builtin_ia32_vpcomgeb (v16qi, v16qi)
v4si __builtin_ia32_vpcomged (v4si, v4si)
v2di __builtin_ia32_vpcomgeq (v2di, v2di)
v16qi __builtin_ia32_vpcomgeub (v16qi, v16qi)
v4si __builtin_ia32_vpcomgeud (v4si, v4si)
v2di __builtin_ia32_vpcomgeuq (v2di, v2di)
v8hi __builtin_ia32_vpcomgeuw (v8hi, v8hi)
v8hi __builtin_ia32_vpcomgew (v8hi, v8hi)
v16qi __builtin_ia32_vpcomgtb (v16qi, v16qi)
v4si __builtin_ia32_vpcomgtd (v4si, v4si)
v2di __builtin_ia32_vpcomgtq (v2di, v2di)
v16qi __builtin_ia32_vpcomgtub (v16qi, v16qi)
v4si __builtin_ia32_vpcomgtud (v4si, v4si)
v2di __builtin_ia32_vpcomgtuq (v2di, v2di)
v8hi __builtin_ia32_vpcomgtuw (v8hi, v8hi)
v8hi __builtin_ia32_vpcomgtw (v8hi, v8hi)
v16qi __builtin_ia32_vpcomleb (v16qi, v16qi)
v4si __builtin_ia32_vpcomled (v4si, v4si)
v2di __builtin_ia32_vpcomleq (v2di, v2di)
v16qi __builtin_ia32_vpcomleub (v16qi, v16qi)
v4si __builtin_ia32_vpcomleud (v4si, v4si)
v2di __builtin_ia32_vpcomleuq (v2di, v2di)
v8hi __builtin_ia32_vpcomleuw (v8hi, v8hi)
v8hi __builtin_ia32_vpcomlew (v8hi, v8hi)
v16qi __builtin_ia32_vpcomltb (v16qi, v16qi)
v4si __builtin_ia32_vpcomltd (v4si, v4si)
v2di __builtin_ia32_vpcomltq (v2di, v2di)
v16qi __builtin_ia32_vpcomltub (v16qi, v16qi)
v4si __builtin_ia32_vpcomltud (v4si, v4si)
v2di __builtin_ia32_vpcomltuq (v2di, v2di)
v8hi __builtin_ia32_vpcomltuw (v8hi, v8hi)
v8hi __builtin_ia32_vpcomltw (v8hi, v8hi)
v16qi __builtin_ia32_vpcomneb (v16qi, v16qi)
v4si __builtin_ia32_vpcomned (v4si, v4si)
v2di __builtin_ia32_vpcomneq (v2di, v2di)
v16qi __builtin_ia32_vpcomneub (v16qi, v16qi)
v4si __builtin_ia32_vpcomneud (v4si, v4si)
v2di __builtin_ia32_vpcomneuq (v2di, v2di)
v8hi __builtin_ia32_vpcomneuw (v8hi, v8hi)
v8hi __builtin_ia32_vpcomnew (v8hi, v8hi)
v16qi __builtin_ia32_vpcomtrueb (v16qi, v16qi)
v4si __builtin_ia32_vpcomtrued (v4si, v4si)
v2di __builtin_ia32_vpcomtrueq (v2di, v2di)
v16qi __builtin_ia32_vpcomtrueub (v16qi, v16qi)
v4si __builtin_ia32_vpcomtrueud (v4si, v4si)
v2di __builtin_ia32_vpcomtrueuq (v2di, v2di)
v8hi __builtin_ia32_vpcomtrueuw (v8hi, v8hi)
v8hi __builtin_ia32_vpcomtruew (v8hi, v8hi)
v4si __builtin_ia32_vphaddbd (v16qi)
v2di __builtin_ia32_vphaddbq (v16qi)
v8hi __builtin_ia32_vphaddbw (v16qi)
v2di __builtin_ia32_vphadddq (v4si)
v4si __builtin_ia32_vphaddubd (v16qi)
v2di __builtin_ia32_vphaddubq (v16qi)
v8hi __builtin_ia32_vphaddubw (v16qi)
v2di __builtin_ia32_vphaddudq (v4si)
v4si __builtin_ia32_vphadduwd (v8hi)
v2di __builtin_ia32_vphadduwq (v8hi)
v4si __builtin_ia32_vphaddwd (v8hi)
v2di __builtin_ia32_vphaddwq (v8hi)
v8hi __builtin_ia32_vphsubbw (v16qi)
v2di __builtin_ia32_vphsubdq (v4si)
v4si __builtin_ia32_vphsubwd (v8hi)
v4si __builtin_ia32_vpmacsdd (v4si, v4si, v4si)
v2di __builtin_ia32_vpmacsdqh (v4si, v4si, v2di)
v2di __builtin_ia32_vpmacsdql (v4si, v4si, v2di)
v4si __builtin_ia32_vpmacssdd (v4si, v4si, v4si)
v2di __builtin_ia32_vpmacssdqh (v4si, v4si, v2di)
v2di __builtin_ia32_vpmacssdql (v4si, v4si, v2di)
v4si __builtin_ia32_vpmacsswd (v8hi, v8hi, v4si)
v8hi __builtin_ia32_vpmacssww (v8hi, v8hi, v8hi)
v4si __builtin_ia32_vpmacswd (v8hi, v8hi, v4si)
v8hi __builtin_ia32_vpmacsww (v8hi, v8hi, v8hi)
v4si __builtin_ia32_vpmadcsswd (v8hi, v8hi, v4si)
v4si __builtin_ia32_vpmadcswd (v8hi, v8hi, v4si)
v16qi __builtin_ia32_vpperm (v16qi, v16qi, v16qi)
v16qi __builtin_ia32_vprotb (v16qi, v16qi)
v4si __builtin_ia32_vprotd (v4si, v4si)
v2di __builtin_ia32_vprotq (v2di, v2di)
v8hi __builtin_ia32_vprotw (v8hi, v8hi)
v16qi __builtin_ia32_vpshab (v16qi, v16qi)
v4si __builtin_ia32_vpshad (v4si, v4si)
v2di __builtin_ia32_vpshaq (v2di, v2di)
v8hi __builtin_ia32_vpshaw (v8hi, v8hi)
v16qi __builtin_ia32_vpshlb (v16qi, v16qi)
v4si __builtin_ia32_vpshld (v4si, v4si)
v2di __builtin_ia32_vpshlq (v2di, v2di)
v8hi __builtin_ia32_vpshlw (v8hi, v8hi)
@end smallexample
The following built-in functions are available when @option{-mfma4} is used. The following built-in functions are available when @option{-mfma4} is used.
All of them generate the machine instruction that is part of the name All of them generate the machine instruction that is part of the name
with MMX registers. with MMX registers.
......
...@@ -594,7 +594,7 @@ Objective-C and Objective-C++ Dialects}. ...@@ -594,7 +594,7 @@ Objective-C and Objective-C++ Dialects}.
-mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip @gol -mcld -mcx16 -msahf -mmovbe -mcrc32 -mrecip @gol
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol -mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
-maes -mpclmul @gol -maes -mpclmul @gol
-msse4a -m3dnow -mpopcnt -mabm -mfma4 @gol -msse4a -m3dnow -mpopcnt -mabm -mfma4 -mxop @gol
-mthreads -mno-align-stringops -minline-all-stringops @gol -mthreads -mno-align-stringops -minline-all-stringops @gol
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol -mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
...@@ -12005,6 +12005,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. ...@@ -12005,6 +12005,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@itemx -mno-sse4a @itemx -mno-sse4a
@itemx -mfma4 @itemx -mfma4
@itemx -mno-fma4 @itemx -mno-fma4
@itemx -mxop
@itemx -mno-xop
@itemx -m3dnow @itemx -m3dnow
@itemx -mno-3dnow @itemx -mno-3dnow
@itemx -mpopcnt @itemx -mpopcnt
...@@ -12018,8 +12020,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. ...@@ -12018,8 +12020,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@opindex m3dnow @opindex m3dnow
@opindex mno-3dnow @opindex mno-3dnow
These switches enable or disable the use of instructions in the MMX, These switches enable or disable the use of instructions in the MMX,
SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, SSE4A, FMA4, ABM or SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AES, PCLMUL, SSE4A, FMA4, XOP,
3DNow!@: extended instruction sets. ABM or 3DNow!@: extended instruction sets.
These extensions are also available as built-in functions: see These extensions are also available as built-in functions: see
@ref{X86 Built-in Functions}, for details of the functions enabled and @ref{X86 Built-in Functions}, for details of the functions enabled and
disabled by these switches. disabled by these switches.
......
2009-11-04 Harsha Jagasia <harsha.jagasia@amd.com>
Dwarakanath Rajagopal <dwarak.rajagopal@amd.com>
* gcc.target/i386/xop-check.h: New file.
* gcc.target/i386/xop-hadduX.c: Ditto.
* gcc.target/i386/xop-haddX.c: Ditto.
* gcc.target/i386/xop-hsubX.c: Ditto.
* gcc.target/i386/xop-imul32widen-vector.c: Ditto.
* gcc.target/i386/xop-imul32widen-vector.c: Ditto.
* gcc.target/i386/xop-pcmov2.c: Ditto.
* gcc.target/i386/xop-pcmov.c: Ditto.
* gcc.target/i386/xop-rotate1-vector.c: Ditto.
* gcc.target/i386/xop-rotate2-vector.c: Ditto.
* gcc.target/i386/xop-rotate3-vector.c: Ditto.
* gcc.target/i386/xop-shift1-vector.c: Ditto.
* gcc.target/i386/xop-shift2-vector.c: Ditto.
* gcc.target/i386/xop-shift3-vector.c: Ditto.
* gcc.target/i386/i386.exp: Add check_effective_target_xop.
* gcc.target/i386/sse-12.c: Update with new compile options to
activate and check xopintrin.h intrinsic file.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* g++.dg/other/i386-2.C: Ditto.
* g++.dg/other/i386-3.C: Ditto.
* g++.dg/other/i386-5.C: Ditto.
* g++.dg/other/i386-6.C: Ditto.
2009-11-04 Wei Guozhi <carrot@google.com> 2009-11-04 Wei Guozhi <carrot@google.com>
PR target/40835 PR target/40835
......
/* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, mm3dnow.h and /* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, xopintrin.h, mm3dnow.h and
mm_malloc.h are usable with -O -pedantic-errors. */ mm_malloc.h are usable with -O -pedantic-errors. */
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -msse4a -mfma4 -maes -mpclmul" } */ /* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -msse4a -mfma4 -mxop -maes -mpclmul" } */
#include <x86intrin.h> #include <x86intrin.h>
int dummy; int dummy;
/* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, mm3dnow.h and /* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, mm3dnow.h, xopintrin.h and
mm_malloc.h are usable with -O -fkeep-inline-functions. */ mm_malloc.h are usable with -O -fkeep-inline-functions. */
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -msse4a -mfma4 -maes -mpclmul" } */ /* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -msse4a -mfma4 -mxop -maes -mpclmul" } */
#include <x86intrin.h> #include <x86intrin.h>
/* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, mm3dnow.h and /* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, xopintrin.h, mm3dnow.h and
mm_malloc.h are usable with -O -fkeep-inline-functions. */ mm_malloc.h are usable with -O -fkeep-inline-functions. */
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -msse4a -mfma4 -maes -mpclmul" } */ /* { dg-options "-O -fkeep-inline-functions -march=k8 -m3dnow -mavx -msse4a -mfma4 -mxop -maes -mpclmul" } */
#include <x86intrin.h> #include <x86intrin.h>
/* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, mm3dnow.h and /* Test that {,x,e,p,t,s,w,a,i}mmintrin.h, fma4intrin.h, xopintrin.h, mm3dnow.h and
mm_malloc.h are usable with -O -pedantic-errors. */ mm_malloc.h are usable with -O -pedantic-errors. */
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -msse4a -mfma4 -maes -mpclmul" } */ /* { dg-options "-O -pedantic-errors -march=k8 -m3dnow -mavx -msse4a -mfma4 -mxop -maes -mpclmul" } */
#include <x86intrin.h> #include <x86intrin.h>
......
...@@ -146,6 +146,20 @@ proc check_effective_target_fma4 { } { ...@@ -146,6 +146,20 @@ proc check_effective_target_fma4 { } {
} "-O2 -mfma4" ] } "-O2 -mfma4" ]
} }
# Return 1 if xop instructions can be compiled.
proc check_effective_target_xop { } {
return [check_no_compiler_messages xop object {
typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
typedef short __v8hi __attribute__ ((__vector_size__ (16)));
__m128i _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
{
return (__m128i) __builtin_ia32_pmacssww ((__v8hi)__A,
(__v8hi)__B,
(__v8hi)__C);
}
} "-O2 -mxop" ]
}
# If a testcase doesn't have special options, use these. # If a testcase doesn't have special options, use these.
global DEFAULT_CFLAGS global DEFAULT_CFLAGS
if ![info exists DEFAULT_CFLAGS] then { if ![info exists DEFAULT_CFLAGS] then {
......
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h and mm_malloc.h are /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h, mm3dnow.h and mm_malloc.h are
usable with -O -std=c89 -pedantic-errors. */ usable with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -maes -mpclmul" } */ /* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -m3dnow -mavx -mfma4 -mxop -maes -mpclmul" } */
#include <x86intrin.h> #include <x86intrin.h>
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -maes -mpclmul" } */ /* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mxop -maes -mpclmul" } */
#include <mm_malloc.h> #include <mm_malloc.h>
/* Test that the intrinsics compile with optimization. All of them are /* Test that the intrinsics compile with optimization. All of them are
defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h and mm3dnow.h defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, xopintrin.h and mm3dnow.h
that reference the proper builtin functions. Defining away "extern" and that reference the proper builtin functions. Defining away "extern" and
"__inline" results in all of them being compiled as proper functions. */ "__inline" results in all of them being compiled as proper functions. */
...@@ -125,4 +125,10 @@ ...@@ -125,4 +125,10 @@
#define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0) #define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0)
#define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0) #define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0)
/* xopintrin.h */
#define __builtin_ia32_vprotbi(A, N) __builtin_ia32_vprotbi (A,1)
#define __builtin_ia32_vprotwi(A, N) __builtin_ia32_vprotwi (A,1)
#define __builtin_ia32_vprotdi(A, N) __builtin_ia32_vprotdi (A,1)
#define __builtin_ia32_vprotqi(A, N) __builtin_ia32_vprotqi (A,1)
#include <x86intrin.h> #include <x86intrin.h>
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -msse4a -maes -mpclmul" } */ /* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -m3dnow -mavx -mxop -msse4a -maes -mpclmul" } */
#include <mm_malloc.h> #include <mm_malloc.h>
/* Test that the intrinsics compile without optimization. All of them are /* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h and mm3dnow.h defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h, xopintrin.h and mm3dnow.h
that reference the proper builtin functions. Defining away "extern" and that reference the proper builtin functions. Defining away "extern" and
"__inline" results in all of them being compiled as proper functions. */ "__inline" results in all of them being compiled as proper functions. */
...@@ -155,3 +155,10 @@ test_2 (_m_pinsrw, __m64, __m64, int, 1) ...@@ -155,3 +155,10 @@ test_2 (_m_pinsrw, __m64, __m64, int, 1)
test_1 (_mm_shuffle_pi16, __m64, __m64, 1) test_1 (_mm_shuffle_pi16, __m64, __m64, 1)
test_1 (_m_pshufw, __m64, __m64, 1) test_1 (_m_pshufw, __m64, __m64, 1)
test_1 (_mm_prefetch, void, void *, _MM_HINT_NTA) test_1 (_mm_prefetch, void, void *, _MM_HINT_NTA)
/* xopintrin.h */
test_1 ( _mm_roti_epi8, __m128i, __m128i, 1)
test_1 ( _mm_roti_epi16, __m128i, __m128i, 1)
test_1 ( _mm_roti_epi32, __m128i, __m128i, 1)
test_1 ( _mm_roti_epi64, __m128i, __m128i, 1)
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <mm_malloc.h> #include <mm_malloc.h>
/* Test that the intrinsics compile without optimization. All of them are /* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h and mm3dnow.h defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h, xopintrin.h and mm3dnow.h
that reference the proper builtin functions. Defining away "extern" and that reference the proper builtin functions. Defining away "extern" and
"__inline" results in all of them being compiled as proper functions. */ "__inline" results in all of them being compiled as proper functions. */
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
#ifndef DIFFERENT_PRAGMAS #ifndef DIFFERENT_PRAGMAS
#pragma GCC target ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse4a,aes,pclmul") #pragma GCC target ("mmx,3dnow,sse,sse2,sse3,ssse3,sse4.1,sse4.2,sse4a,aes,pclmul,xop")
#endif #endif
/* Following intrinsics require immediate arguments. They /* Following intrinsics require immediate arguments. They
...@@ -159,3 +159,13 @@ test_1 (_mm_round_pd, __m128d, __m128d, 1) ...@@ -159,3 +159,13 @@ test_1 (_mm_round_pd, __m128d, __m128d, 1)
test_1 (_mm_round_ps, __m128, __m128, 1) test_1 (_mm_round_ps, __m128, __m128, 1)
test_2 (_mm_round_sd, __m128d, __m128d, __m128d, 1) test_2 (_mm_round_sd, __m128d, __m128d, __m128d, 1)
test_2 (_mm_round_ss, __m128, __m128, __m128, 1) test_2 (_mm_round_ss, __m128, __m128, __m128, 1)
/* xopintrin.h (XOP). */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("xop")
#endif
#include <x86intrin.h>
test_1 ( _mm_roti_epi8, __m128i, __m128i, 1)
test_1 ( _mm_roti_epi16, __m128i, __m128i, 1)
test_1 ( _mm_roti_epi32, __m128i, __m128i, 1)
test_1 ( _mm_roti_epi64, __m128i, __m128i, 1)
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include <mm_malloc.h> #include <mm_malloc.h>
/* Test that the intrinsics compile with optimization. All of them are /* Test that the intrinsics compile with optimization. All of them are
defined as inline functions in {,x,e,p,t,s,w,a,b}mmintrin.h and mm3dnow.h defined as inline functions in {,x,e,p,t,s,w,a}mmintrin.h, xopintrin.h and mm3dnow.h
that reference the proper builtin functions. Defining away "extern" and that reference the proper builtin functions. Defining away "extern" and
"__inline" results in all of them being compiled as proper functions. */ "__inline" results in all of them being compiled as proper functions. */
...@@ -93,14 +93,13 @@ ...@@ -93,14 +93,13 @@
#define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0) #define __builtin_ia32_vec_ext_v4hi(A, N) __builtin_ia32_vec_ext_v4hi(A, 0)
#define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0) #define __builtin_ia32_shufps(A, B, N) __builtin_ia32_shufps(A, B, 0)
/* bmmintrin.h */ /* xopintrin.h */
#define __builtin_ia32_protbi(A, B) __builtin_ia32_protbi(A,1) #define __builtin_ia32_vprotbi(A, B) __builtin_ia32_vprotbi(A,1)
#define __builtin_ia32_protwi(A, B) __builtin_ia32_protwi(A,1) #define __builtin_ia32_vprotwi(A, B) __builtin_ia32_vprotwi(A,1)
#define __builtin_ia32_protdi(A, B) __builtin_ia32_protdi(A,1) #define __builtin_ia32_vprotdi(A, B) __builtin_ia32_vprotdi(A,1)
#define __builtin_ia32_protqi(A, B) __builtin_ia32_protqi(A,1) #define __builtin_ia32_vprotqi(A, B) __builtin_ia32_vprotqi(A,1)
#pragma GCC target ("3dnow,sse4,sse4a,aes,pclmul,xop")
#pragma GCC target ("3dnow,sse4,sse4a,aes,pclmul")
#include <wmmintrin.h> #include <wmmintrin.h>
#include <smmintrin.h> #include <smmintrin.h>
#include <mm3dnow.h> #include <mm3dnow.h>
#include <stdlib.h>
#include "cpuid.h"
static void xop_test (void);
int
main ()
{
unsigned int eax, ebx, ecx, edx;
if (!__get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx))
return 0;
/* Run XOP test only if host has XOP support. */
if (ecx & bit_XOP)
xop_test ();
exit (0);
}
/* { dg-do run } */
/* { dg-require-effective-target xop } */
/* { dg-options "-O2 -mxop" } */
#include "xop-check.h"
#include <x86intrin.h>
#include <string.h>
#define NUM 10
union
{
__m128i x[NUM];
signed char ssi[NUM * 16];
short si[NUM * 8];
int li[NUM * 4];
long long lli[NUM * 2];
} dst, res, src1;
static void
init_sbyte ()
{
int i;
for (i=0; i < NUM * 16; i++)
src1.ssi[i] = i;
}
static void
init_sword ()
{
int i;
for (i=0; i < NUM * 8; i++)
src1.si[i] = i;
}
static void
init_sdword ()
{
int i;
for (i=0; i < NUM * 4; i++)
src1.li[i] = i;
}
static int
check_sbyte2word ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 8; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ;
if (res.si[s] != dst.si[s])
check_fails++;
}
}
}
static int
check_sbyte2dword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 4; j++)
{
t = i + (4 * j);
s = (i / 4) + j;
res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
+ src1.ssi[t + 3]);
if (res.li[s] != dst.li[s])
check_fails++;
}
}
return check_fails++;
}
static int
check_sbyte2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 2; j++)
{
t = i + (8 * j);
s = (i / 8) + j;
res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
+ src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5])
+ (src1.ssi[t + 6] + src1.ssi[t + 7]));
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
return check_fails++;
}
static int
check_sword2dword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < (NUM * 8); i = i + 8)
{
for (j = 0; j < 4; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.li[s] = src1.si[t] + src1.si[t + 1] ;
if (res.li[s] != dst.li[s])
check_fails++;
}
}
}
static int
check_sword2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 8; i = i + 8)
{
for (j = 0; j < 2; j++)
{
t = i + (4 * j);
s = (i / 4) + j;
res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2]
+ src1.si[t + 3]);
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
return check_fails++;
}
static int
check_dword2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < (NUM * 4); i = i + 4)
{
for (j = 0; j < 2; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.lli[s] = src1.li[t] + src1.li[t + 1] ;
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
}
static void
xop_test (void)
{
int i;
init_sbyte ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddw_epi8 (src1.x[i]);
if (check_sbyte2word())
abort ();
for (i = 0; i < (NUM ); i++)
dst.x[i] = _mm_haddd_epi8 (src1.x[i]);
if (check_sbyte2dword())
abort ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epi8 (src1.x[i]);
if (check_sbyte2qword())
abort ();
init_sword ();
for (i = 0; i < (NUM ); i++)
dst.x[i] = _mm_haddd_epi16 (src1.x[i]);
if (check_sword2dword())
abort ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epi16 (src1.x[i]);
if (check_sword2qword())
abort ();
init_sdword ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epi32 (src1.x[i]);
if (check_dword2qword())
abort ();
}
/* { dg-do run } */
/* { dg-require-effective-target xop } */
/* { dg-options "-O2 -mxop" } */
#include "xop-check.h"
#include <x86intrin.h>
#include <string.h>
#define NUM 10
union
{
__m128i x[NUM];
unsigned char ssi[NUM * 16];
unsigned short si[NUM * 8];
unsigned int li[NUM * 4];
unsigned long long lli[NUM * 2];
} dst, res, src1;
static void
init_byte ()
{
int i;
for (i=0; i < NUM * 16; i++)
src1.ssi[i] = i;
}
static void
init_word ()
{
int i;
for (i=0; i < NUM * 8; i++)
src1.si[i] = i;
}
static void
init_dword ()
{
int i;
for (i=0; i < NUM * 4; i++)
src1.li[i] = i;
}
static int
check_byte2word ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 8; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ;
if (res.si[s] != dst.si[s])
check_fails++;
}
}
}
static int
check_byte2dword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 4; j++)
{
t = i + (4 * j);
s = (i / 4) + j;
res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
+ src1.ssi[t + 3]);
if (res.li[s] != dst.li[s])
check_fails++;
}
}
return check_fails++;
}
static int
check_byte2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 2; j++)
{
t = i + (8 * j);
s = (i / 8) + j;
res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
+ src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5])
+ (src1.ssi[t + 6] + src1.ssi[t + 7]));
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
return check_fails++;
}
static int
check_word2dword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < (NUM * 8); i = i + 8)
{
for (j = 0; j < 4; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.li[s] = src1.si[t] + src1.si[t + 1] ;
if (res.li[s] != dst.li[s])
check_fails++;
}
}
}
static int
check_word2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 8; i = i + 8)
{
for (j = 0; j < 2; j++)
{
t = i + (4 * j);
s = (i / 4) + j;
res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2]
+ src1.si[t + 3]);
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
return check_fails++;
}
static int
check_dword2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < (NUM * 4); i = i + 4)
{
for (j = 0; j < 2; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.lli[s] = src1.li[t] + src1.li[t + 1] ;
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
}
static void
xop_test (void)
{
int i;
/* Check haddubw */
init_byte ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddw_epu8 (src1.x[i]);
if (check_byte2word())
abort ();
/* Check haddubd */
for (i = 0; i < (NUM ); i++)
dst.x[i] = _mm_haddd_epu8 (src1.x[i]);
if (check_byte2dword())
abort ();
/* Check haddubq */
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epu8 (src1.x[i]);
if (check_byte2qword())
abort ();
/* Check hadduwd */
init_word ();
for (i = 0; i < (NUM ); i++)
dst.x[i] = _mm_haddd_epu16 (src1.x[i]);
if (check_word2dword())
abort ();
/* Check haddbuwq */
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epu16 (src1.x[i]);
if (check_word2qword())
abort ();
/* Check hadudq */
init_dword ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epu32 (src1.x[i]);
if (check_dword2qword())
abort ();
}
/* { dg-do run } */
/* { dg-require-effective-target xop } */
/* { dg-options "-O2 -mxop" } */
#include "xop-check.h"
#include <x86intrin.h>
#include <string.h>
#define NUM 10
union
{
__m128i x[NUM];
signed char ssi[NUM * 16];
short si[NUM * 8];
int li[NUM * 4];
long long lli[NUM * 2];
} dst, res, src1;
static void
init_sbyte ()
{
int i;
for (i=0; i < NUM * 16; i++)
src1.ssi[i] = i;
}
static void
init_sword ()
{
int i;
for (i=0; i < NUM * 8; i++)
src1.si[i] = i;
}
static void
init_sdword ()
{
int i;
for (i=0; i < NUM * 4; i++)
src1.li[i] = i;
}
static int
check_sbyte2word ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 8; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.si[s] = src1.ssi[t] - src1.ssi[t + 1] ;
if (res.si[s] != dst.si[s])
check_fails++;
}
}
}
static int
check_sword2dword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < (NUM * 8); i = i + 8)
{
for (j = 0; j < 4; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.li[s] = src1.si[t] - src1.si[t + 1] ;
if (res.li[s] != dst.li[s])
check_fails++;
}
}
}
static int
check_dword2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < (NUM * 4); i = i + 4)
{
for (j = 0; j < 2; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.lli[s] = src1.li[t] - src1.li[t + 1] ;
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
}
static void
xop_test (void)
{
int i;
/* Check hsubbw */
init_sbyte ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_hsubw_epi8 (src1.x[i]);
if (check_sbyte2word())
abort ();
/* Check hsubwd */
init_sword ();
for (i = 0; i < (NUM ); i++)
dst.x[i] = _mm_hsubd_epi16 (src1.x[i]);
if (check_sword2dword())
abort ();
/* Check hsubdq */
init_sdword ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_hsubq_epi32 (src1.x[i]);
if (check_dword2qword())
abort ();
}
/* Test that the compiler properly optimizes floating point multiply and add
instructions vector into pmacsdd/etc. on XOP systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -mxop -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
int i32[SIZE];
long i64[SIZE];
} a, b, c, d;
void
imul32_to_64 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.i64[i] = ((long)b.i32[i]) * ((long)c.i32[i]);
}
int main ()
{
imul32_to_64 ();
exit (0);
}
/* { dg-final { scan-assembler "vpmacsdql" } } */
/* { dg-final { scan-assembler "vpmacsdqh" } } */
/* Test that the compiler properly optimizes floating point multiply and add
instructions vector into pmacsdd/etc. on XOP systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -mxop -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
long i64[SIZE];
} a, b, c, d;
void
imul64 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.i64[i] = b.i64[i] * c.i64[i];
}
int main ()
{
imul64 ();
exit (0);
}
/* { dg-final { scan-assembler "vpmacsdd" } } */
/* { dg-final { scan-assembler "vphadddq" } } */
/* { dg-final { scan-assembler "vpmacsdql" } } */
/* Test that the compiler properly optimizes conditional floating point moves
into the pcmov instruction on XOP systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -mxop" } */
extern void exit (int);
double dbl_test (double a, double b, double c, double d)
{
return (a > b) ? c : d;
}
double dbl_a = 1, dbl_b = 2, dbl_c = 3, dbl_d = 4, dbl_e;
int main()
{
dbl_e = dbl_test (dbl_a, dbl_b, dbl_c, dbl_d);
exit (0);
}
/* { dg-final { scan-assembler "vpcmov" } } */
/* Test that the compiler properly optimizes conditional floating point moves
into the pcmov instruction on XOP systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -mxop" } */
extern void exit (int);
float flt_test (float a, float b, float c, float d)
{
return (a > b) ? c : d;
}
float flt_a = 1, flt_b = 2, flt_c = 3, flt_d = 4, flt_e;
int main()
{
flt_e = flt_test (flt_a, flt_b, flt_c, flt_d);
exit (0);
}
/* { dg-final { scan-assembler "vpcmov" } } */
/* Test that the compiler properly optimizes vector rotate instructions vector
into prot on XOP systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -mxop -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
unsigned u32[SIZE];
} a, b, c;
void
left_rotate32 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.u32[i] = (b.u32[i] << ((sizeof (int) * 8) - 4)) | (b.u32[i] >> 4);
}
int
main ()
{
left_rotate32 ();
exit (0);
}
/* { dg-final { scan-assembler "vprotd" } } */
/* Test that the compiler properly optimizes vector rotate instructions vector
into prot on XOP systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -mxop -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
unsigned u32[SIZE];
} a, b, c;
void
right_rotate32_b (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.u32[i] = (b.u32[i] >> ((sizeof (int) * 8) - 4)) | (b.u32[i] << 4);
}
int
main ()
{
right_rotate ();
exit (0);
}
/* { dg-final { scan-assembler "vprot" } } */
/* Test that the compiler properly optimizes vector rotate instructions vector
into prot on XOP systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -mxop -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
unsigned u32[SIZE];
} a, b, c;
void
vector_rotate32 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.u32[i] = (b.u32[i] >> ((sizeof (int) * 8) - c.u32[i])) | (b.u32[i] << c.u32[i]);
}
int main ()
{
vector_rotate32 ();
exit (0);
}
/* { dg-final { scan-assembler "vprotd" } } */
/* Test that the compiler properly optimizes vector shift instructions into
psha/pshl on XOP systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -mxop -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
int i32[SIZE];
unsigned u32[SIZE];
} a, b, c;
void
left_shift32 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.i32[i] = b.i32[i] << c.i32[i];
}
int main ()
{
left_shfit32 ();
exit (0);
}
/* { dg-final { scan-assembler "vpshad" } } */
/* Test that the compiler properly optimizes vector shift instructions into
psha/pshl on XOP systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -mxop -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
int i32[SIZE];
unsigned u32[SIZE];
} a, b, c;
void
right_sign_shift32 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.i32[i] = b.i32[i] >> c.i32[i];
}
int main ()
{
right_sign_shfit32 ();
exit (0);
}
/* { dg-final { scan-assembler "vpshad" } } */
/* Test that the compiler properly optimizes vector shift instructions into
psha/pshl on XOP systems. */
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -mxop -ftree-vectorize" } */
extern void exit (int);
typedef long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
#define SIZE 10240
union {
__m128i i_align;
int i32[SIZE];
unsigned u32[SIZE];
} a, b, c;
void
right_uns_shift32 (void)
{
int i;
for (i = 0; i < SIZE; i++)
a.u32[i] = b.u32[i] >> c.i32[i];
}
int main ()
{
right_uns_shfit32 ();
exit (0);
}
/* { dg-final { scan-assembler "vpshld" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment