Commit 5fbb13a7 by Kirill Yukhin Committed by H.J. Lu

Enable AVX512_4FMAPS and AVX512_4VNNIW instructions

This requires additional patch for register allocator from Vladimir
Makarov.

gcc/

2016-11-17  Kirill Yukhin  <kirill.yukhin@gmail.com>
	    Andrew Senkevich <andrew.senkevich@intel.com>

	* common/config/i386/i386-common.c
	(OPTION_MASK_ISA_AVX5124FMAPS_SET,
	OPTION_MASK_ISA_AVX5124FMAPS_UNSET,
	OPTION_MASK_ISA_AVX5124VNNIW_SET,
	OPTION_MASK_ISA_AVX5124VNNIW_UNSET): New.
	(ix86_handle_option): Handle OPT_mavx5124fmaps,
	OPT_mavx5124vnniw.
	* config.gcc: Add avx5124fmapsintrin.h, avx5124vnniwintrin.h.
	* config/i386/avx5124fmapsintrin.h: New file.
	* config/i386/avx5124vnniwintrin.h: Ditto.
	* config/i386/constraints.md (h): New constraint.
	* config/i386/cpuid.h: (bit_AVX5124VNNIW,
	bit_AVX5124FMAPS): New.
	* config/i386/driver-i386.c (host_detect_local_cpu):
	Detect avx5124fmaps, avx5124vnniw.
	* config/i386/i386-builtin-types.def: Add types
	V16SF_FTYPE_V16SF_V16SF_V16SF_V16SF_V16SF_PCV4SF_V16SF_UHI,
	V16SF_FTYPE_V16SF_V16SF_V16SF_V16SF_V16SF_PCV4SF,
	V4SF_FTYPE_V4SF_V4SF_V4SF_V4SF_V4SF_PCV4SF,
	V4SF_FTYPE_V4SF_V4SF_V4SF_V4SF_V4SF_PCV4SF_V4SF_UQI,
	V16SI_FTYPE_V16SI_V16SI_V16SI_V16SI_V16SI_PCV4SI,
	V16SI_FTYPE_V16SI_V16SI_V16SI_V16SI_V16SI_PCV4SI_V16SI_UHI.
	* config/i386/i386-builtin.def (__builtin_ia32_4fmaddps_mask,
	__builtin_ia32_4fmaddps, __builtin_ia32_4fmaddss,
	__builtin_ia32_4fmaddss_mask, __builtin_ia32_4fnmaddps_mask,
	__builtin_ia32_4fnmaddps, __builtin_ia32_4fnmaddss,
	__builtin_ia32_4fnmaddss_mask, __builtin_ia32_vp4dpwssd,
	__builtin_ia32_vp4dpwssd_mask, __builtin_ia32_vp4dpwssds,
	__builtin_ia32_vp4dpwssds_mask): New.
	* config/i386/i386-c.c (ix86_target_macros_internal):
	Define __AVX5124FMAPS__, __AVX5124VNNIW__.
	* config/i386/i386-modes.def: Fixed comment typos, added new
	modes (VECTOR_MODES (FLOAT, 256), VECTOR_MODE (INT, SI, 64)).
	* config/i386/i386.c (ix86_target_string): Add -mavx5124fmaps,
	-mavx5124vnniw.
	(PTA_AVX5124FMAPS, PTA_AVX5124VNNIW): Define.
	(ix86_option_override_internal): Handle new options.
	(ix86_valid_target_attribute_inner_p): Add avx5124fmaps,
	avx5124vnniw.
	(ix86_expand_builtin): Handle new builtins.
	(ix86_additional_allocno_class_p): New.
	* config/i386/i386.h (TARGET_AVX5124FMAPS,
	TARGET_AVX5124FMAPS_P,
	TARGET_AVX5124VNNIW,
	TARGET_AVX5124VNNIW_P): Define.
	(reg_class): Add MOD4_SSE_REGS.
	(MOD4_SSE_REG_P, MOD4_SSE_REGNO_P): New.
	* config/i386/i386.opt: Add mavx5124fmaps, mavx5124vnniw.
	* config/i386/immintrin.h: Include avx5124fmapsintrin.h,
	avx5124vnniwintrin.h.
	* config/i386/sse.md (unspec): Add UNSPEC_VP4FMADD,
	UNSPEC_VP4FNMADD,
	UNSPEC_VP4DPWSSD, UNSPEC_VP4DPWSSDS.
	(define_mode_iterator IMOD4): New.
	(define_mode_attr imod4_narrow): Ditto.
	(define_insn "mov<mode>"): Ditto.
	(define_insn "avx5124fmaddps_4fmaddps"): Ditto.
	(define_insn "avx5124fmaddps_4fmaddps_mask"): Ditto.
	(define_insn "avx5124fmaddps_4fmaddps_maskz"): Ditto.
	(define_insn "avx5124fmaddps_4fmaddss"): Ditto.
	(define_insn "avx5124fmaddps_4fmaddss_mask"): Ditto.
	(define_insn "avx5124fmaddps_4fmaddss_maskz"): Ditto.
	(define_insn "avx5124fmaddps_4fnmaddps"): Ditto.
	(define_insn "avx5124fmaddps_4fnmaddps_mask"): Ditto.
	(define_insn "avx5124fmaddps_4fnmaddps_maskz"): Ditto.
	(define_insn "avx5124fmaddps_4fnmaddss"): Ditto.
	(define_insn "avx5124fmaddps_4fnmaddss_mask"): Ditto.
	(define_insn "avx5124fmaddps_4fnmaddss_maskz"): Ditto.
	(define_insn "avx5124vnniw_vp4dpwssd"): Ditto.
	(define_insn "avx5124vnniw_vp4dpwssd_mask"): Ditto.
	(define_insn "avx5124vnniw_vp4dpwssd_maskz"): Ditto.
	(define_insn "avx5124vnniw_vp4dpwssds"): Ditto.
	(define_insn "avx5124vnniw_vp4dpwssds_mask"): Ditto.
	(define_insn "avx5124vnniw_vp4dpwssds_maskz"): Ditto.
	* init-regs.c (initialize_uninitialized_regs): Add emit_clobber call.
	* genmodes.c (mode_size_inline): Extend return type.
	* machmode.h (mode_size, mode_base_align): Extend type.

gcc/testsuite/

2016-11-17  Kirill Yukhin  <kirill.yukhin@gmail.com>
	    Andrew Senkevich <andrew.senkevich@intel.com>

	* gcc.target/i386/avx5124fmadd-v4fmaddps-1.c: New test.
	* gcc.target/i386/avx5124fmadd-v4fmaddps-2.c: Ditto.
	* gcc.target/i386/avx5124fmadd-v4fmaddss-1.c: Ditto.
	* gcc.target/i386/avx5124fmadd-v4fnmaddps-1.c: Ditto.
	* gcc.target/i386/avx5124fmadd-v4fnmaddps-2.c: Ditto.
	* gcc.target/i386/avx5124fmadd-v4fnmaddss-1.c: Ditto.
	* gcc.target/i386/avx5124fmaps-check.h: Ditto.
	* gcc.target/i386/avx5124vnniw-check.h: Ditto.
	* gcc.target/i386/avx5124vnniw-vp4dpwssd-1.c: Ditto.
	* gcc.target/i386/avx5124vnniw-vp4dpwssd-2.c: Ditto.
	* gcc.target/i386/avx5124vnniw-vp4dpwssds-1.c: Ditto.
	* gcc.target/i386/avx5124vnniw-vp4dpwssds-2.c: Ditto.
	* gcc.target/i386/avx512f-helper.h: Add avx5124fmaps-check.h,
	avx5124vnniw-check.h.
	* gcc.target/i386/i386.exp (check_effective_target_avx5124fmaps,
	check_effective_target_avx5124vnniw): New.
	* gcc.target/i386/m128-check.h (ESP_FLOAT, ESP_DOUBLE):
	Set under ifndef.
	* gcc.target/i386/sse-12.c: Add -mavx5124fmaps, -mavx5124vnniw.
	* gcc.target/i386/sse-13.c: Ditto.
	* g++.dg/other/i386-2.C: Ditto.
	* g++.dg/other/i386-3.C: Ditto.
	* gcc.target/i386/sse-22.c: Ditto.
	* gcc.target/i386/sse-23.c: Ditto.

From-SVN: r242569
parent 80557bb7
......@@ -76,6 +76,8 @@ along with GCC; see the file COPYING3. If not see
(OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512F_SET)
#define OPTION_MASK_ISA_AVX512VBMI_SET \
(OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512BW_SET)
#define OPTION_MASK_ISA_AVX5124FMAPS_SET OPTION_MASK_ISA_AVX5124FMAPS
#define OPTION_MASK_ISA_AVX5124VNNIW_SET OPTION_MASK_ISA_AVX5124VNNIW
#define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED
......@@ -179,6 +181,8 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA_AVX512VL_UNSET OPTION_MASK_ISA_AVX512VL
#define OPTION_MASK_ISA_AVX512IFMA_UNSET OPTION_MASK_ISA_AVX512IFMA
#define OPTION_MASK_ISA_AVX512VBMI_UNSET OPTION_MASK_ISA_AVX512VBMI
#define OPTION_MASK_ISA_AVX5124FMAPS_UNSET OPTION_MASK_ISA_AVX5124FMAPS
#define OPTION_MASK_ISA_AVX5124VNNIW_UNSET OPTION_MASK_ISA_AVX5124VNNIW
#define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED
......@@ -399,6 +403,12 @@ ix86_handle_option (struct gcc_options *opts,
{
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512F_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_UNSET;
/* Turn off additional isa flags. */
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX5124FMAPS_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX5124FMAPS_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX5124VNNIW_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX5124VNNIW_UNSET;
}
return true;
......@@ -441,6 +451,36 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
case OPT_mavx5124fmaps:
if (value)
{
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS_SET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX5124FMAPS_SET;
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F_SET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_SET;
}
else
{
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX5124FMAPS_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX5124FMAPS_UNSET;
}
return true;
case OPT_mavx5124vnniw:
if (value)
{
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW_SET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX5124VNNIW_SET;
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F_SET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_SET;
}
else
{
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX5124VNNIW_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX5124VNNIW_UNSET;
}
return true;
case OPT_mavx512dq:
if (value)
{
......
......@@ -374,8 +374,8 @@ i[34567]86-*-*)
xsavesintrin.h avx512dqintrin.h avx512bwintrin.h
avx512vlintrin.h avx512vlbwintrin.h avx512vldqintrin.h
avx512ifmaintrin.h avx512ifmavlintrin.h avx512vbmiintrin.h
avx512vbmivlintrin.h clwbintrin.h mwaitxintrin.h
clzerointrin.h pkuintrin.h"
avx512vbmivlintrin.h avx5124fmapsintrin.h avx5124vnniwintrin.h
clwbintrin.h mwaitxintrin.h clzerointrin.h pkuintrin.h"
;;
x86_64-*-*)
cpu_type=i386
......@@ -396,8 +396,8 @@ x86_64-*-*)
xsavesintrin.h avx512dqintrin.h avx512bwintrin.h
avx512vlintrin.h avx512vlbwintrin.h avx512vldqintrin.h
avx512ifmaintrin.h avx512ifmavlintrin.h avx512vbmiintrin.h
avx512vbmivlintrin.h clwbintrin.h mwaitxintrin.h
clzerointrin.h pkuintrin.h"
avx512vbmivlintrin.h avx5124fmapsintrin.h avx5124vnniwintrin.h
clwbintrin.h mwaitxintrin.h clzerointrin.h pkuintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
......
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#if !defined _IMMINTRIN_H_INCLUDED
# error "Never use <avx5124fmapsintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef _AVX5124FMAPSINTRIN_H_INCLUDED
#define _AVX5124FMAPSINTRIN_H_INCLUDED
#ifndef __AVX5124FMAPS__
#pragma GCC push_options
#pragma GCC target("avx5124fmaps")
#define __DISABLE_AVX5124FMAPS__
#endif /* __AVX5124FMAPS__ */
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_4fmadd_ps (__m512 __A, __m512 __B, __m512 __C,
__m512 __D, __m512 __E, __m128 *__F)
{
return (__m512) __builtin_ia32_4fmaddps ((__v16sf) __B,
(__v16sf) __C,
(__v16sf) __D,
(__v16sf) __E,
(__v16sf) __A,
(const __v4sf *) __F);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_4fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
__m512 __C, __m512 __D, __m512 __E, __m128 *__F)
{
return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
(__v16sf) __C,
(__v16sf) __D,
(__v16sf) __E,
(__v16sf) __A,
(const __v4sf *) __F,
(__v16sf) __A,
(__mmask16) __U);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_4fmadd_ps (__mmask16 __U,
__m512 __A, __m512 __B, __m512 __C,
__m512 __D, __m512 __E, __m128 *__F)
{
return (__m512) __builtin_ia32_4fmaddps_mask ((__v16sf) __B,
(__v16sf) __C,
(__v16sf) __D,
(__v16sf) __E,
(__v16sf) __A,
(const __v4sf *) __F,
(__v16sf) _mm512_setzero_ps (),
(__mmask16) __U);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_4fmadd_ss (__m128 __A, __m128 __B, __m128 __C,
__m128 __D, __m128 __E, __m128 *__F)
{
return (__m128) __builtin_ia32_4fmaddss ((__v4sf) __B,
(__v4sf) __C,
(__v4sf) __D,
(__v4sf) __E,
(__v4sf) __A,
(const __v4sf *) __F);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_4fmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
__m128 __D, __m128 __E, __m128 *__F)
{
return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
(__v4sf) __C,
(__v4sf) __D,
(__v4sf) __E,
(__v4sf) __A,
(const __v4sf *) __F,
(__v4sf) __A,
(__mmask8) __U);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_4fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
__m128 __D, __m128 __E, __m128 *__F)
{
return (__m128) __builtin_ia32_4fmaddss_mask ((__v4sf) __B,
(__v4sf) __C,
(__v4sf) __D,
(__v4sf) __E,
(__v4sf) __A,
(const __v4sf *) __F,
(__v4sf) _mm_setzero_ps (),
(__mmask8) __U);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_4fnmadd_ps (__m512 __A, __m512 __B, __m512 __C,
__m512 __D, __m512 __E, __m128 *__F)
{
return (__m512) __builtin_ia32_4fnmaddps ((__v16sf) __B,
(__v16sf) __C,
(__v16sf) __D,
(__v16sf) __E,
(__v16sf) __A,
(const __v4sf *) __F);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_4fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B,
__m512 __C, __m512 __D, __m512 __E, __m128 *__F)
{
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
(__v16sf) __C,
(__v16sf) __D,
(__v16sf) __E,
(__v16sf) __A,
(const __v4sf *) __F,
(__v16sf) __A,
(__mmask16) __U);
}
extern __inline __m512
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_4fnmadd_ps (__mmask16 __U,
__m512 __A, __m512 __B, __m512 __C,
__m512 __D, __m512 __E, __m128 *__F)
{
return (__m512) __builtin_ia32_4fnmaddps_mask ((__v16sf) __B,
(__v16sf) __C,
(__v16sf) __D,
(__v16sf) __E,
(__v16sf) __A,
(const __v4sf *) __F,
(__v16sf) _mm512_setzero_ps (),
(__mmask16) __U);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_4fnmadd_ss (__m128 __A, __m128 __B, __m128 __C,
__m128 __D, __m128 __E, __m128 *__F)
{
return (__m128) __builtin_ia32_4fnmaddss ((__v4sf) __B,
(__v4sf) __C,
(__v4sf) __D,
(__v4sf) __E,
(__v4sf) __A,
(const __v4sf *) __F);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_4fnmadd_ss (__m128 __A, __mmask8 __U, __m128 __B, __m128 __C,
__m128 __D, __m128 __E, __m128 *__F)
{
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
(__v4sf) __C,
(__v4sf) __D,
(__v4sf) __E,
(__v4sf) __A,
(const __v4sf *) __F,
(__v4sf) __A,
(__mmask8) __U);
}
extern __inline __m128
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_4fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C,
__m128 __D, __m128 __E, __m128 *__F)
{
return (__m128) __builtin_ia32_4fnmaddss_mask ((__v4sf) __B,
(__v4sf) __C,
(__v4sf) __D,
(__v4sf) __E,
(__v4sf) __A,
(const __v4sf *) __F,
(__v4sf) _mm_setzero_ps (),
(__mmask8) __U);
}
#ifdef __DISABLE_AVX5124FMAPS__
#undef __DISABLE_AVX5124FMAPS__
#pragma GCC pop_options
#endif /* __DISABLE_AVX5124FMAPS__ */
#endif /* _AVX5124FMAPSINTRIN_H_INCLUDED */
/* Copyright (C) 2015-2016 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#if !defined _IMMINTRIN_H_INCLUDED
# error "Never use <avx5124vnniwintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef _AVX5124VNNIWINTRIN_H_INCLUDED
#define _AVX5124VNNIWINTRIN_H_INCLUDED
#ifndef __AVX5124VNNIW__
#pragma GCC push_options
#pragma GCC target("avx5124vnniw")
#define __DISABLE_AVX5124VNNIW__
#endif /* __AVX5124VNNIW__ */
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_4dpwssd_epi32 (__m512i __A, __m512i __B, __m512i __C,
__m512i __D, __m512i __E, __m128i *__F)
{
return (__m512i) __builtin_ia32_vp4dpwssd ((__v16si) __B,
(__v16si) __C,
(__v16si) __D,
(__v16si) __E,
(__v16si) __A,
(const __v4si *) __F);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_4dpwssd_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
__m512i __C, __m512i __D, __m512i __E,
__m128i *__F)
{
return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
(__v16si) __C,
(__v16si) __D,
(__v16si) __E,
(__v16si) __A,
(const __v4si *) __F,
(__v16si) __A,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_4dpwssd_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
__m512i __C, __m512i __D, __m512i __E,
__m128i *__F)
{
return (__m512i) __builtin_ia32_vp4dpwssd_mask ((__v16si) __B,
(__v16si) __C,
(__v16si) __D,
(__v16si) __E,
(__v16si) __A,
(const __v4si *) __F,
(__v16si) _mm512_setzero_ps (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_4dpwssds_epi32 (__m512i __A, __m512i __B, __m512i __C,
__m512i __D, __m512i __E, __m128i *__F)
{
return (__m512i) __builtin_ia32_vp4dpwssds ((__v16si) __B,
(__v16si) __C,
(__v16si) __D,
(__v16si) __E,
(__v16si) __A,
(const __v4si *) __F);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_4dpwssds_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
__m512i __C, __m512i __D, __m512i __E,
__m128i *__F)
{
return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
(__v16si) __C,
(__v16si) __D,
(__v16si) __E,
(__v16si) __A,
(const __v4si *) __F,
(__v16si) __A,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_4dpwssds_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
__m512i __C, __m512i __D, __m512i __E,
__m128i *__F)
{
return (__m512i) __builtin_ia32_vp4dpwssds_mask ((__v16si) __B,
(__v16si) __C,
(__v16si) __D,
(__v16si) __E,
(__v16si) __A,
(const __v4si *) __F,
(__v16si) _mm512_setzero_ps (),
(__mmask16) __U);
}
#ifdef __DISABLE_AVX5124VNNIW__
#undef __DISABLE_AVX5124VNNIW__
#pragma GCC pop_options
#endif /* __DISABLE_AVX5124VNNIW__ */
#endif /* _AVX5124VNNIWINTRIN_H_INCLUDED */
......@@ -112,6 +112,7 @@
;; f x87 register when 80387 floating point arithmetic is enabled
;; r SSE regs not requiring REX prefix when prefixes avoidance is enabled
;; and all SSE regs otherwise
;; h EVEX encodable SSE register with number factor of four
(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
"First SSE register (@code{%xmm0}).")
......@@ -160,6 +161,9 @@
"TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
"@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.")
(define_register_constraint "Yh" "TARGET_AVX512F ? MOD4_SSE_REGS : NO_REGS"
"@internal Any EVEX encodable SSE register, which has number factor of four.")
;; We use the B prefix to denote any number of internal operands:
;; f FLAGS_REG
;; g GOT memory operand.
......
......@@ -60,6 +60,8 @@
#define bit_MWAITX (1 << 29)
/* %edx */
#define bit_AVX5124VNNIW (1 << 2)
#define bit_AVX5124FMAPS (1 << 3)
#define bit_MMXEXT (1 << 22)
#define bit_LM (1 << 29)
#define bit_3DNOWP (1 << 30)
......
......@@ -414,6 +414,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0;
unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
bool arch;
......@@ -501,6 +502,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
has_prefetchwt1 = ecx & bit_PREFETCHWT1;
has_avx512vbmi = ecx & bit_AVX512VBMI;
has_pku = ecx & bit_OSPKE;
has_avx5124vnniw = edx & bit_AVX5124VNNIW;
has_avx5124fmaps = edx & bit_AVX5124FMAPS;
}
if (max_level >= 13)
......@@ -1021,6 +1024,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
......@@ -1033,8 +1038,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
fxsr, xsave, xsaveopt, avx512f, avx512er,
avx512cd, avx512pf, prefetchwt1, clflushopt,
xsavec, xsaves, avx512dq, avx512bw, avx512vl,
avx512ifma, avx512vbmi, clwb, mwaitx,
clzero, pku, NULL);
avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
clwb, mwaitx, clzero, pku, NULL);
}
done:
......
......@@ -526,6 +526,15 @@ DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED)
DEF_FUNCTION_TYPE (VOID, UNSIGNED, UNSIGNED, UNSIGNED)
DEF_FUNCTION_TYPE (VOID, PV8DI, V8DI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, V16SF, V16SF, PCV4SF, V16SF, UHI)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SF, V16SF, V16SF, PCV4SF)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, V4SF, V4SF, PCV4SF)
DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, V4SF, V4SF, PCV4SF, V4SF, UQI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, V16SI, V16SI, PCV4SI, V16SI, UHI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, V16SI, V16SI, PCV4SI)
# Instructions returning mask
DEF_FUNCTION_TYPE (UHI, UHI)
DEF_FUNCTION_TYPE (UHI, V16QI)
......
......@@ -2482,7 +2482,24 @@ BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__bui
BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT)
BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
BDESC_END (ROUND_ARGS, MPX)
BDESC_END (ROUND_ARGS, ARGS2)
/* AVX512_4FMAPS and AVX512_4VNNIW builtins with variable number of arguments. Defined in additional ix86_isa_flags2. */
BDESC_FIRST (args2, ARGS2,
OPTION_MASK_ISA_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fmaddps_mask, "__builtin_ia32_4fmaddps_mask", IX86_BUILTIN_4FMAPS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_V16SF_V16SF_PCV4SF_V16SF_UHI)
BDESC (OPTION_MASK_ISA_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fmaddps, "__builtin_ia32_4fmaddps", IX86_BUILTIN_4FMAPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_V16SF_V16SF_PCV4SF)
BDESC (OPTION_MASK_ISA_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fmaddss, "__builtin_ia32_4fmaddss", IX86_BUILTIN_4FMASS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_V4SF_V4SF_PCV4SF)
BDESC (OPTION_MASK_ISA_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fmaddss_mask, "__builtin_ia32_4fmaddss_mask", IX86_BUILTIN_4FMASS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_V4SF_V4SF_PCV4SF_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fnmaddps_mask, "__builtin_ia32_4fnmaddps_mask", IX86_BUILTIN_4FNMAPS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_V16SF_V16SF_PCV4SF_V16SF_UHI)
BDESC (OPTION_MASK_ISA_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fnmaddps, "__builtin_ia32_4fnmaddps", IX86_BUILTIN_4FNMAPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_V16SF_V16SF_PCV4SF)
BDESC (OPTION_MASK_ISA_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fnmaddss, "__builtin_ia32_4fnmaddss", IX86_BUILTIN_4FNMASS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_V4SF_V4SF_PCV4SF)
BDESC (OPTION_MASK_ISA_AVX5124FMAPS, CODE_FOR_avx5124fmaddps_4fnmaddss_mask, "__builtin_ia32_4fnmaddss_mask", IX86_BUILTIN_4FNMASS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_V4SF_V4SF_PCV4SF_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX5124VNNIW, CODE_FOR_avx5124vnniw_vp4dpwssd, "__builtin_ia32_vp4dpwssd", IX86_BUILTIN_4DPWSSD, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_V16SI_V16SI_PCV4SI)
BDESC (OPTION_MASK_ISA_AVX5124VNNIW, CODE_FOR_avx5124vnniw_vp4dpwssd_mask, "__builtin_ia32_vp4dpwssd_mask", IX86_BUILTIN_4DPWSSD_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_V16SI_V16SI_PCV4SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX5124VNNIW, CODE_FOR_avx5124vnniw_vp4dpwssds, "__builtin_ia32_vp4dpwssds", IX86_BUILTIN_4DPWSSDS, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_V16SI_V16SI_PCV4SI)
BDESC (OPTION_MASK_ISA_AVX5124VNNIW, CODE_FOR_avx5124vnniw_vp4dpwssds_mask, "__builtin_ia32_vp4dpwssds_mask", IX86_BUILTIN_4DPWSSDS_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_V16SI_V16SI_PCV4SI_V16SI_UHI)
BDESC_END (ARGS2, MPX)
/* Builtins for MPX. */
BDESC_FIRST (mpx, MPX,
......
......@@ -28,14 +28,14 @@ along with GCC; see the file COPYING3. If not see
static bool ix86_pragma_target_parse (tree, tree);
static void ix86_target_macros_internal
(HOST_WIDE_INT, enum processor_type, enum processor_type, enum fpmath_unit,
(HOST_WIDE_INT, HOST_WIDE_INT, enum processor_type, enum processor_type, enum fpmath_unit,
void (*def_or_undef) (cpp_reader *, const char *));
/* Internal function to either define or undef the appropriate system
macros. */
static void
ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
HOST_WIDE_INT isa_flag2,
enum processor_type arch,
enum processor_type tune,
enum fpmath_unit fpmath,
......@@ -376,6 +376,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__AVX512VBMI__");
if (isa_flag & OPTION_MASK_ISA_AVX512IFMA)
def_or_undef (parse_in, "__AVX512IFMA__");
if (isa_flag2 & OPTION_MASK_ISA_AVX5124VNNIW)
def_or_undef (parse_in, "__AVX5124VNNIW__");
if (isa_flag2 & OPTION_MASK_ISA_AVX5124FMAPS)
def_or_undef (parse_in, "__AVX5124FMAPS__");
if (isa_flag & OPTION_MASK_ISA_FMA)
def_or_undef (parse_in, "__FMA__");
if (isa_flag & OPTION_MASK_ISA_RTM)
......@@ -462,6 +466,9 @@ ix86_pragma_target_parse (tree args, tree pop_target)
HOST_WIDE_INT prev_isa;
HOST_WIDE_INT cur_isa;
HOST_WIDE_INT diff_isa;
HOST_WIDE_INT prev_isa2;
HOST_WIDE_INT cur_isa2;
HOST_WIDE_INT diff_isa2;
enum processor_type prev_arch;
enum processor_type prev_tune;
enum processor_type cur_arch;
......@@ -494,6 +501,9 @@ ix86_pragma_target_parse (tree args, tree pop_target)
prev_isa = prev_opt->x_ix86_isa_flags;
cur_isa = cur_opt->x_ix86_isa_flags;
diff_isa = (prev_isa ^ cur_isa);
prev_isa2 = prev_opt->x_ix86_isa_flags2;
cur_isa2 = cur_opt->x_ix86_isa_flags2;
diff_isa2 = (prev_isa2 ^ cur_isa2);
prev_arch = (enum processor_type) prev_opt->arch;
prev_tune = (enum processor_type) prev_opt->tune;
cur_arch = (enum processor_type) cur_opt->arch;
......@@ -509,6 +519,7 @@ ix86_pragma_target_parse (tree args, tree pop_target)
/* Undef all of the macros for that are no longer current. */
ix86_target_macros_internal (prev_isa & diff_isa,
prev_isa2 & diff_isa2,
prev_arch,
prev_tune,
(enum fpmath_unit) prev_opt->x_ix86_fpmath,
......@@ -523,6 +534,7 @@ ix86_pragma_target_parse (tree args, tree pop_target)
/* Define all of the macros for new options that were just turned on. */
ix86_target_macros_internal (cur_isa & diff_isa,
cur_isa2 & diff_isa2,
cur_arch,
cur_tune,
(enum fpmath_unit) cur_opt->x_ix86_fpmath,
......@@ -583,6 +595,7 @@ ix86_target_macros (void)
cpp_define (parse_in, "__GCC_ASM_FLAG_OUTPUTS__");
ix86_target_macros_internal (ix86_isa_flags,
ix86_isa_flags2,
ix86_arch,
ix86_tune,
ix86_fpmath,
......
......@@ -79,11 +79,12 @@ VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */
VECTOR_MODES (INT, 64); /* V64QI V32HI V16SI V8DI */
VECTOR_MODES (INT, 128); /* V128QI V64HI V32SI V16DI */
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */
VECTOR_MODES (FLOAT, 64); /* V32HF V16SF V8DF */
VECTOR_MODES (FLOAT, 128); /* V64HF V32SF V16DF */
VECTOR_MODES (FLOAT, 8); /* V2SF */
VECTOR_MODES (FLOAT, 16); /* V4SF V2DF */
VECTOR_MODES (FLOAT, 32); /* V8SF V4DF V2TF */
VECTOR_MODES (FLOAT, 64); /* V16SF V8DF V4TF */
VECTOR_MODES (FLOAT, 128); /* V32SF V16DF V8TF */
VECTOR_MODES (FLOAT, 256); /* V64SF V32DF V16TF */
VECTOR_MODE (INT, TI, 1); /* V1TI */
VECTOR_MODE (INT, DI, 1); /* V1DI */
VECTOR_MODE (INT, SI, 1); /* V1SI */
......@@ -91,6 +92,7 @@ VECTOR_MODE (INT, QI, 2); /* V2QI */
VECTOR_MODE (INT, QI, 12); /* V12QI */
VECTOR_MODE (INT, QI, 14); /* V14QI */
VECTOR_MODE (INT, HI, 6); /* V6HI */
VECTOR_MODE (INT, SI, 64); /* V64SI */
POINTER_BOUNDS_MODE (BND32, 8);
POINTER_BOUNDS_MODE (BND64, 16);
......
......@@ -81,6 +81,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_AVX512VBMI_P(x) TARGET_ISA_AVX512VBMI_P(x)
#define TARGET_AVX512IFMA TARGET_ISA_AVX512IFMA
#define TARGET_AVX512IFMA_P(x) TARGET_ISA_AVX512IFMA_P(x)
#define TARGET_AVX5124FMAPS TARGET_ISA_AVX5124FMAPS
#define TARGET_AVX5124FMAPS_P(x) TARGET_ISA_AVX5124FMAPS_P(x)
#define TARGET_AVX5124VNNIW TARGET_ISA_AVX5124VNNIW
#define TARGET_AVX5124VNNIW_P(x) TARGET_ISA_AVX5124VNNIW_P(x)
#define TARGET_FMA TARGET_ISA_FMA
#define TARGET_FMA_P(x) TARGET_ISA_FMA_P(x)
#define TARGET_SSE4A TARGET_ISA_SSE4A
......@@ -1089,7 +1093,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define HARD_REGNO_NREGS(REGNO, MODE) \
(STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \
|| MASK_REGNO_P (REGNO) || BND_REGNO_P (REGNO) \
? (COMPLEX_MODE_P (MODE) ? 2 : 1) \
? (COMPLEX_MODE_P (MODE) ? 2 : \
(((MODE == V64SFmode) || (MODE == V64SImode)) ? 4 : 1)) \
: ((MODE) == XFmode \
? (TARGET_64BIT ? 2 : 3) \
: ((MODE) == XCmode \
......@@ -1365,6 +1370,7 @@ enum reg_class
FLOAT_INT_SSE_REGS,
MASK_EVEX_REGS,
MASK_REGS,
MOD4_SSE_REGS,
ALL_REGS, LIM_REG_CLASSES
};
......@@ -1425,6 +1431,7 @@ enum reg_class
"FLOAT_INT_SSE_REGS", \
"MASK_EVEX_REGS", \
"MASK_REGS", \
"MOD4_SSE_REGS" \
"ALL_REGS" }
/* Define which registers fit in which classes. This is an initializer
......@@ -1465,9 +1472,10 @@ enum reg_class
{ 0x11ffff, 0x1fe0, 0x0 }, /* FLOAT_INT_REGS */ \
{ 0x1ff100ff,0xffffffe0, 0x1f }, /* INT_SSE_REGS */ \
{ 0x1ff1ffff,0xffffffe0, 0x1f }, /* FLOAT_INT_SSE_REGS */ \
{ 0x0, 0x0, 0x1fc0 }, /* MASK_EVEX_REGS */ \
{ 0x0, 0x0, 0x1fc0 }, /* MASK_EVEX_REGS */ \
{ 0x0, 0x0, 0x1fe0 }, /* MASK_REGS */ \
{ 0xffffffff,0xffffffff,0x1ffff } \
{ 0x1fe00000,0xffffe000, 0x1f }, /* MOD4_SSE_REGS */ \
{ 0xffffffff,0xffffffff,0x1ffff } \
}
/* The same information, inverted:
......@@ -1533,6 +1541,16 @@ enum reg_class
#define BND_REG_P(X) (REG_P (X) && BND_REGNO_P (REGNO (X)))
#define BND_REGNO_P(N) IN_RANGE ((N), FIRST_BND_REG, LAST_BND_REG)
#define MOD4_SSE_REG_P(X) (REG_P (X) && MOD4_SSE_REGNO_P (REGNO (X)))
#define MOD4_SSE_REGNO_P(N) ((N) == XMM0_REG \
|| (N) == XMM4_REG \
|| (N) == XMM8_REG \
|| (N) == XMM12_REG \
|| (N) == XMM16_REG \
|| (N) == XMM20_REG \
|| (N) == XMM24_REG \
|| (N) == XMM28_REG)
/* First floating point reg */
#define FIRST_FLOAT_REG FIRST_STACK_REG
#define STACK_TOP_P(X) (REG_P (X) && REGNO (X) == FIRST_FLOAT_REG)
......
......@@ -25,11 +25,17 @@ config/i386/i386-opts.h
Variable
HOST_WIDE_INT ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT
Variable
HOST_WIDE_INT ix86_isa_flags2 = 0
; A mask of ix86_isa_flags that includes bit X if X was set or cleared
; on the command line.
Variable
HOST_WIDE_INT ix86_isa_flags_explicit
Variable
HOST_WIDE_INT ix86_isa_flags2_explicit
; Additional target flags
Variable
int ix86_target_flags
......@@ -74,6 +80,10 @@ unsigned char branch_cost
;; which flags were passed by the user
TargetSave
HOST_WIDE_INT x_ix86_isa_flags2_explicit
;; which flags were passed by the user
TargetSave
HOST_WIDE_INT x_ix86_isa_flags_explicit
;; whether -mtune was not specified
......@@ -687,6 +697,14 @@ mavx512vbmi
Target Report Mask(ISA_AVX512VBMI) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512VBMI built-in functions and code generation.
mavx5124fmaps
Target Report Mask(ISA_AVX5124FMAPS) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX5124FMAPS built-in functions and code generation.
mavx5124vnniw
Target Report Mask(ISA_AVX5124VNNIW) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX5124VNNIW built-in functions and code generation.
mfma
Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation.
......
......@@ -68,6 +68,10 @@
#include <avx512vbmivlintrin.h>
#include <avx5124fmapsintrin.h>
#include <avx5124vnniwintrin.h>
#include <shaintrin.h>
#include <lzcntintrin.h>
......
......@@ -973,10 +973,10 @@ inline __attribute__((__always_inline__))\n\
#else\n\
extern __inline__ __attribute__((__always_inline__, __gnu_inline__))\n\
#endif\n\
unsigned char\n\
unsigned short\n\
mode_size_inline (machine_mode mode)\n\
{\n\
extern %sunsigned char mode_size[NUM_MACHINE_MODES];\n\
extern %sunsigned short mode_size[NUM_MACHINE_MODES];\n\
gcc_assert (mode >= 0 && mode < NUM_MACHINE_MODES);\n\
switch (mode)\n\
{\n", adj_bytesize ? "" : "const ");
......@@ -1301,7 +1301,7 @@ emit_mode_size (void)
int c;
struct mode_data *m;
print_maybe_const_decl ("%sunsigned char", "mode_size",
print_maybe_const_decl ("%sunsigned short", "mode_size",
"NUM_MACHINE_MODES", bytesize);
for_all_modes (c, m)
......@@ -1492,7 +1492,7 @@ emit_mode_base_align (void)
int c;
struct mode_data *m;
print_maybe_const_decl ("%sunsigned char",
print_maybe_const_decl ("%sunsigned short",
"mode_base_align", "NUM_MACHINE_MODES",
alignment);
......
......@@ -104,6 +104,7 @@ initialize_uninitialized_regs (void)
bitmap_set_bit (already_genned, regno);
start_sequence ();
emit_clobber (reg);
emit_move_insn (reg, CONST0_RTX (GET_MODE (reg)));
move_insn = get_insns ();
end_sequence ();
......
......@@ -179,7 +179,7 @@ extern const unsigned char mode_class[NUM_MACHINE_MODES];
/* Get the size in bytes and bits of an object of mode MODE. */
extern CONST_MODE_SIZE unsigned char mode_size[NUM_MACHINE_MODES];
extern CONST_MODE_SIZE unsigned short mode_size[NUM_MACHINE_MODES];
#if GCC_VERSION >= 4001
#define GET_MODE_SIZE(MODE) \
((unsigned short) (__builtin_constant_p (MODE) \
......@@ -330,7 +330,7 @@ extern machine_mode get_best_mode (int, int,
/* Determine alignment, 1<=result<=BIGGEST_ALIGNMENT. */
extern CONST_MODE_BASE_ALIGN unsigned char mode_base_align[NUM_MACHINE_MODES];
extern CONST_MODE_BASE_ALIGN unsigned short mode_base_align[NUM_MACHINE_MODES];
extern unsigned get_mode_alignment (machine_mode);
......
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mclwb -mmwaitx -mclzero -mpku" } */
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mclwb -mmwaitx -mclzero -mpku" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
popcntintrin.h, fmaintrin.h, pkuintrin.h and mm_malloc.h.h are usable with
popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h,
avx5124vnniwintrin.h and mm_malloc.h.h are usable with
-O -pedantic-errors. */
#include <x86intrin.h>
......
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mclwb -mmwaitx -mclzero -mpku" } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mclwb -mmwaitx -mclzero -mpku" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
popcntintrin.h, fmaintrin.h, pkuintrin.h and mm_malloc.h are usable with
popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h,
avx5124vnniwintrin.h and mm_malloc.h are usable with
-O -fkeep-inline-functions. */
#include <x86intrin.h>
/* { dg-do compile } */
/* { dg-options "-O2 -mavx5124fmaps" } */
/* { dg-final { scan-assembler-times "v4fmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "v4fmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "v4fmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
__m512 a, b, c, d, e, f, g, x1, x2, x3;
__m128 *mem;
__mmask16 m;
int foo ()
{
x1 = _mm512_4fmadd_ps (a, b, c, d, e, mem);
x2 = _mm512_mask_4fmadd_ps (a, m, b, c, d, e, mem);
x3 = _mm512_maskz_4fmadd_ps (m, a, b, c, d, e, mem);
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx5124fmaps" } */
/* { dg-require-effective-target avx5124fmaps } */
#define ESP_FLOAT 1.0
#define AVX5124FMAPS
#include "avx512f-helper.h"
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
void
CALC (float *src1, float* src2, float *src3,
float *src4, float* prev_dst, float *mult, float *dst)
{
int i;
for (i = 0; i < SIZE; i++)
{
dst[i] = (double)prev_dst[i]
+ (double)src1[i] * (double)mult[0]
+ (double)src2[i] * (double)mult[1]
+ (double)src3[i] * (double)mult[2]
+ (double)src4[i] * (double)mult[3];
}
}
void
TEST (void)
{
int i, sign;
UNION_TYPE (AVX512F_LEN,) src1, src2, src3, src4, src5, dst, res1, res2, res3;
UNION_TYPE (128,) mult;
MASK_TYPE mask = MASK_VALUE;
float res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign;
src3.a[i] = src1.a[i] * src1.a[i];
src4.a[i] = src2.a[i] * src2.a[i];
sign = sign * -1;
}
for (i = 0; i < 4; i++)
mult.a[i] = 3.1415 + i * 2.71828;
for (i = 0; i < SIZE; i++)
src5.a[i] = DEFAULT_VALUE;
CALC (src1.a, src2.a, src3.a, src4.a, src5.a, mult.a, res_ref);
res1.x = INTRINSIC (_4fmadd_ps) ( src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
res2.x = INTRINSIC (_mask_4fmadd_ps) (src5.x, mask, src1.x, src2.x, src3.x, src4.x, &mult.x);
res3.x = INTRINSIC (_maskz_4fmadd_ps) (mask, src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
if (UNION_FP_CHECK (AVX512F_LEN,) (res1, res_ref))
abort ();
MASK_MERGE () (res_ref, mask, SIZE);
if (UNION_FP_CHECK (AVX512F_LEN,) (res2, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, SIZE);
if (UNION_FP_CHECK (AVX512F_LEN,) (res3, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-O2 -mavx5124fmaps" } */
/* { dg-final { scan-assembler-times "v4fmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "v4fmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "v4fmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
__m128 a, b, c, d, e, f, x1, x2, x3;
__m128 *mem;
__mmask8 m;
int foo ()
{
x1 = _mm_4fmadd_ss (a, b, c, d, e, mem);
x2 = _mm_mask_4fmadd_ss (a, m, b, c, d, e, mem);
x3 = _mm_maskz_4fmadd_ss (m, a, b, c, d, e, mem);
}
/* { dg-do compile } */
/* { dg-options "-O2 -mavx5124fmaps" } */
/* { dg-final { scan-assembler-times "v4fnmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "v4fnmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "v4fnmaddps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
__m512 a, b, c, d, e, f, g, x1, x2, x3;
__m128 *mem;
__mmask16 m;
int foo ()
{
x1 = _mm512_4fnmadd_ps (a, b, c, d, e, mem);
x2 = _mm512_mask_4fnmadd_ps (a, m, b, c, d, e, mem);
x3 = _mm512_maskz_4fnmadd_ps (m, a, b, c, d, e, mem);
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx5124fmaps" } */
/* { dg-require-effective-target avx5124fmaps } */
#define ESP_FLOAT 1.0
#define AVX5124FMAPS
#include "avx512f-helper.h"
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
void
CALC (float *src1, float* src2, float *src3,
float *src4, float* prev_dst, float *mult, float *dst)
{
int i;
for (i = 0; i < SIZE; i++)
{
dst[i] = (double)prev_dst[i]
- (double)src1[i] * (double)mult[0]
- (double)src2[i] * (double)mult[1]
- (double)src3[i] * (double)mult[2]
- (double)src4[i] * (double)mult[3];
}
}
void
TEST (void)
{
int i, sign;
UNION_TYPE (AVX512F_LEN,) src1, src2, src3, src4, src5, dst, res1, res2, res3;
UNION_TYPE (128,) mult;
MASK_TYPE mask = MASK_VALUE;
float res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign;
src3.a[i] = src1.a[i] * src1.a[i];
src4.a[i] = src2.a[i] * src2.a[i];
sign = sign * -1;
}
for (i = 0; i < 4; i++)
mult.a[i] = 3.1415 + i * 2.71828;
for (i = 0; i < SIZE; i++)
src5.a[i] = DEFAULT_VALUE;
CALC (src1.a, src2.a, src3.a, src4.a, src5.a, mult.a, res_ref);
res1.x = INTRINSIC (_4fnmadd_ps) ( src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
res2.x = INTRINSIC (_mask_4fnmadd_ps) (src5.x, mask, src1.x, src2.x, src3.x, src4.x, &mult.x);
res3.x = INTRINSIC (_maskz_4fnmadd_ps) (mask, src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
if (UNION_FP_CHECK (AVX512F_LEN,) (res1, res_ref))
abort ();
MASK_MERGE () (res_ref, mask, SIZE);
if (UNION_FP_CHECK (AVX512F_LEN,) (res2, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, SIZE);
if (UNION_FP_CHECK (AVX512F_LEN,) (res3, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-O2 -mavx5124fmaps" } */
/* { dg-final { scan-assembler-times "v4fnmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "v4fnmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "v4fnmaddss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
__m128 a, b, c, d, e, f, x1, x2, x3;
__m128 *mem;
__mmask8 m;
int foo ()
{
x1 = _mm_4fnmadd_ss (a, b, c, d, e, mem);
x2 = _mm_mask_4fnmadd_ss (a, m, b, c, d, e, mem);
x3 = _mm_maskz_4fnmadd_ss (m, a, b, c, d, e, mem);
}
#include <stdlib.h>
#include "cpuid.h"
#include "m512-check.h"
#include "avx512f-os-support.h"
static void avx5124fmaps_test (void);
static void __attribute__ ((noinline)) do_test (void)
{
avx5124fmaps_test ();
}
int
main ()
{
unsigned int eax, ebx, ecx, edx;
if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
return 0;
/* Run AVX512_4FMAPS test only if host has the support. */
if ((ecx & bit_OSXSAVE) == (bit_OSXSAVE))
{
if (__get_cpuid_max (0, NULL) < 7)
return 0;
__cpuid_count (7, 0, eax, ebx, ecx, edx);
if ((avx512f_os_support ()) && ((edx & bit_AVX5124FMAPS) == bit_AVX5124FMAPS))
{
do_test ();
#ifdef DEBUG
printf ("PASSED\n");
#endif
return 0;
}
#ifdef DEBUG
printf ("SKIPPED\n");
#endif
}
#ifdef DEBUG
else
printf ("SKIPPED\n");
#endif
return 0;
}
#include <stdlib.h>
#include "cpuid.h"
#include "m512-check.h"
#include "avx512f-os-support.h"
static void avx5124vnniw_test (void);
static void __attribute__ ((noinline)) do_test (void)
{
avx5124vnniw_test ();
}
int
main ()
{
unsigned int eax, ebx, ecx, edx;
if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
return 0;
/* Run AVX512_4VNNIW test only if host has the support. */
if ((ecx & bit_OSXSAVE) == (bit_OSXSAVE))
{
if (__get_cpuid_max (0, NULL) < 7)
return 0;
__cpuid_count (7, 0, eax, ebx, ecx, edx);
if ((avx512f_os_support ()) && ((edx & bit_AVX5124VNNIW) == bit_AVX5124VNNIW))
{
do_test ();
#ifdef DEBUG
printf ("PASSED\n");
#endif
return 0;
}
#ifdef DEBUG
printf ("SKIPPED\n");
#endif
}
#ifdef DEBUG
else
printf ("SKIPPED\n");
#endif
return 0;
}
/* { dg-do compile } */
/* { dg-options "-O2 -mavx5124vnniw" } */
/* { dg-final { scan-assembler-times "vp4dpwssd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vp4dpwssd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vp4dpwssd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
__m512i a, b, c, d, e, f, g, x1, x2, x3;
__m128i *mem;
__mmask16 m;
int foo ()
{
x1 = _mm512_4dpwssd_epi32 (a, b, c, d, e, mem);
x2 = _mm512_mask_4dpwssd_epi32 (a, m, b, c, d, e, mem);
x3 = _mm512_maskz_4dpwssd_epi32 (m, a, b, c, d, e, mem);
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx5124vnniw" } */
/* { dg-require-effective-target avx5124vnniw } */
#define AVX5124VNNIW
#include "avx512f-helper.h"
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
void
CALC (short *src1, short* src2, short *src3,
short *src4, int* prev_dst, short *mult, int *dst)
{
int i;
for (i = 0; i < SIZE; i++)
{
int p1dword, p2dword;
dst[i] = prev_dst[i];
p1dword = (int)(src1[2*i ]) * (int)(mult[0]);
p2dword = (int)(src1[2*i+1]) * (int)(mult[1]);
dst[i] += p1dword + p2dword;
p1dword = (int)(src2[2*i ]) * (int)(mult[2]);
p2dword = (int)(src2[2*i+1]) * (int)(mult[3]);
dst[i] += p1dword + p2dword;
p1dword = (int)(src3[2*i ]) * (int)(mult[4]);
p2dword = (int)(src3[2*i+1]) * (int)(mult[5]);
dst[i] += p1dword + p2dword;
p1dword = (int)(src4[2*i ]) * (int)(mult[6]);
p2dword = (int)(src4[2*i+1]) * (int)(mult[7]);
dst[i] += p1dword + p2dword;
}
}
void
TEST (void)
{
int i;
UNION_TYPE (AVX512F_LEN, i_w) src1, src2, src3, src4;
UNION_TYPE (AVX512F_LEN, i_d) src5, dst, res1, res2, res3;
UNION_TYPE (128, i_w) mult;
MASK_TYPE mask = MASK_VALUE;
int res_ref[SIZE];
for (i = 0; i < SIZE * 2; i++)
{
src1.a[i] = 2 + 7 * i % 291;
src2.a[i] = 3 + 11 * (i % 377) * i;
src3.a[i] = src1.a[i] * src1.a[i];
src4.a[i] = src2.a[i] * src2.a[i];
}
for (i = 0; i < 8; i++)
mult.a[i] = 3 + i * 2;
for (i = 0; i < SIZE; i++)
src5.a[i] = DEFAULT_VALUE;
CALC (src1.a, src2.a, src3.a, src4.a, src5.a, mult.a, res_ref);
res1.x = INTRINSIC (_4dpwssd_epi32) ( src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
res2.x = INTRINSIC (_mask_4dpwssd_epi32) (src5.x, mask, src1.x, src2.x, src3.x, src4.x, &mult.x);
res3.x = INTRINSIC (_maskz_4dpwssd_epi32) (mask, src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
MASK_MERGE (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
abort ();
MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
abort ();
}
/* { dg-do compile } */
/* { dg-options "-O2 -mavx5124vnniw" } */
/* { dg-final { scan-assembler-times "vp4dpwssds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vp4dpwssds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vp4dpwssds\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
__m512i a, b, c, d, e, f, g, x1, x2, x3;
__m128i *mem;
__mmask16 m;
int foo ()
{
x1 = _mm512_4dpwssds_epi32 (a, b, c, d, e, mem);
x2 = _mm512_mask_4dpwssds_epi32 (a, m, b, c, d, e, mem);
x3 = _mm512_maskz_4dpwssds_epi32 (m, a, b, c, d, e, mem);
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx5124vnniw" } */
/* { dg-require-effective-target avx5124vnniw } */
#define DEFAULT_VALUE 0x7ffffffe
#define AVX5124VNNIW
#include "avx512f-helper.h"
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
void
CALC (short *src1, short* src2, short *src3,
short *src4, int* prev_dst, short *mult, int *dst)
{
int i;
for (i = 0; i < SIZE; i++)
{
int p1dword, p2dword;
long long int tmp;
dst[i] = prev_dst[i];
p1dword = (int)(src1[2*i ]) * (int)(mult[0]);
p2dword = (int)(src1[2*i+1]) * (int)(mult[1]);
tmp = (long long)dst[i] + p1dword + p2dword;
if (tmp > 0x7fffffff)
dst[i] = 0x7fffffff;
else
dst[i] += p1dword + p2dword;
p1dword = (int)(src2[2*i ]) * (int)(mult[2]);
p2dword = (int)(src2[2*i+1]) * (int)(mult[3]);
tmp = (long long)dst[i] + p1dword + p2dword;
if (tmp > 0x7fffffff)
dst[i] = 0x7fffffff;
else
dst[i] += p1dword + p2dword;
p1dword = (int)(src3[2*i ]) * (int)(mult[4]);
p2dword = (int)(src3[2*i+1]) * (int)(mult[5]);
tmp = (long long)dst[i] + p1dword + p2dword;
if (tmp > 0x7fffffff)
dst[i] = 0x7fffffff;
else
dst[i] += p1dword + p2dword;
p1dword = (int)(src4[2*i ]) * (int)(mult[6]);
p2dword = (int)(src4[2*i+1]) * (int)(mult[7]);
tmp = (long long)dst[i] + p1dword + p2dword;
if (tmp > 0x7fffffff)
dst[i] = 0x7fffffff;
else
dst[i] += p1dword + p2dword;
}
}
void
TEST (void)
{
int i;
UNION_TYPE (AVX512F_LEN, i_w) src1, src2, src3, src4;
UNION_TYPE (AVX512F_LEN, i_d) src5, dst, res1, res2, res3;
UNION_TYPE (128, i_w) mult;
MASK_TYPE mask = MASK_VALUE;
int res_ref[SIZE];
for (i = 0; i < SIZE * 2; i++)
{
src1.a[i] = 2 + 7 * i % 291;
src2.a[i] = 3 + 11 * (i % 377) * i;
src3.a[i] = src1.a[i] * src1.a[i];
src4.a[i] = src2.a[i] * src2.a[i];
}
for (i = 0; i < 8; i++)
mult.a[i] = 3 + i * 2;
for (i = 0; i < SIZE; i++)
src5.a[i] = DEFAULT_VALUE;
CALC (src1.a, src2.a, src3.a, src4.a, src5.a, mult.a, res_ref);
res1.x = INTRINSIC (_4dpwssds_epi32) ( src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
res2.x = INTRINSIC (_mask_4dpwssds_epi32) (src5.x, mask, src1.x, src2.x, src3.x, src4.x, &mult.x);
res3.x = INTRINSIC (_maskz_4dpwssds_epi32) (mask, src5.x, src1.x, src2.x, src3.x, src4.x, &mult.x);
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
MASK_MERGE (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
abort ();
MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
abort ();
}
......@@ -22,6 +22,10 @@
#include "avx512ifma-check.h"
#elif defined (AVX512VBMI) && !defined (AVX512VL)
#include "avx512vbmi-check.h"
#elif defined (AVX5124FMAPS) && !defined (AVX512VL)
#include "avx5124fmaps-check.h"
#elif defined (AVX5124VNNIW) && !defined (AVX512VL)
#include "avx5124vnniw-check.h"
#elif defined (AVX512VL)
#include "avx512vl-check.h"
#endif
......@@ -33,7 +37,9 @@
/* Value to be written into destination.
We have one value for all types so it must be small enough
to fit into signed char. */
#ifndef DEFAULT_VALUE
#define DEFAULT_VALUE 117
#endif
#define MAKE_MASK_MERGE(NAME, TYPE) \
static void \
......@@ -132,6 +138,12 @@ avx512ifma_test (void) { test_512 (); }
#elif defined (AVX512VBMI) && !defined (AVX512VL)
void
avx512vbmi_test (void) { test_512 (); }
#elif defined (AVX5124FMAPS) && !defined (AVX512VL)
void
avx5124fmaps_test (void) { test_512 (); }
#elif defined (AVX5124VNNIW) && !defined (AVX512VL)
void
avx5124vnniw_test (void) { test_512 (); }
#elif defined (AVX512VL)
void
avx512vl_test (void) { test_256 (); test_128 (); }
......
......@@ -366,6 +366,48 @@ proc check_effective_target_avx512vbmi { } {
} "-mavx512vbmi" ]
}
# Return 1 if avx512_4fmaps instructions can be compiled.
proc check_effective_target_avx5124fmaps { } {
return [check_no_compiler_messages avx5124fmaps object {
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
__v16sf
_mm512_mask_4fmadd_ps (__v16sf __DEST, __v16sf __A, __v16sf __B, __v16sf __C,
__v16sf __D, __v16sf __E, __v4sf *__F)
{
return (__v16sf) __builtin_ia32_4fmaddps_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __C,
(__v16sf) __D,
(__v16sf) __E,
(const __v4sf *) __F,
(__v16sf) __DEST,
0xffff);
}
} "-mavx5124fmaps" ]
}
# Return 1 if avx512_4vnniw instructions can be compiled.
proc check_effective_target_avx5124vnniw { } {
return [check_no_compiler_messages avx5124vnniw object {
typedef int __v16si __attribute__ ((__vector_size__ (64)));
typedef int __v4si __attribute__ ((__vector_size__ (16)));
__v16si
_mm512_4dpwssd_epi32 (__v16si __A, __v16si __B, __v16si __C,
__v16si __D, __v16si __E, __v4si *__F)
{
return (__v16si) __builtin_ia32_vp4dpwssd ((__v16si) __B,
(__v16si) __C,
(__v16si) __D,
(__v16si) __E,
(__v16si) __A,
(const __v4si *) __F);
}
} "-mavx5124vnniw" ]
}
# If a testcase doesn't have special options, use these.
global DEFAULT_CFLAGS
if ![info exists DEFAULT_CFLAGS] then {
......
......@@ -108,8 +108,12 @@ CHECK_EXP (union128d, double, "%f")
CHECK_EXP (union128, float, "%f")
#ifndef ESP_FLOAT
#define ESP_FLOAT 0.000001
#endif
#ifndef ESP_DOUBLE
#define ESP_DOUBLE 0.000001
#endif
#define CHECK_ARRAY(ARRAY, TYPE, FMT) \
static int \
__attribute__((noinline, unused)) \
......
......@@ -3,7 +3,7 @@
popcntintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mclwb -mmwaitx -mclzero -mpku" } */
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mclwb -mmwaitx -mclzero -mpku" } */
#include <x86intrin.h>
......
/* { dg-do compile } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mclwb -mmwaitx -mclzero -mpku" } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mclwb -mmwaitx -mclzero -mpku" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
......
......@@ -8,7 +8,8 @@
/* Test that the intrinsics compile with optimization. All of them
are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h,
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h and mm_malloc.h
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h,
avx5124fmapsintrin.h, avx5124vnniwintrin.h and mm_malloc.h
that reference the proper builtin functions.
Defining away "extern" and "__inline" results in all of them being
......@@ -100,7 +101,7 @@
#ifndef DIFFERENT_PRAGMAS
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma")
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw")
#endif
/* Following intrinsics require immediate arguments. They
......
......@@ -7,7 +7,8 @@
/* Test that the intrinsics compile with optimization. All of them
are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h,
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h and mm_malloc.h
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h,
avx5124fmapsintrin.h, avx5124vnniwintrin.h and mm_malloc.h
that reference the proper builtin functions.
Defining away "extern" and "__inline" results in all of them being
......@@ -594,6 +595,6 @@
#define __builtin_ia32_extracti64x2_256_mask(A, E, C, D) __builtin_ia32_extracti64x2_256_mask(A, 1, C, D)
#define __builtin_ia32_extractf64x2_256_mask(A, E, C, D) __builtin_ia32_extractf64x2_256_mask(A, 1, C, D)
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,clwb,mwaitx,clzero,pku")
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,clwb,mwaitx,clzero,pku")
#include <x86intrin.h>
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment