Commit 31cfd832 by Thomas Koenig

re PR libfortran/78379 (Processor-specific versions for matmul)

2016-12-03  Thomas Koenig  <tkoenig@gcc.gnu.org>

        PR fortran/78379
        * config/i386/cpuinfo.c:  Move denums for processor vendors,
        processor type, processor subtypes and declaration of
        struct __processor_model into
        * config/i386/cpuinfo.h:  New header file.
        * Makefile.am:  Add dependence of m4/matmul_internal_m4 to
        mamtul files..
        * Makefile.in:  Regenerated.
        * acinclude.m4:  Check for AVX, AVX2 and AVX512F.
        * config.h.in:  Add HAVE_AVX, HAVE_AVX2 and HAVE_AVX512F.
        * configure:  Regenerated.
        * configure.ac:  Use checks for AVX, AVX2 and AVX_512F.
        * m4/matmul_internal.m4:  New file. working part of matmul.m4.
        * m4/matmul.m4:  Implement architecture-specific switching
        for AVX, AVX2 and AVX512F by including matmul_internal.m4
        multiple times.
        * generated/matmul_c10.c: Regenerated.
        * generated/matmul_c16.c: Regenerated.
        * generated/matmul_c4.c: Regenerated.
        * generated/matmul_c8.c: Regenerated.
        * generated/matmul_i1.c: Regenerated.
        * generated/matmul_i16.c: Regenerated.
        * generated/matmul_i2.c: Regenerated.
        * generated/matmul_i4.c: Regenerated.
        * generated/matmul_i8.c: Regenerated.
        * generated/matmul_r10.c: Regenerated.
        * generated/matmul_r16.c: Regenerated.
        * generated/matmul_r4.c: Regenerated.
        * generated/matmul_r8.c: Regenerated.

From-SVN: r243219
parent 802583a2
2016-12-03 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/78379
* config/i386/cpuinfo.c: Move denums for processor vendors,
processor type, processor subtypes and declaration of
struct __processor_model into
* config/i386/cpuinfo.h: New header file.
2016-12-02 Andre Vieira <andre.simoesdiasvieira@arm.com>
Thomas Preud'homme <thomas.preudhomme@arm.com>
......
......@@ -26,6 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include "cpuid.h"
#include "tsystem.h"
#include "auto-target.h"
#include "cpuinfo.h"
#ifdef HAVE_INIT_PRIORITY
#define CONSTRUCTOR_PRIORITY (101)
......@@ -36,97 +37,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
int __cpu_indicator_init (void)
__attribute__ ((constructor CONSTRUCTOR_PRIORITY));
/* Processor Vendor and Models. */
enum processor_vendor
{
VENDOR_INTEL = 1,
VENDOR_AMD,
VENDOR_OTHER,
VENDOR_MAX
};
/* Any new types or subtypes have to be inserted at the end. */
enum processor_types
{
INTEL_BONNELL = 1,
INTEL_CORE2,
INTEL_COREI7,
AMDFAM10H,
AMDFAM15H,
INTEL_SILVERMONT,
INTEL_KNL,
AMD_BTVER1,
AMD_BTVER2,
AMDFAM17H,
CPU_TYPE_MAX
};
enum processor_subtypes
{
INTEL_COREI7_NEHALEM = 1,
INTEL_COREI7_WESTMERE,
INTEL_COREI7_SANDYBRIDGE,
AMDFAM10H_BARCELONA,
AMDFAM10H_SHANGHAI,
AMDFAM10H_ISTANBUL,
AMDFAM15H_BDVER1,
AMDFAM15H_BDVER2,
AMDFAM15H_BDVER3,
AMDFAM15H_BDVER4,
AMDFAM17H_ZNVER1,
INTEL_COREI7_IVYBRIDGE,
INTEL_COREI7_HASWELL,
INTEL_COREI7_BROADWELL,
INTEL_COREI7_SKYLAKE,
INTEL_COREI7_SKYLAKE_AVX512,
CPU_SUBTYPE_MAX
};
/* ISA Features supported. New features have to be inserted at the end. */
enum processor_features
{
FEATURE_CMOV = 0,
FEATURE_MMX,
FEATURE_POPCNT,
FEATURE_SSE,
FEATURE_SSE2,
FEATURE_SSE3,
FEATURE_SSSE3,
FEATURE_SSE4_1,
FEATURE_SSE4_2,
FEATURE_AVX,
FEATURE_AVX2,
FEATURE_SSE4_A,
FEATURE_FMA4,
FEATURE_XOP,
FEATURE_FMA,
FEATURE_AVX512F,
FEATURE_BMI,
FEATURE_BMI2,
FEATURE_AES,
FEATURE_PCLMUL,
FEATURE_AVX512VL,
FEATURE_AVX512BW,
FEATURE_AVX512DQ,
FEATURE_AVX512CD,
FEATURE_AVX512ER,
FEATURE_AVX512PF,
FEATURE_AVX512VBMI,
FEATURE_AVX512IFMA,
FEATURE_AVX5124VNNIW,
FEATURE_AVX5124FMAPS
};
struct __processor_model
{
unsigned int __cpu_vendor;
unsigned int __cpu_type;
unsigned int __cpu_subtype;
unsigned int __cpu_features[1];
} __cpu_model = { };
struct __processor_model __cpu_model = { };
/* Get the specific type of AMD CPU. */
......
/* Get CPU type and Features for x86 processors.
Copyright (C) 2012-2016 Free Software Foundation, Inc.
Contributed by Sriraman Tallam (tmsriram@google.com)
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* Processor Vendor and Models. */
enum processor_vendor
{
VENDOR_INTEL = 1,
VENDOR_AMD,
VENDOR_OTHER,
VENDOR_MAX
};
/* Any new types or subtypes have to be inserted at the end. */
enum processor_types
{
INTEL_BONNELL = 1,
INTEL_CORE2,
INTEL_COREI7,
AMDFAM10H,
AMDFAM15H,
INTEL_SILVERMONT,
INTEL_KNL,
AMD_BTVER1,
AMD_BTVER2,
AMDFAM17H,
CPU_TYPE_MAX
};
enum processor_subtypes
{
INTEL_COREI7_NEHALEM = 1,
INTEL_COREI7_WESTMERE,
INTEL_COREI7_SANDYBRIDGE,
AMDFAM10H_BARCELONA,
AMDFAM10H_SHANGHAI,
AMDFAM10H_ISTANBUL,
AMDFAM15H_BDVER1,
AMDFAM15H_BDVER2,
AMDFAM15H_BDVER3,
AMDFAM15H_BDVER4,
AMDFAM17H_ZNVER1,
INTEL_COREI7_IVYBRIDGE,
INTEL_COREI7_HASWELL,
INTEL_COREI7_BROADWELL,
INTEL_COREI7_SKYLAKE,
INTEL_COREI7_SKYLAKE_AVX512,
CPU_SUBTYPE_MAX
};
/* ISA Features supported. New features have to be inserted at the end. */
enum processor_features
{
FEATURE_CMOV = 0,
FEATURE_MMX,
FEATURE_POPCNT,
FEATURE_SSE,
FEATURE_SSE2,
FEATURE_SSE3,
FEATURE_SSSE3,
FEATURE_SSE4_1,
FEATURE_SSE4_2,
FEATURE_AVX,
FEATURE_AVX2,
FEATURE_SSE4_A,
FEATURE_FMA4,
FEATURE_XOP,
FEATURE_FMA,
FEATURE_AVX512F,
FEATURE_BMI,
FEATURE_BMI2,
FEATURE_AES,
FEATURE_PCLMUL,
FEATURE_AVX512VL,
FEATURE_AVX512BW,
FEATURE_AVX512DQ,
FEATURE_AVX512CD,
FEATURE_AVX512ER,
FEATURE_AVX512PF,
FEATURE_AVX512VBMI,
FEATURE_AVX512IFMA,
FEATURE_AVX5124VNNIW,
FEATURE_AVX5124FMAPS
};
extern struct __processor_model
{
unsigned int __cpu_vendor;
unsigned int __cpu_type;
unsigned int __cpu_subtype;
unsigned int __cpu_features[1];
} __cpu_model;
2016-12-03 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/78379
* Makefile.am: Add dependence of m4/matmul_internal_m4 to
mamtul files..
* Makefile.in: Regenerated.
* acinclude.m4: Check for AVX, AVX2 and AVX512F.
* config.h.in: Add HAVE_AVX, HAVE_AVX2 and HAVE_AVX512F.
* configure: Regenerated.
* configure.ac: Use checks for AVX, AVX2 and AVX_512F.
* m4/matmul_internal.m4: New file. working part of matmul.m4.
* m4/matmul.m4: Implement architecture-specific switching
for AVX, AVX2 and AVX512F by including matmul_internal.m4
multiple times.
* generated/matmul_c10.c: Regenerated.
* generated/matmul_c16.c: Regenerated.
* generated/matmul_c4.c: Regenerated.
* generated/matmul_c8.c: Regenerated.
* generated/matmul_i1.c: Regenerated.
* generated/matmul_i16.c: Regenerated.
* generated/matmul_i2.c: Regenerated.
* generated/matmul_i4.c: Regenerated.
* generated/matmul_i8.c: Regenerated.
* generated/matmul_r10.c: Regenerated.
* generated/matmul_r16.c: Regenerated.
* generated/matmul_r4.c: Regenerated.
* generated/matmul_r8.c: Regenerated.
2016-11-30 Andre Vehreschild <vehre@gcc.gnu.org>
* caf/single.c (_gfortran_caf_get_by_ref): Prevent compile time
......
......@@ -987,7 +987,7 @@ $(i_product_c): m4/product.m4 $(I_M4_DEPS1)
$(i_sum_c): m4/sum.m4 $(I_M4_DEPS1)
$(M4) -Dfile=$@ -I$(srcdir)/m4 sum.m4 > $@
$(i_matmul_c): m4/matmul.m4 $(I_M4_DEPS)
$(i_matmul_c): m4/matmul.m4 m4/matmul_internal.m4 $(I_M4_DEPS)
$(M4) -Dfile=$@ -I$(srcdir)/m4 matmul.m4 > $@
$(i_matmull_c): m4/matmull.m4 $(I_M4_DEPS)
......
......@@ -6053,7 +6053,7 @@ fpu-target.inc: fpu-target.h $(srcdir)/libgfortran.h
@MAINTAINER_MODE_TRUE@$(i_sum_c): m4/sum.m4 $(I_M4_DEPS1)
@MAINTAINER_MODE_TRUE@ $(M4) -Dfile=$@ -I$(srcdir)/m4 sum.m4 > $@
@MAINTAINER_MODE_TRUE@$(i_matmul_c): m4/matmul.m4 $(I_M4_DEPS)
@MAINTAINER_MODE_TRUE@$(i_matmul_c): m4/matmul.m4 m4/matmul_internal.m4 $(I_M4_DEPS)
@MAINTAINER_MODE_TRUE@ $(M4) -Dfile=$@ -I$(srcdir)/m4 matmul.m4 > $@
@MAINTAINER_MODE_TRUE@$(i_matmull_c): m4/matmull.m4 $(I_M4_DEPS)
......
......@@ -393,3 +393,54 @@ AC_DEFUN([LIBGFOR_CHECK_STRERROR_R], [
[Define if strerror_r takes two arguments and is available in <string.h>.]),)
CFLAGS="$ac_save_CFLAGS"
])
dnl Check for AVX
AC_DEFUN([LIBGFOR_CHECK_AVX], [
ac_save_CFLAGS="$CFLAGS"
CFLAGS="-O2 -mavx"
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
void _mm256_zeroall (void)
{
__builtin_ia32_vzeroall ();
}]], [[]])],
AC_DEFINE(HAVE_AVX, 1,
[Define if AVX instructions can be compiled.]),
[])
CFLAGS="$ac_save_CFLAGS"
])
dnl Check for AVX2
AC_DEFUN([LIBGFOR_CHECK_AVX2], [
ac_save_CFLAGS="$CFLAGS"
CFLAGS="-O2 -mavx2"
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
typedef long long __v4di __attribute__ ((__vector_size__ (32)));
__v4di
mm256_is32_andnotsi256 (__v4di __X, __v4di __Y)
{
return __builtin_ia32_andnotsi256 (__X, __Y);
}]], [[]])],
AC_DEFINE(HAVE_AVX2, 1,
[Define if AVX2 instructions can be compiled.]),
[])
CFLAGS="$ac_save_CFLAGS"
])
dnl Check for AVX512f
AC_DEFUN([LIBGFOR_CHECK_AVX512F], [
ac_save_CFLAGS="$CFLAGS"
CFLAGS="-O2 -mavx512f"
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
typedef double __m512d __attribute__ ((__vector_size__ (64)));
__m512d _mm512_add (__m512d a)
{
return __builtin_ia32_addpd512_mask (a, a, a, 1, 4);
}]], [[]])],
AC_DEFINE(HAVE_AVX512F, 1,
[Define if AVX512f instructions can be compiled.]),
[])
CFLAGS="$ac_save_CFLAGS"
])
......@@ -78,6 +78,15 @@
/* Define to 1 if the target supports __attribute__((visibility(...))). */
#undef HAVE_ATTRIBUTE_VISIBILITY
/* Define if AVX instructions can be compiled. */
#undef HAVE_AVX
/* Define if AVX2 instructions can be compiled. */
#undef HAVE_AVX2
/* Define if AVX512f instructions can be compiled. */
#undef HAVE_AVX512F
/* Define to 1 if you have the `cabs' function. */
#undef HAVE_CABS
......
......@@ -26174,6 +26174,93 @@ $as_echo "#define HAVE_CRLF 1" >>confdefs.h
fi
# Check whether we support AVX extensions
ac_save_CFLAGS="$CFLAGS"
CFLAGS="-O2 -mavx"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
void _mm256_zeroall (void)
{
__builtin_ia32_vzeroall ();
}
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
$as_echo "#define HAVE_AVX 1" >>confdefs.h
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
CFLAGS="$ac_save_CFLAGS"
# Check wether we support AVX2 extensions
ac_save_CFLAGS="$CFLAGS"
CFLAGS="-O2 -mavx2"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
typedef long long __v4di __attribute__ ((__vector_size__ (32)));
__v4di
mm256_is32_andnotsi256 (__v4di __X, __v4di __Y)
{
return __builtin_ia32_andnotsi256 (__X, __Y);
}
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
$as_echo "#define HAVE_AVX2 1" >>confdefs.h
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
CFLAGS="$ac_save_CFLAGS"
# Check wether we support AVX512f extensions
ac_save_CFLAGS="$CFLAGS"
CFLAGS="-O2 -mavx512f"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
typedef double __m512d __attribute__ ((__vector_size__ (64)));
__m512d _mm512_add (__m512d a)
{
return __builtin_ia32_addpd512_mask (a, a, a, 1, 4);
}
int
main ()
{
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
$as_echo "#define HAVE_AVX512F 1" >>confdefs.h
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
CFLAGS="$ac_save_CFLAGS"
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
# tests run on this system so they can be shared between configure
......
......@@ -609,6 +609,15 @@ LIBGFOR_CHECK_UNLINK_OPEN_FILE
# Check whether line terminator is LF or CRLF
LIBGFOR_CHECK_CRLF
# Check whether we support AVX extensions
LIBGFOR_CHECK_AVX
# Check wether we support AVX2 extensions
LIBGFOR_CHECK_AVX2
# Check wether we support AVX512f extensions
LIBGFOR_CHECK_AVX512F
AC_CACHE_SAVE
if test ${multilib} = yes; then
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment