Commit 2ddd46d6 by Ilya Tocar Committed by H.J. Lu

Add FMA intrinsics and testcases.

gcc/

2011-08-30  Ilya Tocar  <ilya.tocar@intel.com>

	* config/i386/fmaintrin.h: New.
	* config.gcc: Add fmaintrin.h.
	* config/i386/i386.c
	(enum ix86_builtins) <IX86_BUILTIN_VFMADDSS3>: New.
	<IX86_BUILTIN_VFMADDSD3>: Likewise.
	* config/i386/sse.md (fmai_vmfmadd_<mode>): New.
	(*fmai_fmadd_<mode>): Likewise.
	(*fmai_fmsub_<mode>): Likewise.
	(*fmai_fnmadd_<mode>): Likewise.
	(*fmai_fnmsub_<mode>): Likewise.
	* config/i386/immintrin.h: Add fmaintrin.h.

gcc/testsuite/

2011-08-30  Ilya Tocar <ilya.tocar@intel.com>

	* gcc.target/i386/fma-check.h: New.
	* gcc.target/i386/fma-256-fmaddXX.c: New testcase.
	* gcc.target/i386/fma-256-fmaddsubXX.c: Likewise.
	* gcc.target/i386/fma-256-fmsubXX.c: Likewise.
	* gcc.target/i386/fma-256-fmsubaddXX.c: Likewise.
	* gcc.target/i386/fma-256-fnmaddXX.c: Likewise.
	* gcc.target/i386/fma-256-fnmsubXX.c: Likewise.
	* gcc.target/i386/fma-fmaddXX.c: Likewise.
	* gcc.target/i386/fma-fmaddsubXX.c: Likewise.
	* gcc.target/i386/fma-fmsubXX.c: Likewise.
	* gcc.target/i386/fma-fmsubaddXX.c: Likewise.
	* gcc.target/i386/fma-fnmaddXX.c: Likewise.
	* gcc.target/i386/fma-fnmsubXX.c: Likewise.
	* gcc.target/i386/fma-compile.c: Likewise.
	* gcc.target/i386/i386.exp (check_effective_target_fma): New.
	* gcc.target/i386/sse-12.c: Add -mfma.
	* gcc.target/i386/sse-13.c: Likewise.
	* gcc.target/i386/sse-14.c: Likewise.
	* gcc.target/i386/sse-22.c: Likewise.
	* gcc.target/i386/sse-23.c: Likewise.
	* g++.dg/other/i386-2.C: Likewise.
	* g++.dg/other/i386-3.C: Likewise.

From-SVN: r178311
parent c199ccf7
2011-08-30 Ilya Tocar <ilya.tocar@intel.com>
* config/i386/fmaintrin.h: New.
* config.gcc: Add fmaintrin.h.
* config/i386/i386.c
(enum ix86_builtins) <IX86_BUILTIN_VFMADDSS3>: New.
<IX86_BUILTIN_VFMADDSD3>: Likewise.
* config/i386/sse.md (fmai_vmfmadd_<mode>): New.
(*fmai_fmadd_<mode>): Likewise.
(*fmai_fmsub_<mode>): Likewise.
(*fmai_fnmadd_<mode>): Likewise.
(*fmai_fnmsub_<mode>): Likewise.
* config/i386/immintrin.h: Add fmaintrin.h.
2011-08-30 Bernd Schmidt <bernds@codesourcery.com>
* genautomata.c (NO_COMB_OPTION): New macro.
......@@ -353,7 +353,7 @@ i[34567]86-*-*)
immintrin.h x86intrin.h avxintrin.h xopintrin.h
ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h
avx2intrin.h"
avx2intrin.h fmaintrin.h"
;;
x86_64-*-*)
cpu_type=i386
......@@ -366,7 +366,7 @@ x86_64-*-*)
immintrin.h x86intrin.h avxintrin.h xopintrin.h
ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
lzcntintrin.h bmiintrin.h tbmintrin.h bmi2intrin.h
avx2intrin.h"
avx2intrin.h fmaintrin.h"
need_64bit_hwint=yes
;;
ia64-*-*)
......
......@@ -24055,7 +24055,7 @@ enum ix86_builtins
IX86_BUILTIN_VEC_PERM_V4DF,
IX86_BUILTIN_VEC_PERM_V8SF,
/* FMA4 and XOP instructions. */
/* FMA4 instructions. */
IX86_BUILTIN_VFMADDSS,
IX86_BUILTIN_VFMADDSD,
IX86_BUILTIN_VFMADDPS,
......@@ -24067,6 +24067,11 @@ enum ix86_builtins
IX86_BUILTIN_VFMADDSUBPS256,
IX86_BUILTIN_VFMADDSUBPD256,
/* FMA3 instructions. */
IX86_BUILTIN_VFMADDSS3,
IX86_BUILTIN_VFMADDSD3,
/* XOP instructions. */
IX86_BUILTIN_VPCMOV,
IX86_BUILTIN_VPCMOV_V2DI,
IX86_BUILTIN_VPCMOV_V4SI,
......@@ -25450,6 +25455,13 @@ static const struct builtin_description bdesc_multi_arg[] =
"__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
UNKNOWN, (int)MULTI_ARG_3_DF },
{ OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
"__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
UNKNOWN, (int)MULTI_ARG_3_SF },
{ OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
"__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
UNKNOWN, (int)MULTI_ARG_3_DF },
{ OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
"__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
UNKNOWN, (int)MULTI_ARG_3_SF },
......@@ -72,6 +72,10 @@
#include <bmi2intrin.h>
#endif
#ifdef __FMA__
#include <fmaintrin.h>
#endif
#ifdef __RDRND__
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
......
......@@ -1719,6 +1719,89 @@
operands[4] = CONST0_RTX (<MODE>mode);
})
(define_expand "fmai_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand")
(vec_merge:VF_128
(fma:VF_128
(match_operand:VF_128 1 "nonimmediate_operand")
(match_operand:VF_128 2 "nonimmediate_operand")
(match_operand:VF_128 3 "nonimmediate_operand"))
(match_dup 0)
(const_int 1)))]
"TARGET_FMA")
(define_insn "*fmai_fmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
(vec_merge:VF_128
(fma:VF_128
(match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
(match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
(match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
(match_dup 0)
(const_int 1)))]
"TARGET_FMA"
"@
vfmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fmai_fmsub_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
(vec_merge:VF_128
(fma:VF_128
(match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x")
(match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
(neg:VF_128
(match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
(match_dup 0)
(const_int 1)))]
"TARGET_FMA"
"@
vfmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fmai_fnmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
(vec_merge:VF_128
(fma:VF_128
(neg:VF_128
(match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
(match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
(match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0"))
(match_dup 0)
(const_int 1)))]
"TARGET_FMA"
"@
vfnmadd132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfnmadd213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfnmadd231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fmai_fnmsub_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x,x")
(vec_merge:VF_128
(fma:VF_128
(neg:VF_128
(match_operand:VF_128 1 "nonimmediate_operand" "%0, 0,x"))
(match_operand:VF_128 2 "nonimmediate_operand" "xm, x,xm")
(neg:VF_128
(match_operand:VF_128 3 "nonimmediate_operand" " x,xm,0")))
(match_dup 0)
(const_int 1)))]
"TARGET_FMA"
"@
vfnmsub132<ssescalarmodesuffix>\t{%2, %3, %0|%0, %3, %2}
vfnmsub213<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
vfnmsub231<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
(define_insn "*fma4i_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
......
2011-08-30 Ilya Tocar <ilya.tocar@intel.com>
* gcc.target/i386/fma-check.h: New.
* gcc.target/i386/fma-256-fmaddXX.c: New testcase.
* gcc.target/i386/fma-256-fmaddsubXX.c: Likewise.
* gcc.target/i386/fma-256-fmsubXX.c: Likewise.
* gcc.target/i386/fma-256-fmsubaddXX.c: Likewise.
* gcc.target/i386/fma-256-fnmaddXX.c: Likewise.
* gcc.target/i386/fma-256-fnmsubXX.c: Likewise.
* gcc.target/i386/fma-fmaddXX.c: Likewise.
* gcc.target/i386/fma-fmaddsubXX.c: Likewise.
* gcc.target/i386/fma-fmsubXX.c: Likewise.
* gcc.target/i386/fma-fmsubaddXX.c: Likewise.
* gcc.target/i386/fma-fnmaddXX.c: Likewise.
* gcc.target/i386/fma-fnmsubXX.c: Likewise.
* gcc.target/i386/fma-compile.c: Likewise.
* gcc.target/i386/i386.exp (check_effective_target_fma): New.
* gcc.target/i386/sse-12.c: Add -mfma.
* gcc.target/i386/sse-13.c: Likewise.
* gcc.target/i386/sse-14.c: Likewise.
* gcc.target/i386/sse-22.c: Likewise.
* gcc.target/i386/sse-23.c: Likewise.
* g++.dg/other/i386-2.C: Likewise.
* g++.dg/other/i386-3.C: Likewise.
2011-08-30 Kirill Yukhin <kirill.yukhin@intel.com>
PR testsuite/50185
......
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
popcntintrin.h and mm_malloc.h.h are usable with -O -pedantic-errors. */
popcntintrin.h, fmaintrin.h and mm_malloc.h.h are usable with
-O -pedantic-errors. */
#include <x86intrin.h>
......
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
popcntintrin.h and mm_malloc.h are usable with
popcntintrin.h, fmaintrin.h and mm_malloc.h are usable with
-O -fkeep-inline-functions. */
#include <x86intrin.h>
/* { dg-do run } */
/* { dg-require-effective-target fma } */
/* { dg-options "-O2 -mfma" } */
#include "fma-check.h"
#include <x86intrin.h>
#include "m256-check.h"
void
check_mm256_fmadd_pd (__m256d __A, __m256d __B, __m256d __C)
{
union256d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[4];
int i;
e.x = _mm256_fmadd_pd (__A, __B, __C);
for (i = 0; i < 4; i++)
{
d[i] = a.a[i] * b.a[i] + c.a[i];
}
if (check_union256d (e, d))
abort ();
}
void
check_mm256_fmadd_ps (__m256 __A, __m256 __B, __m256 __C)
{
union256 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[8];
int i;
e.x = _mm256_fmadd_ps (__A, __B, __C);
for (i = 0; i < 8; i++)
{
d[i] = a.a[i] * b.a[i] + c.a[i];
}
if (check_union256 (e, d))
abort ();
}
static void
fma_test (void)
{
union256 c[3];
union256d d[3];
int i, j;
for (i = 0; i < 3; i++)
{
for (j = 0; j < 8; j++)
c[i].a[j] = i * j + 3.5;
for (j = 0; j < 4; j++)
d[i].a[j] = i * j + 3.5;
}
check_mm256_fmadd_pd (d[0].x, d[1].x, d[2].x);
check_mm256_fmadd_ps (c[0].x, c[1].x, c[2].x);
}
/* { dg-do run } */
/* { dg-require-effective-target fma } */
/* { dg-options "-O2 -mfma" } */
#include "fma-check.h"
#include <x86intrin.h>
#include "m256-check.h"
void
check_mm256_fmaddsub_ps (__m256 __A, __m256 __B, __m256 __C)
{
union256 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[8];
int i;
e.x = _mm256_fmaddsub_ps (__A, __B, __C);
for (i = 0; i < 8; i++)
{
d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]);
}
if (check_union256 (e, d))
abort ();
}
void
check_mm256_fmaddsub_pd (__m256d __A, __m256d __B, __m256d __C)
{
union256d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[4];
int i;
e.x = _mm256_fmaddsub_pd (__A, __B, __C);
for (i = 0; i < 4; i++)
{
d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]);
}
if (check_union256d (e, d))
abort ();
}
static void
fma_test (void)
{
union256 c[3];
union256d d[3];
int i, j;
for (i = 0; i < 3; i++)
{
for (j = 0; j < 8; j++)
c[i].a[j] = i * j + 3.5;
for (j = 0; j < 4; j++)
d[i].a[j] = i * j + 3.5;
}
check_mm256_fmaddsub_pd (d[0].x, d[1].x, d[2].x);
check_mm256_fmaddsub_ps (c[0].x, c[1].x, c[2].x);
}
/* { dg-do run } */
/* { dg-require-effective-target fma } */
/* { dg-options "-O2 -mfma" } */
#include "fma-check.h"
#include <x86intrin.h>
#include "m256-check.h"
void
check_mm256_fmsub_pd (__m256d __A, __m256d __B, __m256d __C)
{
union256d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[4];
int i;
e.x = _mm256_fmsub_pd (__A, __B, __C);
for (i = 0; i < 4; i++)
{
d[i] = a.a[i] * b.a[i] - c.a[i];
}
if (check_union256d (e, d))
abort ();
}
void
check_mm256_fmsub_ps (__m256 __A, __m256 __B, __m256 __C)
{
union256 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[8];
int i;
e.x = _mm256_fmsub_ps (__A, __B, __C);
for (i = 0; i < 8; i++)
{
d[i] = a.a[i] * b.a[i] - c.a[i];
}
if (check_union256 (e, d))
abort ();
}
static void
fma_test (void)
{
union256 c[3];
union256d d[3];
int i, j;
for (i = 0; i < 3; i++)
{
for (j = 0; j < 8; j++)
c[i].a[j] = i * j + 3.5;
for (j = 0; j < 4; j++)
d[i].a[j] = i * j + 3.5;
}
check_mm256_fmsub_pd (d[0].x, d[1].x, d[2].x);
check_mm256_fmsub_ps (c[0].x, c[1].x, c[2].x);
}
/* { dg-do run } */
/* { dg-require-effective-target fma } */
/* { dg-options "-O2 -mfma" } */
#include "fma-check.h"
#include <x86intrin.h>
#include "m256-check.h"
void
check_mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
{
union256 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[8];
int i;
e.x = _mm256_fmsubadd_ps (__A, __B, __C);
for (i = 0; i < 8; i++)
{
d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]);
}
if (check_union256 (e, d))
abort ();
}
void
check_mm256_fmsubadd_pd (__m256d __A, __m256d __B, __m256d __C)
{
union256d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[4];
int i;
e.x = _mm256_fmsubadd_pd (__A, __B, __C);
for (i = 0; i < 4; i++)
{
d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]);
}
if (check_union256d (e, d))
abort ();
}
static void
fma_test (void)
{
union256 c[3];
union256d d[3];
int i, j;
for (i = 0; i < 3; i++)
{
for (j = 0; j < 8; j++)
c[i].a[j] = i * j + 3.5;
for (j = 0; j < 4; j++)
d[i].a[j] = i * j + 3.5;
}
check_mm256_fmsubadd_pd (d[0].x, d[1].x, d[2].x);
check_mm256_fmsubadd_ps (c[0].x, c[1].x, c[2].x);
}
/* { dg-do run } */
/* { dg-require-effective-target fma } */
/* { dg-options "-O2 -mfma" } */
#include "fma-check.h"
#include <x86intrin.h>
#include "m256-check.h"
void
check_mm256_fnmadd_pd (__m256d __A, __m256d __B, __m256d __C)
{
union256d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[4];
int i;
e.x = _mm256_fnmadd_pd (__A, __B, __C);
for (i = 0; i < 4; i++)
{
d[i] = -a.a[i] * b.a[i] + c.a[i];
}
if (check_union256d (e, d))
abort ();
}
void
check_mm256_fnmadd_ps (__m256 __A, __m256 __B, __m256 __C)
{
union256 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[8];
int i;
e.x = _mm256_fnmadd_ps (__A, __B, __C);
for (i = 0; i < 8; i++)
{
d[i] = -a.a[i] * b.a[i] + c.a[i];
}
if (check_union256 (e, d))
abort ();
}
static void
fma_test (void)
{
union256 c[3];
union256d d[3];
int i, j;
for (i = 0; i < 3; i++)
{
for (j = 0; j < 8; j++)
c[i].a[j] = i * j + 3.5;
for (j = 0; j < 4; j++)
d[i].a[j] = i * j + 3.5;
}
check_mm256_fnmadd_pd (d[0].x, d[1].x, d[2].x);
check_mm256_fnmadd_ps (c[0].x, c[1].x, c[2].x);
}
/* { dg-do run } */
/* { dg-require-effective-target fma } */
/* { dg-options "-O2 -mfma" } */
#include "fma-check.h"
#include <x86intrin.h>
#include "m256-check.h"
void
check_mm256_fnmsub_pd (__m256d __A, __m256d __B, __m256d __C)
{
union256d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[4];
int i;
e.x = _mm256_fnmsub_pd (__A, __B, __C);
for (i = 0; i < 4; i++)
{
d[i] = -a.a[i] * b.a[i] - c.a[i];
}
if (check_union256d (e, d))
abort ();
}
void
check_mm256_fnmsub_ps (__m256 __A, __m256 __B, __m256 __C)
{
union256 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[8];
int i;
e.x = _mm256_fnmsub_ps (__A, __B, __C);
for (i = 0; i < 8; i++)
{
d[i] = -a.a[i] * b.a[i] - c.a[i];
}
if (check_union256 (e, d))
abort ();
}
static void
fma_test (void)
{
union256 c[3];
union256d d[3];
int i, j;
for (i = 0; i < 3; i++)
{
for (j = 0; j < 8; j++)
c[i].a[j] = i * j + 3.5;
for (j = 0; j < 4; j++)
d[i].a[j] = i * j + 3.5;
}
check_mm256_fnmsub_pd (d[0].x, d[1].x, d[2].x);
check_mm256_fnmsub_ps (c[0].x, c[1].x, c[2].x);
}
#include <stdlib.h>
#include "cpuid.h"
static void fma_test (void);
static void __attribute__ ((noinline)) do_test (void)
{
fma_test ();
}
int
main ()
{
unsigned int eax, ebx, ecx, edx;
if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
return 0;
/* Run FMA test only if host has FMA support. */
if (ecx & bit_FMA)
do_test ();
exit (0);
}
/* Test that the compiler properly generates floating point multiply
and add instructions FMA systems. */
/* { dg-do compile } */
/* { dg-options "-O2 -mfma" } */
#include <x86intrin.h>
__m128d
check_mm_fmadd_pd (__m128d a, __m128d b, __m128d c)
{
return _mm_fmadd_pd (a, b, c);
}
__m256d
check_mm256_fmadd_pd (__m256d a, __m256d b, __m256d c)
{
return _mm256_fmadd_pd (a, b, c);
}
__m128
check_mm_fmadd_ps (__m128 a, __m128 b, __m128 c)
{
return _mm_fmadd_ps (a, b, c);
}
__m256
check_mm256_fmadd_ps (__m256 a, __m256 b, __m256 c)
{
return _mm256_fmadd_ps (a, b, c);
}
__m128d
check_mm_fmadd_sd (__m128d a, __m128d b, __m128d c)
{
return _mm_fmadd_sd (a, b, c);
}
__m128
check_mm_fmadd_ss (__m128 a, __m128 b, __m128 c)
{
return _mm_fmadd_ss (a, b, c);
}
__m128d
check_mm_fmsub_pd (__m128d a, __m128d b, __m128d c)
{
return _mm_fmsub_pd (a, b, c);
}
__m256d
check_mm256_fmsub_pd (__m256d a, __m256d b, __m256d c)
{
return _mm256_fmsub_pd (a, b, c);
}
__m128
check_mm_fmsub_ps (__m128 a, __m128 b, __m128 c)
{
return _mm_fmsub_ps (a, b, c);
}
__m256
check_mm256_fmsub_ps (__m256 a, __m256 b, __m256 c)
{
return _mm256_fmsub_ps (a, b, c);
}
__m128d
check_mm_fmsub_sd (__m128d a, __m128d b, __m128d c)
{
return _mm_fmsub_sd (a, b, c);
}
__m128
check_mm_fmsub_ss (__m128 a, __m128 b, __m128 c)
{
return _mm_fmsub_ss (a, b, c);
}
__m128d
check_mm_fnmadd_pd (__m128d a, __m128d b, __m128d c)
{
return _mm_fnmadd_pd (a, b, c);
}
__m256d
check_mm256_fnmadd_pd (__m256d a, __m256d b, __m256d c)
{
return _mm256_fnmadd_pd (a, b, c);
}
__m128
check_mm_fnmadd_ps (__m128 a, __m128 b, __m128 c)
{
return _mm_fnmadd_ps (a, b, c);
}
__m256
check_mm256_fnmadd_ps (__m256 a, __m256 b, __m256 c)
{
return _mm256_fnmadd_ps (a, b, c);
}
__m128d
check_mm_fnmadd_sd (__m128d a, __m128d b, __m128d c)
{
return _mm_fnmadd_sd (a, b, c);
}
__m128
check_mm_fnmadd_ss (__m128 a, __m128 b, __m128 c)
{
return _mm_fnmadd_ss (a, b, c);
}
__m128d
check_mm_fnmsub_pd (__m128d a, __m128d b, __m128d c)
{
return _mm_fnmsub_pd (a, b, c);
}
__m256d
check_mm256_fnmsub_pd (__m256d a, __m256d b, __m256d c)
{
return _mm256_fnmsub_pd (a, b, c);
}
__m128
check_mm_fnmsub_ps (__m128 a, __m128 b, __m128 c)
{
return _mm_fnmsub_ps (a, b, c);
}
__m256
check_mm256_fnmsub_ps (__m256 a, __m256 b, __m256 c)
{
return _mm256_fnmsub_ps (a, b, c);
}
__m128d
check_mm_fnmsub_sd (__m128d a, __m128d b, __m128d c)
{
return _mm_fnmsub_sd (a, b, c);
}
__m128
check_mm_fnmsub_ss (__m128 a, __m128 b, __m128 c)
{
return _mm_fnmsub_ss (a, b, c);
}
__m128d
check_mm_fmaddsub_pd (__m128d a, __m128d b, __m128d c)
{
return _mm_fmaddsub_pd (a, b, c);
}
__m256d
check_mm256_fmaddsub_pd (__m256d a, __m256d b, __m256d c)
{
return _mm256_fmaddsub_pd (a, b, c);
}
__m128
check_mm_fmaddsub_ps (__m128 a, __m128 b, __m128 c)
{
return _mm_fmaddsub_ps (a, b, c);
}
__m256
check_mm256_fmaddsub_ps (__m256 a, __m256 b, __m256 c)
{
return _mm256_fmaddsub_ps (a, b, c);
}
__m128d
check_mm_fmsubadd_pd (__m128d a, __m128d b, __m128d c)
{
return _mm_fmsubadd_pd (a, b, c);
}
__m256d
check_mm256_fmsubadd_pd (__m256d a, __m256d b, __m256d c)
{
return _mm256_fmsubadd_pd (a, b, c);
}
__m128
check_mm_fmsubadd_ps (__m128 a, __m128 b, __m128 c)
{
return _mm_fmsubadd_ps (a, b, c);
}
__m256
check_mm256_fmsubadd_ps (__m256 a, __m256 b, __m256 c)
{
return _mm256_fmsubadd_ps (a, b, c);
}
/* { dg-final { scan-assembler-times "vfmadd[^s]..ps" 2 } } */
/* { dg-final { scan-assembler-times "vfmsub[^s]..ps" 2 } } */
/* { dg-final { scan-assembler-times "vfnmadd...ps" 2 } } */
/* { dg-final { scan-assembler-times "vfnmsub...ps" 2 } } */
/* { dg-final { scan-assembler-times "vfmaddsub...ps" 2 } } */
/* { dg-final { scan-assembler-times "vfmsubadd...ps" 2 } } */
/* { dg-final { scan-assembler-times "vfmadd[^s]..pd" 2 } } */
/* { dg-final { scan-assembler-times "vfmsub[^s]..pd" 2 } } */
/* { dg-final { scan-assembler-times "vfnmadd...pd" 2 } } */
/* { dg-final { scan-assembler-times "vfnmsub...pd" 2 } } */
/* { dg-final { scan-assembler-times "vfmaddsub...pd" 2 } } */
/* { dg-final { scan-assembler-times "vfmsubadd...pd" 2 } } */
/* { dg-final { scan-assembler-times "vfmadd[^s]..ss" 1 } } */
/* { dg-final { scan-assembler-times "vfmsub[^s]..ss" 1 } } */
/* { dg-final { scan-assembler-times "vfnmadd...ss" 1 } } */
/* { dg-final { scan-assembler-times "vfnmsub...ss" 1 } } */
/* { dg-final { scan-assembler-times "vfmadd[^s]..sd" 1 } } */
/* { dg-final { scan-assembler-times "vfmsub[^s]..sd" 1 } } */
/* { dg-final { scan-assembler-times "vfnmadd...sd" 1 } } */
/* { dg-final { scan-assembler-times "vfnmsub...sd" 1 } } */
/* { dg-do run } */
/* { dg-require-effective-target fma } */
/* { dg-options "-O2 -mfma" } */
#include "fma-check.h"
#include <x86intrin.h>
#include "m256-check.h"
void
check_mm_fmadd_pd (__m128d __A, __m128d __B, __m128d __C)
{
union128d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[2];
int i;
e.x = _mm_fmadd_pd (__A, __B, __C);
for (i = 0; i < 2; i++)
{
d[i] = a.a[i] * b.a[i] + c.a[i];
}
if (check_union128d (e, d))
abort ();
}
void
check_mm_fmadd_ps (__m128 __A, __m128 __B, __m128 __C)
{
union128 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[4];
int i;
e.x = _mm_fmadd_ps (__A, __B, __C);
for (i = 0; i < 4; i++)
{
d[i] = a.a[i] * b.a[i] + c.a[i];
}
if (check_union128 (e, d))
abort ();
}
void
check_mm_fmadd_sd (__m128d __A, __m128d __B, __m128d __C)
{
union128d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[2];
int i;
e.x = _mm_fmadd_sd (__A, __B, __C);
for (i = 1; i < 2; i++)
{
d[i] = a.a[i];
}
d[0] = a.a[0] * b.a[0] + c.a[0];
if (check_union128d (e, d))
abort ();
}
void
check_mm_fmadd_ss (__m128 __A, __m128 __B, __m128 __C)
{
union128 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[4];
int i;
e.x = _mm_fmadd_ss (__A, __B, __C);
for (i = 1; i < 4; i++)
{
d[i] = a.a[i];
}
d[0] = a.a[0] * b.a[0] + c.a[0];
if (check_union128 (e, d))
abort ();
}
static void
fma_test (void)
{
union128 a[3];
union128d b[3];
int i, j;
for (i = 0; i < 3; i++)
{
for (j = 0; j < 4; j++)
a[i].a[j] = i * j + 3.5;
for (j = 0; j < 2; j++)
b[i].a[j] = i * j + 3.5;
}
check_mm_fmadd_pd (b[0].x, b[1].x, b[2].x);
check_mm_fmadd_sd (b[0].x, b[1].x, b[2].x);
check_mm_fmadd_ps (a[0].x, a[1].x, a[2].x);
check_mm_fmadd_ss (a[0].x, a[1].x, a[2].x);
}
/* { dg-do run } */
/* { dg-require-effective-target fma } */
/* { dg-options "-O2 -mfma" } */
#include "fma-check.h"
#include <x86intrin.h>
#include "m256-check.h"
void
check_mm_fmaddsub_ps (__m128 __A, __m128 __B, __m128 __C)
{
union128 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[4];
int i;
e.x = _mm_fmaddsub_ps (__A, __B, __C);
for (i = 0; i < 4; i++)
{
d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]);
}
if (check_union128 (e, d))
abort ();
}
void
check_mm_fmaddsub_pd (__m128d __A, __m128d __B, __m128d __C)
{
union128d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[2];
int i;
e.x = _mm_fmaddsub_pd (__A, __B, __C);
for (i = 0; i < 2; i++)
{
d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? c.a[i] : -c.a[i]);
}
if (check_union128d (e, d))
abort ();
}
static void
fma_test (void)
{
union128 a[3];
union128d b[3];
int i, j;
for (i = 0; i < 3; i++)
{
for (j = 0; j < 4; j++)
a[i].a[j] = i * j + 3.5;
for (j = 0; j < 2; j++)
b[i].a[j] = i * j + 3.5;
}
check_mm_fmaddsub_pd (b[0].x, b[1].x, b[2].x);
check_mm_fmaddsub_ps (a[0].x, a[1].x, a[2].x);
}
/* { dg-do run } */
/* { dg-require-effective-target fma } */
/* { dg-options "-O2 -mfma" } */
#include "fma-check.h"
#include <x86intrin.h>
#include "m256-check.h"
void
check_mm_fmsub_pd (__m128d __A, __m128d __B, __m128d __C)
{
union128d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[2];
int i;
e.x = _mm_fmsub_pd (__A, __B, __C);
for (i = 0; i < 2; i++)
{
d[i] = a.a[i] * b.a[i] - c.a[i];
}
if (check_union128d (e, d))
abort ();
}
void
check_mm_fmsub_ps (__m128 __A, __m128 __B, __m128 __C)
{
union128 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[4];
int i;
e.x = _mm_fmsub_ps (__A, __B, __C);
for (i = 0; i < 4; i++)
{
d[i] = a.a[i] * b.a[i] - c.a[i];
}
if (check_union128 (e, d))
abort ();
}
void
check_mm_fmsub_sd (__m128d __A, __m128d __B, __m128d __C)
{
union128d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[2];
int i;
e.x = _mm_fmsub_sd (__A, __B, __C);
for (i = 1; i < 2; i++)
{
d[i] = a.a[i];
}
d[0] = a.a[0] * b.a[0] - c.a[0];
if (check_union128d (e, d))
abort ();
}
void
check_mm_fmsub_ss (__m128 __A, __m128 __B, __m128 __C)
{
union128 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[4];
int i;
e.x = _mm_fmsub_ss (__A, __B, __C);
for (i = 1; i < 4; i++)
{
d[i] = a.a[i];
}
d[0] = a.a[0] * b.a[0] - c.a[0];
if (check_union128 (e, d))
abort ();
}
static void
fma_test (void)
{
union128 a[3];
union128d b[3];
int i, j;
for (i = 0; i < 3; i++)
{
for (j = 0; j < 4; j++)
a[i].a[j] = i * j + 3.5;
for (j = 0; j < 2; j++)
b[i].a[j] = i * j + 3.5;
}
check_mm_fmsub_pd (b[0].x, b[1].x, b[2].x);
check_mm_fmsub_sd (b[0].x, b[1].x, b[2].x);
check_mm_fmsub_ps (a[0].x, a[1].x, a[2].x);
check_mm_fmsub_ss (a[0].x, a[1].x, a[2].x);
}
/* { dg-do run } */
/* { dg-require-effective-target fma } */
/* { dg-options "-O2 -mfma" } */
#include "fma-check.h"
#include <x86intrin.h>
#include "m256-check.h"
void
check_mm_fmsubadd_ps (__m128 __A, __m128 __B, __m128 __C)
{
union128 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[4];
int i;
e.x = _mm_fmsubadd_ps (__A, __B, __C);
for (i = 0; i < 4; i++)
{
d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]);
}
if (check_union128 (e, d))
abort ();
}
void
check_mm_fmsubadd_pd (__m128d __A, __m128d __B, __m128d __C)
{
union128d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[2];
int i;
e.x = _mm_fmsubadd_pd (__A, __B, __C);
for (i = 0; i < 2; i++)
{
d[i] = a.a[i] * b.a[i] + (i % 2 == 1 ? -c.a[i] : c.a[i]);
}
if (check_union128d (e, d))
abort ();
}
static void
fma_test (void)
{
union128 a[3];
union128d b[3];
int i, j;
for (i = 0; i < 3; i++)
{
for (j = 0; j < 4; j++)
a[i].a[j] = i * j + 3.5;
for (j = 0; j < 2; j++)
b[i].a[j] = i * j + 3.5;
}
check_mm_fmsubadd_pd (b[0].x, b[1].x, b[2].x);
check_mm_fmsubadd_ps (a[0].x, a[1].x, a[2].x);
}
/* { dg-do run } */
/* { dg-require-effective-target fma } */
/* { dg-options "-O2 -mfma" } */
#include "fma-check.h"
#include <x86intrin.h>
#include "m256-check.h"
void
check_mm_fnmadd_ps (__m128 __A, __m128 __B, __m128 __C)
{
union128 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[4];
int i;
e.x = _mm_fnmadd_ps (__A, __B, __C);
for (i = 0; i < 4; i++)
{
d[i] = -a.a[i] * b.a[i] + c.a[i];
}
if (check_union128 (e, d))
abort ();
}
void
check_mm_fnmadd_pd (__m128d __A, __m128d __B, __m128d __C)
{
union128d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[2];
int i;
e.x = _mm_fnmadd_pd (__A, __B, __C);
for (i = 0; i < 2; i++)
{
d[i] = -a.a[i] * b.a[i] + c.a[i];
}
if (check_union128d (e, d))
abort ();
}
void
check_mm_fnmadd_sd (__m128d __A, __m128d __B, __m128d __C)
{
union128d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[2];
int i;
e.x = _mm_fnmadd_sd (__A, __B, __C);
for (i = 1; i < 2; i++)
{
d[i] = a.a[i];
}
d[0] = -a.a[0] * b.a[0] + c.a[0];
if (check_union128d (e, d))
abort ();
}
void
check_mm_fnmadd_ss (__m128 __A, __m128 __B, __m128 __C)
{
union128 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[4];
int i;
e.x = _mm_fnmadd_ss (__A, __B, __C);
for (i = 1; i < 4; i++)
{
d[i] = a.a[i];
}
d[0] = -a.a[0] * b.a[0] + c.a[0];
if (check_union128 (e, d))
abort ();
}
static void
fma_test (void)
{
union128 a[3];
union128d b[3];
int i, j;
for (i = 0; i < 3; i++)
{
for (j = 0; j < 4; j++)
a[i].a[j] = i * j + 3.5;
for (j = 0; j < 2; j++)
b[i].a[j] = i * j + 3.5;
}
check_mm_fnmadd_pd (b[0].x, b[1].x, b[2].x);
check_mm_fnmadd_sd (b[0].x, b[1].x, b[2].x);
check_mm_fnmadd_ps (a[0].x, a[1].x, a[2].x);
check_mm_fnmadd_ss (a[0].x, a[1].x, a[2].x);
}
/* { dg-do run } */
/* { dg-require-effective-target fma } */
/* { dg-options "-O2 -mfma" } */
#include "fma-check.h"
#include <x86intrin.h>
#include "m256-check.h"
void
check_mm_fnmsub_sd (__m128d __A, __m128d __B, __m128d __C)
{
union128d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[2];
int i;
e.x = _mm_fnmsub_sd (__A, __B, __C);
for (i = 1; i < 2; i++)
{
d[i] = a.a[i];
}
d[0] = -a.a[0] * b.a[0] - c.a[0];
if (check_union128d (e, d))
abort ();
}
void
check_mm_fnmsub_ss (__m128 __A, __m128 __B, __m128 __C)
{
union128 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[4];
int i;
e.x = _mm_fnmsub_ss (__A, __B, __C);
for (i = 1; i < 4; i++)
{
d[i] = a.a[i];
}
d[0] = -a.a[0] * b.a[0] - c.a[0];
if (check_union128 (e, d))
abort ();
}
void
check_mm_fnmsub_ps (__m128 __A, __m128 __B, __m128 __C)
{
union128 a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
float d[4];
int i;
e.x = _mm_fnmsub_ps (__A, __B, __C);
for (i = 0; i < 4; i++)
{
d[i] = -a.a[i] * b.a[i] - c.a[i];
}
if (check_union128 (e, d))
abort ();
}
void
check_mm_fnmsub_pd (__m128d __A, __m128d __B, __m128d __C)
{
union128d a, b, c, e;
a.x = __A;
b.x = __B;
c.x = __C;
double d[2];
int i;
e.x = _mm_fnmsub_pd (__A, __B, __C);
for (i = 0; i < 2; i++)
{
d[i] = -a.a[i] * b.a[i] - c.a[i];
}
if (check_union128d (e, d))
abort ();
}
static void
fma_test (void)
{
union128 a[3];
union128d b[3];
int i, j;
for (i = 0; i < 3; i++)
{
for (j = 0; j < 4; j++)
a[i].a[j] = i * j + 3.5;
for (j = 0; j < 2; j++)
b[i].a[j] = i * j + 3.5;
}
check_mm_fnmsub_pd (b[0].x, b[1].x, b[2].x);
check_mm_fnmsub_sd (b[0].x, b[1].x, b[2].x);
check_mm_fnmsub_ps (a[0].x, a[1].x, a[2].x);
check_mm_fnmsub_ss (a[0].x, a[1].x, a[2].x);
}
......@@ -172,6 +172,20 @@ proc check_effective_target_fma4 { } {
} "-O2 -mfma4" ]
}
# Return 1 if fma instructions can be compiled.
proc check_effective_target_fma { } {
return [check_no_compiler_messages fma object {
typedef float __m128 __attribute__ ((__vector_size__ (16)));
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
__m128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
{
return (__m128) __builtin_ia32_vfmaddps ((__v4sf)__A,
(__v4sf)__B,
(__v4sf)__C);
}
} "-O2 -mfma" ]
}
# Return 1 if xop instructions can be compiled.
proc check_effective_target_xop { } {
return [check_no_compiler_messages xop object {
......
......@@ -3,7 +3,7 @@
popcntintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */
#include <x86intrin.h>
......
/* { dg-do compile } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */
#include <mm_malloc.h>
/* Test that the intrinsics compile with optimization. All of them
are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h,
tbmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h that
reference the proper builtin functions.
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h and mm_malloc.h
that reference the proper builtin functions.
Defining away "extern" and "__inline" results in all of them being
compiled as proper functions. */
......
/* { dg-do compile } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma" } */
#include <mm_malloc.h>
/* Test that the intrinsics compile without optimization. All of them are
defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h,
fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h,
lwpintrin.h and mm_malloc.h that reference the proper builtin functions.
fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h,
lwpintrin.h, fmaintrin.h and mm_malloc.h that reference the proper
builtin functions.
Defining away "extern" and "__inline" results in all of them being compiled
as proper functions. */
......
......@@ -7,8 +7,8 @@
/* Test that the intrinsics compile with optimization. All of them
are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h,
tbmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h that
reference the proper builtin functions.
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h and mm_malloc.h
that reference the proper builtin functions.
Defining away "extern" and "__inline" results in all of them being
compiled as proper functions. */
......@@ -255,9 +255,9 @@ test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1)
#endif
#include <popcntintrin.h>
/* x86intrin.h (FMA4/XOP/LWP/BMI/BMI2/TBM/LZCNT). */
/* x86intrin.h (FMA4/XOP/LWP/BMI/BMI2/TBM/LZCNT/FMA). */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt")
#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt,fma")
#endif
#include <x86intrin.h>
/* xopintrin.h */
......
......@@ -6,8 +6,8 @@
/* Test that the intrinsics compile with optimization. All of them
are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h,
tbmintrin.h, lwpintrin.h, popcntintrin.h and mm_malloc.h that
reference the proper builtin functions.
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h and mm_malloc.h
that reference the proper builtin functions.
Defining away "extern" and "__inline" results in all of them being
compiled as proper functions. */
......@@ -180,7 +180,7 @@
#define __builtin_ia32_gatherdiv4si(X, Y, Z, K, M) __builtin_ia32_gatherdiv4si(X, Y, Z, K, 1)
#define __builtin_ia32_gatherdiv4si256(X, Y, Z, K, M) __builtin_ia32_gatherdiv4si256(X, Y, Z, K, 1)
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c")
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma")
#include <wmmintrin.h>
#include <smmintrin.h>
#include <mm3dnow.h>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment