Commit b76f1550 by Steven Munroe

config.gcc (powerpc*-*-*): Add bmi2intrin.h, bmiintrin.h, and x86intrin.h


[gcc]

2017-05-12  Steven Munroe  <munroesj@gcc.gnu.org>

	* config.gcc (powerpc*-*-*): Add bmi2intrin.h, bmiintrin.h,
	and x86intrin.h
	* config/rs6000/bmiintrin.h: New file.
	* config/rs6000/bmi2intrin.h: New file.
	* config/rs6000/x86intrin.h: New file.



[gcc/testsuite]

2017-05-12  Steven Munroe  <munroesj@gcc.gnu.org>

	* gcc.target/powerpc/bmi-andn-1.c: New file
	* gcc.target/powerpc/bmi-andn-2.c: New file.
	* gcc.target/powerpc/bmi-bextr-1.c: New file.
	* gcc.target/powerpc/bmi-bextr-2.c: New file.
	* gcc.target/powerpc/bmi-bextr-4.c: New file.
	* gcc.target/powerpc/bmi-bextr-5.c: New file.
	* gcc.target/powerpc/bmi-blsi-1.c: New file.
	* gcc.target/powerpc/bmi-blsi-2.c: New file.
	* gcc.target/powerpc/bmi-blsmsk-1.c: new file.
	* gcc.target/powerpc/bmi-blsmsk-2.c: New file.
	* gcc.target/powerpc/bmi-blsr-1.c: New file.
	* gcc.target/powerpc/bmi-blsr-2.c: New File.
	* gcc.target/powerpc/bmi-check.h: New File.
	* gcc.target/powerpc/bmi-tzcnt-1.c: new file.
	* gcc.target/powerpc/bmi-tzcnt-2.c: New file.
	* gcc.target/powerpc/bmi2-bzhi32-1.c: New file.
	* gcc.target/powerpc/bmi2-bzhi64-1.c: New file.
	* gcc.target/powerpc/bmi2-bzhi64-1a.c: New file.
	* gcc.target/powerpc/bmi2-check.h: New file.
	* gcc.target/powerpc/bmi2-mulx32-1.c: New file.
	* gcc.target/powerpc/bmi2-mulx32-2.c: New file.
	* gcc.target/powerpc/bmi2-mulx64-1.c: New file.
	* gcc.target/powerpc/bmi2-mulx64-2.c: New file.
	* gcc.target/powerpc/bmi2-pdep32-1.c: New file.
	* gcc.target/powerpc/bmi2-pdep64-1.c: New file.
	* gcc.target/powerpc/bmi2-pext32-1.c: New File.
	* gcc.target/powerpc/bmi2-pext64-1.c: New file.
	* gcc.target/powerpc/bmi2-pext64-1a.c: New File.

From-SVN: r247988
parent 2a3fa75a
2017-05-12 Steven Munroe <munroesj@gcc.gnu.org>
* config.gcc (powerpc*-*-*): Add bmi2intrin.h, bmiintrin.h,
and x86intrin.h
* config/rs6000/bmiintrin.h: New file.
* config/rs6000/bmi2intrin.h: New file.
* config/rs6000/x86intrin.h: New file.
2017-05-12 Jeff Law <law@redhat.com>
* tree-vrp.c (vrp_dom_walker::before_dom_childern): Push unwinding
......
......@@ -444,7 +444,10 @@ nvptx-*-*)
;;
powerpc*-*-*)
cpu_type=rs6000
extra_headers="ppc-asm.h altivec.h spe.h ppu_intrinsics.h paired.h spu2vmx.h vec_types.h si2vmx.h htmintrin.h htmxlintrin.h"
extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h x86intrin.h"
extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h si2vmx.h"
extra_headers="${extra_headers} spe.h paired.h"
case x$with_cpu in
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500)
cpu_is_64bit=yes
......
/* Copyright (C) 2011-2017 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This header is distributed to simplify porting x86_64 code that
makes explicit use of Intel intrinsics to powerpc64le.
It is the user's responsibility to determine if the results are
acceptable and make additional changes as necessary.
Note that much code that uses Intel intrinsics can be rewritten in
standard C or GNU C extensions, which are more portable and better
optimized across multiple targets. */
#if !defined _X86INTRIN_H_INCLUDED
# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef _BMI2INTRIN_H_INCLUDED
#define _BMI2INTRIN_H_INCLUDED
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_bzhi_u32 (unsigned int __X, unsigned int __Y)
{
return ((__X << (32 - __Y)) >> (32 - __Y));
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
{
unsigned long long __res = (unsigned long long) __X * __Y;
*__P = (unsigned int) (__res >> 32);
return (unsigned int) __res;
}
#ifdef __PPC64__
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
{
return ((__X << (64 - __Y)) >> (64 - __Y));
}
/* __int128 requires base 64-bit. */
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mulx_u64 (unsigned long long __X, unsigned long long __Y,
unsigned long long *__P)
{
unsigned __int128 __res = (unsigned __int128) __X * __Y;
*__P = (unsigned long long) (__res >> 64);
return (unsigned long long) __res;
}
#ifdef _ARCH_PWR7
/* popcount and bpermd require power7 minimum. */
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_pdep_u64 (unsigned long long __X, unsigned long long __M)
{
unsigned long result = 0x0UL;
const unsigned long mask = 0x8000000000000000UL;
unsigned long m = __M;
unsigned long c, t;
unsigned long p;
/* The pop-count of the mask gives the number of the bits from
source to process. This is also needed to shift bits from the
source into the correct position for the result. */
p = 64 - __builtin_popcountl (__M);
/* The loop is for the number of '1' bits in the mask and clearing
each mask bit as it is processed. */
while (m != 0)
{
c = __builtin_clzl (m);
t = __X << (p - c);
m ^= (mask >> c);
result |= (t & (mask >> c));
p++;
}
return (result);
}
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_pext_u64 (unsigned long long __X, unsigned long long __M)
{
unsigned long p = 0x4040404040404040UL; // initial bit permute control
const unsigned long mask = 0x8000000000000000UL;
unsigned long m = __M;
unsigned long c;
unsigned long result;
/* if the mask is constant and selects 8 bits or less we can use
the Power8 Bit permute instruction. */
if (__builtin_constant_p (__M) && (__builtin_popcountl (__M) <= 8))
{
/* Also if the pext mask is constant, then the popcount is
constant, we can evaluate the following loop at compile
time and use a constant bit permute vector. */
for (long i = 0; i < __builtin_popcountl (__M); i++)
{
c = __builtin_clzl (m);
p = (p << 8) | c;
m ^= (mask >> c);
}
result = __builtin_bpermd (p, __X);
}
else
{
p = 64 - __builtin_popcountl (__M);
result = 0;
/* We could a use a for loop here, but that combined with
-funroll-loops can expand to a lot of code. The while
loop avoids unrolling and the compiler commons the xor
from clearing the mask bit with the (m != 0) test. The
result is a more compact loop setup and body. */
while (m != 0)
{
unsigned long t;
c = __builtin_clzl (m);
t = (__X & (mask >> c)) >> (p - c);
m ^= (mask >> c);
result |= (t);
p++;
}
}
return (result);
}
/* these 32-bit implementations depend on 64-bit pdep/pext
which depend on _ARCH_PWR7. */
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_pdep_u32 (unsigned int __X, unsigned int __Y)
{
return _pdep_u64 (__X, __Y);
}
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_pext_u32 (unsigned int __X, unsigned int __Y)
{
return _pext_u64 (__X, __Y);
}
#endif /* _ARCH_PWR7 */
#endif /* __PPC64__ */
#endif /* _BMI2INTRIN_H_INCLUDED */
/* Copyright (C) 2010-2017 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This header is distributed to simplify porting x86_64 code that
makes explicit use of Intel intrinsics to powerpc64le.
It is the user's responsibility to determine if the results are
acceptable and make additional changes as necessary.
Note that much code that uses Intel intrinsics can be rewritten in
standard C or GNU C extensions, which are more portable and better
optimized across multiple targets. */
#if !defined _X86INTRIN_H_INCLUDED
# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef _BMIINTRIN_H_INCLUDED
#define _BMIINTRIN_H_INCLUDED
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__tzcnt_u16 (unsigned short __X)
{
return __builtin_ctz (__X);
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__andn_u32 (unsigned int __X, unsigned int __Y)
{
return (~__X & __Y);
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_bextr_u32 (unsigned int __X, unsigned int __P, unsigned int __L)
{
return ((__X << (32 - (__L + __P))) >> (32 - __L));
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bextr_u32 (unsigned int __X, unsigned int __Y)
{
unsigned int __P, __L;
__P = __Y & 0xFF;
__L = (__Y >> 8) & 0xFF;
return (_bextr_u32 (__X, __P, __L));
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsi_u32 (unsigned int __X)
{
return (__X & -__X);
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_blsi_u32 (unsigned int __X)
{
return __blsi_u32 (__X);
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsmsk_u32 (unsigned int __X)
{
return (__X ^ (__X - 1));
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_blsmsk_u32 (unsigned int __X)
{
return __blsmsk_u32 (__X);
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsr_u32 (unsigned int __X)
{
return (__X & (__X - 1));
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_blsr_u32 (unsigned int __X)
{
return __blsr_u32 (__X);
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__tzcnt_u32 (unsigned int __X)
{
return __builtin_ctz (__X);
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_tzcnt_u32 (unsigned int __X)
{
return __builtin_ctz (__X);
}
/* use the 64-bit shift, rotate, and count leading zeros instructions
for long long. */
#ifdef __PPC64__
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__andn_u64 (unsigned long long __X, unsigned long long __Y)
{
return (~__X & __Y);
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_bextr_u64 (unsigned long long __X, unsigned int __P, unsigned int __L)
{
return ((__X << (64 - (__L + __P))) >> (64 - __L));
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bextr_u64 (unsigned long long __X, unsigned long long __Y)
{
unsigned int __P, __L;
__P = __Y & 0xFF;
__L = (__Y & 0xFF00) >> 8;
return (_bextr_u64 (__X, __P, __L));
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsi_u64 (unsigned long long __X)
{
return __X & -__X;
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_blsi_u64 (unsigned long long __X)
{
return __blsi_u64 (__X);
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsmsk_u64 (unsigned long long __X)
{
return (__X ^ (__X - 1));
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_blsmsk_u64 (unsigned long long __X)
{
return __blsmsk_u64 (__X);
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsr_u64 (unsigned long long __X)
{
return (__X & (__X - 1));
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_blsr_u64 (unsigned long long __X)
{
return __blsr_u64 (__X);
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__tzcnt_u64 (unsigned long long __X)
{
return __builtin_ctzll (__X);
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_tzcnt_u64 (unsigned long long __X)
{
return __builtin_ctzll (__X);
}
#endif /* __PPC64__ */
#endif /* _BMIINTRIN_H_INCLUDED */
/* Copyright (C) 2008-2017 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef NO_WARN_X86_INTRINSICS
/* This header is distributed to simplify porting x86_64 code that
makes explicit use of Intel intrinsics to powerpc64le.
It is the user's responsibility to determine if the results are
acceptable and make additional changes as necessary.
Note that much code that uses Intel intrinsics can be rewritten in
standard C or GNU C extensions, which are more portable and better
optimized across multiple targets. */
#warning "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning."
#endif
#ifndef _X86INTRIN_H_INCLUDED
#define _X86INTRIN_H_INCLUDED
#include <bmiintrin.h>
#include <bmi2intrin.h>
#endif /* _X86INTRIN_H_INCLUDED */
2017-05-12 Steven Munroe <munroesj@gcc.gnu.org>
* gcc.target/powerpc/bmi-andn-1.c: New file
* gcc.target/powerpc/bmi-andn-2.c: New file.
* gcc.target/powerpc/bmi-bextr-1.c: New file.
* gcc.target/powerpc/bmi-bextr-2.c: New file.
* gcc.target/powerpc/bmi-bextr-4.c: New file.
* gcc.target/powerpc/bmi-bextr-5.c: New file.
* gcc.target/powerpc/bmi-blsi-1.c: New file.
* gcc.target/powerpc/bmi-blsi-2.c: New file.
* gcc.target/powerpc/bmi-blsmsk-1.c: new file.
* gcc.target/powerpc/bmi-blsmsk-2.c: New file.
* gcc.target/powerpc/bmi-blsr-1.c: New file.
* gcc.target/powerpc/bmi-blsr-2.c: New File.
* gcc.target/powerpc/bmi-check.h: New File.
* gcc.target/powerpc/bmi-tzcnt-1.c: new file.
* gcc.target/powerpc/bmi-tzcnt-2.c: New file.
* gcc.target/powerpc/bmi2-bzhi32-1.c: New file.
* gcc.target/powerpc/bmi2-bzhi64-1.c: New file.
* gcc.target/powerpc/bmi2-bzhi64-1a.c: New file.
* gcc.target/powerpc/bmi2-check.h: New file.
* gcc.target/powerpc/bmi2-mulx32-1.c: New file.
* gcc.target/powerpc/bmi2-mulx32-2.c: New file.
* gcc.target/powerpc/bmi2-mulx64-1.c: New file.
* gcc.target/powerpc/bmi2-mulx64-2.c: New file.
* gcc.target/powerpc/bmi2-pdep32-1.c: New file.
* gcc.target/powerpc/bmi2-pdep64-1.c: New file.
* gcc.target/powerpc/bmi2-pext32-1.c: New File.
* gcc.target/powerpc/bmi2-pext64-1.c: New file.
* gcc.target/powerpc/bmi2-pext64-1a.c: New File.
2017-05-12 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/60430
......
/* { dg-do run } */
/* { dg-options "-O3 -m64" } */
/* { dg-require-effective-target lp64 } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
long long calc_andn_u64 (long long src1,
long long src2,
long long dummy)
{
return (~src1 + dummy) & (src2);
}
static void
bmi_test()
{
unsigned i;
long long src = 0xfacec0ffeefacec0;
long long res, res_ref;
for (i=0; i<5; ++i) {
src = (i + src) << i;
res_ref = calc_andn_u64 (src, src+i, 0);
res = __andn_u64 (src, src+i);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-options "-O3 -m64" } */
/* { dg-require-effective-target lp64 } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
long long calc_andn_u32 (int src1, int src2, int dummy)
{
return (~src1+dummy) & (src2);
}
static void
bmi_test()
{
unsigned i;
int src = 0xfacec0ff;
int res, res_ref;
for (i=0; i<5; ++i) {
src = (i + src) << i;
res_ref = calc_andn_u32 (src, src+i, 0);
res = __andn_u32 (src, src+i);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -m64 -fno-inline" } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
long long calc_bextr_u64 (unsigned long long src1,
unsigned long long src2)
{
long long res = 0;
unsigned char start = (src2 & 0xff);
unsigned char len = (int) ((src2 >> 8) & 0xff);
if (start < 64) {
unsigned i;
unsigned last = (start+len) < 64 ? start+len : 64;
src1 >>= start;
for (i=start; i<last; ++i) {
res |= (src1 & 1) << (i-start);
src1 >>= 1;
}
}
return res;
}
static void
bmi_test ()
{
unsigned i;
unsigned char start, len;
unsigned long long src1 = 0xfacec0ffeefacec0;
unsigned long long res, res_ref, src2;
for (i=0; i<5; ++i) {
start = (i * 1983) % 64;
len = (i + (i * 1983)) % 64;
src1 = src1 * 3;
src2 = start | (((unsigned long long)len) << 8);
res_ref = calc_bextr_u64 (src1, src2);
res = __bextr_u64 (src1, src2);
if (res != res_ref)
abort ();
}
}
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O3 -m64 -fno-inline" } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
unsigned calc_bextr_u32 (unsigned src1, unsigned src2)
{
unsigned res = 0;
unsigned char start = (src2 & 0xff);
unsigned char len = (int) ((src2 >> 8) & 0xff);
if (start < 32) {
unsigned i;
unsigned last = (start+len) < 32 ? start+len : 32;
src1 >>= start;
for (i=start; i<last; ++i) {
res |= (src1 & 1) << (i-start);
src1 >>= 1;
}
}
return res;
}
static void
bmi_test ()
{
unsigned i;
unsigned char start, len;
unsigned src1 = 0xfacec0ff;
unsigned res, res_ref, src2;
for (i=0; i<5; ++i) {
start = (i * 1983) % 32;
len = (i + (i * 1983)) % 32;
src1 = src1 * 3;
src2 = start | (((unsigned)len) << 8);
res_ref = calc_bextr_u32 (src1, src2);
res = __bextr_u32 (src1, src2);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O3 -m64 -fno-inline" } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
unsigned calc_bextr_u32 (unsigned src1, unsigned src2)
{
unsigned res = 0;
unsigned char start = (src2 & 0xff);
unsigned char len = (int) ((src2 >> 8) & 0xff);
if (start < 32) {
unsigned i;
unsigned last = (start+len) < 32 ? start+len : 32;
src1 >>= start;
for (i=start; i<last; ++i) {
res |= (src1 & 1) << (i-start);
src1 >>= 1;
}
}
return res;
}
static void
bmi_test ()
{
unsigned i;
unsigned char start, len;
unsigned src1 = 0xfacec0ff;
unsigned res, res_ref, src2;
for (i=0; i<5; ++i) {
start = i * 4;
len = i * 4;
src1 = src1 * 3;
src2 = (start & 0xff) | ((len & 0xff) << 8);
res_ref = calc_bextr_u32 (src1, src2);
res = _bextr_u32 (src1, start, len);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O3 -m64 -fno-inline" } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
long long calc_bextr_u64 (unsigned long long src1,
unsigned long long src2)
{
long long res = 0;
unsigned char start = (src2 & 0xff);
unsigned char len = (int) ((src2 >> 8) & 0xff);
if (start < 64) {
unsigned i;
unsigned last = (start+len) < 64 ? start+len : 64;
src1 >>= start;
for (i=start; i<last; ++i) {
res |= (src1 & 1) << (i-start);
src1 >>= 1;
}
}
return res;
}
static void
bmi_test ()
{
unsigned i;
unsigned char start, len;
unsigned long long src1 = 0xfacec0ffeefacec0;
unsigned long long res, res_ref, src2;
for (i=0; i<5; ++i) {
start = i * 4;
len = i * 3;
src1 = src1 * 3;
src2 = (start & 0xff) | ((len & 0xff) << 8);
res_ref = calc_bextr_u64 (src1, src2);
res = _bextr_u64 (src1, start, len);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O3 -m64 -fno-inline" } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
/* To fool the compiler, so it does not generate blsi here. */
long long calc_blsi_u64 (long long src1, long long src2)
{
return (-src1) & (src2);
}
static void
bmi_test()
{
unsigned i;
long long src = 0xfacec0ffeefacec0;
long long res, res_ref;
for (i=0; i<5; ++i) {
src = (i + src) << i;
res_ref = calc_blsi_u64 (src, src);
res = __blsi_u64 (src);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O3 -m64 -fno-inline" } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
/* To fool the compiler, so it does not generate blsi here. */
int calc_blsi_u32 (int src1, int src2)
{
return (-src1) & (src2);
}
static void
bmi_test()
{
unsigned i;
int src = 0xfacec0ff;
int res, res_ref;
for (i=0; i<5; ++i) {
src = (i + src) << i;
res_ref = calc_blsi_u32 (src, src);
res = __blsi_u32 (src);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O3 -m64 -fno-inline" } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
/* Trick compiler in order not to generate target insn here. */
long long calc_blsmsk_u64 (long long src1, long long src2)
{
return (src1-1) ^ (src2);
}
static void
bmi_test ()
{
unsigned i;
long long src = 0xfacec0ffeefacec0;
long long res, res_ref;
for (i=0; i<5; ++i) {
src = (i + src) << i;
res_ref = calc_blsmsk_u64 (src, src);
res = __blsmsk_u64 (src);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O3 -m64 -fno-inline" } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
/* Trick compiler in order not to generate target insn here. */
int calc_blsmsk_u32 (int src1, int src2)
{
return (src1-1) ^ (src2);
}
static void
bmi_test ()
{
unsigned i;
int src = 0xfacec0ff;
int res, res_ref;
for (i=0; i<5; ++i) {
src = (i + src) << i;
res_ref = calc_blsmsk_u32 (src, src);
res = __blsmsk_u32 (src);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O3 -m64 -fno-inline" } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
long long calc_blsr_u64 (long long src1, long long src2)
{
return (src1-1) & (src2);
}
static void
bmi_test()
{
unsigned i;
long long src = 0xfacec0ffeefacec0;
long long res, res_ref;
for (i=0; i<5; ++i) {
src = (i + src) << i;
res_ref = calc_blsr_u64 (src, src);
res = __blsr_u64 (src);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O3 -m64 -fno-inline" } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
int calc_blsr_u32 (int src1, int src2)
{
return (src1-1) & (src2);
}
static void
bmi_test ()
{
unsigned i;
int src = 0xfacec0ff;
int res, res_ref;
for (i=0; i<5; ++i) {
src = (i + src) << i;
res_ref = calc_blsr_u32 (src, src);
res = __blsr_u32 (src);
if (res != res_ref)
abort();
}
}
#include <stdio.h>
#include <stdlib.h>
static void bmi_test (void);
static void
__attribute__ ((noinline))
do_test (void)
{
bmi_test ();
}
int
main ()
{
/* Need 64-bit for 64-bit longs as single instruction. */
if ( __builtin_cpu_supports ("ppc64") )
{
do_test ();
#ifdef DEBUG
printf ("PASSED\n");
#endif
}
#ifdef DEBUG
else
printf ("SKIPPED\n");
#endif
return 0;
}
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O3 -m64 -fno-inline" } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
long long calc_tzcnt_u64 (long long src)
{
int i;
int res = 0;
while ( (res<64) && ((src&1) == 0)) {
++res;
src >>= 1;
}
return res;
}
static void
bmi_test ()
{
unsigned i;
long long src = 0xfacec0ffeefacec0;
long long res, res_ref;
for (i=0; i<5; ++i) {
src = (i + src) << i;
res_ref = calc_tzcnt_u64 (src);
res = __tzcnt_u64 (src);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O3 -m64 -fno-inline" } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi-check.h"
int calc_tzcnt_u32 (int src)
{
int i;
int res = 0;
while ( (res<32) && ((src&1) == 0)) {
++res;
src >>= 1;
}
return res;
}
static void
bmi_test ()
{
unsigned i;
int src = 0xfacec0ff;
int res, res_ref;
for (i=0; i<5; ++i) {
src = i + (src << i);
res_ref = calc_tzcnt_u32 (src);
res = __tzcnt_u32 (src);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-options "-O3 -m64 -mcpu=power7" } */
/* { dg-require-effective-target powerpc_vsx_ok } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi2-check.h"
__attribute__((noinline))
unsigned
calc_bzhi_u32 (unsigned a, int l)
{
unsigned res = a;
int i;
for (i = 0; i < 32 - l; ++i)
res &= ~(1 << (31 - i));
return res;
}
static void
bmi2_test ()
{
unsigned i;
unsigned src = 0xce7ace0f;
unsigned res, res_ref;
for (i = 0; i < 5; ++i) {
src = src * (i + 1);
res_ref = calc_bzhi_u32 (src, i * 2);
res = _bzhi_u32 (src, i * 2);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-options "-O3 -m64 -mcpu=power7" } */
/* { dg-require-effective-target powerpc_vsx_ok } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi2-check.h"
__attribute__((noinline))
unsigned long long
calc_bzhi_u64 (unsigned long long a, int l)
{
unsigned long long res = a;
int i;
for (i = 0; i < 64 - l; ++i)
res &= ~(1LL << (63 - i));
return res;
}
static void
bmi2_test ()
{
unsigned i;
unsigned long long src = 0xce7ace0ce7ace0ff;
unsigned long long res, res_ref;
for (i = 0; i < 5; ++i) {
src = src * (i + 1);
res_ref = calc_bzhi_u64 (src, i * 2);
res = _bzhi_u64 (src, i * 2);
if (res != res_ref)
abort();
}
}
/* { dg-do compile } */
/* { dg-options "-O3 -m64 -mcpu=power7" } */
/* { dg-require-effective-target powerpc_vsx_ok } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
unsigned long long
test__bzhi_u64_group (unsigned long long a)
{
/* bzhi is implemented in source as shift left then shift right
to clear the high order bits.
For the case where the starting index is const, the compiler
should reduces this to a single Rotate Left Doubleword
Immediate then Clear Left (rldicl) instruction. */
unsigned long long res;
res = _bzhi_u64 (a, 8);
res += _bzhi_u64 (a, 16);
res += _bzhi_u64 (a, 24);
res += _bzhi_u64 (a, 32);
res += _bzhi_u64 (a, 40);
res += _bzhi_u64 (a, 48);
return (res);
}
/* the resulting assembler should have 6 X rldicl and no sld or
srd instructions. */
/* { dg-final { scan-assembler-times "rldicl" 6 } } */
/* { dg-final { scan-assembler-not "sld" } } */
/* { dg-final { scan-assembler-not "srd" } } */
#include <stdio.h>
#include <stdlib.h>
static void bmi2_test (void);
static void
__attribute__ ((noinline))
do_test (void)
{
bmi2_test ();
}
int
main ()
{
/* The BMI2 test for pext test requires the Bit Permute doubleword
(bpermd) instruction added in PowerISA 2.06 along with the VSX
facility. So we can test for arch_2_06. */
if ( __builtin_cpu_supports ("arch_2_06") )
{
do_test ();
#ifdef DEBUG
printf ("PASSED\n");
#endif
}
#ifdef DEBUG
else
printf ("SKIPPED\n");
#endif
return 0;
}
/* { dg-do run } */
/* { dg-options "-O3 -m64 -mcpu=power7" } */
/* { dg-require-effective-target powerpc_vsx_ok } */
#define NO_WARN_X86_INTRINSICS 1
#include "bmi2-check.h"
__attribute__((noinline))
unsigned long long
calc_mul_u32 (unsigned volatile a, unsigned b)
{
unsigned long long res = 0;
int i;
for (i = 0; i < b; ++i)
res += a;
return res;
}
__attribute__((noinline))
unsigned long long
gen_mulx (unsigned a, unsigned b)
{
unsigned long long res;
res = (unsigned long long)a * b;
return res;
}
static void
bmi2_test ()
{
unsigned i;
unsigned a = 0xce7ace0;
unsigned b = 0xfacefff;
unsigned long long res, res_ref;
for (i = 0; i < 5; ++i) {
a = a * (i + 1);
b = b / (i + 1);
res_ref = calc_mul_u32 (a, b);
res = gen_mulx (a, b);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-options "-O3 -m64 -mcpu=power7" } */
/* { dg-require-effective-target powerpc_vsx_ok } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi2-check.h"
__attribute__((noinline))
unsigned long long
calc_mul_u32 (unsigned volatile a, unsigned b)
{
unsigned long long res = 0;
int i;
for (i = 0; i < b; ++i)
res += a;
return res;
}
__attribute__((noinline))
unsigned calc_mulx_u32 (unsigned x, unsigned y, unsigned *res_h)
{
return (unsigned) _mulx_u32 (x, y, res_h);
}
static void
bmi2_test ()
{
unsigned i;
unsigned a = 0xce7ace0;
unsigned b = 0xfacefff;
unsigned res_l, res_h;
unsigned long long res, res_ref;
for (i = 0; i < 5; ++i) {
a = a * (i + 1);
b = b / (i + 1);
res_ref = calc_mul_u32 (a, b);
res_l = calc_mulx_u32 (a, b, &res_h);
res = ((unsigned long long) res_h << 32) | res_l;
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-options "-O3 -m64 -mcpu=power7" } */
/* { dg-require-effective-target powerpc_vsx_ok } */
#define NO_WARN_X86_INTRINSICS 1
#include "bmi2-check.h"
__attribute__((noinline))
unsigned __int128
calc_mul_u64 (unsigned long long volatile a, unsigned long long b)
{
unsigned __int128 res = 0;
int i;
for (i = 0; i < b; ++i)
res += (unsigned __int128) a;
return res;
}
static void
bmi2_test ()
{
unsigned i;
unsigned long long a = 0xce7ace0ce7ace0;
unsigned long long b = 0xface;
unsigned __int128 res, res_ref;
for (i=0; i<5; ++i) {
a = a * (i + 1);
b = b / (i + 1);
res_ref = calc_mul_u64 (a, b);
res = (unsigned __int128) a * b;
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-options "-O3 -m64 -mcpu=power7" } */
/* { dg-require-effective-target powerpc_vsx_ok } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi2-check.h"
__attribute__((noinline))
unsigned __int128
calc_mul_u64 (unsigned long long volatile a, unsigned long long b)
{
unsigned __int128 res = 0;
int i;
for (i = 0; i < b; ++i)
res += (unsigned __int128) a;
return res;
}
__attribute__((noinline))
unsigned long long
calc_mulx_u64 (unsigned long long x,
unsigned long long y,
unsigned long long *res_h)
{
return _mulx_u64 (x, y, res_h);
}
static void
bmi2_test ()
{
unsigned i;
unsigned long long a = 0xce7ace0ce7ace0;
unsigned long long b = 0xface;
unsigned long long res_l, res_h;
unsigned __int128 res, res_ref;
for (i=0; i<5; ++i) {
a = a * (i + 1);
b = b / (i + 1);
res_ref = calc_mul_u64 (a, b);
res_l = calc_mulx_u64 (a, b, &res_h);
res = ((unsigned __int128) res_h << 64) | res_l;
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-options "-O3 -m64 -mcpu=power7" } */
/* { dg-require-effective-target powerpc_vsx_ok } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi2-check.h"
__attribute__((noinline))
unsigned
calc_pdep_u32 (unsigned a, int mask)
{
unsigned res = 0;
int i, k = 0;
for (i = 0; i < 32; ++i)
if (mask & (1 << i)) {
res |= ((a & (1 << k)) >> k) << i;
++k;
}
return res;
}
static void
bmi2_test ()
{
unsigned i;
unsigned src = 0xce7acc;
unsigned res, res_ref;
for (i = 0; i < 5; ++i) {
src = src * (i + 1);
res_ref = calc_pdep_u32 (src, i * 3);
res = _pdep_u32 (src, i * 3);
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-options "-O3 -m64 -mcpu=power7" } */
/* { dg-require-effective-target powerpc_vsx_ok } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi2-check.h"
__attribute__((noinline))
unsigned long long
calc_pdep_u64 (unsigned long long a, unsigned long long mask)
{
unsigned long long res = 0;
unsigned long long i, k = 0;
for (i = 0; i < 64; ++i)
if (mask & (1LL << i)) {
res |= ((a & (1LL << k)) >> k) << i;
++k;
}
return res;
}
static
void
bmi2_test ()
{
unsigned long long i;
unsigned long long src = 0xce7acce7acce7ac;
unsigned long long res, res_ref;
for (i = 0; i < 5; ++i) {
src = src * (i + 1);
res_ref = calc_pdep_u64 (src, ~(i * 3));
res = _pdep_u64 (src, ~(i * 3));
if (res != res_ref)
abort ();
}
}
/* { dg-do run } */
/* { dg-options "-O3 -m64 -mcpu=power7" } */
/* { dg-require-effective-target powerpc_vsx_ok } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi2-check.h"
__attribute__((noinline))
unsigned
calc_pext_u32 (unsigned a, unsigned mask)
{
unsigned res = 0;
int i, k = 0;
for (i = 0; i < 32; ++i)
if (mask & (1 << i)) {
res |= ((a & (1 << i)) >> i) << k;
++k;
}
return res;
}
static void
bmi2_test ()
{
unsigned i;
unsigned src = 0xce7acc;
unsigned res, res_ref;
for (i = 0; i < 5; ++i) {
src = src * (i + 1);
res_ref = calc_pext_u32 (src, ~(i * 3));
res = _pext_u32 (src, ~(i * 3));
if (res != res_ref)
abort();
}
}
/* { dg-do run } */
/* { dg-options "-O3 -m64 -mcpu=power7" } */
/* { dg-require-effective-target powerpc_vsx_ok } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
#include "bmi2-check.h"
__attribute__((noinline))
unsigned long long
calc_pext_u64 (unsigned long long a, unsigned long long mask)
{
unsigned long long res = 0;
int i, k = 0;
for (i = 0; i < 64; ++i)
if (mask & (1LL << i)) {
res |= ((a & (1LL << i)) >> i) << k;
++k;
}
return res;
}
static void
bmi2_test ()
{
unsigned long long i;
unsigned long long src = 0xce7acce7acce7ac;
unsigned long long res, res_ref;
for (i = 0; i < 5; ++i) {
src = src * (i + 1);
res_ref = calc_pext_u64 (src, ~(i * 3));
res = _pext_u64 (src, ~(i * 3));
if (res != res_ref)
abort();
}
}
/* { dg-do compile } */
/* { dg-options "-O3 -m64 -mcpu=power7" } */
/* { dg-require-effective-target powerpc_vsx_ok } */
#define NO_WARN_X86_INTRINSICS 1
#include <x86intrin.h>
unsigned long long
test__pexp_cmask_u64 (unsigned long long a[4])
{
/* The _pext implmentation is nominally a popcount of the mask,
followed by a loop using count leading zeros to find the
next bit to process.
If the mask is a const, the popcount should be folded and
the constant propagation should eliminate the mask
generation loop and produce a single constant bpermd permute
control word.
This test verifies that the compiler is replacing the mask
popcount and loop with a const bperm control and generating
the bpermd for this case. */
const unsigned long mask = 0x00000000100000a4UL;
unsigned long res;
res = _pext_u64 (a[0], mask);
res = (res << 8) | _pext_u64 (a[1], mask);
res = (res << 8) | _pext_u64 (a[2], mask);
res = (res << 8) | _pext_u64 (a[3], mask);
return (res);
}
/* the resulting assembler should have 4 X bpermd and no popcntd or
cntlzd instructions. */
/* { dg-final { scan-assembler-times "bpermd" 4 } } */
/* { dg-final { scan-assembler-not "popcntd" } } */
/* { dg-final { scan-assembler-not "cntlzd" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment