Commit 95a3261e by H.J. Lu Committed by H.J. Lu

sse4_2-check.h: New.

2007-06-02  H.J. Lu  <hongjiu.lu@intel.com>

	* gcc.target/i386/sse4_2-check.h: New.
	* gcc.target/i386/sse4_2-crc32b.c: Likewise.
	* gcc.target/i386/sse4_2-crc32.h: Likewise.
	* gcc.target/i386/sse4_2-crc32l.c: Likewise.
	* gcc.target/i386/sse4_2-crc32q.c: Likewise.
	* gcc.target/i386/sse4_2-crc32w.c: Likewise.
	* gcc.target/i386/sse4_2-pcmpestri-1.c: Likewise.
	* gcc.target/i386/sse4_2-pcmpestri-2.c: Likewise.
	* gcc.target/i386/sse4_2-pcmpestrm-1.c: Likewise.
	* gcc.target/i386/sse4_2-pcmpestrm-2.c: Likewise.
	* gcc.target/i386/sse4_2-pcmpgtq.c: Likewise.
	* gcc.target/i386/sse4_2-pcmpistri-1.c: Likewise.
	* gcc.target/i386/sse4_2-pcmpistri-2.c: Likewise.
	* gcc.target/i386/sse4_2-pcmpistrm-1.c: Likewise.
	* gcc.target/i386/sse4_2-pcmpistrm-2.c: Likewise.
	* gcc.target/i386/sse4_2-pcmpstr.h: Likewise.
	* gcc.target/i386/sse4_2-popcnt.h: Likewise.
	* gcc.target/i386/sse4_2-popcntl.c: Likewise.
	* gcc.target/i386/sse4_2-popcntq.c: Likewise.

From-SVN: r125281
parent c7a69424
2007-06-02 H.J. Lu <hongjiu.lu@intel.com>
* gcc.target/i386/sse4_2-check.h: New.
* gcc.target/i386/sse4_2-crc32b.c: Likewise.
* gcc.target/i386/sse4_2-crc32.h: Likewise.
* gcc.target/i386/sse4_2-crc32l.c: Likewise.
* gcc.target/i386/sse4_2-crc32q.c: Likewise.
* gcc.target/i386/sse4_2-crc32w.c: Likewise.
* gcc.target/i386/sse4_2-pcmpestri-1.c: Likewise.
* gcc.target/i386/sse4_2-pcmpestri-2.c: Likewise.
* gcc.target/i386/sse4_2-pcmpestrm-1.c: Likewise.
* gcc.target/i386/sse4_2-pcmpestrm-2.c: Likewise.
* gcc.target/i386/sse4_2-pcmpgtq.c: Likewise.
* gcc.target/i386/sse4_2-pcmpistri-1.c: Likewise.
* gcc.target/i386/sse4_2-pcmpistri-2.c: Likewise.
* gcc.target/i386/sse4_2-pcmpistrm-1.c: Likewise.
* gcc.target/i386/sse4_2-pcmpistrm-2.c: Likewise.
* gcc.target/i386/sse4_2-pcmpstr.h: Likewise.
* gcc.target/i386/sse4_2-popcnt.h: Likewise.
* gcc.target/i386/sse4_2-popcntl.c: Likewise.
* gcc.target/i386/sse4_2-popcntq.c: Likewise.
2007-06-01 Geoffrey Keating <geoffk@apple.com>
* gcc.dg/pie-link.c: New test.
#include <stdio.h>
#include <stdlib.h>
#include "../../gcc.dg/i386-cpuid.h"
static void sse4_2_test (void);
int
main ()
{
unsigned long cpu_facilities;
cpu_facilities = i386_cpuid_ecx ();
/* Run SSE4.2 test only if host has SSE4.2 support. */
if ((cpu_facilities & bit_SSE4_2))
sse4_2_test ();
exit (0);
}
#include "sse4_2-check.h"
#include <nmmintrin.h>
#include <string.h>
#define POLYNOMIAL 0x11EDC6F41LL
#define MAX_BUF 16
static void
shift_mem_by1 (unsigned char* buf, int len)
{
int i;
for (i = len - 1; i >= 0; i--)
{
buf[i] = buf[i] << 1;
if (i > 0 && (buf[i-1] & 0x80))
buf[i] |= 1;
}
}
static void
do_div (unsigned char* buf, unsigned char* div)
{
int i;
for (i = 0; i < 5; i++)
buf[i] ^= div[i];
}
static unsigned int
calc_rem (unsigned char* buf, int len)
{
union
{
unsigned long long ll;
unsigned char c[8];
} divisor;
union
{
unsigned int i;
unsigned char c[4];
} ret;
unsigned char *div_buf;
unsigned char divident[MAX_BUF];
int disp = len / 8;
int i;
divisor.ll = POLYNOMIAL << 7LL;
memcpy (divident, buf, disp);
div_buf = divident + disp - 5;
for (i = 0; i < len - 32; i++)
{
if ((div_buf[4] & 0x80))
do_div (div_buf, divisor.c);
shift_mem_by1 (divident, disp);
}
memcpy (ret.c, div_buf + 1, sizeof (ret));
return ret.i;
}
static void
reverse_bits (unsigned char *src, int len)
{
unsigned char buf[MAX_BUF];
unsigned char *tmp = buf + len - 1;
unsigned char ch;
int i, j;
for (i = 0; i < len; i++)
{
ch = 0;
for (j = 0; j < 8; j++)
if ((src[i] & (1 << j)))
ch |= 1 << (7 - j);
*tmp-- = ch;
}
for (i = 0; i < len; i++)
src[i] = buf[i];
}
static void
shift_mem ( unsigned char *src, unsigned char *dst, int len, int shft)
{
int disp = shft / 8;
int i;
memset (dst, 0, len + disp);
for (i = 0; i < len; i++)
dst[i + disp] = src[i];
}
static void
xor_mem (unsigned char *src, unsigned char *dst, int len)
{
int disp = len / 8;
int i;
for (i = 0; i < disp; i++)
dst[i] ^= src[i];
}
static DST_T
compute_crc32 (DST_T crc, SRC_T inp)
{
unsigned char crcbuf[sizeof (DST_T)];
unsigned char inbuf[sizeof (SRC_T)];
unsigned char tmp1[MAX_BUF], tmp2[MAX_BUF];
int crc_sh, xor_sz;
union
{
unsigned int i;
unsigned char c[4];
} ret;
crc_sh = sizeof (SRC_T) * 8;
xor_sz = 32 + crc_sh;
memcpy (crcbuf, &crc, sizeof (DST_T));
memcpy (inbuf, &inp, sizeof (SRC_T));
reverse_bits (crcbuf, 4);
reverse_bits (inbuf, sizeof (SRC_T));
shift_mem (inbuf, tmp1, sizeof (SRC_T), 32);
shift_mem (crcbuf, tmp2, 4, crc_sh);
xor_mem (tmp1, tmp2, xor_sz);
ret.i = calc_rem (tmp2, xor_sz);
reverse_bits (ret.c, 4);
return (DST_T)ret.i;
}
#define NUM 1024
static void
sse4_2_test (void)
{
DST_T dst[NUM];
SRC_T src[NUM];
int i;
for (i = 0; i < NUM; i++)
{
dst[i] = rand ();
if (sizeof (DST_T) > 4)
dst[i] |= (DST_T)rand () << (DST_T)(sizeof (DST_T) * 4);
src[i] = rand ();
if (sizeof (SRC_T) > 4)
src[i] |= (SRC_T)rand () << (SRC_T)(sizeof (DST_T) * 4);
}
for (i = 0; i < NUM; i++)
if (CRC32 (dst[i], src[i]) != compute_crc32 (dst[i], src[i]))
abort ();
}
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#define CRC32 _mm_crc32_u8
#define DST_T unsigned int
#define SRC_T unsigned char
#include "sse4_2-crc32.h"
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#define CRC32 _mm_crc32_u32
#define DST_T unsigned int
#define SRC_T unsigned int
#include "sse4_2-crc32.h"
/* { dg-do run { target { { i?86-*-* x86_64-*-* } && lp64 } } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#define CRC32 _mm_crc32_u64
#define DST_T unsigned long long
#define SRC_T unsigned long long
#include "sse4_2-crc32.h"
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#define CRC32 _mm_crc32_u16
#define DST_T unsigned int
#define SRC_T unsigned short
#include "sse4_2-crc32.h"
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#include "sse4_2-check.h"
#include "sse4_2-pcmpstr.h"
#define NUM 1024
#define IMM_VAL0 \
(SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
#define IMM_VAL1 \
(SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
| SIDD_MOST_SIGNIFICANT)
#define IMM_VAL2 \
(SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
#define IMM_VAL3 \
(SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
| SIDD_MASKED_NEGATIVE_POLARITY | SIDD_LEAST_SIGNIFICANT)
static void
sse4_2_test (void)
{
union
{
__m128i x[NUM];
char c[NUM *16];
} src1, src2;
int res, correct, l1, l2;
int i;
for (i = 0; i < NUM *16; i++)
{
src1.c[i] = rand ();
src2.c[i] = rand ();
}
for (i = 0; i < NUM; i++)
{
l1 = rand () % 18;
l2 = rand () % 18;
switch ((rand () % 4))
{
case 0:
res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0,
NULL);
break;
case 1:
res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1,
NULL);
break;
case 2:
res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2,
NULL);
break;
default:
res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3,
NULL);
break;
}
if (correct != res)
abort ();
}
}
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#include "sse4_2-check.h"
#include "sse4_2-pcmpstr.h"
#define NUM 1024
#define IMM_VAL0 \
(SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
#define IMM_VAL1 \
(SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
| SIDD_MOST_SIGNIFICANT)
#define IMM_VAL2 \
(SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
#define IMM_VAL3 \
(SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
| SIDD_MASKED_NEGATIVE_POLARITY | SIDD_LEAST_SIGNIFICANT)
static void
sse4_2_test (void)
{
union
{
__m128i x[NUM];
char c[NUM *16];
} src1, src2;
int res, correct, correct_flags, l1, l2;
int flags, cf, zf, sf, of, af;
int i;
for (i = 0; i < NUM *16; i++)
{
src1.c[i] = rand ();
src2.c[i] = rand ();
}
for (i = 0; i < NUM; i++)
{
l1 = rand () % 18;
l2 = rand () % 18;
switch ((rand () % 4))
{
case 0:
res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0,
&correct_flags);
break;
case 1:
res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1,
&correct_flags);
break;
case 2:
res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2,
&correct_flags);
break;
default:
res = _mm_cmpestri (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
correct = cmp_ei (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3,
&correct_flags);
break;
}
if (correct != res)
abort ();
flags = 0;
if (cf)
flags |= CFLAG;
if (zf)
flags |= ZFLAG;
if (sf)
flags |= SFLAG;
if (of)
flags |= OFLAG;
if (flags != correct_flags
|| (af && (cf || zf))
|| (!af && !(cf || zf)))
abort ();
}
}
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#include "sse4_2-check.h"
#include "sse4_2-pcmpstr.h"
#define NUM 1024
#define IMM_VAL0 \
(SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
#define IMM_VAL1 \
(SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
| SIDD_BIT_MASK)
#define IMM_VAL2 \
(SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
#define IMM_VAL3 \
(SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
| SIDD_MASKED_NEGATIVE_POLARITY | SIDD_UNIT_MASK)
static void
sse4_2_test (void)
{
union
{
__m128i x[NUM];
char c[NUM *16];
} src1, src2;
__m128i res, correct;
int l1, l2;
int i;
for (i = 0; i < NUM *16; i++)
{
src1.c[i] = rand ();
src2.c[i] = rand ();
}
for (i = 0; i < NUM; i++)
{
l1 = rand () % 18;
l2 = rand () % 18;
switch((rand() % 4))
{
case 0:
res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0,
NULL);
break;
case 1:
res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1,
NULL);
break;
case 2:
res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2,
NULL);
break;
default:
res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3,
NULL);
break;
}
if (memcmp (&correct, &res, sizeof (res)))
abort ();
}
}
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#include "sse4_2-check.h"
#include "sse4_2-pcmpstr.h"
#define NUM 1024
#define IMM_VAL0 \
(SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
#define IMM_VAL1 \
(SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
| SIDD_BIT_MASK)
#define IMM_VAL2 \
(SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_NEGATIVE_POLARITY)
#define IMM_VAL3 \
(SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
| SIDD_MASKED_NEGATIVE_POLARITY | SIDD_UNIT_MASK)
static void
sse4_2_test (void)
{
union
{
__m128i x[NUM];
char c[NUM *16];
} src1, src2;
__m128i res, correct;
int correct_flags, l1, l2;
int flags, cf, zf, sf, of, af;
int i;
for (i = 0; i < NUM *16; i++)
{
src1.c[i] = rand ();
src2.c[i] = rand ();
}
for (i = 0; i < NUM; i++)
{
l1 = rand () % 18;
l2 = rand () % 18;
switch ((rand () % 4))
{
case 0:
res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL0);
correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL0,
&correct_flags);
break;
case 1:
res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL1);
correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL1,
&correct_flags);
break;
case 2:
res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL2);
correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL2,
&correct_flags);
break;
default:
res = _mm_cmpestrm (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
cf = _mm_cmpestrc (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
zf = _mm_cmpestrz (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
sf = _mm_cmpestrs (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
of = _mm_cmpestro (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
af = _mm_cmpestra (src1.x[i], l1, src2.x[i], l2, IMM_VAL3);
correct = cmp_em (&src1.x[i], l1, &src2.x[i], l2, IMM_VAL3,
&correct_flags);
break;
}
if (memcmp (&correct, &res, sizeof (res)))
abort ();
flags = 0;
if (cf)
flags |= CFLAG;
if (zf)
flags |= ZFLAG;
if (sf)
flags |= SFLAG;
if (of)
flags |= OFLAG;
if (flags != correct_flags
|| (af && (cf || zf))
|| (!af && !(cf || zf)))
abort ();
}
}
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#include "sse4_2-check.h"
#include <nmmintrin.h>
#define NUM 64
static void
sse4_2_test (void)
{
union
{
__m128i x[NUM / 2];
long long ll[NUM];
} dst, src1, src2;
int i, sign = 1;
long long is_eq;
for (i = 0; i < NUM; i++)
{
src1.ll[i] = i * i * sign;
src2.ll[i] = (i + 20) * sign;
sign = -sign;
}
for (i = 0; i < NUM; i += 2)
dst.x[i / 2] = _mm_cmpgt_epi64 (src1.x[i / 2], src2.x[i / 2]);
for (i = 0; i < NUM; i++)
{
is_eq = src1.ll[i] > src2.ll[i] ? 0xFFFFFFFFFFFFFFFFLL : 0LL;
if (is_eq != dst.ll[i])
abort ();
}
}
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#include "sse4_2-check.h"
#include "sse4_2-pcmpstr.h"
#define NUM 1024
#define IMM_VAL0 \
(SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
#define IMM_VAL1 \
(SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
| SIDD_MOST_SIGNIFICANT)
#define IMM_VAL2 \
(SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
#define IMM_VAL3 \
(SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
| SIDD_MASKED_NEGATIVE_POLARITY | SIDD_MOST_SIGNIFICANT)
static void
sse4_2_test (void)
{
union
{
__m128i x[NUM];
char c[NUM *16];
} src1, src2;
int res, correct;
int i;
for (i = 0; i < NUM *16; i++)
{
src1.c[i] = rand ();
src2.c[i] = rand ();
}
for (i = 0; i < NUM; i++)
{
switch ((rand () % 4))
{
case 0:
res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL0);
correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL0, NULL);
break;
case 1:
res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL1);
correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL1, NULL);
break;
case 2:
res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL2);
correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL2, NULL);
break;
default:
res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL3);
correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL3, NULL);
break;
}
if (correct != res)
abort ();
}
}
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#include "sse4_2-check.h"
#include "sse4_2-pcmpstr.h"
#define NUM 1024
#define IMM_VAL0 \
(SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
#define IMM_VAL1 \
(SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
| SIDD_MOST_SIGNIFICANT)
#define IMM_VAL2 \
(SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
#define IMM_VAL3 \
(SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
| SIDD_MASKED_NEGATIVE_POLARITY | SIDD_MOST_SIGNIFICANT)
static void
sse4_2_test (void)
{
union
{
__m128i x[NUM];
char c[NUM *16];
} src1, src2;
int res, correct, correct_flags;
int flags, cf, zf, sf, of, af;
int i;
for (i = 0; i < NUM *16; i++)
{
src1.c[i] = rand ();
src2.c[i] = rand ();
}
for (i = 0; i < NUM; i++)
{
switch ((rand () % 4))
{
case 0:
res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL0);
cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL0);
zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL0);
sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL0);
of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL0);
af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL0);
correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL0,
&correct_flags);
break;
case 1:
res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL1);
cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL1);
zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL1);
sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL1);
of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL1);
af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL1);
correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL1,
&correct_flags);
break;
case 2:
res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL2);
cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL2);
zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL2);
sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL2);
of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL2);
af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL2);
correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL2,
&correct_flags);
break;
default:
res = _mm_cmpistri (src1.x[i], src2.x[i], IMM_VAL3);
cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL3);
zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL3);
sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL3);
of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL3);
af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL3);
correct = cmp_ii (&src1.x[i], &src2.x[i], IMM_VAL3,
&correct_flags);
break;
}
if (correct != res)
abort ();
flags = 0;
if (cf)
flags |= CFLAG;
if (zf)
flags |= ZFLAG;
if (sf)
flags |= SFLAG;
if (of)
flags |= OFLAG;
if (flags != correct_flags
|| (af && (cf || zf))
|| (!af && !(cf || zf)))
abort ();
}
}
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#include "sse4_2-check.h"
#include "sse4_2-pcmpstr.h"
#define NUM 1024
#define IMM_VAL0 \
(SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
#define IMM_VAL1 \
(SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
| SIDD_BIT_MASK)
#define IMM_VAL2 \
(SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
#define IMM_VAL3 \
(SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
| SIDD_MASKED_NEGATIVE_POLARITY | SIDD_UNIT_MASK)
static void
sse4_2_test (void)
{
union
{
__m128i x[NUM];
char c[NUM *16];
} src1, src2;
__m128i res, correct;
int i;
for (i = 0; i < NUM *16; i++)
{
src1.c[i] = rand ();
src2.c[i] = rand ();
}
for (i = 0; i < NUM; i++)
{
switch((rand() % 4))
{
case 0:
res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL0);
correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL0, NULL);
break;
case 1:
res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL1);
correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL1, NULL);
break;
case 2:
res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL2);
correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL2, NULL);
break;
default:
res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL3);
correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL3, NULL);
break;
}
if (memcmp (&correct, &res, sizeof (res)))
abort ();
}
}
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#include "sse4_2-check.h"
#include "sse4_2-pcmpstr.h"
#define NUM 1024
#define IMM_VAL0 \
(SIDD_SBYTE_OPS | SIDD_CMP_RANGES | SIDD_MASKED_POSITIVE_POLARITY)
#define IMM_VAL1 \
(SIDD_UBYTE_OPS | SIDD_CMP_EQUAL_EACH | SIDD_NEGATIVE_POLARITY \
| SIDD_BIT_MASK)
#define IMM_VAL2 \
(SIDD_UWORD_OPS | SIDD_CMP_EQUAL_ANY | SIDD_MASKED_NEGATIVE_POLARITY)
#define IMM_VAL3 \
(SIDD_SWORD_OPS | SIDD_CMP_EQUAL_ORDERED \
| SIDD_POSITIVE_POLARITY | SIDD_UNIT_MASK)
static void
sse4_2_test (void)
{
union
{
__m128i x[NUM];
char c[NUM *16];
} src1, src2;
__m128i res, correct;
int correct_flags;
int flags, cf, zf, sf, of, af;
int i;
for (i = 0; i < NUM *16; i++)
{
src1.c[i] = rand ();
src2.c[i] = rand ();
}
for (i = 0; i < NUM; i++)
{
switch ((rand () % 4))
{
case 0:
res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL0);
cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL0);
zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL0);
sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL0);
of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL0);
af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL0);
correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL0,
&correct_flags);
break;
case 1:
res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL1);
cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL1);
zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL1);
sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL1);
of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL1);
af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL1);
correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL1,
&correct_flags);
break;
case 2:
res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL2);
cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL2);
zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL2);
sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL2);
of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL2);
af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL2);
correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL2,
&correct_flags);
break;
default:
res = _mm_cmpistrm (src1.x[i], src2.x[i], IMM_VAL3);
cf = _mm_cmpistrc (src1.x[i], src2.x[i], IMM_VAL3);
zf = _mm_cmpistrz (src1.x[i], src2.x[i], IMM_VAL3);
sf = _mm_cmpistrs (src1.x[i], src2.x[i], IMM_VAL3);
of = _mm_cmpistro (src1.x[i], src2.x[i], IMM_VAL3);
af = _mm_cmpistra (src1.x[i], src2.x[i], IMM_VAL3);
correct = cmp_im (&src1.x[i], &src2.x[i], IMM_VAL3,
&correct_flags);
break;
}
if (memcmp (&correct, &res, sizeof (res)))
abort ();
flags = 0;
if (cf)
flags |= CFLAG;
if (zf)
flags |= ZFLAG;
if (sf)
flags |= SFLAG;
if (of)
flags |= OFLAG;
if (flags != correct_flags
|| (af && (cf || zf))
|| (!af && !(cf || zf)))
abort ();
}
}
#include <nmmintrin.h>
#include <string.h>
#define CFLAG 0x00000001
#define ZFLAG 0x00000002
#define SFLAG 0x00000004
#define OFLAG 0x00000008
#define AFLAG 0x00000010
#define PFLAG 0x00000020
#define PCMPSTR_EQ(X, Y, RES) \
{ \
int __size = (sizeof (*X) ^ 3) * 8; \
int __i, __j; \
for (__i = 0; __i < __size; __i++) \
for (__j = 0; __j < __size; __j++) \
RES[__j][__i] = (X[__i] == Y[__j]); \
}
#define PCMPSTR_RNG(X, Y, RES) \
{ \
int __size = (sizeof (*X) ^ 3) * 8; \
int __i, __j; \
for (__j = 0; __j < __size; __j++) \
for (__i = 0; __i < __size - 1; __i += 2) \
{ \
RES[__j][__i] = (Y[__j] >= X[__i]); \
RES[__j][__i+1] = (Y[__j] <= X[__i + 1]); \
} \
}
static void
override_invalid (unsigned char res[16][16], int la, int lb,
const int mode, int dim)
{
int i, j;
for (j = 0; j < dim; j++)
for (i = 0; i < dim; i++)
if (i < la && j >= lb)
res[j][i] = 0;
else if (i >= la)
switch ((mode & 0x0C))
{
case SIDD_CMP_EQUAL_ANY:
case SIDD_CMP_RANGES:
res[j][i] = 0;
break;
case SIDD_CMP_EQUAL_EACH:
res[j][i] = (j >= lb) ? 1: 0;
break;
case SIDD_CMP_EQUAL_ORDERED:
res[j][i] = 1;
break;
}
}
static void
calc_matrix (__m128i a, int la, __m128i b, int lb, const int mode,
unsigned char res[16][16])
{
union
{
__m128i x;
signed char sc[16];
unsigned char uc[16];
signed short ss[8];
unsigned short us[8];
} d, s;
d.x = a;
s.x = b;
switch ((mode & 3))
{
case SIDD_UBYTE_OPS:
if ((mode & 0x0C) == SIDD_CMP_RANGES)
{
PCMPSTR_RNG (d.uc, s.uc, res);
}
else
{
PCMPSTR_EQ (d.uc, s.uc, res);
}
break;
case SIDD_UWORD_OPS:
if ((mode & 0x0C) == SIDD_CMP_RANGES)
{
PCMPSTR_RNG (d.us, s.us, res);
}
else
{
PCMPSTR_EQ (d.us, s.us, res);
}
break;
case SIDD_SBYTE_OPS:
if ((mode & 0x0C) == SIDD_CMP_RANGES)
{
PCMPSTR_RNG (d.sc, s.sc, res);
}
else
{
PCMPSTR_EQ (d.sc, s.sc, res);
}
break;
case SIDD_SWORD_OPS:
if ((mode & 0x0C) == SIDD_CMP_RANGES)
{
PCMPSTR_RNG (d.ss, s.ss, res);
}
else
{
PCMPSTR_EQ (d.ss, s.ss, res);
}
break;
}
override_invalid (res, la, lb, mode, (mode & 1) == 0 ? 16 : 8);
}
static int
calc_res (__m128i a, int la, __m128i b, int lb, const int mode)
{
unsigned char mtx[16][16];
int i, j, k, dim, res = 0;
memset (mtx, 0, sizeof (mtx));
dim = (mode & 1) == 0 ? 16 : 8;
if (la < 0)
la = -la;
if (lb < 0)
lb = -lb;
if (la > dim)
la = dim;
if (lb > dim)
lb = dim;
calc_matrix (a, la, b, lb, mode, mtx);
switch ((mode & 0x0C))
{
case SIDD_CMP_EQUAL_ANY:
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
if (mtx[i][j])
res |= (1 << i);
break;
case SIDD_CMP_RANGES:
for (i = 0; i < dim; i += 2)
for(j = 0; j < dim; j++)
if (mtx[j][i] && mtx[j][i+1])
res |= (1 << j);
break;
case SIDD_CMP_EQUAL_EACH:
for(i = 0; i < dim; i++)
if (mtx[i][i])
res |= (1 << i);
break;
case SIDD_CMP_EQUAL_ORDERED:
for(i = 0; i < dim; i++)
{
unsigned char val = 1;
for (j = 0, k = i; j < dim - i && k < dim; j++, k++)
val &= mtx[k][j];
if (val)
res |= (1 << i);
else
res &= ~(1 << i);
}
break;
}
switch ((mode & 0x30))
{
case SIDD_POSITIVE_POLARITY:
case SIDD_MASKED_POSITIVE_POLARITY:
break;
case SIDD_NEGATIVE_POLARITY:
res ^= -1;
break;
case SIDD_MASKED_NEGATIVE_POLARITY:
for (i = 0; i < lb; i++)
if (res & (1 << i))
res &= ~(1 << i);
else
res |= (1 << i);
break;
}
return res & ((dim == 8) ? 0xFF : 0xFFFF);
}
static int
cmp_flags (__m128i a, int la, __m128i b, int lb,
int mode, int res2, int is_implicit)
{
int i;
int flags = 0;
int is_bytes_mode = (mode & 1) == 0;
union
{
__m128i x;
unsigned char uc[16];
unsigned short us[8];
} d, s;
d.x = a;
s.x = b;
/* CF: reset if (RES2 == 0), set otherwise. */
if (res2 != 0)
flags |= CFLAG;
if (is_implicit)
{
/* ZF: set if any byte/word of src xmm operand is null, reset
otherwise.
SF: set if any byte/word of dst xmm operand is null, reset
otherwise. */
if (is_bytes_mode)
{
for (i = 0; i < 16; i++)
{
if (s.uc[i] == 0)
flags |= ZFLAG;
if (d.uc[i] == 0)
flags |= SFLAG;
}
}
else
{
for (i = 0; i < 8; i++)
{
if (s.us[i] == 0)
flags |= ZFLAG;
if (d.us[i] == 0)
flags |= SFLAG;
}
}
}
else
{
/* ZF: set if abs value of EDX/RDX < 16 (8), reset otherwise.
SF: set if abs value of EAX/RAX < 16 (8), reset otherwise. */
int max_ind = is_bytes_mode ? 16 : 8;
if (la < 0)
la = -la;
if (lb < 0)
lb = -lb;
if (lb < max_ind)
flags |= ZFLAG;
if (la < max_ind)
flags |= SFLAG;
}
/* OF: equal to RES2[0]. */
if ((res2 & 0x1))
flags |= OFLAG;
/* AF: Reset.
PF: Reset. */
return flags;
}
static int
cmp_indexed (__m128i a, int la, __m128i b, int lb,
const int mode, int *res2)
{
int i, ndx;
int dim = (mode & 1) == 0 ? 16 : 8;
int r2;
r2 = calc_res (a, la, b, lb, mode);
ndx = dim;
if ((mode & 0x40))
{
for (i = dim - 1; i >= 0; i--)
if (r2 & (1 << i))
{
ndx = i;
break;
}
}
else
{
for (i = 0; i < dim; i++)
if ((r2 & (1 << i)))
{
ndx = i;
break;
}
}
*res2 = r2;
return ndx;
}
static __m128i
cmp_masked (__m128i a, int la, __m128i b, int lb,
const int mode, int *res2)
{
union
{
__m128i x;
char c[16];
short s[8];
} ret;
int i;
int dim = (mode & 1) == 0 ? 16 : 8;
union
{
int i;
char c[4];
short s[2];
} r2;
r2.i = calc_res (a, la, b, lb, mode);
memset (&ret, 0, sizeof (ret));
if (mode & 0x40)
{
for (i = 0; i < dim; i++)
if (dim == 8)
ret.s [i] = (r2.i & (1 << i)) ? -1 : 0;
else
ret.c [i] = (r2.i & (1 << i)) ? -1 : 0;
}
else
{
if (dim == 16)
ret.s[0] = r2.s[0];
else
ret.c[0] = r2.c[0];
}
*res2 = r2.i;
return ret.x;
}
static int
calc_str_len (__m128i a, const int mode)
{
union
{
__m128i x;
char c[16];
short s[8];
} s;
int i;
int dim = (mode & 1) == 0 ? 16 : 8;
s.x = a;
if ((mode & 1))
{
for (i = 0; i < dim; i++)
if (s.s[i] == 0)
break;
}
else
{
for (i = 0; i < dim; i++)
if (s.c[i] == 0)
break;
}
return i;
}
static inline int
cmp_ei (__m128i *a, int la, __m128i *b, int lb,
const int mode, int *flags)
{
int res2;
int index = cmp_indexed (*a, la, *b, lb, mode, &res2);
if (flags != NULL)
*flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
return index;
}
static inline int
cmp_ii (__m128i *a, __m128i *b, const int mode, int *flags)
{
int la, lb;
int res2;
int index;
la = calc_str_len (*a, mode);
lb = calc_str_len (*b, mode);
index = cmp_indexed (*a, la, *b, lb, mode, &res2);
if (flags != NULL)
*flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
return index;
}
static inline __m128i
cmp_em (__m128i *a, int la, __m128i *b, int lb,
const int mode, int *flags )
{
int res2;
__m128i mask = cmp_masked (*a, la, *b, lb, mode, &res2);
if (flags != NULL)
*flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
return mask;
}
static inline __m128i
cmp_im (__m128i *a, __m128i *b, const int mode, int *flags)
{
int la, lb;
int res2;
__m128i mask;
la = calc_str_len (*a, mode);
lb = calc_str_len (*b, mode);
mask = cmp_masked (*a, la, *b, lb, mode, &res2);
if (flags != NULL)
*flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
return mask;
}
#include "sse4_2-check.h"
#include <nmmintrin.h>
#define NUM 1024
static int
compute_popcnt (TYPE v)
{
int ret;
int i;
ret = 0;
for (i = 0; i < sizeof(v) * 8; i++)
if ((v & ((TYPE)1 << (TYPE) i)))
ret++;
return ret;
}
static void
sse4_2_test (void)
{
int i;
TYPE vals[NUM];
TYPE res;
for (i = 0; i < NUM; i++)
{
vals[i] = rand ();
if (sizeof (TYPE) > 4)
vals[i] |= (TYPE)rand() << (TYPE)(sizeof (TYPE) * 4);
}
for (i=0; i < NUM; i++)
{
res = POPCNT (vals[i]);
if (res != compute_popcnt (vals[i]))
abort ();
}
}
/* { dg-do run { target i?86-*-* x86_64-*-* } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#define TYPE unsigned int
#define POPCNT _mm_popcnt_u32
#include "sse4_2-popcnt.h"
/* { dg-do run { target { { i?86-*-* x86_64-*-* } && lp64 } } } */
/* { dg-require-effective-target sse4 } */
/* { dg-options "-O2 -msse4.2" } */
#define TYPE unsigned long long
#define POPCNT _mm_popcnt_u64
#include "sse4_2-popcnt.h"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment