Commit daff6cdf by Steven Munroe

Part 2/2 for contributing PPC64LE support for X86 SSE2 instrisics.

This patch includes testsuite/gcc.target tests for the intrinsics
in emmintrin.h.  For these tests I added -Wno-psabi to dg-options
to suppress warnings associated with the vector ABI change in GCC5.

From-SVN: r254235
parent 09359ea3
2017-10-30 Steven Munroe <munroesj@gcc.gnu.org>
* sse2-check.h: New file.
* sse2-addpd-1.c: New file.
* sse2-addsd-1.c: New file.
* sse2-andnpd-1.c: New file.
* sse2-andpd-1.c: New file.
* sse2-cmppd-1.c: New file.
* sse2-cmpsd-1.c: New file.
* sse2-comisd-1.c: New file.
* sse2-comisd-2.c: New file.
* sse2-comisd-3.c: New file.
* sse2-comisd-4.c: New file.
* sse2-comisd-5.c: New file.
* sse2-comisd-6.c: New file.
* sse2-cvtdq2pd-1.c: New file.
* sse2-cvtdq2ps-1.c: New file.
* sse2-cvtpd2dq-1.c: New file.
* sse2-cvtpd2ps-1.c: New file.
* sse2-cvtps2dq-1.c: New file.
* sse2-cvtps2pd-1.c: New file.
* sse2-cvtsd2si-1.c: New file.
* sse2-cvtsd2si-2.c: New file.
* sse2-cvtsd2ss-1.c: New file.
* sse2-cvtsi2sd-1.c: New file.
* sse2-cvtsi2sd-2.c: New file.
* sse2-cvtss2sd-1.c: New file.
* sse2-cvttpd2dq-1.c: New file.
* sse2-cvttps2dq-1.c: New file.
* sse2-cvttsd2si-1.c: New file.
* sse2-cvttsd2si-2.c: New file.
* sse2-divpd-1.c: New file.
* sse2-divsd-1.c: New file.
* sse2-maxpd-1.c: New file.
* sse2-maxsd-1.c: New file.
* sse2-minpd-1.c: New file.
* sse2-minsd-1.c: New file.
* sse2-mmx.c: New file.
* sse2-movhpd-1.c: New file.
* sse2-movhpd-2.c: New file.
* sse2-movlpd-1.c: New file.
* sse2-movlpd-2.c: New file.
* sse2-movmskpd-1.c: New file.
* sse2-movq-1.c: New file.
* sse2-movq-2.c: New file.
* sse2-movq-3.c: New file.
* sse2-movsd-1.c: New file.
* sse2-movsd-2.c: New file.
* sse2-movsd-3.c: New file.
* sse2-mulpd-1.c: New file.
* sse2-mulsd-1.c: New file.
* sse2-orpd-1.c: New file.
* sse2-packssdw-1.c: New file.
* sse2-packsswb-1.c: New file.
* sse2-packuswb-1.c: New file.
* sse2-paddb-1.c: New file.
* sse2-paddd-1.c: New file.
* sse2-paddq-1.c: New file.
* sse2-paddsb-1.c: New file.
* sse2-paddsw-1.c: New file.
* sse2-paddusb-1.c: New file.
* sse2-paddusw-1.c: New file.
* sse2-paddw-1.c: New file.
* sse2-pavgb-1.c: New file.
* sse2-pavgw-1.c: New file.
* sse2-pcmpeqb-1.c: New file.
* sse2-pcmpeqd-1.c: New file.
* sse2-pcmpeqw-1.c: New file.
* sse2-pcmpgtb-1.c: New file.
* sse2-pcmpgtd-1.c: New file.
* sse2-pcmpgtw-1.c: New file.
* sse2-pextrw.c: New file.
* sse2-pinsrw.c: New file.
* sse2-pmaddwd-1.c: New file.
* sse2-pmaxsw-1.c: New file.
* sse2-pmaxub-1.c: New file.
* sse2-pminsw-1.c: New file.
* sse2-pminub-1.c: New file.
* sse2-pmovmskb-1.c: New file.
* sse2-pmulhuw-1.c: New file.
* sse2-pmulhw-1.c: New file.
* sse2-pmullw-1.c: New file.
* sse2-pmuludq-1.c: New file.
* sse2-psadbw-1.c: New file.
* sse2-pshufd-1.c: New file.
* sse2-pshufhw-1.c: New file.
* sse2-pshuflw-1.c: New file.
* sse2-pslld-1.c: New file.
* sse2-pslld-2.c: New file.
* sse2-pslldq-1.c: New file.
* sse2-psllq-1.c: New file.
* sse2-psllq-2.c: New file.
* sse2-psllw-1.c: New file.
* sse2-psllw-2.c: New file.
* sse2-psrad-1.c: New file.
* sse2-psrad-2.c: New file.
* sse2-psraw-1.c: New file.
* sse2-psraw-2.c: New file.
* sse2-psrld-1.c: New file.
* sse2-psrld-2.c: New file.
* sse2-psrldq-1.c: New file.
* sse2-psrlq-1.c: New file.
* sse2-psrlq-2.c: New file.
* sse2-psrlw-1.c: New file.
* sse2-psrlw-2.c: New file.
* sse2-psubb-1.c: New file.
* sse2-psubd-1.c: New file.
2017-10-30 Will Schmidt <will_schmidt@vnet.ibm.com>
* gcc.target/powerpc/fold-vec-perm-longlong.c: Update to use long long
......
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_addpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_add_pd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0] + s2.a[0];
e[1] = s1.a[1] + s2.a[1];
if (check_union128d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#include <stdint.h>
#include <stdio.h>
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_addsd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_add_sd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0] + s2.a[0];
e[1] = s1.a[1];
if (check_union128d (u, e))
#if DEBUG
{
printf ("sse2_test_addsd_1; check_union128d failed\n");
printf ("\t [%f,%f] + [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
s2.a[1], u.a[0], u.a[1]);
printf ("\t expect [%f,%f]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_andnpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
return _mm_andnot_pd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
long long source1[2]={34545, 95567};
long long source2[2]={674, 57897};
long long e[2];
s1.x = _mm_loadu_pd ((double *)source1);
s2.x = _mm_loadu_pd ((double *)source2);
u.x = test (s1.x, s2.x);
e[0] = (~source1[0]) & source2[0];
e[1] = (~source1[1]) & source2[1];
if (check_union128d (u, (double *)e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_andpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
return _mm_and_pd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
union
{
double d[2];
long long ll[2];
}source1, source2, e;
s1.x = _mm_set_pd (34545, 95567);
s2.x = _mm_set_pd (674, 57897);
_mm_storeu_pd (source1.d, s1.x);
_mm_storeu_pd (source2.d, s2.x);
u.x = test (s1.x, s2.x);
e.ll[0] = source1.ll[0] & source2.ll[0];
e.ll[1] = source1.ll[1] & source2.ll[1];
if (check_union128d (u, e.d))
abort ();
}
#include <stdlib.h>
/* Define this to enable the combination of VSX vector double and
SSE2 data types. */
#define __VSX_SSE2__ 1
#include "m128-check.h"
/* define DEBUG replace abort with printf on error. */
//#define DEBUG 1
#if 1
#define TEST sse2_test
static void sse2_test (void);
static void
__attribute__ ((noinline))
do_test (void)
{
sse2_test ();
}
int
main ()
{
#ifdef __BUILTIN_CPU_SUPPORTS__
/* Most SSE2 (vector double) intrinsic operations require VSX
instructions, but some operations may need only VMX
instructions. This also true for SSE2 scalar doubles as they
imply that "other half" of the vector remains unchanged or set
to zeros. The VSX scalar operations leave ther "other half"
undefined, and require additional merge operations.
Some conversions (to/from integer) need the direct register
transfer instructions from POWER8 for best performance.
So we test for arch_2_07. */
if ( __builtin_cpu_supports ("arch_2_07") )
{
do_test ();
#ifdef DEBUG
printf ("PASSED\n");
#endif
}
#ifdef DEBUG
else
printf ("SKIPPED\n");
#endif
#endif /* __BUILTIN_CPU_SUPPORTS__ */
return 0;
}
#endif
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cmp_pd_1
#endif
#include <emmintrin.h>
#include <math.h>
double ps1[] = {2134.3343, 6678.346};
double ps2[] = {41124.234, 6678.346};
long long pdd[] = {1, 2}, pd[2];
union{long long l[2]; double d[2];} pe;
void pd_check(char *id, __m128d dst)
{
__v2di dest = (__v2di)dst;
if(checkVl(pd, pe.l, 2))
{
printf("mm_cmp%s_pd FAILED\n", id);
printf("dst [%lld, %lld], e.l[%lld, %lld]\n",
dest[0], dest[1], pe.l[0], pe.l[1]);
}
}
#define CMP(cmp, rel0, rel1) \
pe.l[0] = rel0 ? -1 : 0; \
pe.l[1] = rel1 ? -1 : 0; \
dest = _mm_loadu_pd((double*)pdd); \
source1 = _mm_loadu_pd(ps1); \
source2 = _mm_loadu_pd(ps2); \
dest = _mm_cmp##cmp##_pd(source1, source2); \
_mm_storeu_pd((double*) pd, dest); \
pd_check("" #cmp "", dest);
static void
TEST ()
{
__m128d source1, source2, dest;
CMP(eq, !isunordered(ps1[0], ps2[0]) && ps1[0] == ps2[0],
!isunordered(ps1[1], ps2[1]) && ps1[1] == ps2[1]);
CMP(lt, !isunordered(ps1[0], ps2[0]) && ps1[0] < ps2[0],
!isunordered(ps1[1], ps2[1]) && ps1[1] < ps2[1]);
CMP(le, !isunordered(ps1[0], ps2[0]) && ps1[0] <= ps2[0],
!isunordered(ps1[1], ps2[1]) && ps1[1] <= ps2[1]);
CMP(unord, isunordered(ps1[0], ps2[0]),
isunordered(ps1[1], ps2[1]));
CMP(neq, isunordered(ps1[0], ps2[0]) || ps1[0] != ps2[0],
isunordered(ps1[1], ps2[1]) || ps1[1] != ps2[01]);
CMP(nlt, isunordered(ps1[0], ps2[0]) || ps1[0] >= ps2[0],
isunordered(ps1[1], ps2[1]) || ps1[1] >= ps2[1]);
CMP(nle, isunordered(ps1[0], ps2[0]) || ps1[0] > ps2[0],
isunordered(ps1[1], ps2[1]) || ps1[1] > ps2[1]);
CMP(ord, !isunordered(ps1[0], ps2[0]),
!isunordered(ps1[1], ps2[1]));
CMP(ge, isunordered(ps1[0], ps2[0]) || ps1[0] >= ps2[0],
isunordered(ps1[1], ps2[1]) || ps1[1] >= ps2[1]);
CMP(gt, isunordered(ps1[0], ps2[0]) || ps1[0] > ps2[0],
isunordered(ps1[1], ps2[1]) || ps1[1] > ps2[1]);
CMP(nge, !isunordered(ps1[0], ps2[0]) && ps1[0] < ps2[0],
!isunordered(ps1[1], ps2[1]) && ps1[1] < ps2[1]);
CMP(ngt, !isunordered(ps1[0], ps2[0]) && ps1[0] <= ps2[0],
!isunordered(ps1[1], ps2[1]) && ps1[1] <= ps2[1]);
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cmp_sd_1
#endif
#include <emmintrin.h>
#include <math.h>
double s1[] = {2134.3343, 6678.346};
double s2[] = {41124.234, 6678.346};
long long dd[] = {1, 2}, d[2];
union{long long l[2]; double d[2];} e;
void check(char *id, __m128d dst)
{
__v2di dest = (__v2di)dst;
if(checkVl(d, e.l, 2))
{
printf("mm_cmp%s_sd FAILED\n", id);
printf("dst [%lld, %lld], e.l[%lld]\n",
dest[0], dest[1], e.l[0]);
}
}
#define CMP(cmp, rel) \
e.l[0] = rel ? -1 : 0; \
dest = _mm_loadu_pd((double*)dd); \
source1 = _mm_loadu_pd(s1); \
source2 = _mm_loadu_pd(s2); \
dest = _mm_cmp##cmp##_sd(source1, source2); \
_mm_storeu_pd((double*) d, dest); \
check("" #cmp "", dest);
static void
TEST ()
{
__m128d source1, source2, dest;
e.d[1] = s1[1];
CMP(eq, !isunordered(s1[0], s2[0]) && s1[0] == s2[0]);
CMP(lt, !isunordered(s1[0], s2[0]) && s1[0] < s2[0]);
CMP(le, !isunordered(s1[0], s2[0]) && s1[0] <= s2[0]);
CMP(unord, isunordered(s1[0], s2[0]));
CMP(neq, isunordered(s1[0], s2[0]) || s1[0] != s2[0]);
CMP(nlt, isunordered(s1[0], s2[0]) || s1[0] >= s2[0]);
CMP(nle, isunordered(s1[0], s2[0]) || s1[0] > s2[0]);
CMP(ord, !isunordered(s1[0], s2[0]));
CMP(ge, isunordered(s1[0], s2[0]) || s1[0] >= s2[0]);
CMP(gt, isunordered(s1[0], s2[0]) || s1[0] > s2[0]);
CMP(nge, !isunordered(s1[0], s2[0]) && s1[0] < s2[0]);
CMP(ngt, !isunordered(s1[0], s2[0]) && s1[0] <= s2[0]);
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_comi_sd_1
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_comieq_sd (s1, s2);
}
static void
TEST (void)
{
union128d s1, s2;
int d[1];
int e[1];
s1.x = _mm_set_pd (2134.3343,2344.2354);
s2.x = _mm_set_pd (41124.234,2344.2354);
d[0] = test (s1.x, s2.x);
e[0] = s1.a[0] == s2.a[0];
if (checkVi (d, e, 1))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_comi_sd_2
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_comilt_sd (s1, s2);
}
static void
TEST (void)
{
union128d s1, s2;
int d[1];
int e[1];
s1.x = _mm_set_pd (2134.3343,2344.2354);
s2.x = _mm_set_pd (41124.234,2344.2354);
d[0] = test (s1.x, s2.x);
e[0] = s1.a[0] < s2.a[0];
if (checkVi (d, e, 1))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_comi_sd_3
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_comile_sd (s1, s2);
}
static void
TEST (void)
{
union128d s1, s2;
int d[1];
int e[1];
s1.x = _mm_set_pd (2134.3343,2344.2354);
s2.x = _mm_set_pd (41124.234,2344.2354);
d[0] = test (s1.x, s2.x);
e[0] = s1.a[0] <= s2.a[0];
if (checkVi (d, e, 1))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_comi_sd_4
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_comigt_sd (s1, s2);
}
static void
TEST (void)
{
union128d s1, s2;
int d[1];
int e[1];
s1.x = _mm_set_pd (2134.3343,12344.2354);
s2.x = _mm_set_pd (41124.234,2344.2354);
d[0] = test (s1.x, s2.x);
e[0] = s1.a[0] > s2.a[0];
if (checkVi (d, e, 1))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_comi_sd_5
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_comige_sd (s1, s2);
}
static void
TEST (void)
{
union128d s1, s2;
int d[1];
int e[1];
s1.x = _mm_set_pd (2134.3343,2344.2354);
s2.x = _mm_set_pd (41124.234,2344.2354);
d[0] = test (s1.x, s2.x);
e[0] = s1.a[0] >= s2.a[0];
if (checkVi (d, e, 1))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_comi_sd_6
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_comineq_sd (s1, s2);
}
static void
TEST (void)
{
union128d s1, s2;
int d[1];
int e[1];
s1.x = _mm_set_pd (2134.3343,2344.2354);
s2.x = _mm_set_pd (41124.234,2344.2354);
d[0] = test (s1.x, s2.x);
e[0] = s1.a[0] != s2.a[0];
if (checkVi (d, e, 1))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvtepi32_pd
#endif
#include <emmintrin.h>
#ifdef _ARCH_PWR8
static __m128d
__attribute__((noinline, unused))
test (__m128i p)
{
return _mm_cvtepi32_pd (p);
}
#endif
static void
TEST (void)
{
#ifdef _ARCH_PWR8
union128d u;
union128i_d s;
double e[2];
s.x = _mm_set_epi32 (123, 321, 456, 987);
u.x = test (s.x);
e[0] = (double)s.a[0];
e[1] = (double)s.a[1];
if (check_union128d (u, e))
#if DEBUG
{
printf ("sse2_test_cvtepi32_pd; check_union128d failed\n");
printf ("\t [%d,%d, %d, %d] -> [%f,%f]\n",
s.a[0], s.a[1], s.a[2], s.a[3],
u.a[0], u.a[1]);
printf ("\t expect [%f,%f]\n",
e[0], e[1]);
}
#else
abort ();
#endif
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvtepi32_ps
#endif
#include <emmintrin.h>
static __m128
__attribute__((noinline, unused))
test (__m128i p)
{
return _mm_cvtepi32_ps (p);
}
static void
TEST (void)
{
union128 u;
union128i_d s;
float e[4];
s.x = _mm_set_epi32 (123, 321, 456, 987);
u.x = test (s.x);
e[0] = (float)s.a[0];
e[1] = (float)s.a[1];
e[2] = (float)s.a[2];
e[3] = (float)s.a[3];
if (check_union128 (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvtpd_epi32
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128d p)
{
return _mm_cvtpd_epi32 (p);
}
static void
TEST (void)
{
union128i_d u;
union128d s;
int e[4] = {0};
s.x = _mm_set_pd (2.78, 7777768.82);
u.x = test (s.x);
e[0] = (int)(s.a[0] + 0.5);
e[1] = (int)(s.a[1] + 0.5);
if (check_union128i_d (u, e))
#if DEBUG
{
printf ("sse2_test_cvtpd_epi32; check_union128i_d failed\n");
printf ("\t [%f,%f] -> [%d,%d,%d,%d]\n", s.a[0], s.a[1], u.a[0], u.a[1],
u.a[2], u.a[3]);
printf ("\t expect [%d,%d,%d,%d]\n", e[0], e[1], e[2], e[3]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvtpd_ps
#endif
#include <emmintrin.h>
static __m128
__attribute__((noinline, unused))
test (__m128d p)
{
return _mm_cvtpd_ps (p);
}
static void
TEST (void)
{
union128 u;
union128d s;
float e[4] = { 0.0 };
s.x = _mm_set_pd (123.321, 456.987);
u.x = test (s.x);
e[0] = (float)s.a[0];
e[1] = (float)s.a[1];
if (check_union128 (u, e))
#if DEBUG
{
printf ("sse2_test_cvtpd_ps; check_union128 failed\n");
printf ("\t [%f,%f] -> [%f,%f,%f,%f]\n", s.a[0], s.a[1], u.a[0], u.a[1],
u.a[2], u.a[3]);
printf ("\t expect [%f,%f,%f,%f]\n", e[0], e[1], e[2], e[3]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvtps2dq_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128 p)
{
return _mm_cvtps_epi32 (p);
}
static void
TEST (void)
{
union128i_d u;
union128 s;
int e[4] = {0};
s.x = _mm_set_ps (2.78, 7777768.82, 2.331, 3.456);
u.x = test (s.x);
e[0] = (int)(s.a[0] + 0.5);
e[1] = (int)(s.a[1] + 0.5);
e[2] = (int)(s.a[2] + 0.5);
e[3] = (int)(s.a[3] + 0.5);
if (check_union128i_d (u, e))
#if DEBUG
{
printf ("sse2_test_cvtps2dq_1; check_union128i_d failed\n");
printf ("\t [%f,%f,%f,%f] -> [%d,%d,%d,%d]\n", s.a[0], s.a[1], s.a[2],
s.a[3], u.a[0], u.a[1], u.a[2], u.a[3]);
printf ("\t expect [%d,%d,%d,%d]\n", e[0], e[1], e[2], e[3]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvtps2pd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128 p)
{
return _mm_cvtps_pd (p);
}
static void
TEST (void)
{
union128d u;
union128 s;
double e[2];
s.x = _mm_set_ps (2.78, 7777768.82, 2.331, 3.456);
u.x = test (s.x);
e[0] = (double)s.a[0];
e[1] = (double)s.a[1];
if (check_union128d (u, e))
#if DEBUG
{
printf ("sse2_test_cvtps2pd_1; check_union128d failed\n");
printf ("\t cvt\t [%f,%f,%f,%f] -> [%f,%f]\n", s.a[0], s.a[1], s.a[2],
s.a[3], u.a[0], u.a[1]);
printf ("\t expect\t [%f,%f]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvtsd2si_1
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d p)
{
return _mm_cvtsd_si32 (p);
}
static void
TEST (void)
{
union128d s;
int e;
int d;
s.x = _mm_set_pd (123.321, 456.987);
d = test (s.x);
e = (int)(s.a[0] + 0.5);
if (d != e)
#if DEBUG
{
printf ("sse2_test_cvtsd2si_1; failed\n");
printf ("\t [%f,%f] -> [%d]\n", s.a[0], s.a[1], d);
printf ("\t expect [%d]\n", e);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvtsd2si_2
#endif
#include <emmintrin.h>
static long long
__attribute__((noinline, unused))
test (__m128d p)
{
return _mm_cvtsd_si64 (p);
}
static void
TEST (void)
{
union128d s;
long long e;
long long d;
s.x = _mm_set_pd (829496729501.4, 429496729501.4);
d = test (s.x);
e = (long long)(s.a[0] + 0.5);
if (d != e)
#if DEBUG
{
printf ("sse2_test_cvtsd2si_2; failed\n");
printf ("\t [%f,%f] -> [%ld]\n", s.a[0], s.a[1], d);
printf ("\t expect [%ld]\n", e);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvtsd2ss_1
#endif
#include <emmintrin.h>
static __m128
__attribute__((noinline, unused))
test (__m128 p1, __m128d p2)
{
return _mm_cvtsd_ss (p1, p2);
}
static void
TEST (void)
{
union128d s1;
union128 u, s2;
double source1[2] = {123.345, 67.3321};
float e[4] = {5633.098, 93.21, 3.34, 4555.2};
s1.x = _mm_loadu_pd (source1);
s2.x = _mm_loadu_ps (e);
__asm("" : "+v"(s1.x), "+v"(s2.x));
u.x = test(s2.x, s1.x);
e[0] = (float)source1[0];
if (check_union128(u, e))
#if DEBUG
{
printf ("sse2_test_cvtsd2ss_1; check_union128 failed\n");
printf ("\t [%f,%f,%f,%f],[%f,%f]\n", s2.a[0], s2.a[1], s2.a[2], s2.a[3],
s1.a[0], s1.a[1]);
printf ("\t -> \t[%f,%f,%f,%f]\n", u.a[0], u.a[1], u.a[2], u.a[3]);
printf ("\texpect\t[%f,%f,%f,%f]\n", e[0], e[1], e[2], e[3]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvtsi2sd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d p, int b)
{
__asm("" : "+v"(p), "+r"(b));
return _mm_cvtsi32_sd (p, b);
}
static void
TEST (void)
{
union128d u, s;
int b = 128;
double e[2];
s.x = _mm_set_pd (123.321, 456.987);
u.x = test (s.x, b);
e[0] = (double)b;
e[1] = s.a[1];
if (check_union128d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvtsi2sd_2
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d p, long long b)
{
__asm("" : "+v"(p), "+r"(b));
return _mm_cvtsi64_sd (p, b);
}
static void
TEST (void)
{
union128d u, s;
long long b = 42949672951333LL;
double e[2];
s.x = _mm_set_pd (123.321, 456.987);
u.x = test (s.x, b);
e[0] = (double)b;
e[1] = s.a[1];
if (check_union128d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvtss2sd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d a, __m128 b)
{
return _mm_cvtss_sd (a, b);
}
static void
TEST (void)
{
union128d u, s1;
union128 s2;
double e[2];
s1.x = _mm_set_pd (123.321, 456.987);
s2.x = _mm_set_ps (123.321, 456.987, 666.45, 231.987);
u.x = test (s1.x, s2.x);
e[0] = (double)s2.a[0];
e[1] = s1.a[1];
if (check_union128d (u, e))
#if DEBUG
{
printf ("sse2_test_cvtss2sd_1; check_union128d failed\n");
printf ("\t [%f,%f], [%f,%f,%f,%f]\n", s1.a[0], s1.a[1], s2.a[0], s2.a[1],
s2.a[2], s2.a[3]);
printf ("\t -> \t[%f,%f]\n", u.a[0], u.a[1]);
printf ("\texpect\t[%f,%f]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvttpd_epi32
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128d p)
{
return _mm_cvttpd_epi32 (p);
}
static void
TEST (void)
{
union128d s;
union128i_d u;
int e[4] = {0};
s.x = _mm_set_pd (123.321, 456.987);
u.x = test (s.x);
e[0] = (int)s.a[0];
e[1] = (int)s.a[1];
if (check_union128i_d (u, e))
#if DEBUG
{
printf ("sse2_test_cvttpd_epi32; check_union128i_d failed\n");
printf ("\t [%f,%f] -> [%d,%d,%d,%d]\n", s.a[0], s.a[1], u.a[0], u.a[1],
u.a[2], u.a[3]);
printf ("\t expect [%d,%d,%d,%d]\n", e[0], e[1], e[2], e[3]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvttps2dq_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128 p)
{
return _mm_cvttps_epi32 (p);
}
static void
TEST (void)
{
union128 s;
union128i_d u;
int e[4] = {0};
s.x = _mm_set_ps (123.321, 456.987, 33.56, 7765.321);
u.x = test (s.x);
e[0] = (int)s.a[0];
e[1] = (int)s.a[1];
e[2] = (int)s.a[2];
e[3] = (int)s.a[3];
if (check_union128i_d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvttsd2si_1
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d p)
{
__asm("" : "+v"(p));
return _mm_cvttsd_si32 (p);
}
static void
TEST (void)
{
union128d s;
int e;
int d;
s.x = _mm_set_pd (123.321, 456.987);
d = test (s.x);
e = (int)(s.a[0]);
if (d != e)
#if DEBUG
{
printf ("sse2_test_cvttsd2si_1; failed\n");
printf ("\t [%f,%f] -> [%d]\n", s.a[0], s.a[1], d);
printf ("\t expect [%d]\n", e);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_cvttsd2si_2
#endif
#include <emmintrin.h>
static long long
__attribute__((noinline, unused))
test (__m128d p)
{
__asm("" : "+v"(p));
return _mm_cvttsd_si64 (p);
}
static void
TEST (void)
{
union128d s;
long long e;
long long d;
s.x = _mm_set_pd (123.321, 42949672339501.4);
d = test (s.x);
e = (long long)(s.a[0]);
if (d != e)
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_divpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_div_pd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0] / s2.a[0];
e[1] = s1.a[1] / s2.a[1];
if (check_union128d (u, e))
#if DEBUG
{
printf ("sse2_test_divpd_1; check_union128d failed\n");
printf ("\t [%f,%f] * [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
s2.a[1], u.a[0], u.a[1]);
printf ("\t expect [%f,%f]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_divsd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_div_sd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0] / s2.a[0];
e[1] = s1.a[1];
if (check_union128d (u, e))
#if DEBUG
{
printf ("sse2_test_divsd_1; check_union128d failed\n");
printf ("\t [%f,%f] / [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
s2.a[1], u.a[0], u.a[1]);
printf ("\t expect [%f,%f]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_maxpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_max_pd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0] > s2.a[0] ? s1.a[0]:s2.a[0];
e[1] = s1.a[1] > s2.a[1] ? s1.a[1]:s2.a[1];
if (check_union128d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_maxsd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_max_sd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0] > s2.a[0] ? s1.a[0]:s2.a[0];
e[1] = s1.a[1];
if (check_union128d (u, e))
#if DEBUG
{
printf ("sse2_test_maxsd_3; check_union128d failed\n");
printf ("\t [%f,%f] + [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
s2.a[1], u.a[0], u.a[1]);
printf ("\t expect [%f,%f]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_minpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_min_pd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0] < s2.a[0] ? s1.a[0]:s2.a[0];
e[1] = s1.a[1] < s2.a[1] ? s1.a[1]:s2.a[1];
if (check_union128d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_minsd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_min_sd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0] < s2.a[0] ? s1.a[0]:s2.a[0];
e[1] = s1.a[1];
if (check_union128d (u, e))
#if DEBUG
{
printf ("sse2_test_minsd_3; check_union128d failed\n");
printf ("\t [%f,%f] + [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
s2.a[1], u.a[0], u.a[1]);
printf ("\t expect [%f,%f]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#include "sse2-check.h"
#ifndef TEST
#define TEST sse2_test_mmx_1
#endif
#include <mmintrin.h>
#define N 4
unsigned long long a[N], b[N], result[N];
unsigned long long check_data[N] =
{ 0x101010101010100full,
0x1010101010101010ull,
0x1010101010101010ull,
0x1010101010101010ull };
__m64
unsigned_add3 (const __m64 * a, const __m64 * b,
__m64 * result, unsigned int count)
{
__m64 _a, _b, one, sum, carry, onesCarry;
unsigned int i;
carry = _mm_setzero_si64 ();
one = _mm_cmpeq_pi8 (carry, carry);
one = _mm_sub_si64 (carry, one);
for (i = 0; i < count; i++)
{
_a = a[i];
_b = b[i];
sum = _mm_add_si64 (_a, _b);
sum = _mm_add_si64 (sum, carry);
result[i] = sum;
onesCarry = _mm_and_si64 (_mm_xor_si64 (_a, _b), carry);
onesCarry = _mm_or_si64 (_mm_and_si64 (_a, _b), onesCarry);
onesCarry = _mm_and_si64 (onesCarry, one);
_a = _mm_srli_si64 (_a, 1);
_b = _mm_srli_si64 (_b, 1);
carry = _mm_add_si64 (_mm_add_si64 (_a, _b), onesCarry);
carry = _mm_srli_si64 (carry, 63);
}
return carry;
}
void __attribute__((noinline))
TEST (void)
{
unsigned long long carry;
int i;
/* Really long numbers. */
a[3] = a[2] = a[1] = a[0] = 0xd3d3d3d3d3d3d3d3ull;
b[3] = b[2] = b[1] = b[0] = 0x3c3c3c3c3c3c3c3cull;
carry = (unsigned long long) unsigned_add3
((__m64 *)a, (__m64 *)b, (__m64 *)result, N);
_mm_empty ();
if (carry != 1)
abort ();
for (i = 0; i < N; i++)
if (result [i] != check_data[i])
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_movhpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, double *p)
{
__asm("" : "+v"(s1), "+b"(p));
return _mm_loadh_pd (s1, p);
}
static void
TEST (void)
{
union128d u, s1;
double s2[2] = {41124.234,2344.2354};
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
u.x = test (s1.x, s2);
e[0] = s1.a[0];
e[1] = s2[0];
if (check_union128d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_movhpd_2
#endif
#include <emmintrin.h>
static void
__attribute__((noinline, unused))
test (double *p, __m128d a)
{
__asm("" : "+v"(a), "+b"(p));
return _mm_storeh_pd (p, a);
}
static void
TEST (void)
{
union128d s;
double d[1];
double e[1];
s.x = _mm_set_pd (2134.3343,1234.635654);
test (d, s.x);
e[0] = s.a[1];
if (e[0] != d[0])
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_movlpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d a, double *e)
{
__asm("" : "+v"(a), "+b"(e));
return _mm_loadl_pd (a, e);
}
static void
TEST (void)
{
union128d u, s1;
double d[2] = {2134.3343,1234.635654};
double e[2];
s1.x = _mm_set_pd (41124.234,2344.2354);
u.x = _mm_loadu_pd (d);
u.x = test (s1.x, d);
e[0] = d[0];
e[1] = s1.a[1];
if (check_union128d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_movlpd_2
#endif
#include <emmintrin.h>
static void
__attribute__((noinline, unused))
test (double *e, __m128d a)
{
__asm("" : "+v"(a), "+b"(e));
return _mm_storel_pd (e, a);
}
static void
TEST (void)
{
union128d u;
double e[2];
u.x = _mm_set_pd (41124.234,2344.2354);
test (e, u.x);
e[1] = u.a[1];
if (check_union128d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_movmskpd_1
#endif
#include <emmintrin.h>
#ifdef _ARCH_PWR8
static int
__attribute__((noinline, unused))
test (__m128d p)
{
__asm("" : "+v"(p));
return _mm_movemask_pd (p);
}
#endif
static void
TEST (void)
{
#ifdef _ARCH_PWR8
double source[2] = {1.234, -2234.23};
union128d s1;
int d;
int e;
s1.x = _mm_loadu_pd (source);
d = test (s1.x);
e = 0;
if (source[0] < 0)
e |= 1;
if (source[1] < 0)
e |= 1 << 1;
if (checkVi (&d, &e, 1))
#if DEBUG
{
printf ("sse2_test_movmskpd_1; check_union128d failed\n");
printf ("\t [%f,%f] -> [%d]\n",
s1.a[0], s1.a[1], d);
printf ("\t expect [%d]\n",
e);
}
#else
abort ();
#endif
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_movq_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i b)
{
__asm("" : "+v"(b));
return _mm_move_epi64 (b);
}
static void
TEST (void)
{
union128i_q u, s1;
long long e[2] = { 0 };
s1.x = _mm_set_epi64x(12876, 3376590);
u.x = test (s1.x);
e[0] = s1.a[0];
if (check_union128i_q (u, e))
#if DEBUG
{
printf ("sse2_test_movq_1; check_union128i_q failed\n");
printf ("\t move_epi64 ([%llx, %llx]) -> [%llx, %llx]\n", s1.a[0],
s1.a[1], u.a[0], u.a[1]);
printf ("\t expect [%llx, %llx]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_movq_2
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (long long b)
{
__asm("" : "+r" (b));
return _mm_cvtsi64_si128 (b);
}
static void
TEST (void)
{
union128i_q u;
long long b = 4294967295133LL;
long long e[2] = {0};
u.x = test (b);
e[0] = b;
if (check_union128i_q (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_movq_3
#endif
#include <emmintrin.h>
static long long
__attribute__((noinline, unused))
test (__m128i b)
{
__asm("" : "+v"(b));
return _mm_cvtsi128_si64 (b);
}
static void
TEST (void)
{
union128i_q u;
long long e;
u.x = _mm_set_epi64x (4294967295133LL, 3844294967295133LL);
e = test (u.x);
if (e != u.a[0])
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_movsd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (double *p)
{
return _mm_load_sd (p);
}
static void
TEST (void)
{
union128d u;
double d[2] = {128.023, 3345.1234};
double e[2];
u.x = _mm_loadu_pd (e);
u.x = test (d);
e[0] = d[0];
e[1] = 0.0;
if (check_union128d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_movsd_2
#endif
#include <emmintrin.h>
static void
__attribute__((noinline, unused))
test (double *p, __m128d a)
{
_mm_store_sd (p, a);
}
static void
TEST (void)
{
union128d u;
double d[1];
double e[1];
u.x = _mm_set_pd (128.023, 3345.1234);
test (d, u.x);
e[0] = u.a[0];
if (checkVd (d, e, 1))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_movsd_3
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d a, __m128d b)
{
__asm("" : "+v"(a), "+v"(b));
return _mm_move_sd (a, b);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2] = { 256.046, 3345.1234 };
s1.x = _mm_setr_pd (128.023, 3345.1234);
s2.x = _mm_setr_pd (256.046, 4533.1234);
__asm("" : "+v"(s1.x), "+v"(s2.x));
u.x = test (s1.x, s2.x);
if (check_union128d (u, e))
#if DEBUG
{
printf ("sse2_test_movsd_3; check_union128d failed\n");
printf ("\t [%f,%f], [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
s2.a[1], u.a[0], u.a[1]);
printf ("\t expect [%f,%f]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_mulpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_mul_pd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0] * s2.a[0];
e[1] = s1.a[1] * s2.a[1];
if (check_union128d (u, e))
#if DEBUG
{
printf ("sse2_test_mul_pd_1; check_union128d failed\n");
printf ("\t [%f,%f] * [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
s2.a[1], u.a[0], u.a[1]);
printf ("\t expect [%f,%f]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_mulsd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_mul_sd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0] * s2.a[0];
e[1] = s1.a[1];
if (check_union128d (u, e))
#if DEBUG
{
printf ("sse2_test_mul_sd_1; check_union128d failed\n");
printf ("\t [%f,%f] * [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
s2.a[1], u.a[0], u.a[1]);
printf ("\t expect [%f,%f]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_orpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
return _mm_or_pd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
union
{
double d[2];
long long ll[2];
}d1, d2, e;
s1.x = _mm_set_pd (1234, 44386);
s2.x = _mm_set_pd (5198, 23098);
_mm_storeu_pd (d1.d, s1.x);
_mm_storeu_pd (d2.d, s2.x);
u.x = test (s1.x, s2.x);
e.ll[0] = d1.ll[0] | d2.ll[0];
e.ll[1] = d1.ll[1] | d2.ll[1];
if (check_union128d (u, e.d))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_packssdw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_packs_epi32 (s1, s2);
}
static void
TEST (void)
{
union128i_d s1, s2;
union128i_w u;
short e[8];
int i;
s1.x = _mm_set_epi32 (2134, -128, 655366, 9999);
s2.x = _mm_set_epi32 (41124, 234, 2, -800900);
u.x = test (s1.x, s2.x);
for (i = 0; i < 4; i++)
{
if (s1.a[i] > 32767)
e[i] = 32767;
else if (s1.a[i] < -32768)
e[i] = -32768;
else
e[i] = s1.a[i];
}
for (i = 0; i < 4; i++)
{
if (s2.a[i] > 32767)
e[i+4] = 32767;
else if (s2.a[i] < -32768)
e[i+4] = -32768;
else
e[i+4] = s2.a[i];
}
if (check_union128i_w (u, e))
#if DEBUG
{
printf ("sse2_test_packssdw_1; check_union128i_w failed\n");
printf (
"\t ([%x,%x,%x,%x], [%x,%x,%x,%x]) -> [%x,%x,%x,%x, %x,%x,%x,%x]\n",
s1.a[0], s1.a[1], s1.a[2], s1.a[3], s2.a[0], s2.a[1], s2.a[2],
s2.a[3], u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6],
u.a[7]);
printf ("\t expect [%x,%x,%x,%x, %x,%x,%x,%x]\n", e[0], e[1], e[2], e[3],
e[4], e[5], e[6], e[7]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_packsswb_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_packs_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w s1, s2;
union128i_b u;
char e[16];
int i;
s1.x = _mm_set_epi16 (2134, -128, 1234, 6354, 1002, 3004, 4050, 9999);
s2.x = _mm_set_epi16 (41124, 234, 2344, 2354, 607, 1, 2, -8009);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
{
if (s1.a[i] > 127)
e[i] = 127;
else if (s1.a[i] < -128)
e[i] = -128;
else
e[i] = s1.a[i];
}
for (i = 0; i < 8; i++)
{
if (s2.a[i] > 127)
e[i+8] = 127;
else if (s2.a[i] < -128)
e[i+8] = -128;
else
e[i+8] = s2.a[i];
}
if (check_union128i_b (u, e))
#if DEBUG
{
printf ("sse2_test_packsswb_1; check_union128i_w failed\n");
printf ("\t ([%x,%x,%x,%x, %x,%x,%x,%x], [%x,%x,%x,%x, %x,%x,%x,%x])\n",
s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
s1.a[7], s2.a[0], s2.a[1], s2.a[2], s2.a[3], s2.a[4], s2.a[5],
s2.a[6], s2.a[7]);
printf ("\t\t -> [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14],
u.a[15]);
printf (
"\t expect [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
e[11], e[12], e[13], e[14], e[15]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_packuswb_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_packus_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w s1, s2;
union128i_ub u;
unsigned char e[16];
int i, tmp;
s1.x = _mm_set_epi16 (1, 2, 3, 4, -5, -6, -7, -8);
s2.x = _mm_set_epi16 (-9, -10, -11, -12, 13, 14, 15, 16);
u.x = test (s1.x, s2.x);
for (i=0; i<8; i++)
{
tmp = s1.a[i]<0 ? 0 : s1.a[i];
tmp = tmp>255 ? 255 : tmp;
e[i] = tmp;
tmp = s2.a[i]<0 ? 0 : s2.a[i];
tmp = tmp>255 ? 255 : tmp;
e[i+8] = tmp;
}
if (check_union128i_ub (u, e))
#if DEBUG
{
printf ("sse2_test_packuswb_1; check_union128i_w failed\n");
printf ("\t ([%x,%x,%x,%x, %x,%x,%x,%x], [%x,%x,%x,%x, %x,%x,%x,%x])\n",
s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
s1.a[7], s2.a[0], s2.a[1], s2.a[2], s2.a[3], s2.a[4], s2.a[5],
s2.a[6], s2.a[7]);
printf ("\t\t -> [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14],
u.a[15]);
printf (
"\t expect [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
e[11], e[12], e[13], e[14], e[15]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_paddb_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_add_epi8 (s1, s2);
}
static void
TEST (void)
{
union128i_b u, s1, s2;
char e[16];
int i;
s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
u.x = test (s1.x, s2.x);
for (i = 0; i < 16; i++)
e[i] = s1.a[i] + s2.a[i];
if (check_union128i_b (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_paddd_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_add_epi32 (s1, s2);
}
static void
TEST (void)
{
union128i_d u, s1, s2;
int e[4];
int i;
s1.x = _mm_set_epi32 (30,90,-80,-40);
s2.x = _mm_set_epi32 (76, -100, -34, -78);
u.x = test (s1.x, s2.x);
for (i = 0; i < 4; i++)
e[i] = s1.a[i] + s2.a[i];
if (check_union128i_d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_paddq_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_add_epi64 (s1, s2);
}
static void
TEST (void)
{
union128i_q u, s1, s2;
long long e[2];
int i;
s1.x = _mm_set_epi64x (90,-80);
s2.x = _mm_set_epi64x (76, -100);
u.x = test (s1.x, s2.x);
for (i = 0; i < 2; i++)
e[i] = s1.a[i] + s2.a[i];
if (check_union128i_q (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_paddsb_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_adds_epi8 (s1, s2);
}
static void
TEST (void)
{
union128i_b u, s1, s2;
char e[16];
int i, tmp;
s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
u.x = test (s1.x, s2.x);
for (i = 0; i < 16; i++)
{
tmp = (signed char)s1.a[i] + (signed char)s2.a[i];
if (tmp > 127)
tmp = 127;
if (tmp < -128)
tmp = -128;
e[i] = tmp;
}
if (check_union128i_b (u, e))
#if DEBUG
{
printf ("sse2_test_paddsb_1; check_union128i_b failed\n");
printf (
"\tadds\t([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x],\n",
s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
s1.a[7], s1.a[8], s1.a[9], s1.a[10], s1.a[11], s1.a[12], s1.a[13],
s1.a[14], s1.a[15]);
printf ("\t\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x])\n",
s2.a[0], s2.a[1], s2.a[2], s2.a[3], s2.a[4], s2.a[5], s2.a[6],
s2.a[7], s2.a[8], s2.a[9], s2.a[10], s2.a[11], s2.a[12], s2.a[13],
s2.a[14], s2.a[15]);
printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14],
u.a[15]);
printf (
"\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
e[11], e[12], e[13], e[14], e[15]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_paddsw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_adds_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i, tmp;
s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15);
s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
{
tmp = s1.a[i] + s2.a[i];
if (tmp > 32767)
tmp = 32767;
if (tmp < -32768)
tmp = -32768;
e[i] = tmp;
}
if (check_union128i_w (u, e))
#if DEBUG
{
printf ("sse2_test_paddsw_1; check_union128i_w failed\n");
printf ("\tadds\t([%x,%x,%x,%x, %x,%x,%x,%x],\n", s1.a[0], s1.a[1],
s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6], s1.a[7]);
printf ("\t\t [%x,%x,%x,%x, %x,%x,%x,%x])\n", s2.a[0], s2.a[1], s2.a[2],
s2.a[3], s2.a[4], s2.a[5], s2.a[6], s2.a[7]);
printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x]\n", u.a[0], u.a[1], u.a[2],
u.a[3], u.a[4], u.a[5], u.a[6], u.a[7]);
printf ("\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x]\n", e[0], e[1], e[2], e[3],
e[4], e[5], e[6], e[7]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_paddusb_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_adds_epu8 (s1, s2);
}
static void
TEST (void)
{
union128i_b u, s1, s2;
char e[16] = {0};
int i, tmp;
s1.x = _mm_set_epi8 (30, 2, 3, 4, 10, 20, 30, 90, 80, 40, 100, 15, 98, 25, 98, 7);
s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, 100, 34, 78, 39, 6, 3, 4, 5, 119);
u.x = test (s1.x, s2.x);
for (i = 0; i < 16; i++)
{
tmp = (unsigned char)s1.a[i] + (unsigned char)s2.a[i];
if (tmp > 255)
tmp = -1;
if (tmp < 0)
tmp = 0;
e[i] = tmp;
}
if (check_union128i_b (u, e))
#if DEBUG
{
printf ("sse2_test_paddusb_1; check_union128i_b failed\n");
printf (
"\tadds\t([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x],\n",
s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
s1.a[7], s1.a[8], s1.a[9], s1.a[10], s1.a[11], s1.a[12], s1.a[13],
s1.a[14], s1.a[15]);
printf ("\t\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x])\n",
s2.a[0], s2.a[1], s2.a[2], s2.a[3], s2.a[4], s2.a[5], s2.a[6],
s2.a[7], s2.a[8], s2.a[9], s2.a[10], s2.a[11], s2.a[12], s2.a[13],
s2.a[14], s2.a[15]);
printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14],
u.a[15]);
printf (
"\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
e[11], e[12], e[13], e[14], e[15]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_paddusw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_adds_epu16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i, tmp;
s1.x = _mm_set_epi16 (10,20,30,90,80,40,100,15);
s2.x = _mm_set_epi16 (11, 98, 76, 100, 34, 78, 39, 14);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
{
tmp = s1.a[i] + s2.a[i];
if (tmp > 65535)
tmp = -1;
if (tmp < 0)
tmp = 0;
e[i] = tmp;
}
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_paddw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_add_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i;
s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15);
s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
e[i] = s1.a[i] + s2.a[i];
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pavgb_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_avg_epu8 (s1, s2);
}
static void
TEST (void)
{
union128i_ub u, s1, s2;
unsigned char e[16];
int i;
s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,80,40,100,15,98, 25, 98,7);
s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, 100, 34, 78, 39, 6, 3, 4, 5, 119);
u.x = test (s1.x, s2.x);
for (i = 0; i < 16; i++)
e[i] = (s1.a[i] + s2.a[i]+1)>>1;
if (check_union128i_ub (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pavgw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_avg_epu16 (s1, s2);
}
static void
TEST (void)
{
union128i_uw u, s1, s2;
unsigned short e[8];
int i;
s1.x = _mm_set_epi16 (10,20,30,90,80,40,100,15);
s2.x = _mm_set_epi16 (11, 98, 76, 100, 34, 78, 39, 14);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
e[i] = (s1.a[i] + s2.a[i]+1)>>1;
if (check_union128i_uw (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pcmpeqb_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_cmpeq_epi8 (s1, s2);
}
static void
TEST (void)
{
union128i_b u, s1, s2;
char e[16];
int i;
s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,80,40,100,15,98, 25, 98,7);
s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, 100, 34, 78, 39, 6, 3, 4, 5, 119);
u.x = test (s1.x, s2.x);
for (i = 0; i < 16; i++)
e[i] = (s1.a[i] == s2.a[i]) ? -1:0;
if (check_union128i_b (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pcmpeqd_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_cmpeq_epi32 (s1, s2);
}
static void
TEST (void)
{
union128i_d u, s1, s2;
int e[4];
int i;
s1.x = _mm_set_epi32 (98, 25, 98,7);
s2.x = _mm_set_epi32 (88, 44, 33, 229);
u.x = test (s1.x, s2.x);
for (i = 0; i < 4; i++)
e[i] = (s1.a[i] == s2.a[i]) ? -1:0;
if (check_union128i_d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pcmpeqw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_cmpeq_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i;
s1.x = _mm_set_epi16 (20,30,90,80,40,100,15,98);
s2.x = _mm_set_epi16 (34, 78, 39, 6, 3, 4, 5, 119);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
e[i] = (s1.a[i] == s2.a[i]) ? -1:0;
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pcmpgtb_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_cmpgt_epi8 (s1, s2);
}
static void
TEST (void)
{
union128i_b u, s1, s2;
char e[16];
int i;
s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,80,40,100,15,98, 25, 98,7);
s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, 100, 34, 78, 39, 6, 3, 4, 5, 119);
u.x = test (s1.x, s2.x);
for (i = 0; i < 16; i++)
e[i] = (s1.a[i] > s2.a[i]) ? -1:0;
if (check_union128i_b (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pcmpgtd_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_cmpgt_epi32 (s1, s2);
}
static void
TEST (void)
{
union128i_d u, s1, s2;
int e[4];
int i;
s1.x = _mm_set_epi32 (98, 25, 98,7);
s2.x = _mm_set_epi32 (88, 44, 33, 229);
u.x = test (s1.x, s2.x);
for (i = 0; i < 4; i++)
e[i] = (s1.a[i] > s2.a[i]) ? -1:0;
if (check_union128i_d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pcmpgtw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_cmpgt_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i;
s1.x = _mm_set_epi16 (20,30,90,80,40,100,15,98);
s2.x = _mm_set_epi16 (34, 78, 39, 6, 3, 4, 5, 119);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
e[i] = (s1.a[i] > s2.a[i]) ? -1:0;
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pextrw_1
#endif
#include <emmintrin.h>
#define msk0 0
#define msk1 1
#define msk2 2
#define msk3 3
#define msk4 4
#define msk5 5
#define msk6 6
#define msk7 7
static void
TEST (void)
{
union
{
__m128i x;
int i[4];
short s[8];
} val1;
int res[8], masks[8];
int i;
val1.i[0] = 0x04030201;
val1.i[1] = 0x08070605;
val1.i[2] = 0x0C0B0A09;
val1.i[3] = 0x100F0E0D;
res[0] = _mm_extract_epi16 (val1.x, msk0);
res[1] = _mm_extract_epi16 (val1.x, msk1);
res[2] = _mm_extract_epi16 (val1.x, msk2);
res[3] = _mm_extract_epi16 (val1.x, msk3);
res[4] = _mm_extract_epi16 (val1.x, msk4);
res[5] = _mm_extract_epi16 (val1.x, msk5);
res[6] = _mm_extract_epi16 (val1.x, msk6);
res[7] = _mm_extract_epi16 (val1.x, msk7);
masks[0] = msk0;
masks[1] = msk1;
masks[2] = msk2;
masks[3] = msk3;
masks[4] = msk4;
masks[5] = msk5;
masks[6] = msk6;
masks[7] = msk7;
for (i = 0; i < 8; i++)
if (res[i] != val1.s [masks[i]])
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pinsrw_1
#endif
#include <emmintrin.h>
#include <string.h>
#define msk0 0x00
#define msk1 0x01
#define msk2 0x02
#define msk3 0x03
#define msk4 0x04
#define msk5 0x05
#define msk6 0x06
#define msk7 0x07
static void
TEST (void)
{
union
{
__m128i x;
unsigned int i[4];
unsigned short s[8];
} res [8], val, tmp;
int masks[8];
unsigned short ins[4] = { 3, 4, 5, 6 };
int i;
val.i[0] = 0x35251505;
val.i[1] = 0x75655545;
val.i[2] = 0xB5A59585;
val.i[3] = 0xF5E5D5C5;
/* Check pinsrw imm8, r32, xmm. */
res[0].x = _mm_insert_epi16 (val.x, ins[0], msk0);
res[1].x = _mm_insert_epi16 (val.x, ins[0], msk1);
res[2].x = _mm_insert_epi16 (val.x, ins[0], msk2);
res[3].x = _mm_insert_epi16 (val.x, ins[0], msk3);
res[4].x = _mm_insert_epi16 (val.x, ins[0], msk4);
res[5].x = _mm_insert_epi16 (val.x, ins[0], msk5);
res[6].x = _mm_insert_epi16 (val.x, ins[0], msk6);
res[7].x = _mm_insert_epi16 (val.x, ins[0], msk7);
masks[0] = msk0;
masks[1] = msk1;
masks[2] = msk2;
masks[3] = msk3;
masks[4] = msk4;
masks[5] = msk5;
masks[6] = msk6;
masks[7] = msk7;
for (i = 0; i < 8; i++)
{
tmp.x = val.x;
tmp.s[masks[i]] = ins[0];
if (memcmp (&tmp, &res[i], sizeof (tmp)))
abort ();
}
/* Check pinsrw imm8, m16, xmm. */
for (i = 0; i < 8; i++)
{
res[i].x = _mm_insert_epi16 (val.x, ins[i % 2], msk0);
masks[i] = msk0;
}
for (i = 0; i < 8; i++)
{
tmp.x = val.x;
tmp.s[masks[i]] = ins[i % 2];
if (memcmp (&tmp, &res[i], sizeof (tmp)))
abort ();
}
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pmaddwd_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_madd_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w s1, s2;
union128i_d u;
int e[4];
int i;
s1.x = _mm_set_epi16 (2134,3343,1234,6354, 1, 3, 4, 5);
s2.x = _mm_set_epi16 (41124,234,2344,2354,9, -1, -8, -10);
u.x = test (s1.x, s2.x);
for (i = 0; i < 4; i++)
e[i] = (s1.a[i*2] * s2.a[i*2])+(s1.a[(i*2) + 1] * s2.a[(i*2) + 1]);
if (check_union128i_d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pmaxsw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_max_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i;
s1.x = _mm_set_epi16 (1,2,3,4,5,6,7,8);
s2.x = _mm_set_epi16 (8,7,6,5,4,3,2,1);
u.x = test (s1.x, s2.x);
for (i=0; i<8; i++)
e[i] = s1.a[i]>s2.a[i]?s1.a[i]:s2.a[i];
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pmaxub_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_max_epu8 (s1, s2);
}
static void
TEST (void)
{
union128i_ub u, s1, s2;
unsigned char e[16];
int i;
s1.x = _mm_set_epi8 (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
s2.x = _mm_set_epi8 (16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
u.x = test (s1.x, s2.x);
for (i=0; i<16; i++)
e[i] = s1.a[i]>s2.a[i]?s1.a[i]:s2.a[i];
if (check_union128i_ub (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pminsw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_min_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i;
s1.x = _mm_set_epi16 (1,2,3,4,5,6,7,8);
s2.x = _mm_set_epi16 (8,7,6,5,4,3,2,1);
u.x = test (s1.x, s2.x);
for (i=0; i<8; i++)
e[i] = s1.a[i]<s2.a[i]?s1.a[i]:s2.a[i];
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pminub_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_min_epu8 (s1, s2);
}
static void
TEST (void)
{
union128i_ub u, s1, s2;
unsigned char e[16];
int i;
s1.x = _mm_set_epi8 (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
s2.x = _mm_set_epi8 (16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
u.x = test (s1.x, s2.x);
for (i=0; i<16; i++)
e[i] = s1.a[i]<s2.a[i]?s1.a[i]:s2.a[i];
if (check_union128i_ub (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pmovmskb_1
#endif
#include <emmintrin.h>
#ifdef _ARCH_PWR8
static int
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_movemask_epi8 (s1);
}
#endif
static void
TEST (void)
{
#ifdef _ARCH_PWR8
union128i_b s1;
int i, u, e=0;
s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
__asm("" : "+v"(s1.x));
u = test (s1.x);
for (i = 0; i < 16; i++)
if (s1.a[i] & (1<<7))
e = e | (1<<i);
if (checkVi (&u, &e, 1))
#if DEBUG
{
printf ("sse2_test_pmovmskb_1; checkVi failed\n");
printf ("\t ([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x], -> %x)\n",
s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
s1.a[7], s1.a[8], s1.a[9], s1.a[10], s1.a[11], s1.a[12], s1.a[13],
s1.a[14], s1.a[15], u);
printf ("\t expect %x\n", e);
}
#else
abort ();
#endif
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pmulhuw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_mulhi_epu16 (s1, s2);
}
static void
TEST (void)
{
union128i_uw u, s1, s2;
unsigned short e[8];
int i, tmp;
s1.x = _mm_set_epi16 (10,2067,3033,90,80,40,1000,15);
s2.x = _mm_set_epi16 (11, 9834, 7444, 10222, 34, 7833, 39, 14);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
{
tmp = s1.a[i] * s2.a[i];
e[i] = (tmp & 0xffff0000)>>16;
}
if (check_union128i_uw (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pmulhw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_mulhi_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i, tmp;
s1.x = _mm_set_epi16 (10,2067,-3033,90,80,40,-1000,15);
s2.x = _mm_set_epi16 (11, 9834, 7444, -10222, 34, -7833, 39, 14);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
{
tmp = s1.a[i] * s2.a[i];
e[i] = (tmp & 0xffff0000)>>16;
}
if (check_union128i_w (u, e))
#if DEBUG
{
printf ("sse2_test_pmulhw_1; check_union128i_w failed\n");
printf ("\tmulhi\t([%x,%x,%x,%x, %x,%x,%x,%x],\n", s1.a[0], s1.a[1],
s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6], s1.a[7]);
printf ("\t\t [%x,%x,%x,%x, %x,%x,%x,%x])\n", s2.a[0], s2.a[1], s2.a[2],
s2.a[3], s2.a[4], s2.a[5], s2.a[6], s2.a[7]);
printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x]\n", u.a[0], u.a[1], u.a[2],
u.a[3], u.a[4], u.a[5], u.a[6], u.a[7]);
printf ("\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x]\n", e[0], e[1], e[2], e[3],
e[4], e[5], e[6], e[7]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#define NO_WARN_X86_INTRINSICS 1
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pmullw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_mullo_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i, tmp;
s1.x = _mm_set_epi16 (10,2067,-3033,90,80,40,-1000,15);
s2.x = _mm_set_epi16 (11, 9834, 7444, -10222, 34, -7833, 39, 14);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
{
tmp = s1.a[i] * s2.a[i];
e[i] = tmp;
}
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pmuludq_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_mul_epu32 (s1, s2);
}
static void
TEST (void)
{
union128i_d s1, s2;
union128i_q u;
long long e[2];
s1.x = _mm_set_epi32 (10,2067,3033,905);
s2.x = _mm_set_epi32 (11, 9834, 7444, 10222);
__asm("" : "+v"(s1.x), "+v"(s2.x));
u.x = test (s1.x, s2.x);
e[0] = s1.a[0] * s2.a[0];
e[1] = s1.a[2] * s2.a[2];
if (check_union128i_q (u, e))
#if DEBUG
{
printf ("sse2_test_pmuludq_1; check_union128i_q failed\n");
printf ("\t ([%x,%x,%x,%x], [%x,%x,%x,%x], -> [%llx, %llx])\n", s1.a[0],
s1.a[1], s1.a[2], s1.a[3], s2.a[0], s2.a[1], s2.a[2], s2.a[3],
u.a[0], u.a[1]);
printf ("\t expect [%llx, %llx]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psadbw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
return _mm_sad_epu8 (s1, s2);
}
static void
TEST (void)
{
union128i_ub s1, s2;
union128i_w u;
short e[8] = { 0 };
unsigned char tmp[16];
int i;
s1.x = _mm_set_epi8 (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16);
s2.x = _mm_set_epi8 (16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
u.x = test (s1.x, s2.x);
for (i = 0; i < 16; i++)
tmp [i] = __builtin_abs (s1.a[i] - s2.a[i]);
for (i = 0; i < 8; i++)
e[0] += tmp[i];
for (i = 8; i < 16; i++)
e[4] += tmp[i];
if (check_union128i_w (u, e))
#if DEBUG
{
printf ("sse2_test_psadbw_1; check_union128i_w failed\n");
printf (
"\tadds\t([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x],\n",
s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
s1.a[7], s1.a[8], s1.a[9], s1.a[10], s1.a[11], s1.a[12], s1.a[13],
s1.a[14], s1.a[15]);
printf ("\t\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x])\n",
s2.a[0], s2.a[1], s2.a[2], s2.a[3], s2.a[4], s2.a[5], s2.a[6],
s2.a[7], s2.a[8], s2.a[9], s2.a[10], s2.a[11], s2.a[12], s2.a[13],
s2.a[14], s2.a[15]);
printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x]\n", u.a[0], u.a[1], u.a[2],
u.a[3], u.a[4], u.a[5], u.a[6], u.a[7]);
printf ("\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x]\n", e[0], e[1], e[2], e[3],
e[4], e[5], e[6], e[7]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pshufd_1
#endif
#define N 0xec
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_shuffle_epi32 (s1, N);
}
static void
TEST (void)
{
union128i_d u, s1;
int e[4] = { 0 };
int i;
s1.x = _mm_set_epi32 (16,15,14,13);
u.x = test (s1.x);
for (i = 0; i < 4; i++)
e[i] = s1.a[((N & (0x3<<(2*i)))>>(2*i))];
if (check_union128i_d(u, e))
#if DEBUG
{
printf ("sse2_test_pshufd_1; check_union128i_d failed\n");
printf ("\t ([%x,%x,%x,%x]) -> [%x,%x,%x,%x]\n", s1.a[0], s1.a[1],
s1.a[2], s1.a[3], u.a[0], u.a[1], u.a[2], u.a[3]);
printf ("\t expect [%x,%x,%x,%x]\n", e[0], e[1], e[2], e[3]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pshufhw_1
#endif
#define N 0xec
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_shufflehi_epi16 (s1, N);
}
static void
TEST (void)
{
union128i_q s1;
union128i_w u;
short e[8] = { 0 };
int i;
int m1[4] = { 0x3, 0x3<<2, 0x3<<4, 0x3<<6 };
int m2[4];
s1.x = _mm_set_epi64x (0xabcde,0xef58a234);
u.x = test (s1.x);
for (i = 0; i < 4; i++)
e[i] = (s1.a[0]>>(16 * i)) & 0xffff;
for (i = 0; i < 4; i++)
m2[i] = (N & m1[i])>>(2*i);
for (i = 0; i < 4; i++)
e[i+4] = (s1.a[1] >> (16 * m2[i])) & 0xffff;
if (check_union128i_w(u, e))
#if DEBUG
{
union128i_w s;
s.x = s1.x;
printf ("sse2_test_pshufhw_1; check_union128i_w failed\n");
printf ("\t ([%hx,%hx,%hx,%hx, %hx,%hx,%hx,%hx])\n", s.a[0], s.a[1],
s.a[2], s.a[3], s.a[4], s.a[5], s.a[6], s.a[7]);
printf ("\t\t -> [%hx,%hx,%hx,%hx, %hx,%hx,%hx,%hx]\n", u.a[0], u.a[1],
u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7]);
printf ("\t expect [%hx,%hx,%hx,%hx, %hx,%hx,%hx,%hx]\n", e[0], e[1],
e[2], e[3], e[4], e[5], e[6], e[7]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pshuflw_1
#endif
#define N 0xec
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_shufflelo_epi16 (s1, N);
}
static void
TEST (void)
{
union128i_q s1;
union128i_w u;
short e[8] = { 0 };
int i;
int m1[4] = { 0x3, 0x3<<2, 0x3<<4, 0x3<<6 };
int m2[4];
s1.x = _mm_set_epi64x (0xabcde,0xef58a234);
u.x = test (s1.x);
for (i = 0; i < 4; i++)
e[i+4] = (s1.a[1]>>(16 * i)) & 0xffff;
for (i = 0; i < 4; i++)
m2[i] = (N & m1[i])>>(2*i);
for (i = 0; i < 4; i++)
e[i] = (s1.a[0] >> (16 * m2[i])) & 0xffff;
if (check_union128i_w(u, e))
#if DEBUG
{
union128i_w s;
s.x = s1.x;
printf ("sse2_test_pshuflw_1; check_union128i_w failed\n");
printf ("\t ([%hx,%hx,%hx,%hx, %hx,%hx,%hx,%hx])\n", s.a[0], s.a[1],
s.a[2], s.a[3], s.a[4], s.a[5], s.a[6], s.a[7]);
printf ("\t\t -> [%hx,%hx,%hx,%hx, %hx,%hx,%hx,%hx]\n", u.a[0], u.a[1],
u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7]);
printf ("\t expect [%hx,%hx,%hx,%hx, %hx,%hx,%hx,%hx]\n", e[0], e[1],
e[2], e[3], e[4], e[5], e[6], e[7]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pslld_1
#endif
#define N 0xf
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_slli_epi32 (s1, N);
}
static void
TEST (void)
{
union128i_d u, s;
int e[4] = {0};
int i;
s.x = _mm_set_epi32 (1, -2, 3, 4);
u.x = test (s.x);
if (N < 32)
for (i = 0; i < 4; i++)
e[i] = s.a[i] << N;
if (check_union128i_d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pslld_2
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i c)
{
return _mm_sll_epi32 (s1, c);
}
static void
TEST (void)
{
union128i_d u, s;
union128i_q c;
int e[4] = { 0 };
int i;
s.x = _mm_set_epi32 (2, -3, 0x7000, 0x9000);
c.x = _mm_set_epi64x (12, 23);
__asm("" : "+v"(s.x), "+v"(c.x));
u.x = test (s.x, c.x);
if (c.a[0] < 32)
for (i = 0; i < 4; i++)
e[i] = s.a[i] << c.a[0];
if (check_union128i_d (u, e))
#if DEBUG
{
printf ("sse2_test_pslld_2; check_union128i_d failed\n");
printf ("\tsll\t([%x,%x,%x,%x], [%llx,%llx]\n", s.a[0], s.a[1], s.a[2],
s.a[3], c.a[0], c.a[1]);
printf ("\t ->\t [%x,%x,%x,%x]\n", u.a[0], u.a[1], u.a[2], u.a[3]);
printf ("\texpect\t [%x,%x,%x,%x]\n", e[0], e[1], e[2], e[3]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_pslldq_1
#endif
#define N 0x5
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_slli_si128 (s1, N);
}
static void
TEST (void)
{
union128i_b u, s;
char src[16] =
{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
char e[16] =
{ 0 };
int i;
s.x = _mm_loadu_si128 ((__m128i *) src);
u.x = test (s.x);
for (i = 0; i < 16 - N; i++)
e[i + N] = src[i];
if (check_union128i_b (u, e))
#if DEBUG
{
printf ("sse2_test_pslldq_1; check_union128i_b failed\n");
printf ("\t s ([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x])\n",
s.a[0], s.a[1], s.a[2], s.a[3], s.a[4], s.a[5], s.a[6], s.a[7],
s.a[8], s.a[9], s.a[10], s.a[11], s.a[12], s.a[13], s.a[14],
s.a[15]);
printf (
"\t u ->\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14], u.a[15]);
printf (
"\t expect\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
e[11], e[12], e[13], e[14], e[15]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psllq_1
#endif
#define N 60
#include <emmintrin.h>
#ifdef _ARCH_PWR8
static __m128i
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_slli_epi64 (s1, N);
}
#endif
static void
TEST (void)
{
#ifdef _ARCH_PWR8
union128i_q u, s;
long long e[2] = {0};
int i;
s.x = _mm_set_epi64x (-1, 0xf);
u.x = test (s.x);
if (N < 64)
for (i = 0; i < 2; i++)
e[i] = s.a[i] << N;
if (check_union128i_q (u, e))
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psllq_2
#endif
#include <emmintrin.h>
#ifdef _ARCH_PWR8
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i c)
{
return _mm_sll_epi64 (s1, c);
}
#endif
static void
TEST (void)
{
#ifdef _ARCH_PWR8
union128i_q u, s, c;
long long e[2] = {0};
int i;
s.x = _mm_set_epi64x (-1, 0xf);
c.x = _mm_set_epi64x (60,50);
__asm("" : "+v"(s.x), "+v"(c.x));
u.x = test (s.x, c.x);
if (c.a[0] < 64)
for (i = 0; i < 2; i++)
e[i] = s.a[i] << c.a[0];
if (check_union128i_q (u, e))
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psllw_1
#endif
#define N 0xb
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_slli_epi16 (s1, N);
}
static void
TEST (void)
{
union128i_w u, s;
short e[8] = {0};
int i;
s.x = _mm_set_epi16 (1, 2, 3, 4, 5, 6, 0x7000, 0x9000);
u.x = test (s.x);
if (N < 16)
for (i = 0; i < 8; i++)
e[i] = s.a[i] << N;
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psllw_2
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i c)
{
return _mm_sll_epi16 (s1, c);
}
static void
TEST (void)
{
union128i_w u, s;
union128i_q c;
short e[8] = {0};
int i;
s.x = _mm_set_epi16 (1, 2, 3, 4, 5, 6, 0x7000, 0x9000);
c.x = _mm_set_epi64x (12, 13);
__asm("" : "+v"(s.x), "+v"(c.x));
u.x = test (s.x, c.x);
if (c.a[0] < 16)
for (i = 0; i < 8; i++)
e[i] = s.a[i] << c.a[0];
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psrad_1
#endif
#define N 0xf
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_srai_epi32 (s1, N);
}
static void
TEST (void)
{
union128i_d u, s;
int e[4] = {0};
int i;
s.x = _mm_set_epi32 (1, -2, 3, 4);
u.x = test (s.x);
if (N < 32)
for (i = 0; i < 4; i++)
e[i] = s.a[i] >> N;
if (check_union128i_d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psrad_2
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i count)
{
return _mm_sra_epi32 (s1, count);
}
static void
TEST (void)
{
union128i_d u, s;
union128i_q c;
int e[4] = {0};
int i;
s.x = _mm_set_epi32 (1, -2, 3, 4);
c.x = _mm_set_epi64x (16, 29);
__asm("" : "+v"(s.x), "+v"(c.x));
u.x = test (s.x, c.x);
if (c.a[0] < 32)
for (i = 0; i < 4; i++)
e[i] = s.a[i] >> c.a[0];
if (check_union128i_d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psraw_1
#endif
#define N 0xb
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_srai_epi16 (s1, N);
}
static void
TEST (void)
{
union128i_w u, s;
short e[8] = {0};
int i;
s.x = _mm_set_epi16 (1, -2, 3, 4, -5, 6, 0x7000, 0x9000);
u.x = test (s.x);
if (N < 16)
for (i = 0; i < 8; i++)
e[i] = s.a[i] >> N;
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psraw_2
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i c)
{
return _mm_sra_epi16 (s1, c);
}
static void
TEST (void)
{
union128i_w u, s;
union128i_q c;
short e[8] = {0};
int i;
s.x = _mm_set_epi16 (1, -2, 3, 4, 5, 6, -0x7000, 0x9000);
c.x = _mm_set_epi64x (12, 13);
__asm("" : "+v"(s.x), "+v"(c.x));
u.x = test (s.x, c.x);
if (c.a[0] < 16)
for (i = 0; i < 8; i++)
e[i] = s.a[i] >> c.a[0];
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psrld_1
#endif
#define N 0xf
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_srli_epi32 (s1, N);
}
static void
TEST (void)
{
union128i_d u, s;
int e[4] = { 0 };
unsigned int tmp;
int i;
s.x = _mm_set_epi32 (1, -2, 3, 4);
u.x = test (s.x);
if (N < 32)
for (i = 0; i < 4; i++)
{
tmp = s.a[i];
e[i] = tmp >> N;
}
if (check_union128i_d (u, e))
#if DEBUG
{
printf ("sse2_test_psrld_1; check_union128i_d failed\n");
printf ("\tsrl\t([%x,%x,%x,%x],%d\n", s.a[0], s.a[1], s.a[2], s.a[3], N);
printf ("\t ->\t [%x,%x,%x,%x]\n", u.a[0], u.a[1], u.a[2], u.a[3]);
printf ("\texpect\t [%x,%x,%x,%x]\n", e[0], e[1], e[2], e[3]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psrld_2
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i c)
{
return _mm_srl_epi32 (s1, c);
}
static void
TEST (void)
{
union128i_d u, s;
union128i_q c;
int e[4] = { 0 };
unsigned int tmp;
int i;
s.x = _mm_set_epi32 (2, -3, 0x7000, 0x9000);
c.x = _mm_set_epi64x (12, 23);
__asm("" : "+v"(s.x), "+v"(c.x));
u.x = test (s.x, c.x);
if (c.a[0] < 32)
for (i = 0; i < 4; i++)
{
tmp = s.a[i];
e[i] = tmp >> c.a[0];
}
if (check_union128i_d (u, e))
#if DEBUG
{
printf ("sse2_test_psrld_2; check_union128i_d failed\n");
printf ("\tsrld\t([%x,%x,%x,%x], [%llx,%llx]\n", s.a[0], s.a[1], s.a[2],
s.a[3], c.a[0], c.a[1]);
printf ("\t ->\t [%x,%x,%x,%x]\n", u.a[0], u.a[1], u.a[2], u.a[3]);
printf ("\texpect\t [%x,%x,%x,%x]\n", e[0], e[1], e[2], e[3]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psrldq_1
#endif
#define N 0x5
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_srli_si128 (s1, N);
}
static void
TEST (void)
{
union128i_b u, s;
char src[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16 };
char e[16] = { 0 };
int i;
s.x = _mm_loadu_si128 ((__m128i *)src);
u.x = test (s.x);
for (i = 0; i < 16-N; i++)
e[i] = src[i+N];
if (check_union128i_b (u, e))
#if DEBUG
{
printf ("sse2_test_psrldq_1; check_union128i_b failed\n");
printf ("\tsrl\t([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x],\n",
s.a[0], s.a[1], s.a[2], s.a[3], s.a[4], s.a[5], s.a[6], s.a[7],
s.a[8], s.a[9], s.a[10], s.a[11], s.a[12], s.a[13], s.a[14],
s.a[15]);
printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14],
u.a[15]);
printf (
"\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
e[11], e[12], e[13], e[14], e[15]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psrlq_1
#endif
#define N 60
#include <emmintrin.h>
#ifdef _ARCH_PWR8
static __m128i
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_srli_epi64 (s1, N);
}
#endif
static void
TEST (void)
{
#ifdef _ARCH_PWR8
union128i_q u, s;
long long e[2] = {0};
unsigned long long tmp;
int i;
s.x = _mm_set_epi64x (-1, 0xf);
u.x = test (s.x);
if (N < 64)
for (i = 0; i < 2; i++) {
tmp = s.a[i];
e[i] = tmp >> N;
}
if (check_union128i_q (u, e))
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psrlq_2
#endif
#include <emmintrin.h>
#ifdef _ARCH_PWR8
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i c)
{
return _mm_srl_epi64 (s1, c);
}
#endif
static void
TEST (void)
{
#ifdef _ARCH_PWR8
union128i_q u, s, c;
long long e[2] = {0};
unsigned long long tmp;
int i;
s.x = _mm_set_epi64x (-1, 0xf);
c.x = _mm_set_epi64x (60,50);
__asm("" : "+v"(s.x), "+v"(c.x));
u.x = test (s.x, c.x);
if (c.a[0] < 64)
for (i = 0; i < 2; i++){
tmp = s.a[i];
e[i] =tmp >> c.a[0];
}
if (check_union128i_q (u, e))
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psrlw_1
#endif
#define N 0xb
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1)
{
return _mm_srli_epi16 (s1, N);
}
static void
TEST (void)
{
union128i_w u, s;
short e[8] = {0};
unsigned short tmp;
int i;
s.x = _mm_set_epi16 (1, -2, 3, -4, 5, 6, 0x7000, 0x9000);
u.x = test (s.x);
if (N < 16)
for (i = 0; i < 8; i++)
{
tmp = s.a[i];
e[i] = tmp >> N;
}
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psrlw_2
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i c)
{
return _mm_srl_epi16 (s1, c);
}
static void
TEST (void)
{
union128i_w u, s;
union128i_q c;
short e[8] = {0};
unsigned short tmp;
int i;
s.x = _mm_set_epi16 (1, -2, 3, 4, 5, 6, -0x7000, 0x9000);
c.x = _mm_set_epi64x (12, 13);
__asm("" : "+v"(s.x), "+v"(c.x));
u.x = test (s.x, c.x);
if (c.a[0] < 16)
for (i = 0; i < 8; i++)
{
tmp = s.a[i];
e[i] = tmp >> c.a[0];
}
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psubb_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_sub_epi8 (s1, s2);
}
static void
TEST (void)
{
union128i_b u, s1, s2;
char e[16];
int i;
s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
u.x = test (s1.x, s2.x);
for (i = 0; i < 16; i++)
e[i] = s1.a[i] - s2.a[i];
if (check_union128i_b (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psubd_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_sub_epi32 (s1, s2);
}
static void
TEST (void)
{
union128i_d u, s1, s2;
int e[4];
int i;
s1.x = _mm_set_epi32 (30,90,-80,-40);
s2.x = _mm_set_epi32 (76, -100, -34, -78);
u.x = test (s1.x, s2.x);
for (i = 0; i < 4; i++)
e[i] = s1.a[i] - s2.a[i];
if (check_union128i_d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psubq_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_sub_epi64 (s1, s2);
}
static void
TEST (void)
{
union128i_q u, s1, s2;
long long e[2];
int i;
s1.x = _mm_set_epi64x (90,-80);
s2.x = _mm_set_epi64x (76, -100);
u.x = test (s1.x, s2.x);
for (i = 0; i < 2; i++)
e[i] = s1.a[i] - s2.a[i];
if (check_union128i_q (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psubsb_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_subs_epi8 (s1, s2);
}
static void
TEST (void)
{
union128i_b u, s1, s2;
char e[16];
int i, tmp;
s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
u.x = test (s1.x, s2.x);
for (i = 0; i < 16; i++)
{
tmp = (signed char)s1.a[i] - (signed char)s2.a[i];
if (tmp > 127)
tmp = 127;
if (tmp < -128)
tmp = -128;
e[i] = tmp;
}
if (check_union128i_b (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psubsw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_subs_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i, tmp;
s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15);
s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
{
tmp = s1.a[i] - s2.a[i];
if (tmp > 32767)
tmp = 32767;
if (tmp < -32768)
tmp = -32768;
e[i] = tmp;
}
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psubusb_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_subs_epu8 (s1, s2);
}
static void
TEST (void)
{
union128i_b u, s1, s2;
char e[16] = { 0 };
int i, tmp;
s1.x = _mm_set_epi8 (30, 2, 3, 4, 10, 20, 30, 90, 80, 40, 100, 15, 98, 25, 98, 7);
s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, 100, 34, 78, 39, 6, 3, 4, 5, 119);
u.x = test (s1.x, s2.x);
for (i = 0; i < 16; i++)
{
tmp = (unsigned char)s1.a[i] - (unsigned char)s2.a[i];
if (tmp > 255)
tmp = -1;
if (tmp < 0)
tmp = 0;
e[i] = tmp;
}
if (check_union128i_b (u, e))
#if DEBUG
{
printf ("sse2_test_psubusb_1; check_union128i_b failed\n");
printf (
"\tadds\t([%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x],\n",
s1.a[0], s1.a[1], s1.a[2], s1.a[3], s1.a[4], s1.a[5], s1.a[6],
s1.a[7], s1.a[8], s1.a[9], s1.a[10], s1.a[11], s1.a[12], s1.a[13],
s1.a[14], s1.a[15]);
printf ("\t\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x])\n",
s2.a[0], s2.a[1], s2.a[2], s2.a[3], s2.a[4], s2.a[5], s2.a[6],
s2.a[7], s2.a[8], s2.a[9], s2.a[10], s2.a[11], s2.a[12], s2.a[13],
s2.a[14], s2.a[15]);
printf ("\t ->\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
u.a[0], u.a[1], u.a[2], u.a[3], u.a[4], u.a[5], u.a[6], u.a[7],
u.a[8], u.a[9], u.a[10], u.a[11], u.a[12], u.a[13], u.a[14],
u.a[15]);
printf (
"\texpect\t [%x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x, %x,%x,%x,%x]\n",
e[0], e[1], e[2], e[3], e[4], e[5], e[6], e[7], e[8], e[9], e[10],
e[11], e[12], e[13], e[14], e[15]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psubusw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_subs_epu16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i, tmp;
s1.x = _mm_set_epi16 (10,20,30,90,80,40,100,15);
s2.x = _mm_set_epi16 (11, 98, 76, 100, 34, 78, 39, 14);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
{
tmp = (unsigned short)s1.a[i] - (unsigned short)s2.a[i];
if (tmp > 65535)
tmp = -1;
if (tmp < 0)
tmp = 0;
e[i] = tmp;
}
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_psubw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_sub_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i;
s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15);
s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
e[i] = s1.a[i] - s2.a[i];
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_punpckhbw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_unpackhi_epi8 (s1, s2);
}
static void
TEST (void)
{
union128i_b u, s1, s2;
char e[16];
int i;
s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
{
e[2*i] = s1.a[8+i];
e[2*i + 1] = s2.a[8+i];
}
if (check_union128i_b (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_punpckhdq_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_unpackhi_epi32 (s1, s2);
}
static void
TEST (void)
{
union128i_d u, s1, s2;
int e[4];
int i;
s1.x = _mm_set_epi32 (10,20,-80,-40);
s2.x = _mm_set_epi32 (11, -34, -78, -39);
u.x = test (s1.x, s2.x);
for (i = 0; i < 2; i++)
{
e[2*i] = s1.a[2+i];
e[2*i+1] = s2.a[2+i];
}
if (check_union128i_d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_punpckhqdq_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_unpackhi_epi64 (s1, s2);
}
static void
TEST (void)
{
union128i_q u, s1, s2;
long long e[2];
s1.x = _mm_set_epi64x (10,-40);
s2.x = _mm_set_epi64x (1134, -7839);
u.x = test (s1.x, s2.x);
e[0] = s1.a[1];
e[1] = s2.a[1];
if (check_union128i_q (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_punpckhwd_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_unpackhi_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i;
s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15);
s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14);
u.x = test (s1.x, s2.x);
for (i = 0; i < 4; i++)
{
e[2*i] = s1.a[4+i];
e[2*i+1] = s2.a[4+i];
}
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_punpcklbw_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_unpacklo_epi8 (s1, s2);
}
static void
TEST (void)
{
union128i_b u, s1, s2;
char e[16];
int i;
s1.x = _mm_set_epi8 (1,2,3,4,10,20,30,90,-80,-40,-100,-15,98, 25, 98,7);
s2.x = _mm_set_epi8 (88, 44, 33, 22, 11, 98, 76, -100, -34, -78, -39, 6, 3, 4, 5, 119);
u.x = test (s1.x, s2.x);
for (i = 0; i < 8; i++)
{
e[2*i] = s1.a[i];
e[2*i + 1] = s2.a[i];
}
if (check_union128i_b (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_punpckldq_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_unpacklo_epi32 (s1, s2);
}
static void
TEST (void)
{
union128i_d u, s1, s2;
int e[4];
int i;
s1.x = _mm_set_epi32 (10,20,-80,-40);
s2.x = _mm_set_epi32 (11, -34, -78, -39);
u.x = test (s1.x, s2.x);
for (i = 0; i < 2; i++)
{
e[2*i] = s1.a[i];
e[2*i+1] = s2.a[i];
}
if (check_union128i_d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_punpcklqdq_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_unpacklo_epi64 (s1, s2);
}
static void
TEST (void)
{
union128i_q u, s1, s2;
long long e[2];
s1.x = _mm_set_epi64x (10,-40);
s2.x = _mm_set_epi64x (1134, -7839);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0];
e[1] = s2.a[0];
if (check_union128i_q (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_punpcklwd_1
#endif
#include <emmintrin.h>
static __m128i
__attribute__((noinline, unused))
test (__m128i s1, __m128i s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_unpacklo_epi16 (s1, s2);
}
static void
TEST (void)
{
union128i_w u, s1, s2;
short e[8];
int i;
s1.x = _mm_set_epi16 (10,20,30,90,-80,-40,-100,-15);
s2.x = _mm_set_epi16 (11, 98, 76, -100, -34, -78, -39, 14);
u.x = test (s1.x, s2.x);
for (i = 0; i < 4; i++)
{
e[2*i] = s1.a[i];
e[2*i+1] = s2.a[i];
}
if (check_union128i_w (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_shufpd_1
#endif
#define N 0xab
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
return _mm_shuffle_pd (s1, s2, N);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2] = {0.0};
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (453.345635,54646.464356);
u.x = test (s1.x, s2.x);
e[0] = (N & (1 << 0)) ? s1.a[1] : s1.a[0];
e[1] = (N & (1 << 1)) ? s2.a[1] : s2.a[0];
if (check_union128d(u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_sqrt_pd_1
#endif
#include <emmintrin.h>
#include <math.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1)
{
return _mm_sqrt_pd (s1);
}
static void
TEST (void)
{
union128d u, s1;
__m128d bogus = { 123.0, 456.0 };
double e[2];
int i;
s1.x = _mm_set_pd (2134.3343,1234.635654);
u.x = test (s1.x);
for (i = 0; i < 2; i++)
{
__m128d tmp = _mm_load_sd (&s1.a[i]);
tmp = _mm_sqrt_sd (bogus, tmp);
_mm_store_sd (&e[i], tmp);
}
if (check_union128d (u, e))
#if DEBUG
{
printf ("sse2_test_sqrt_pd_1; check_union128d failed\n");
printf ("\t [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], u.a[0], u.a[1]);
printf ("\t expect [%f,%f]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_subpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_sub_pd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0] - s2.a[0];
e[1] = s1.a[1] - s2.a[1];
if (check_union128d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_subsd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_sub_sd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0] - s2.a[0];
e[1] = s1.a[1];
if (check_union128d (u, e))
#if DEBUG
{
printf ("sse2_test_subsd_1; check_union128d failed\n");
printf ("\t [%f,%f] - [%f,%f] -> [%f,%f]\n", s1.a[0], s1.a[1], s2.a[0],
s2.a[1], u.a[0], u.a[1]);
printf ("\t expect [%f,%f]\n", e[0], e[1]);
}
#else
abort ();
#endif
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_ucomisd_1
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
return _mm_ucomieq_sd (s1, s2);
}
static void
TEST (void)
{
union128d s1, s2;
int d[1];
int e[1];
s1.x = _mm_set_pd (2134.3343,2344.2354);
s2.x = _mm_set_pd (41124.234,2344.2354);
d[0] = test (s1.x, s2.x);
e[0] = s1.a[0] == s2.a[0];
if (checkVi (d, e, 1))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_ucomisd_2
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
return _mm_ucomilt_sd (s1, s2);
}
static void
TEST (void)
{
union128d s1, s2;
int d[1];
int e[1];
s1.x = _mm_set_pd (2134.3343,12344.2354);
s2.x = _mm_set_pd (41124.234,2344.2354);
d[0] = test (s1.x, s2.x);
e[0] = s1.a[0] < s2.a[0];
if (checkVi (d, e, 1))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_ucomisd_3
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
return _mm_ucomile_sd (s1, s2);
}
static void
TEST (void)
{
union128d s1, s2;
int d[1] = {0};
int e[1] = {0};
s1.x = _mm_set_pd (2134.3343,12344.2354);
s2.x = _mm_set_pd (41124.234,2344.2354);
d[0] = test (s1.x, s2.x);
e[0] = s1.a[0] <= s2.a[0];
if (checkVi (d, e, 1))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_ucomisd_4
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
return _mm_ucomigt_sd (s1, s2);
}
static void
TEST (void)
{
union128d s1, s2;
int d[1];
int e[1];
s1.x = _mm_set_pd (2134.3343,12344.2354);
s2.x = _mm_set_pd (41124.234,2344.2354);
d[0] = test (s1.x, s2.x);
e[0] = s1.a[0] > s2.a[0];
if (checkVi (d, e, 1))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_ucomisd_5
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
return _mm_ucomige_sd (s1, s2);
}
static void
TEST (void)
{
union128d s1, s2;
int d[1];
int e[1];
s1.x = _mm_set_pd (2134.3343,12344.2354);
s2.x = _mm_set_pd (41124.234,2344.2354);
d[0] = test (s1.x, s2.x);
e[0] = s1.a[0] >= s2.a[0];
if (checkVi (d, e, 1))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_ucomisd_6
#endif
#include <emmintrin.h>
static int
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
return _mm_ucomineq_sd (s1, s2);
}
static void
TEST (void)
{
union128d s1, s2;
int d[1];
int e[1];
s1.x = _mm_set_pd (2134.3343,12344.2354);
s2.x = _mm_set_pd (41124.234,2344.2354);
d[0] = test (s1.x, s2.x);
e[0] = s1.a[0] != s2.a[0];
if (checkVi (d, e, 1))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_unpckhpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_unpackhi_pd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[1];
e[1] = s2.a[1];
if (check_union128d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_unpcklpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
__asm("" : "+v"(s1), "+v"(s2));
return _mm_unpacklo_pd (s1, s2);
}
static void
TEST (void)
{
union128d u, s1, s2;
double e[2];
s1.x = _mm_set_pd (2134.3343,1234.635654);
s2.x = _mm_set_pd (41124.234,2344.2354);
u.x = test (s1.x, s2.x);
e[0] = s1.a[0];
e[1] = s2.a[0];
if (check_union128d (u, e))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O3 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target lp64 } */
/* { dg-require-effective-target p8vector_hw } */
#ifndef CHECK_H
#define CHECK_H "sse2-check.h"
#endif
#include CHECK_H
#ifndef TEST
#define TEST sse2_test_xorpd_1
#endif
#include <emmintrin.h>
static __m128d
__attribute__((noinline, unused))
test (__m128d s1, __m128d s2)
{
return _mm_xor_pd (s1, s2);
}
static void
TEST (void)
{
union
{
double d[2];
long long l[2];
}source1, source2, e;
union128d u, s1, s2;
int i;
s1.x = _mm_set_pd (11.1321456, 2.287332);
s2.x = _mm_set_pd (3.37768, 4.43222234);
_mm_storeu_pd (source1.d, s1.x);
_mm_storeu_pd (source2.d, s2.x);
u.x = test (s1.x, s2.x);
for (i = 0; i < 2; i++)
e.l[i] = source1.l[i] ^ source2.l[i];
if (check_union128d (u, e.d))
abort ();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment