Commit 1c4547f1 by Paul A. Clarke Committed by Paul Clarke

[rs6000] Add _mm_blend_epi16 and _mm_blendv_epi8

Add compatibility implementations of _mm_blend_epi16 and _mm_blendv_epi8
intrinsics.

Respective test cases are copied almost verbatim (minor changes to
the dejagnu head lines) from i386.

2019-07-22  Paul A. Clarke  <pc@us.ibm.com>

[gcc]

	* config/rs6000/smmintrin.h (_mm_blend_epi16): New.
	(_mm_blendv_epi8): New.

[gcc/testsuite]

	* gcc.target/powerpc/sse4_1-check.h: New.
	* gcc.target/powerpc/sse4_1-pblendvb.c: New.
	* gcc.target/powerpc/sse4_1-pblendw.c: New.
	* gcc.target/powerpc/sse4_1-pblendw-2.c: New.

From-SVN: r273698
parent 46ebb491
2019-07-22 Paul A. Clarke <pc@us.ibm.com>
* config/rs6000/smmintrin.h (_mm_blend_epi16): New.
(_mm_blendv_epi8): New.
2019-07-22 Richard Biener <rguenther@suse.de> 2019-07-22 Richard Biener <rguenther@suse.de>
PR tree-optimization/91221 PR tree-optimization/91221
......
...@@ -66,4 +66,24 @@ _mm_extract_ps (__m128 __X, const int __N) ...@@ -66,4 +66,24 @@ _mm_extract_ps (__m128 __X, const int __N)
return ((__v4si)__X)[__N & 3]; return ((__v4si)__X)[__N & 3];
} }
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blend_epi16 (__m128i __A, __m128i __B, const int __imm8)
{
__v16qi __charmask = vec_splats ((signed char) __imm8);
__charmask = vec_gb (__charmask);
__v8hu __shortmask = (__v8hu) vec_unpackh (__charmask);
#ifdef __BIG_ENDIAN__
__shortmask = vec_reve (__shortmask);
#endif
return (__m128i) vec_sel ((__v8hu) __A, (__v8hu) __B, __shortmask);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_blendv_epi8 (__m128i __A, __m128i __B, __m128i __mask)
{
const __v16qu __seven = vec_splats ((unsigned char) 0x07);
__v16qu __lmask = vec_sra ((__v16qu) __mask, __seven);
return (__m128i) vec_sel ((__v16qu) __A, (__v16qu) __B, __lmask);
}
#endif #endif
2019-07-22 Paul A. Clarke <pc@us.ibm.com>
* gcc.target/powerpc/sse4_1-check.h: New.
* gcc.target/powerpc/sse4_1-pblendvb.c: New.
* gcc.target/powerpc/sse4_1-pblendw.c: New.
* gcc.target/powerpc/sse4_1-pblendw-2.c: New.
2019-07-22 Eric Botcazou <ebotcazou@adacore.com> 2019-07-22 Eric Botcazou <ebotcazou@adacore.com>
* gnat.dg/fixedpnt6.adb: New testcase. * gnat.dg/fixedpnt6.adb: New testcase.
......
#include <stdio.h>
#include <stdlib.h>
#include "m128-check.h"
//#define DEBUG 1
#define TEST sse4_1_test
static void sse4_1_test (void);
static void
__attribute__ ((noinline))
do_test (void)
{
sse4_1_test ();
}
int
main ()
{
do_test ();
#ifdef DEBUG
printf ("PASSED\n");
#endif
return 0;
}
/* { dg-do run } */
/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target p8vector_hw } */
#define NO_WARN_X86_INTRINSICS 1
#ifndef CHECK_H
#define CHECK_H "sse4_1-check.h"
#endif
#ifndef TEST
#define TEST sse4_1_test
#endif
#include CHECK_H
#include <smmintrin.h>
#include <string.h>
#define NUM 20
static void
init_pblendvb (unsigned char *src1, unsigned char *src2,
unsigned char *mask)
{
int i, sign = 1;
for (i = 0; i < NUM * 16; i++)
{
src1[i] = i* i * sign;
src2[i] = (i + 20) * sign;
mask[i] = (i % 3) + ((i * (14 + sign))
^ (src1[i] | src2[i] | (i*3)));
sign = -sign;
}
}
static int
check_pblendvb (__m128i *dst, unsigned char *src1,
unsigned char *src2, unsigned char *mask)
{
unsigned char tmp[16];
int j;
memcpy (&tmp[0], src1, sizeof (tmp));
for (j = 0; j < 16; j++)
if (mask [j] & 0x80)
tmp[j] = src2[j];
return memcmp (dst, &tmp[0], sizeof (tmp));
}
static void
TEST (void)
{
union
{
__m128i x[NUM];
unsigned char c[NUM * 16];
} dst, src1, src2, mask;
int i;
init_pblendvb (src1.c, src2.c, mask.c);
for (i = 0; i < NUM; i++)
{
dst.x[i] = _mm_blendv_epi8 (src1.x[i], src2.x[i], mask.x[i]);
if (check_pblendvb (&dst.x[i], &src1.c[i * 16], &src2.c[i * 16],
&mask.c[i * 16]))
abort ();
}
}
/* { dg-do run } */
/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target p8vector_hw } */
#define NO_WARN_X86_INTRINSICS 1
#include "sse4_1-check.h"
#include <smmintrin.h>
#include <string.h>
#define NUM 20
#undef MASK
#define MASK 0xfe
static void
init_pblendw (short *src1, short *src2)
{
int i, sign = 1;
for (i = 0; i < NUM * 8; i++)
{
src1[i] = i * i * sign;
src2[i] = (i + 20) * sign;
sign = -sign;
}
}
static int
check_pblendw (__m128i *dst, short *src1, short *src2)
{
short tmp[8];
int j;
memcpy (&tmp[0], src1, sizeof (tmp));
for (j = 0; j < 8; j++)
if ((MASK & (1 << j)))
tmp[j] = src2[j];
return memcmp (dst, &tmp[0], sizeof (tmp));
}
static void
sse4_1_test (void)
{
__m128i x, y;
union
{
__m128i x[NUM];
short s[NUM * 8];
} dst, src1, src2;
union
{
__m128i x;
short s[8];
} src3;
int i;
init_pblendw (src1.s, src2.s);
/* Check pblendw imm8, m128, xmm */
for (i = 0; i < NUM; i++)
{
dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK);
if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8]))
abort ();
}
/* Check pblendw imm8, xmm, xmm */
src3.x = _mm_setzero_si128 ();
x = _mm_blend_epi16 (dst.x[2], src3.x, MASK);
y = _mm_blend_epi16 (src3.x, dst.x[2], MASK);
if (check_pblendw (&x, &dst.s[16], &src3.s[0]))
abort ();
if (check_pblendw (&y, &src3.s[0], &dst.s[16]))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mpower8-vector -Wno-psabi" } */
/* { dg-require-effective-target p8vector_hw } */
#define NO_WARN_X86_INTRINSICS 1
#ifndef CHECK_H
#define CHECK_H "sse4_1-check.h"
#endif
#ifndef TEST
#define TEST sse4_1_test
#endif
#include CHECK_H
#include <smmintrin.h>
#include <string.h>
#define NUM 20
#ifndef MASK
#define MASK 0x0f
#endif
static void
init_pblendw (short *src1, short *src2)
{
int i, sign = 1;
for (i = 0; i < NUM * 8; i++)
{
src1[i] = i * i * sign;
src2[i] = (i + 20) * sign;
sign = -sign;
}
}
static int
check_pblendw (__m128i *dst, short *src1, short *src2)
{
short tmp[8];
int j;
memcpy (&tmp[0], src1, sizeof (tmp));
for (j = 0; j < 8; j++)
if ((MASK & (1 << j)))
tmp[j] = src2[j];
return memcmp (dst, &tmp[0], sizeof (tmp));
}
static void
TEST (void)
{
__m128i x, y;
union
{
__m128i x[NUM];
short s[NUM * 8];
} dst, src1, src2;
union
{
__m128i x;
short s[8];
} src3;
int i;
init_pblendw (src1.s, src2.s);
/* Check pblendw imm8, m128, xmm */
for (i = 0; i < NUM; i++)
{
dst.x[i] = _mm_blend_epi16 (src1.x[i], src2.x[i], MASK);
if (check_pblendw (&dst.x[i], &src1.s[i * 8], &src2.s[i * 8]))
abort ();
}
/* Check pblendw imm8, xmm, xmm */
src3.x = _mm_setzero_si128 ();
x = _mm_blend_epi16 (dst.x[2], src3.x, MASK);
y = _mm_blend_epi16 (src3.x, dst.x[2], MASK);
if (check_pblendw (&x, &dst.s[16], &src3.s[0]))
abort ();
if (check_pblendw (&y, &src3.s[0], &dst.s[16]))
abort ();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment