Commit eef4632c by Paul A. Clarke Committed by Segher Boessenkool

rs6000: Fix _mm_slli_epi{32,64} for shift values 16 through 31 and negative (PR84302)

The powerpc versions of _mm_slli_epi32 and __mm_slli_epi64 in emmintrin.h
do not properly handle shift values between 16 and 31, inclusive.
These are setting up the shift with vec_splat_s32, which only accepts
*5 bit signed* shift values, or a range of -16 to 15.  Values above 15
produce an error:

  error: argument 1 must be a 5-bit signed literal

Fix is to effectively reduce the range for which vec_splat_s32 is used
to < 32 and use vec_splats otherwise.

Also, __mm_slli_epi{16,32,64}, when given a negative shift value,
should always return a vector of {0}.


	PR target/83402
	* config/rs6000/emmintrin.h (_mm_slli_epi{16,32,64}):
	Ensure that vec_splat_s32 is only called with 0 <= shift < 16.
	Ensure negative shifts result in {0}.

gcc/testsuite/
	PR target/83402
	* gcc.target/powerpc/sse2-psllw-1.c: Refactor and add tests for
	several values:  positive, negative, and zero.
	* gcc.target/powerpc/sse2-pslld-1.c: Same.
	* gcc.target/powerpc/sse2-psllq-1.c: Same.

From-SVN: r259380
parent 53bdbcbc
2018-04-13 Paul A. Clarke <pc@us.ibm.com>
PR target/83402
* config/rs6000/emmintrin.h (_mm_slli_epi{16,32,64}):
Ensure that vec_splat_s32 is only called with 0 <= shift < 16.
Ensure negative shifts result in {0}.
2018-04-13 Vladimir Makarov <vmakarov@redhat.com> 2018-04-13 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimization/79916 PR rtl-optimization/79916
......
...@@ -1488,7 +1488,7 @@ _mm_slli_epi16 (__m128i __A, int __B) ...@@ -1488,7 +1488,7 @@ _mm_slli_epi16 (__m128i __A, int __B)
__v8hu lshift; __v8hu lshift;
__v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 }; __v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 };
if (__B < 16) if (__B >= 0 && __B < 16)
{ {
if (__builtin_constant_p(__B)) if (__builtin_constant_p(__B))
lshift = (__v8hu) vec_splat_s16(__B); lshift = (__v8hu) vec_splat_s16(__B);
...@@ -1507,9 +1507,9 @@ _mm_slli_epi32 (__m128i __A, int __B) ...@@ -1507,9 +1507,9 @@ _mm_slli_epi32 (__m128i __A, int __B)
__v4su lshift; __v4su lshift;
__v4si result = { 0, 0, 0, 0 }; __v4si result = { 0, 0, 0, 0 };
if (__B < 32) if (__B >= 0 && __B < 32)
{ {
if (__builtin_constant_p(__B)) if (__builtin_constant_p(__B) && __B < 16)
lshift = (__v4su) vec_splat_s32(__B); lshift = (__v4su) vec_splat_s32(__B);
else else
lshift = vec_splats ((unsigned int) __B); lshift = vec_splats ((unsigned int) __B);
...@@ -1527,16 +1527,11 @@ _mm_slli_epi64 (__m128i __A, int __B) ...@@ -1527,16 +1527,11 @@ _mm_slli_epi64 (__m128i __A, int __B)
__v2du lshift; __v2du lshift;
__v2di result = { 0, 0 }; __v2di result = { 0, 0 };
if (__B < 64) if (__B >= 0 && __B < 64)
{ {
if (__builtin_constant_p(__B)) if (__builtin_constant_p(__B) && __B < 16)
{
if (__B < 32)
lshift = (__v2du) vec_splat_s32(__B); lshift = (__v2du) vec_splat_s32(__B);
else else
lshift = (__v2du) vec_splats((unsigned long long)__B);
}
else
lshift = (__v2du) vec_splats ((unsigned int) __B); lshift = (__v2du) vec_splats ((unsigned int) __B);
result = vec_vsld ((__v2di) __A, lshift); result = vec_vsld ((__v2di) __A, lshift);
......
2018-04-13 Paul A. Clarke <pc@us.ibm.com>
PR target/83402
* gcc.target/powerpc/sse2-psllw-1.c: Refactor and add tests for
several values: positive, negative, and zero.
* gcc.target/powerpc/sse2-pslld-1.c: Same.
* gcc.target/powerpc/sse2-psllq-1.c: Same.
2018-04-13 Vladimir Makarov <vmakarov@redhat.com> 2018-04-13 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimization/79916 PR rtl-optimization/79916
......
...@@ -13,32 +13,50 @@ ...@@ -13,32 +13,50 @@
#define TEST sse2_test_pslld_1 #define TEST sse2_test_pslld_1
#endif #endif
#define N 0xf
#include <emmintrin.h> #include <emmintrin.h>
static __m128i #define TEST_FUNC(id, N) \
__attribute__((noinline, unused)) static __m128i \
test (__m128i s1) __attribute__((noinline, unused)) \
{ test##id (__m128i s1) \
return _mm_slli_epi32 (s1, N); { \
} return _mm_slli_epi32 (s1, N); \
}
TEST_FUNC(0, 0)
TEST_FUNC(15, 15)
TEST_FUNC(16, 16)
TEST_FUNC(31, 31)
TEST_FUNC(neg1, -1)
TEST_FUNC(neg16, -16)
TEST_FUNC(neg32, -32)
TEST_FUNC(neg64, -64)
TEST_FUNC(neg128, -128)
#define TEST_CODE(id, N) \
{ \
int e[4] = {0}; \
union128i_d u, s; \
int i; \
s.x = _mm_set_epi32 (1, -2, 3, 4); \
u.x = test##id (s.x); \
if (N >= 0 && N < 32) \
for (i = 0; i < 4; i++) \
e[i] = s.a[i] << (N * (N >= 0)); \
if (check_union128i_d (u, e)) \
abort (); \
}
static void static void
TEST (void) TEST (void)
{ {
union128i_d u, s; TEST_CODE(0, 0);
int e[4] = {0}; TEST_CODE(15, 15);
int i; TEST_CODE(16, 16);
TEST_CODE(31, 31);
s.x = _mm_set_epi32 (1, -2, 3, 4); TEST_CODE(neg1, -1);
TEST_CODE(neg16, -16);
u.x = test (s.x); TEST_CODE(neg32, -32);
TEST_CODE(neg64, -64);
if (N < 32) TEST_CODE(neg128, -128);
for (i = 0; i < 4; i++)
e[i] = s.a[i] << N;
if (check_union128i_d (u, e))
abort ();
} }
...@@ -13,36 +13,56 @@ ...@@ -13,36 +13,56 @@
#define TEST sse2_test_psllq_1 #define TEST sse2_test_psllq_1
#endif #endif
#define N 60
#include <emmintrin.h> #include <emmintrin.h>
#ifdef _ARCH_PWR8 #ifdef _ARCH_PWR8
static __m128i #define TEST_FUNC(id, N) \
__attribute__((noinline, unused)) static __m128i \
test (__m128i s1) __attribute__((noinline, unused)) \
{ test##id (__m128i s1) \
return _mm_slli_epi64 (s1, N); { \
} return _mm_slli_epi64 (s1, N); \
}
TEST_FUNC(0, 0)
TEST_FUNC(15, 15)
TEST_FUNC(16, 16)
TEST_FUNC(31, 31)
TEST_FUNC(63, 63)
TEST_FUNC(neg1, -1)
TEST_FUNC(neg16, -16)
TEST_FUNC(neg32, -32)
TEST_FUNC(neg64, -64)
TEST_FUNC(neg128, -128)
#endif #endif
#define TEST_CODE(id, N) \
{ \
union128i_q u, s; \
long long e[2] = {0}; \
int i; \
s.x = _mm_set_epi64x (-1, 0xf); \
u.x = test##id (s.x); \
if (N >= 0 && N < 64) \
for (i = 0; i < 2; i++) \
e[i] = s.a[i] << (N * (N >= 0)); \
if (check_union128i_q (u, e)) \
abort (); \
}
static void static void
TEST (void) TEST (void)
{ {
#ifdef _ARCH_PWR8 #ifdef _ARCH_PWR8
union128i_q u, s; TEST_CODE(0, 0);
long long e[2] = {0}; TEST_CODE(15, 15);
int i; TEST_CODE(16, 16);
TEST_CODE(31, 31);
s.x = _mm_set_epi64x (-1, 0xf); TEST_CODE(63, 63);
TEST_CODE(neg1, -1);
u.x = test (s.x); TEST_CODE(neg16, -16);
TEST_CODE(neg32, -32);
if (N < 64) TEST_CODE(neg64, -64);
for (i = 0; i < 2; i++) TEST_CODE(neg128, -128);
e[i] = s.a[i] << N;
if (check_union128i_q (u, e))
abort ();
#endif #endif
} }
...@@ -13,32 +13,48 @@ ...@@ -13,32 +13,48 @@
#define TEST sse2_test_psllw_1 #define TEST sse2_test_psllw_1
#endif #endif
#define N 0xb
#include <emmintrin.h> #include <emmintrin.h>
static __m128i #define TEST_FUNC(id, N) \
__attribute__((noinline, unused)) static __m128i \
test (__m128i s1) __attribute__((noinline, unused)) \
{ test##id (__m128i s1) \
return _mm_slli_epi16 (s1, N); { \
} return _mm_slli_epi16 (s1, N); \
}
TEST_FUNC(0, 0)
TEST_FUNC(15, 15)
TEST_FUNC(16, 16)
TEST_FUNC(neg1, -1)
TEST_FUNC(neg16, -16)
TEST_FUNC(neg32, -32)
TEST_FUNC(neg64, -64)
TEST_FUNC(neg128, -128)
#define TEST_CODE(id, N) \
{ \
short e[8] = {0}; \
union128i_w u, s; \
int i; \
s.x = _mm_set_epi16 (1, 2, 3, 4, 5, 6, 0x7000, 0x9000); \
u.x = test##id (s.x); \
if (N >= 0 && N < 16) \
for (i = 0; i < 8; i++) \
e[i] = s.a[i] << (N * (N >= 0)); \
if (check_union128i_w (u, e)) \
abort (); \
}
static void static void
TEST (void) TEST (void)
{ {
union128i_w u, s; TEST_CODE(0, 0);
short e[8] = {0}; TEST_CODE(15, 15);
int i; TEST_CODE(16, 16);
TEST_CODE(neg1, -1);
s.x = _mm_set_epi16 (1, 2, 3, 4, 5, 6, 0x7000, 0x9000); TEST_CODE(neg16, -16);
TEST_CODE(neg32, -32);
u.x = test (s.x); TEST_CODE(neg64, -64);
TEST_CODE(neg128, -128);
if (N < 16)
for (i = 0; i < 8; i++)
e[i] = s.a[i] << N;
if (check_union128i_w (u, e))
abort ();
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment