[rs6000] x86 vector intrinsics compatibility: clean-ups for 32bit support

Implement various corrections in the compatibility implementations of the x86 vector intrinsics found after enabling 32bit mode for the associated test cases. (Actual enablement coming in a subsequent patch.) 2018-10-26 Paul A. Clarke <pc@us.ibm.com> gcc/ChangeLog: * config/rs6000/mmintrin.h: Enable 32bit compilation. * config/rs6000/xmmintrin.h: Likewise. From-SVN: r265535

[rs6000] x86 vector intrinsics compatibility: clean-ups for 32bit support
Implement various corrections in the compatibility implementations of the x86 vector intrinsics found after enabling 32bit mode for the associated test cases. (Actual enablement coming in a subsequent patch.) 2018-10-26 Paul A. Clarke <pc@us.ibm.com> gcc/ChangeLog: * config/rs6000/mmintrin.h: Enable 32bit compilation. * config/rs6000/xmmintrin.h: Likewise. From-SVN: r265535
dbafa0f5 · Paul A. Clarke · Paul Clarke · dbd93b9d · dbafa0f5 · dbafa0f5
Commit dbafa0f5 authored Oct 26, 2018 by Paul A. Clarke Committed by Paul Clarke Oct 26, 2018
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 61 additions and 61 deletions

gcc/ChangeLog
+5 -0

gcc/config/rs6000/mmintrin.h
+35 -37

gcc/config/rs6000/xmmintrin.h
+21 -24

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2018-10-25  Paul A. Clarke  <pc@us.ibm.com>
+
+	* config/rs6000/mmintrin.h: Enable 32bit compilation.
+	* config/rs6000/xmmintrin.h: Likewise.
+
 2018-10-26  Paul A. Clarke  <pc@us.ibm.com>

 	* config/rs6000/xmmintrin.h (_mm_extract_pi16): Fix for big-endian.
--- a/gcc/config/rs6000/mmintrin.h
+++ b/gcc/config/rs6000/mmintrin.h
--- a/gcc/config/rs6000/xmmintrin.h
+++ b/gcc/config/rs6000/xmmintrin.h
@@ -996,7 +996,7 @@ _mm_cvtps_pi32 (__m128 __A)
  rounded = vec_rint(temp);
  result = (__vector unsigned long long) vec_cts (rounded, 0);

-  return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0));
+  return (__m64) ((vector long long) result)[0];
 }

 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1053,7 +1053,7 @@ _mm_cvttps_pi32 (__m128 __A)
  temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
  result = (__vector unsigned long long) vec_cts (temp, 0);

-  return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0));
+  return (__m64) ((vector long long) result)[0];
 }

 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1104,7 +1104,7 @@ _mm_cvtpi32_ps (__m128        __A, __m64        __B)
  __vector signed int vm1;
  __vector float vf1;

-  vm1 = (__vector signed int) __builtin_pack_vector_int128 (__B, __B);
+  vm1 = (__vector signed int) (__vector unsigned long long) {__B, __B};
  vf1 = (__vector float) vec_ctf (vm1, 0);

  return ((__m128) (__vector unsigned long long)
@@ -1126,7 +1126,7 @@ _mm_cvtpi16_ps (__m64 __A)
  __vector signed int vi4;
  __vector float vf1;

-  vs8 = (__vector signed short) __builtin_pack_vector_int128 (__A, __A);
+  vs8 = (__vector signed short) (__vector unsigned long long) { __A, __A };
  vi4 = vec_vupklsh (vs8);
  vf1 = (__vector float) vec_ctf (vi4, 0);

@@ -1143,7 +1143,7 @@ _mm_cvtpu16_ps (__m64 __A)
  __vector unsigned int vi4;
  __vector float vf1;

-  vs8 = (__vector unsigned short) __builtin_pack_vector_int128 (__A, __A);
+  vs8 = (__vector unsigned short) (__vector unsigned long long) { __A, __A };
  vi4 = (__vector unsigned int) vec_vmrglh (vs8, zero);
  vf1 = (__vector float) vec_ctf (vi4, 0);

@@ -1159,7 +1159,7 @@ _mm_cvtpi8_ps (__m64 __A)
  __vector signed int vi4;
  __vector float vf1;

-  vc16 = (__vector signed char) __builtin_pack_vector_int128 (__A, __A);
+  vc16 = (__vector signed char) (__vector unsigned long long) { __A, __A };
  vs8 = vec_vupkhsb (vc16);
  vi4 = vec_vupkhsh (vs8);
  vf1 = (__vector float) vec_ctf (vi4, 0);
@@ -1179,7 +1179,7 @@ _mm_cvtpu8_ps (__m64  __A)
  __vector unsigned int vi4;
  __vector float vf1;

-  vc16 = (__vector unsigned char) __builtin_pack_vector_int128 (__A, __A);
+  vc16 = (__vector unsigned char) (__vector unsigned long long) { __A, __A };
  vs8 = (__vector unsigned short) vec_vmrglb (vc16, zero);
  vi4 = (__vector unsigned int) vec_vmrghh (vs8,
 					    (__vector unsigned short) zero);
@@ -1195,7 +1195,7 @@ _mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
  __vector signed int vi4;
  __vector float vf4;

-  vi4 = (__vector signed int) __builtin_pack_vector_int128 (__B, __A);
+  vi4 = (__vector signed int) (__vector unsigned long long) { __B, __A };
  vf4 = (__vector float) vec_ctf (vi4, 0);
  return (__m128) vf4;
 }
@@ -1212,7 +1212,7 @@ _mm_cvtps_pi16(__m128 __A)
  temp = vec_cts (rounded, 0);
  result = (__vector unsigned long long) vec_pack (temp, temp);

-  return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0));
+  return (__m64) ((vector long long) result)[0];
 }

 /* Convert the four SPFP values in A to four signed 8-bit integers.  */
@@ -1224,15 +1224,12 @@ _mm_cvtps_pi8(__m128 __A)
  static const __vector signed int zero = {0, 0, 0, 0};
  __vector signed short tmp_s;
  __vector signed char res_v;
-  __m64 result;

  rounded = vec_rint(__A);
  tmp_i = vec_cts (rounded, 0);
  tmp_s = vec_pack (tmp_i, zero);
  res_v = vec_pack (tmp_s, tmp_s);
-  result = (__m64) __builtin_unpack_vector_int128 ((__vector __int128)res_v, 0);
-
-  return (result);
+  return (__m64) ((vector long long) res_v)[0];
 }

 /* Selects four specific SPFP values from A and B based on MASK.  */
@@ -1432,7 +1429,7 @@ _mm_max_pi16 (__m64 __A, __m64 __B)
  b = (__vector signed short)vec_splats (__B);
  c = (__vector __bool short)vec_cmpgt (a, b);
  r = vec_sel (b, a, c);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
+  return (__m64) ((vector long long) r)[0];
 #else
  __m64_union m1, m2, res;

@@ -1470,7 +1467,7 @@ _mm_max_pu8 (__m64 __A, __m64 __B)
  b = (__vector unsigned char)vec_splats (__B);
  c = (__vector __bool char)vec_cmpgt (a, b);
  r = vec_sel (b, a, c);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
+  return (__m64) ((vector long long) r)[0];
 #else
  __m64_union m1, m2, res;
  long i;
@@ -1506,7 +1503,7 @@ _mm_min_pi16 (__m64 __A, __m64 __B)
  b = (__vector signed short)vec_splats (__B);
  c = (__vector __bool short)vec_cmplt (a, b);
  r = vec_sel (b, a, c);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
+  return (__m64) ((vector long long) r)[0];
 #else
  __m64_union m1, m2, res;

@@ -1544,7 +1541,7 @@ _mm_min_pu8 (__m64 __A, __m64 __B)
  b = (__vector unsigned char)vec_splats (__B);
  c = (__vector __bool char)vec_cmplt (a, b);
  r = vec_sel (b, a, c);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
+  return (__m64) ((vector long long) r)[0];
 #else
  __m64_union m1, m2, res;
  long i;
@@ -1572,7 +1569,7 @@ _m_pminub (__m64 __A, __m64 __B)
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_movemask_pi8 (__m64 __A)
 {
-  unsigned long p = 0x0008101820283038UL; // permute control for sign bits
+  unsigned long long p = 0x0008101820283038UL; // permute control for sign bits

  return __builtin_bpermd (p, __A);
 }
@@ -1603,7 +1600,7 @@ _mm_mulhi_pu16 (__m64 __A, __m64 __B)
  w1 = vec_vmulouh (a, b);
  c = (__vector unsigned short)vec_perm (w0, w1, xform1);

-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }

 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1646,7 +1643,7 @@ _mm_shuffle_pi16 (__m64 __A, int const __N)
  p = vec_splats (t.as_m64);
  a = vec_splats (__A);
  r = vec_perm (a, a, (__vector unsigned char)p);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0));
+  return (__m64) ((vector long long) r)[0];
 }

 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1686,7 +1683,7 @@ _mm_avg_pu8 (__m64 __A, __m64 __B)
  a = (__vector unsigned char)vec_splats (__A);
  b = (__vector unsigned char)vec_splats (__B);
  c = vec_avg (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }

 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1704,7 +1701,7 @@ _mm_avg_pu16 (__m64 __A, __m64 __B)
  a = (__vector unsigned short)vec_splats (__A);
  b = (__vector unsigned short)vec_splats (__B);
  c = vec_avg (a, b);
-  return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0));
+  return (__m64) ((vector long long) c)[0];
 }

 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -1726,8 +1723,8 @@ _mm_sad_pu8 (__m64  __A, __m64  __B)
    { 0, 0, 0, 0 };
  unsigned short result;

-  a = (__vector unsigned char) __builtin_pack_vector_int128 (0UL, __A);
-  b = (__vector unsigned char) __builtin_pack_vector_int128 (0UL, __B);
+  a = (__vector unsigned char) (__vector unsigned long long) { 0UL, __A };
+  b = (__vector unsigned char) (__vector unsigned long long) { 0UL, __B };
  vmin = vec_min (a, b);
  vmax = vec_max (a, b);
  vabsdiff = vec_sub (vmax, vmin);