Commit dbafa0f5 by Paul A. Clarke Committed by Paul Clarke

[rs6000] x86 vector intrinsics compatibility: clean-ups for 32bit support

Implement various corrections in the compatibility implementations of the
x86 vector intrinsics found after enabling 32bit mode for the associated
test cases.  (Actual enablement coming in a subsequent patch.)

2018-10-26  Paul A. Clarke  <pc@us.ibm.com>

gcc/ChangeLog:

	* config/rs6000/mmintrin.h: Enable 32bit compilation.
	* config/rs6000/xmmintrin.h: Likewise.

From-SVN: r265535
parent dbd93b9d
2018-10-25 Paul A. Clarke <pc@us.ibm.com>
* config/rs6000/mmintrin.h: Enable 32bit compilation.
* config/rs6000/xmmintrin.h: Likewise.
2018-10-26 Paul A. Clarke <pc@us.ibm.com> 2018-10-26 Paul A. Clarke <pc@us.ibm.com>
* config/rs6000/xmmintrin.h (_mm_extract_pi16): Fix for big-endian. * config/rs6000/xmmintrin.h (_mm_extract_pi16): Fix for big-endian.
...@@ -112,7 +112,6 @@ _m_to_int (__m64 __i) ...@@ -112,7 +112,6 @@ _m_to_int (__m64 __i)
return _mm_cvtsi64_si32 (__i); return _mm_cvtsi64_si32 (__i);
} }
#ifdef __powerpc64__
/* Convert I to a __m64 object. */ /* Convert I to a __m64 object. */
/* Intel intrinsic. */ /* Intel intrinsic. */
...@@ -173,9 +172,9 @@ _mm_packs_pi16 (__m64 __m1, __m64 __m2) ...@@ -173,9 +172,9 @@ _mm_packs_pi16 (__m64 __m1, __m64 __m2)
__vector signed short vm1; __vector signed short vm1;
__vector signed char vresult; __vector signed char vresult;
vm1 = (__vector signed short)__builtin_pack_vector_int128 (__m2, __m1); vm1 = (__vector signed short) (__vector unsigned long long) { __m2, __m1 };
vresult = vec_vpkshss (vm1, vm1); vresult = vec_vpkshss (vm1, vm1);
return (__m64) __builtin_unpack_vector_int128 ((__vector __int128)vresult, 0); return (__m64) ((vector long long) vresult)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -193,9 +192,9 @@ _mm_packs_pi32 (__m64 __m1, __m64 __m2) ...@@ -193,9 +192,9 @@ _mm_packs_pi32 (__m64 __m1, __m64 __m2)
__vector signed int vm1; __vector signed int vm1;
__vector signed short vresult; __vector signed short vresult;
vm1 = (__vector signed int)__builtin_pack_vector_int128 (__m2, __m1); vm1 = (__vector signed int) (__vector unsigned long long) { __m2, __m1 };
vresult = vec_vpkswss (vm1, vm1); vresult = vec_vpkswss (vm1, vm1);
return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)vresult, 0)); return (__m64) ((vector long long) vresult)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -213,9 +212,9 @@ _mm_packs_pu16 (__m64 __m1, __m64 __m2) ...@@ -213,9 +212,9 @@ _mm_packs_pu16 (__m64 __m1, __m64 __m2)
__vector signed short vm1; __vector signed short vm1;
__vector unsigned char vresult; __vector unsigned char vresult;
vm1 = (__vector signed short)__builtin_pack_vector_int128 (__m2, __m1); vm1 = (__vector signed short) (__vector unsigned long long) { __m2, __m1 };
vresult = vec_vpkshus (vm1, vm1); vresult = vec_vpkshus (vm1, vm1);
return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)vresult, 0)); return (__m64) ((vector long long) vresult)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -236,7 +235,7 @@ _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) ...@@ -236,7 +235,7 @@ _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
a = (__vector unsigned char)vec_splats (__m1); a = (__vector unsigned char)vec_splats (__m1);
b = (__vector unsigned char)vec_splats (__m2); b = (__vector unsigned char)vec_splats (__m2);
c = vec_mergel (a, b); c = vec_mergel (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -317,7 +316,7 @@ _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) ...@@ -317,7 +316,7 @@ _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
a = (__vector unsigned char)vec_splats (__m1); a = (__vector unsigned char)vec_splats (__m1);
b = (__vector unsigned char)vec_splats (__m2); b = (__vector unsigned char)vec_splats (__m2);
c = vec_mergel (a, b); c = vec_mergel (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 1)); return (__m64) ((vector long long) c)[1];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -398,7 +397,7 @@ _mm_add_pi8 (__m64 __m1, __m64 __m2) ...@@ -398,7 +397,7 @@ _mm_add_pi8 (__m64 __m1, __m64 __m2)
a = (__vector signed char)vec_splats (__m1); a = (__vector signed char)vec_splats (__m1);
b = (__vector signed char)vec_splats (__m2); b = (__vector signed char)vec_splats (__m2);
c = vec_add (a, b); c = vec_add (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -434,7 +433,7 @@ _mm_add_pi16 (__m64 __m1, __m64 __m2) ...@@ -434,7 +433,7 @@ _mm_add_pi16 (__m64 __m1, __m64 __m2)
a = (__vector signed short)vec_splats (__m1); a = (__vector signed short)vec_splats (__m1);
b = (__vector signed short)vec_splats (__m2); b = (__vector signed short)vec_splats (__m2);
c = vec_add (a, b); c = vec_add (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -466,7 +465,7 @@ _mm_add_pi32 (__m64 __m1, __m64 __m2) ...@@ -466,7 +465,7 @@ _mm_add_pi32 (__m64 __m1, __m64 __m2)
a = (__vector signed int)vec_splats (__m1); a = (__vector signed int)vec_splats (__m1);
b = (__vector signed int)vec_splats (__m2); b = (__vector signed int)vec_splats (__m2);
c = vec_add (a, b); c = vec_add (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -496,7 +495,7 @@ _mm_sub_pi8 (__m64 __m1, __m64 __m2) ...@@ -496,7 +495,7 @@ _mm_sub_pi8 (__m64 __m1, __m64 __m2)
a = (__vector signed char)vec_splats (__m1); a = (__vector signed char)vec_splats (__m1);
b = (__vector signed char)vec_splats (__m2); b = (__vector signed char)vec_splats (__m2);
c = vec_sub (a, b); c = vec_sub (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -532,7 +531,7 @@ _mm_sub_pi16 (__m64 __m1, __m64 __m2) ...@@ -532,7 +531,7 @@ _mm_sub_pi16 (__m64 __m1, __m64 __m2)
a = (__vector signed short)vec_splats (__m1); a = (__vector signed short)vec_splats (__m1);
b = (__vector signed short)vec_splats (__m2); b = (__vector signed short)vec_splats (__m2);
c = vec_sub (a, b); c = vec_sub (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -564,7 +563,7 @@ _mm_sub_pi32 (__m64 __m1, __m64 __m2) ...@@ -564,7 +563,7 @@ _mm_sub_pi32 (__m64 __m1, __m64 __m2)
a = (__vector signed int)vec_splats (__m1); a = (__vector signed int)vec_splats (__m1);
b = (__vector signed int)vec_splats (__m2); b = (__vector signed int)vec_splats (__m2);
c = vec_sub (a, b); c = vec_sub (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -754,7 +753,7 @@ _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) ...@@ -754,7 +753,7 @@ _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
a = (__vector signed char)vec_splats (__m1); a = (__vector signed char)vec_splats (__m1);
b = (__vector signed char)vec_splats (__m2); b = (__vector signed char)vec_splats (__m2);
c = (__vector signed char)vec_cmpgt (a, b); c = (__vector signed char)vec_cmpgt (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -791,7 +790,7 @@ _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) ...@@ -791,7 +790,7 @@ _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
a = (__vector signed short)vec_splats (__m1); a = (__vector signed short)vec_splats (__m1);
b = (__vector signed short)vec_splats (__m2); b = (__vector signed short)vec_splats (__m2);
c = (__vector signed short)vec_cmpeq (a, b); c = (__vector signed short)vec_cmpeq (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -822,7 +821,7 @@ _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) ...@@ -822,7 +821,7 @@ _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
a = (__vector signed short)vec_splats (__m1); a = (__vector signed short)vec_splats (__m1);
b = (__vector signed short)vec_splats (__m2); b = (__vector signed short)vec_splats (__m2);
c = (__vector signed short)vec_cmpgt (a, b); c = (__vector signed short)vec_cmpgt (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -855,7 +854,7 @@ _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) ...@@ -855,7 +854,7 @@ _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
a = (__vector signed int)vec_splats (__m1); a = (__vector signed int)vec_splats (__m1);
b = (__vector signed int)vec_splats (__m2); b = (__vector signed int)vec_splats (__m2);
c = (__vector signed int)vec_cmpeq (a, b); c = (__vector signed int)vec_cmpeq (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -884,7 +883,7 @@ _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) ...@@ -884,7 +883,7 @@ _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
a = (__vector signed int)vec_splats (__m1); a = (__vector signed int)vec_splats (__m1);
b = (__vector signed int)vec_splats (__m2); b = (__vector signed int)vec_splats (__m2);
c = (__vector signed int)vec_cmpgt (a, b); c = (__vector signed int)vec_cmpgt (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -915,7 +914,7 @@ _mm_adds_pi8 (__m64 __m1, __m64 __m2) ...@@ -915,7 +914,7 @@ _mm_adds_pi8 (__m64 __m1, __m64 __m2)
a = (__vector signed char)vec_splats (__m1); a = (__vector signed char)vec_splats (__m1);
b = (__vector signed char)vec_splats (__m2); b = (__vector signed char)vec_splats (__m2);
c = vec_adds (a, b); c = vec_adds (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -933,7 +932,7 @@ _mm_adds_pi16 (__m64 __m1, __m64 __m2) ...@@ -933,7 +932,7 @@ _mm_adds_pi16 (__m64 __m1, __m64 __m2)
a = (__vector signed short)vec_splats (__m1); a = (__vector signed short)vec_splats (__m1);
b = (__vector signed short)vec_splats (__m2); b = (__vector signed short)vec_splats (__m2);
c = vec_adds (a, b); c = vec_adds (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -951,7 +950,7 @@ _mm_adds_pu8 (__m64 __m1, __m64 __m2) ...@@ -951,7 +950,7 @@ _mm_adds_pu8 (__m64 __m1, __m64 __m2)
a = (__vector unsigned char)vec_splats (__m1); a = (__vector unsigned char)vec_splats (__m1);
b = (__vector unsigned char)vec_splats (__m2); b = (__vector unsigned char)vec_splats (__m2);
c = vec_adds (a, b); c = vec_adds (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -970,7 +969,7 @@ _mm_adds_pu16 (__m64 __m1, __m64 __m2) ...@@ -970,7 +969,7 @@ _mm_adds_pu16 (__m64 __m1, __m64 __m2)
a = (__vector unsigned short)vec_splats (__m1); a = (__vector unsigned short)vec_splats (__m1);
b = (__vector unsigned short)vec_splats (__m2); b = (__vector unsigned short)vec_splats (__m2);
c = vec_adds (a, b); c = vec_adds (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -989,7 +988,7 @@ _mm_subs_pi8 (__m64 __m1, __m64 __m2) ...@@ -989,7 +988,7 @@ _mm_subs_pi8 (__m64 __m1, __m64 __m2)
a = (__vector signed char)vec_splats (__m1); a = (__vector signed char)vec_splats (__m1);
b = (__vector signed char)vec_splats (__m2); b = (__vector signed char)vec_splats (__m2);
c = vec_subs (a, b); c = vec_subs (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -1008,7 +1007,7 @@ _mm_subs_pi16 (__m64 __m1, __m64 __m2) ...@@ -1008,7 +1007,7 @@ _mm_subs_pi16 (__m64 __m1, __m64 __m2)
a = (__vector signed short)vec_splats (__m1); a = (__vector signed short)vec_splats (__m1);
b = (__vector signed short)vec_splats (__m2); b = (__vector signed short)vec_splats (__m2);
c = vec_subs (a, b); c = vec_subs (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -1027,7 +1026,7 @@ _mm_subs_pu8 (__m64 __m1, __m64 __m2) ...@@ -1027,7 +1026,7 @@ _mm_subs_pu8 (__m64 __m1, __m64 __m2)
a = (__vector unsigned char)vec_splats (__m1); a = (__vector unsigned char)vec_splats (__m1);
b = (__vector unsigned char)vec_splats (__m2); b = (__vector unsigned char)vec_splats (__m2);
c = vec_subs (a, b); c = vec_subs (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -1046,7 +1045,7 @@ _mm_subs_pu16 (__m64 __m1, __m64 __m2) ...@@ -1046,7 +1045,7 @@ _mm_subs_pu16 (__m64 __m1, __m64 __m2)
a = (__vector unsigned short)vec_splats (__m1); a = (__vector unsigned short)vec_splats (__m1);
b = (__vector unsigned short)vec_splats (__m2); b = (__vector unsigned short)vec_splats (__m2);
c = vec_subs (a, b); c = vec_subs (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -1068,7 +1067,7 @@ _mm_madd_pi16 (__m64 __m1, __m64 __m2) ...@@ -1068,7 +1067,7 @@ _mm_madd_pi16 (__m64 __m1, __m64 __m2)
a = (__vector signed short)vec_splats (__m1); a = (__vector signed short)vec_splats (__m1);
b = (__vector signed short)vec_splats (__m2); b = (__vector signed short)vec_splats (__m2);
c = vec_vmsumshm (a, b, zero); c = vec_vmsumshm (a, b, zero);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -1096,7 +1095,7 @@ _mm_mulhi_pi16 (__m64 __m1, __m64 __m2) ...@@ -1096,7 +1095,7 @@ _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
w1 = vec_vmulosh (a, b); w1 = vec_vmulosh (a, b);
c = (__vector signed short)vec_perm (w0, w1, xform1); c = (__vector signed short)vec_perm (w0, w1, xform1);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -1115,7 +1114,7 @@ _mm_mullo_pi16 (__m64 __m1, __m64 __m2) ...@@ -1115,7 +1114,7 @@ _mm_mullo_pi16 (__m64 __m1, __m64 __m2)
a = (__vector signed short)vec_splats (__m1); a = (__vector signed short)vec_splats (__m1);
b = (__vector signed short)vec_splats (__m2); b = (__vector signed short)vec_splats (__m2);
c = a * b; c = a * b;
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -1136,7 +1135,7 @@ _mm_sll_pi16 (__m64 __m, __m64 __count) ...@@ -1136,7 +1135,7 @@ _mm_sll_pi16 (__m64 __m, __m64 __count)
m = (__vector signed short)vec_splats (__m); m = (__vector signed short)vec_splats (__m);
c = (__vector unsigned short)vec_splats ((unsigned short)__count); c = (__vector unsigned short)vec_splats ((unsigned short)__count);
r = vec_sl (m, (__vector unsigned short)c); r = vec_sl (m, (__vector unsigned short)c);
return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); return (__m64) ((vector long long) r)[0];
} }
else else
return (0); return (0);
...@@ -1205,7 +1204,7 @@ _mm_sra_pi16 (__m64 __m, __m64 __count) ...@@ -1205,7 +1204,7 @@ _mm_sra_pi16 (__m64 __m, __m64 __count)
m = (__vector signed short)vec_splats (__m); m = (__vector signed short)vec_splats (__m);
c = (__vector unsigned short)vec_splats ((unsigned short)__count); c = (__vector unsigned short)vec_splats ((unsigned short)__count);
r = vec_sra (m, (__vector unsigned short)c); r = vec_sra (m, (__vector unsigned short)c);
return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); return (__m64) ((vector long long) r)[0];
} }
else else
return (0); return (0);
...@@ -1274,7 +1273,7 @@ _mm_srl_pi16 (__m64 __m, __m64 __count) ...@@ -1274,7 +1273,7 @@ _mm_srl_pi16 (__m64 __m, __m64 __count)
m = (__vector unsigned short)vec_splats (__m); m = (__vector unsigned short)vec_splats (__m);
c = (__vector unsigned short)vec_splats ((unsigned short)__count); c = (__vector unsigned short)vec_splats ((unsigned short)__count);
r = vec_sr (m, (__vector unsigned short)c); r = vec_sr (m, (__vector unsigned short)c);
return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); return (__m64) ((vector long long) r)[0];
} }
else else
return (0); return (0);
...@@ -1417,7 +1416,7 @@ _mm_set1_pi16 (short __w) ...@@ -1417,7 +1416,7 @@ _mm_set1_pi16 (short __w)
__vector signed short w; __vector signed short w;
w = (__vector signed short)vec_splats (__w); w = (__vector signed short)vec_splats (__w);
return (__builtin_unpack_vector_int128 ((__vector __int128)w, 0)); return (__m64) ((vector long long) w)[0];
#else #else
__m64_union res; __m64_union res;
...@@ -1437,7 +1436,7 @@ _mm_set1_pi8 (signed char __b) ...@@ -1437,7 +1436,7 @@ _mm_set1_pi8 (signed char __b)
__vector signed char b; __vector signed char b;
b = (__vector signed char)vec_splats (__b); b = (__vector signed char)vec_splats (__b);
return (__builtin_unpack_vector_int128 ((__vector __int128)b, 0)); return (__m64) ((vector long long) b)[0];
#else #else
__m64_union res; __m64_union res;
...@@ -1452,5 +1451,4 @@ _mm_set1_pi8 (signed char __b) ...@@ -1452,5 +1451,4 @@ _mm_set1_pi8 (signed char __b)
return (res.as_m64); return (res.as_m64);
#endif #endif
} }
#endif /* __powerpc64__ */
#endif /* _MMINTRIN_H_INCLUDED */ #endif /* _MMINTRIN_H_INCLUDED */
...@@ -996,7 +996,7 @@ _mm_cvtps_pi32 (__m128 __A) ...@@ -996,7 +996,7 @@ _mm_cvtps_pi32 (__m128 __A)
rounded = vec_rint(temp); rounded = vec_rint(temp);
result = (__vector unsigned long long) vec_cts (rounded, 0); result = (__vector unsigned long long) vec_cts (rounded, 0);
return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); return (__m64) ((vector long long) result)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -1053,7 +1053,7 @@ _mm_cvttps_pi32 (__m128 __A) ...@@ -1053,7 +1053,7 @@ _mm_cvttps_pi32 (__m128 __A)
temp = (__v4sf) vec_splat ((__vector long long)__A, 0); temp = (__v4sf) vec_splat ((__vector long long)__A, 0);
result = (__vector unsigned long long) vec_cts (temp, 0); result = (__vector unsigned long long) vec_cts (temp, 0);
return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); return (__m64) ((vector long long) result)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -1104,7 +1104,7 @@ _mm_cvtpi32_ps (__m128 __A, __m64 __B) ...@@ -1104,7 +1104,7 @@ _mm_cvtpi32_ps (__m128 __A, __m64 __B)
__vector signed int vm1; __vector signed int vm1;
__vector float vf1; __vector float vf1;
vm1 = (__vector signed int) __builtin_pack_vector_int128 (__B, __B); vm1 = (__vector signed int) (__vector unsigned long long) {__B, __B};
vf1 = (__vector float) vec_ctf (vm1, 0); vf1 = (__vector float) vec_ctf (vm1, 0);
return ((__m128) (__vector unsigned long long) return ((__m128) (__vector unsigned long long)
...@@ -1126,7 +1126,7 @@ _mm_cvtpi16_ps (__m64 __A) ...@@ -1126,7 +1126,7 @@ _mm_cvtpi16_ps (__m64 __A)
__vector signed int vi4; __vector signed int vi4;
__vector float vf1; __vector float vf1;
vs8 = (__vector signed short) __builtin_pack_vector_int128 (__A, __A); vs8 = (__vector signed short) (__vector unsigned long long) { __A, __A };
vi4 = vec_vupklsh (vs8); vi4 = vec_vupklsh (vs8);
vf1 = (__vector float) vec_ctf (vi4, 0); vf1 = (__vector float) vec_ctf (vi4, 0);
...@@ -1143,7 +1143,7 @@ _mm_cvtpu16_ps (__m64 __A) ...@@ -1143,7 +1143,7 @@ _mm_cvtpu16_ps (__m64 __A)
__vector unsigned int vi4; __vector unsigned int vi4;
__vector float vf1; __vector float vf1;
vs8 = (__vector unsigned short) __builtin_pack_vector_int128 (__A, __A); vs8 = (__vector unsigned short) (__vector unsigned long long) { __A, __A };
vi4 = (__vector unsigned int) vec_vmrglh (vs8, zero); vi4 = (__vector unsigned int) vec_vmrglh (vs8, zero);
vf1 = (__vector float) vec_ctf (vi4, 0); vf1 = (__vector float) vec_ctf (vi4, 0);
...@@ -1159,7 +1159,7 @@ _mm_cvtpi8_ps (__m64 __A) ...@@ -1159,7 +1159,7 @@ _mm_cvtpi8_ps (__m64 __A)
__vector signed int vi4; __vector signed int vi4;
__vector float vf1; __vector float vf1;
vc16 = (__vector signed char) __builtin_pack_vector_int128 (__A, __A); vc16 = (__vector signed char) (__vector unsigned long long) { __A, __A };
vs8 = vec_vupkhsb (vc16); vs8 = vec_vupkhsb (vc16);
vi4 = vec_vupkhsh (vs8); vi4 = vec_vupkhsh (vs8);
vf1 = (__vector float) vec_ctf (vi4, 0); vf1 = (__vector float) vec_ctf (vi4, 0);
...@@ -1179,7 +1179,7 @@ _mm_cvtpu8_ps (__m64 __A) ...@@ -1179,7 +1179,7 @@ _mm_cvtpu8_ps (__m64 __A)
__vector unsigned int vi4; __vector unsigned int vi4;
__vector float vf1; __vector float vf1;
vc16 = (__vector unsigned char) __builtin_pack_vector_int128 (__A, __A); vc16 = (__vector unsigned char) (__vector unsigned long long) { __A, __A };
vs8 = (__vector unsigned short) vec_vmrglb (vc16, zero); vs8 = (__vector unsigned short) vec_vmrglb (vc16, zero);
vi4 = (__vector unsigned int) vec_vmrghh (vs8, vi4 = (__vector unsigned int) vec_vmrghh (vs8,
(__vector unsigned short) zero); (__vector unsigned short) zero);
...@@ -1195,7 +1195,7 @@ _mm_cvtpi32x2_ps(__m64 __A, __m64 __B) ...@@ -1195,7 +1195,7 @@ _mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
__vector signed int vi4; __vector signed int vi4;
__vector float vf4; __vector float vf4;
vi4 = (__vector signed int) __builtin_pack_vector_int128 (__B, __A); vi4 = (__vector signed int) (__vector unsigned long long) { __B, __A };
vf4 = (__vector float) vec_ctf (vi4, 0); vf4 = (__vector float) vec_ctf (vi4, 0);
return (__m128) vf4; return (__m128) vf4;
} }
...@@ -1212,7 +1212,7 @@ _mm_cvtps_pi16(__m128 __A) ...@@ -1212,7 +1212,7 @@ _mm_cvtps_pi16(__m128 __A)
temp = vec_cts (rounded, 0); temp = vec_cts (rounded, 0);
result = (__vector unsigned long long) vec_pack (temp, temp); result = (__vector unsigned long long) vec_pack (temp, temp);
return ((__m64) __builtin_unpack_vector_int128 ((__vector __int128)result, 0)); return (__m64) ((vector long long) result)[0];
} }
/* Convert the four SPFP values in A to four signed 8-bit integers. */ /* Convert the four SPFP values in A to four signed 8-bit integers. */
...@@ -1224,15 +1224,12 @@ _mm_cvtps_pi8(__m128 __A) ...@@ -1224,15 +1224,12 @@ _mm_cvtps_pi8(__m128 __A)
static const __vector signed int zero = {0, 0, 0, 0}; static const __vector signed int zero = {0, 0, 0, 0};
__vector signed short tmp_s; __vector signed short tmp_s;
__vector signed char res_v; __vector signed char res_v;
__m64 result;
rounded = vec_rint(__A); rounded = vec_rint(__A);
tmp_i = vec_cts (rounded, 0); tmp_i = vec_cts (rounded, 0);
tmp_s = vec_pack (tmp_i, zero); tmp_s = vec_pack (tmp_i, zero);
res_v = vec_pack (tmp_s, tmp_s); res_v = vec_pack (tmp_s, tmp_s);
result = (__m64) __builtin_unpack_vector_int128 ((__vector __int128)res_v, 0); return (__m64) ((vector long long) res_v)[0];
return (result);
} }
/* Selects four specific SPFP values from A and B based on MASK. */ /* Selects four specific SPFP values from A and B based on MASK. */
...@@ -1432,7 +1429,7 @@ _mm_max_pi16 (__m64 __A, __m64 __B) ...@@ -1432,7 +1429,7 @@ _mm_max_pi16 (__m64 __A, __m64 __B)
b = (__vector signed short)vec_splats (__B); b = (__vector signed short)vec_splats (__B);
c = (__vector __bool short)vec_cmpgt (a, b); c = (__vector __bool short)vec_cmpgt (a, b);
r = vec_sel (b, a, c); r = vec_sel (b, a, c);
return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); return (__m64) ((vector long long) r)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -1470,7 +1467,7 @@ _mm_max_pu8 (__m64 __A, __m64 __B) ...@@ -1470,7 +1467,7 @@ _mm_max_pu8 (__m64 __A, __m64 __B)
b = (__vector unsigned char)vec_splats (__B); b = (__vector unsigned char)vec_splats (__B);
c = (__vector __bool char)vec_cmpgt (a, b); c = (__vector __bool char)vec_cmpgt (a, b);
r = vec_sel (b, a, c); r = vec_sel (b, a, c);
return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); return (__m64) ((vector long long) r)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
long i; long i;
...@@ -1506,7 +1503,7 @@ _mm_min_pi16 (__m64 __A, __m64 __B) ...@@ -1506,7 +1503,7 @@ _mm_min_pi16 (__m64 __A, __m64 __B)
b = (__vector signed short)vec_splats (__B); b = (__vector signed short)vec_splats (__B);
c = (__vector __bool short)vec_cmplt (a, b); c = (__vector __bool short)vec_cmplt (a, b);
r = vec_sel (b, a, c); r = vec_sel (b, a, c);
return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); return (__m64) ((vector long long) r)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
...@@ -1544,7 +1541,7 @@ _mm_min_pu8 (__m64 __A, __m64 __B) ...@@ -1544,7 +1541,7 @@ _mm_min_pu8 (__m64 __A, __m64 __B)
b = (__vector unsigned char)vec_splats (__B); b = (__vector unsigned char)vec_splats (__B);
c = (__vector __bool char)vec_cmplt (a, b); c = (__vector __bool char)vec_cmplt (a, b);
r = vec_sel (b, a, c); r = vec_sel (b, a, c);
return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); return (__m64) ((vector long long) r)[0];
#else #else
__m64_union m1, m2, res; __m64_union m1, m2, res;
long i; long i;
...@@ -1572,7 +1569,7 @@ _m_pminub (__m64 __A, __m64 __B) ...@@ -1572,7 +1569,7 @@ _m_pminub (__m64 __A, __m64 __B)
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_movemask_pi8 (__m64 __A) _mm_movemask_pi8 (__m64 __A)
{ {
unsigned long p = 0x0008101820283038UL; // permute control for sign bits unsigned long long p = 0x0008101820283038UL; // permute control for sign bits
return __builtin_bpermd (p, __A); return __builtin_bpermd (p, __A);
} }
...@@ -1603,7 +1600,7 @@ _mm_mulhi_pu16 (__m64 __A, __m64 __B) ...@@ -1603,7 +1600,7 @@ _mm_mulhi_pu16 (__m64 __A, __m64 __B)
w1 = vec_vmulouh (a, b); w1 = vec_vmulouh (a, b);
c = (__vector unsigned short)vec_perm (w0, w1, xform1); c = (__vector unsigned short)vec_perm (w0, w1, xform1);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -1646,7 +1643,7 @@ _mm_shuffle_pi16 (__m64 __A, int const __N) ...@@ -1646,7 +1643,7 @@ _mm_shuffle_pi16 (__m64 __A, int const __N)
p = vec_splats (t.as_m64); p = vec_splats (t.as_m64);
a = vec_splats (__A); a = vec_splats (__A);
r = vec_perm (a, a, (__vector unsigned char)p); r = vec_perm (a, a, (__vector unsigned char)p);
return (__builtin_unpack_vector_int128 ((__vector __int128)r, 0)); return (__m64) ((vector long long) r)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -1686,7 +1683,7 @@ _mm_avg_pu8 (__m64 __A, __m64 __B) ...@@ -1686,7 +1683,7 @@ _mm_avg_pu8 (__m64 __A, __m64 __B)
a = (__vector unsigned char)vec_splats (__A); a = (__vector unsigned char)vec_splats (__A);
b = (__vector unsigned char)vec_splats (__B); b = (__vector unsigned char)vec_splats (__B);
c = vec_avg (a, b); c = vec_avg (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -1704,7 +1701,7 @@ _mm_avg_pu16 (__m64 __A, __m64 __B) ...@@ -1704,7 +1701,7 @@ _mm_avg_pu16 (__m64 __A, __m64 __B)
a = (__vector unsigned short)vec_splats (__A); a = (__vector unsigned short)vec_splats (__A);
b = (__vector unsigned short)vec_splats (__B); b = (__vector unsigned short)vec_splats (__B);
c = vec_avg (a, b); c = vec_avg (a, b);
return (__builtin_unpack_vector_int128 ((__vector __int128)c, 0)); return (__m64) ((vector long long) c)[0];
} }
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
...@@ -1726,8 +1723,8 @@ _mm_sad_pu8 (__m64 __A, __m64 __B) ...@@ -1726,8 +1723,8 @@ _mm_sad_pu8 (__m64 __A, __m64 __B)
{ 0, 0, 0, 0 }; { 0, 0, 0, 0 };
unsigned short result; unsigned short result;
a = (__vector unsigned char) __builtin_pack_vector_int128 (0UL, __A); a = (__vector unsigned char) (__vector unsigned long long) { 0UL, __A };
b = (__vector unsigned char) __builtin_pack_vector_int128 (0UL, __B); b = (__vector unsigned char) (__vector unsigned long long) { 0UL, __B };
vmin = vec_min (a, b); vmin = vec_min (a, b);
vmax = vec_max (a, b); vmax = vec_max (a, b);
vabsdiff = vec_sub (vmax, vmin); vabsdiff = vec_sub (vmax, vmin);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment