Commit 077084dd by Jan Hubicka Committed by Jan Hubicka

i386.c (builtin_description): Add IX86_BUILTIN_PUNPCKHQDQ128.

	* i386.c (builtin_description): Add IX86_BUILTIN_PUNPCKHQDQ128.
	(ix86_expand_builtin): Fix MASKMOVDQU expasion.
	* i386.h (ix86_builtins): Add IX86_BUILTIN_PUNPCKHQDQ128.
	* i386.md (mmx_punpck?dq): Simplify.
	(sse2_pubpcklqdq): Fix.
	(sse2_pubpckhqdq): New.
	* xmmintrin.h (_mm_unpackhi_epi32): New.

	* xmmintrin.h (_mm_cvt*, _mm_stream_pd): Fix prototypes.
	(_mm_shufflehi_epi16, _mm_shufflelo_epi16): Fix typo.

From-SVN: r58412
parent 874994a9
Tue Oct 22 19:07:03 CEST 2002 Jan Hubicka <jh@suse.cz>
* i386.c (builtin_description): Add IX86_BUILTIN_PUNPCKHQDQ128.
(ix86_expand_builtin): Fix MASKMOVDQU expasion.
* i386.h (ix86_builtins): Add IX86_BUILTIN_PUNPCKHQDQ128.
* i386.md (mmx_punpck?dq): Simplify.
(sse2_pubpcklqdq): Fix.
(sse2_pubpckhqdq): New.
* xmmintrin.h (_mm_unpackhi_epi32): New.
* xmmintrin.h (_mm_cvt*, _mm_stream_pd): Fix prototypes.
(_mm_shufflehi_epi16, _mm_shufflelo_epi16): Fix typo.
2002-10-22 Nathan Sidwell <nathan@codesourcery.com> 2002-10-22 Nathan Sidwell <nathan@codesourcery.com>
PR c++/7209 PR c++/7209
......
...@@ -12115,6 +12115,7 @@ static const struct builtin_description bdesc_2arg[] = ...@@ -12115,6 +12115,7 @@ static const struct builtin_description bdesc_2arg[] =
{ MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
{ MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
...@@ -13145,6 +13146,7 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore) ...@@ -13145,6 +13146,7 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
return target; return target;
case IX86_BUILTIN_MASKMOVQ: case IX86_BUILTIN_MASKMOVQ:
case IX86_BUILTIN_MASKMOVDQU:
icode = (fcode == IX86_BUILTIN_MASKMOVQ icode = (fcode == IX86_BUILTIN_MASKMOVQ
? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq) ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
: CODE_FOR_sse2_maskmovdqu); : CODE_FOR_sse2_maskmovdqu);
......
...@@ -2427,6 +2427,7 @@ enum ix86_builtins ...@@ -2427,6 +2427,7 @@ enum ix86_builtins
IX86_BUILTIN_PUNPCKHBW128, IX86_BUILTIN_PUNPCKHBW128,
IX86_BUILTIN_PUNPCKHWD128, IX86_BUILTIN_PUNPCKHWD128,
IX86_BUILTIN_PUNPCKHDQ128, IX86_BUILTIN_PUNPCKHDQ128,
IX86_BUILTIN_PUNPCKHQDQ128,
IX86_BUILTIN_PUNPCKLBW128, IX86_BUILTIN_PUNPCKLBW128,
IX86_BUILTIN_PUNPCKLWD128, IX86_BUILTIN_PUNPCKLWD128,
IX86_BUILTIN_PUNPCKLDQ128, IX86_BUILTIN_PUNPCKLDQ128,
......
...@@ -19694,9 +19694,7 @@ ...@@ -19694,9 +19694,7 @@
(define_insn "mmx_punpckhdq" (define_insn "mmx_punpckhdq"
[(set (match_operand:V2SI 0 "register_operand" "=y") [(set (match_operand:V2SI 0 "register_operand" "=y")
(vec_merge:V2SI (vec_merge:V2SI
(vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") (match_operand:V2SI 1 "register_operand" "0")
(parallel [(const_int 0)
(const_int 1)]))
(vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
(parallel [(const_int 1) (parallel [(const_int 1)
(const_int 0)])) (const_int 0)]))
...@@ -19758,9 +19756,7 @@ ...@@ -19758,9 +19756,7 @@
(vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0") (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0")
(parallel [(const_int 1) (parallel [(const_int 1)
(const_int 0)])) (const_int 0)]))
(vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y") (match_operand:V2SI 2 "register_operand" "y")
(parallel [(const_int 0)
(const_int 1)]))
(const_int 1)))] (const_int 1)))]
"TARGET_MMX" "TARGET_MMX"
"punpckldq\t{%2, %0|%0, %2}" "punpckldq\t{%2, %0|%0, %2}"
...@@ -21548,16 +21544,29 @@ ...@@ -21548,16 +21544,29 @@
(define_insn "sse2_punpcklqdq" (define_insn "sse2_punpcklqdq"
[(set (match_operand:V2DI 0 "register_operand" "=x") [(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_merge:V2DI (vec_merge:V2DI
(match_operand:V2DI 1 "register_operand" "0")
(vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x") (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x")
(parallel [(const_int 1) (parallel [(const_int 1)
(const_int 0)])) (const_int 0)]))
(match_operand:V2DI 1 "register_operand" "0")
(const_int 1)))] (const_int 1)))]
"TARGET_SSE2" "TARGET_SSE2"
"punpcklqdq\t{%2, %0|%0, %2}" "punpcklqdq\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecvt") [(set_attr "type" "ssecvt")
(set_attr "mode" "TI")]) (set_attr "mode" "TI")])
(define_insn "sse2_punpckhqdq"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_merge:V2DI
(match_operand:V2DI 1 "register_operand" "0")
(vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x")
(parallel [(const_int 1)
(const_int 0)]))
(const_int 1)))]
"TARGET_SSE2"
"punpckhqdq\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "TI")])
;; SSE2 moves ;; SSE2 moves
(define_insn "sse2_movapd" (define_insn "sse2_movapd"
......
...@@ -1827,16 +1827,16 @@ _mm_cvtepi32_pd (__m128i __A) ...@@ -1827,16 +1827,16 @@ _mm_cvtepi32_pd (__m128i __A)
return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A); return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
} }
static __inline __m128d static __inline __m128
_mm_cvtepi32_ps (__m128i __A) _mm_cvtepi32_ps (__m128i __A)
{ {
return (__m128d)__builtin_ia32_cvtdq2ps ((__v4si) __A); return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
} }
static __inline __m128d static __inline __m128i
_mm_cvtpd_epi32 (__m128d __A) _mm_cvtpd_epi32 (__m128d __A)
{ {
return (__m128d)__builtin_ia32_cvtpd2dq ((__v2df) __A); return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
} }
static __inline __m64 static __inline __m64
...@@ -1845,16 +1845,16 @@ _mm_cvtpd_pi32 (__m128d __A) ...@@ -1845,16 +1845,16 @@ _mm_cvtpd_pi32 (__m128d __A)
return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A); return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
} }
static __inline __m128d static __inline __m128
_mm_cvtpd_ps (__m128d __A) _mm_cvtpd_ps (__m128d __A)
{ {
return (__m128d)__builtin_ia32_cvtpd2ps ((__v2df) __A); return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
} }
static __inline __m128d static __inline __m128i
_mm_cvttpd_epi32 (__m128d __A) _mm_cvttpd_epi32 (__m128d __A)
{ {
return (__m128d)__builtin_ia32_cvttpd2dq ((__v2df) __A); return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
} }
static __inline __m64 static __inline __m64
...@@ -1869,20 +1869,20 @@ _mm_cvtpi32_pd (__m64 __A) ...@@ -1869,20 +1869,20 @@ _mm_cvtpi32_pd (__m64 __A)
return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A); return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
} }
static __inline __m128d static __inline __m128i
_mm_cvtps_epi32 (__m128d __A) _mm_cvtps_epi32 (__m128 __A)
{ {
return (__m128d)__builtin_ia32_cvtps2dq ((__v4sf) __A); return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
} }
static __inline __m128d static __inline __m128i
_mm_cvttps_epi32 (__m128d __A) _mm_cvttps_epi32 (__m128 __A)
{ {
return (__m128d)__builtin_ia32_cvttps2dq ((__v4sf) __A); return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
} }
static __inline __m128d static __inline __m128d
_mm_cvtps_pd (__m128d __A) _mm_cvtps_pd (__m128 __A)
{ {
return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A); return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
} }
...@@ -1899,10 +1899,10 @@ _mm_cvttsd_si32 (__m128d __A) ...@@ -1899,10 +1899,10 @@ _mm_cvttsd_si32 (__m128d __A)
return __builtin_ia32_cvttsd2si ((__v2df) __A); return __builtin_ia32_cvttsd2si ((__v2df) __A);
} }
static __inline __m128d static __inline __m128
_mm_cvtsd_ss (__m128d __A, __m128d __B) _mm_cvtsd_ss (__m128 __A, __m128d __B)
{ {
return (__m128d)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B); return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
} }
static __inline __m128d static __inline __m128d
...@@ -1912,7 +1912,7 @@ _mm_cvtsi32_sd (__m128d __A, int __B) ...@@ -1912,7 +1912,7 @@ _mm_cvtsi32_sd (__m128d __A, int __B)
} }
static __inline __m128d static __inline __m128d
_mm_cvtss_sd (__m128d __A, __m128d __B) _mm_cvtss_sd (__m128d __A, __m128 __B)
{ {
return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B); return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
} }
...@@ -1998,6 +1998,12 @@ _mm_unpackhi_epi32 (__m128i __A, __m128i __B) ...@@ -1998,6 +1998,12 @@ _mm_unpackhi_epi32 (__m128i __A, __m128i __B)
} }
static __inline __m128i static __inline __m128i
_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
}
static __inline __m128i
_mm_unpacklo_epi8 (__m128i __A, __m128i __B) _mm_unpacklo_epi8 (__m128i __A, __m128i __B)
{ {
return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B); return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
...@@ -2359,8 +2365,8 @@ _mm_mulhi_epu16 (__m128i __A, __m128i __B) ...@@ -2359,8 +2365,8 @@ _mm_mulhi_epu16 (__m128i __A, __m128i __B)
return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B); return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
} }
#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw128 ((__v8hi)__A, __B)) #define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B))
#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw128 ((__v8hi)__A, __B)) #define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B))
#define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B)) #define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B))
static __inline void static __inline void
...@@ -2400,9 +2406,9 @@ _mm_stream_si128 (__m128i *__A, __m128i __B) ...@@ -2400,9 +2406,9 @@ _mm_stream_si128 (__m128i *__A, __m128i __B)
} }
static __inline void static __inline void
_mm_stream_pd (__m128d *__A, __m128d __B) _mm_stream_pd (double *__A, __m128d __B)
{ {
__builtin_ia32_movntpd (__A, (__v2df)__B); __builtin_ia32_movntpd ((__m128d *)__A, (__v2df)__B);
} }
static __inline __m128i static __inline __m128i
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment