Commit ed9b5396 by Richard Henderson Committed by Richard Henderson

i386.c (IX86_BUILTIN_MOVQ, [...]): Remove.

        * config/i386/i386.c (IX86_BUILTIN_MOVQ, IX86_BUILTIN_LOADD,
        IX86_BUILTIN_STORED, IX86_BUILTIN_MOVQ2DQ,
        IX86_BUILTIN_MOVDQ2Q): Remove.
        (IX86_BUILTIN_VEC_EXT_V4SI): New.
        (ix86_init_mmx_sse_builtins, ix86_expand_builtin): Update to match.
        (ix86_expand_vector_extract): For V4S[FI], extract element 0 after
        shuffling.
        * config/i386/sse.md (sse_concatv2sf): Accept zero operand 2.
        (sse2_pextrw): Fix immediate constraint.
        (sse2_loadq, sse2_loadq_rex64): Remove.
        * config/i386/emmintrin.h (_mm_cvtsi128_si32, _mm_cvtsi128_si64x):
        Use __builtin_ia32_vec_ext_<size>.
        (_mm_cvtsi32_si128, _mm_cvtsi64x_si128): Use _mm_set_epi<size>.

From-SVN: r93604
parent 17284759
2005-01-13 Richard Henderson <rth@redhat.com>
* config/i386/i386.c (IX86_BUILTIN_MOVQ, IX86_BUILTIN_LOADD,
IX86_BUILTIN_STORED, IX86_BUILTIN_MOVQ2DQ,
IX86_BUILTIN_MOVDQ2Q): Remove.
(IX86_BUILTIN_VEC_EXT_V4SI): New.
(ix86_init_mmx_sse_builtins, ix86_expand_builtin): Update to match.
(ix86_expand_vector_extract): For V4S[FI], extract element 0 after
shuffling.
* config/i386/sse.md (sse_concatv2sf): Accept zero operand 2.
(sse2_pextrw): Fix immediate constraint.
(sse2_loadq, sse2_loadq_rex64): Remove.
* config/i386/emmintrin.h (_mm_cvtsi128_si32, _mm_cvtsi128_si64x):
Use __builtin_ia32_vec_ext_<size>.
(_mm_cvtsi32_si128, _mm_cvtsi64x_si128): Use _mm_set_epi<size>.
2005-01-13 Aldy Hernandez <aldyh@redhat.com> 2005-01-13 Aldy Hernandez <aldyh@redhat.com>
* function.c (assign_parm_setup_block): Look inside original * function.c (assign_parm_setup_block): Look inside original
......
...@@ -195,20 +195,17 @@ _mm_storer_pd (double *__P, __m128d __A) ...@@ -195,20 +195,17 @@ _mm_storer_pd (double *__P, __m128d __A)
static __inline int static __inline int
_mm_cvtsi128_si32 (__m128i __A) _mm_cvtsi128_si32 (__m128i __A)
{ {
int __tmp; return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
__builtin_ia32_stored (&__tmp, (__v4si)__A);
return __tmp;
} }
#ifdef __x86_64__ #ifdef __x86_64__
static __inline long long static __inline long long
_mm_cvtsi128_si64x (__m128i __A) _mm_cvtsi128_si64x (__m128i __A)
{ {
return __builtin_ia32_movdq2q ((__v2di)__A); return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
} }
#endif #endif
static __inline __m128d static __inline __m128d
_mm_add_pd (__m128d __A, __m128d __B) _mm_add_pd (__m128d __A, __m128d __B)
{ {
...@@ -1377,14 +1374,14 @@ _mm_mfence (void) ...@@ -1377,14 +1374,14 @@ _mm_mfence (void)
static __inline __m128i static __inline __m128i
_mm_cvtsi32_si128 (int __A) _mm_cvtsi32_si128 (int __A)
{ {
return (__m128i) __builtin_ia32_loadd (&__A); return _mm_set_epi32 (0, 0, 0, __A);
} }
#ifdef __x86_64__ #ifdef __x86_64__
static __inline __m128i static __inline __m128i
_mm_cvtsi64x_si128 (long long __A) _mm_cvtsi64x_si128 (long long __A)
{ {
return (__m128i) __builtin_ia32_movq2dq (__A); return _mm_set_epi64x (0, __A);
} }
#endif #endif
......
...@@ -12265,9 +12265,6 @@ enum ix86_builtins ...@@ -12265,9 +12265,6 @@ enum ix86_builtins
IX86_BUILTIN_LOADDQU, IX86_BUILTIN_LOADDQU,
IX86_BUILTIN_STOREDQU, IX86_BUILTIN_STOREDQU,
IX86_BUILTIN_MOVQ,
IX86_BUILTIN_LOADD,
IX86_BUILTIN_STORED,
IX86_BUILTIN_PACKSSWB, IX86_BUILTIN_PACKSSWB,
IX86_BUILTIN_PACKSSDW, IX86_BUILTIN_PACKSSDW,
...@@ -12498,8 +12495,6 @@ enum ix86_builtins ...@@ -12498,8 +12495,6 @@ enum ix86_builtins
IX86_BUILTIN_MASKMOVDQU, IX86_BUILTIN_MASKMOVDQU,
IX86_BUILTIN_MOVMSKPD, IX86_BUILTIN_MOVMSKPD,
IX86_BUILTIN_PMOVMSKB128, IX86_BUILTIN_PMOVMSKB128,
IX86_BUILTIN_MOVQ2DQ,
IX86_BUILTIN_MOVDQ2Q,
IX86_BUILTIN_PACKSSWB128, IX86_BUILTIN_PACKSSWB128,
IX86_BUILTIN_PACKSSDW128, IX86_BUILTIN_PACKSSDW128,
...@@ -12607,6 +12602,7 @@ enum ix86_builtins ...@@ -12607,6 +12602,7 @@ enum ix86_builtins
IX86_BUILTIN_VEC_EXT_V2DF, IX86_BUILTIN_VEC_EXT_V2DF,
IX86_BUILTIN_VEC_EXT_V2DI, IX86_BUILTIN_VEC_EXT_V2DI,
IX86_BUILTIN_VEC_EXT_V4SF, IX86_BUILTIN_VEC_EXT_V4SF,
IX86_BUILTIN_VEC_EXT_V4SI,
IX86_BUILTIN_VEC_EXT_V8HI, IX86_BUILTIN_VEC_EXT_V8HI,
IX86_BUILTIN_VEC_EXT_V4HI, IX86_BUILTIN_VEC_EXT_V4HI,
IX86_BUILTIN_VEC_SET_V8HI, IX86_BUILTIN_VEC_SET_V8HI,
...@@ -13154,8 +13150,6 @@ ix86_init_mmx_sse_builtins (void) ...@@ -13154,8 +13150,6 @@ ix86_init_mmx_sse_builtins (void)
= build_function_type_list (V2SI_type_node, = build_function_type_list (V2SI_type_node,
V2SF_type_node, V2SF_type_node, NULL_TREE); V2SF_type_node, V2SF_type_node, NULL_TREE);
tree pint_type_node = build_pointer_type (integer_type_node); tree pint_type_node = build_pointer_type (integer_type_node);
tree pcint_type_node = build_pointer_type (
build_type_variant (integer_type_node, 1, 0));
tree pdouble_type_node = build_pointer_type (double_type_node); tree pdouble_type_node = build_pointer_type (double_type_node);
tree pcdouble_type_node = build_pointer_type ( tree pcdouble_type_node = build_pointer_type (
build_type_variant (double_type_node, 1, 0)); build_type_variant (double_type_node, 1, 0));
...@@ -13168,12 +13162,6 @@ ix86_init_mmx_sse_builtins (void) ...@@ -13168,12 +13162,6 @@ ix86_init_mmx_sse_builtins (void)
intTI_type_node, intTI_type_node, NULL_TREE); intTI_type_node, intTI_type_node, NULL_TREE);
tree void_ftype_pcvoid tree void_ftype_pcvoid
= build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
tree v2di_ftype_di
= build_function_type_list (V2DI_type_node,
long_long_unsigned_type_node, NULL_TREE);
tree di_ftype_v2di
= build_function_type_list (long_long_unsigned_type_node,
V2DI_type_node, NULL_TREE);
tree v4sf_ftype_v4si tree v4sf_ftype_v4si
= build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
tree v4si_ftype_v4sf tree v4si_ftype_v4sf
...@@ -13285,13 +13273,6 @@ ix86_init_mmx_sse_builtins (void) ...@@ -13285,13 +13273,6 @@ ix86_init_mmx_sse_builtins (void)
tree void_ftype_pchar_v16qi tree void_ftype_pchar_v16qi
= build_function_type_list (void_type_node, = build_function_type_list (void_type_node,
pchar_type_node, V16QI_type_node, NULL_TREE); pchar_type_node, V16QI_type_node, NULL_TREE);
tree v4si_ftype_pcint
= build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
tree void_ftype_pcint_v4si
= build_function_type_list (void_type_node,
pcint_type_node, V4SI_type_node, NULL_TREE);
tree v2di_ftype_v2di
= build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
tree float80_type; tree float80_type;
tree float128_type; tree float128_type;
...@@ -13479,8 +13460,6 @@ ix86_init_mmx_sse_builtins (void) ...@@ -13479,8 +13460,6 @@ ix86_init_mmx_sse_builtins (void)
/* SSE2 */ /* SSE2 */
def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD); def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD); def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
...@@ -13534,10 +13513,7 @@ ix86_init_mmx_sse_builtins (void) ...@@ -13534,10 +13513,7 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU); def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU); def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ); def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128); def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
...@@ -13622,6 +13598,11 @@ ix86_init_mmx_sse_builtins (void) ...@@ -13622,6 +13598,11 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf", def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
ftype, IX86_BUILTIN_VEC_EXT_V4SF); ftype, IX86_BUILTIN_VEC_EXT_V4SF);
ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
integer_type_node, NULL_TREE);
def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
ftype, IX86_BUILTIN_VEC_EXT_V4SI);
ftype = build_function_type_list (intHI_type_node, V8HI_type_node, ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
integer_type_node, NULL_TREE); integer_type_node, NULL_TREE);
def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi", def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
...@@ -14399,13 +14380,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, ...@@ -14399,13 +14380,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case IX86_BUILTIN_LOADDQU: case IX86_BUILTIN_LOADDQU:
return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1); return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
case IX86_BUILTIN_LOADD:
return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
case IX86_BUILTIN_STOREDQU: case IX86_BUILTIN_STOREDQU:
return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist); return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
case IX86_BUILTIN_STORED:
return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
case IX86_BUILTIN_MONITOR: case IX86_BUILTIN_MONITOR:
arg0 = TREE_VALUE (arglist); arg0 = TREE_VALUE (arglist);
...@@ -14447,6 +14423,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, ...@@ -14447,6 +14423,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case IX86_BUILTIN_VEC_EXT_V2DF: case IX86_BUILTIN_VEC_EXT_V2DF:
case IX86_BUILTIN_VEC_EXT_V2DI: case IX86_BUILTIN_VEC_EXT_V2DI:
case IX86_BUILTIN_VEC_EXT_V4SF: case IX86_BUILTIN_VEC_EXT_V4SF:
case IX86_BUILTIN_VEC_EXT_V4SI:
case IX86_BUILTIN_VEC_EXT_V8HI: case IX86_BUILTIN_VEC_EXT_V8HI:
case IX86_BUILTIN_VEC_EXT_V4HI: case IX86_BUILTIN_VEC_EXT_V4HI:
return ix86_expand_vec_ext_builtin (arglist, target); return ix86_expand_vec_ext_builtin (arglist, target);
...@@ -14480,8 +14457,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, ...@@ -14480,8 +14457,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if (d->code == fcode) if (d->code == fcode)
return ix86_expand_sse_comi (d, arglist, target); return ix86_expand_sse_comi (d, arglist, target);
/* @@@ Should really do something sensible here. */ gcc_unreachable ();
return 0;
} }
/* Store OPERAND to the memory after reload is completed. This means /* Store OPERAND to the memory after reload is completed. This means
...@@ -16402,6 +16378,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) ...@@ -16402,6 +16378,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
} }
vec = tmp; vec = tmp;
use_vec_extr = true; use_vec_extr = true;
elt = 0;
break; break;
case V4SImode: case V4SImode:
...@@ -16431,6 +16408,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) ...@@ -16431,6 +16408,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
} }
vec = tmp; vec = tmp;
use_vec_extr = true; use_vec_extr = true;
elt = 0;
} }
else else
{ {
......
...@@ -1067,16 +1067,18 @@ ...@@ -1067,16 +1067,18 @@
;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
;; alternatives pretty much forces the MMX alternative to be chosen. ;; alternatives pretty much forces the MMX alternative to be chosen.
(define_insn "*sse_concatv2sf" (define_insn "*sse_concatv2sf"
[(set (match_operand:V2SF 0 "register_operand" "=x,*y") [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
(vec_concat:V2SF (vec_concat:V2SF
(match_operand:SF 1 "register_operand" " 0, 0") (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
(match_operand:SF 2 "register_operand" " x,*y")))] (match_operand:SF 2 "vector_move_operand" " x,C,*y, C")))]
"TARGET_SSE" "TARGET_SSE"
"@ "@
unpcklps\t{%2, %0|%0, %2} unpcklps\t{%2, %0|%0, %2}
punpckldq\t{%2, %0|%0, %2}" movss\t{%1, %0|%0, %1}
[(set_attr "type" "sselog,mmxcvt") punpckldq\t{%2, %0|%0, %2}
(set_attr "mode" "V4SF,DI")]) movd\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
(set_attr "mode" "V4SF,SF,DI,DI")])
(define_insn "*sse_concatv4sf" (define_insn "*sse_concatv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x,x") [(set (match_operand:V4SF 0 "register_operand" "=x,x")
...@@ -2671,7 +2673,7 @@ ...@@ -2671,7 +2673,7 @@
(zero_extend:SI (zero_extend:SI
(vec_select:HI (vec_select:HI
(match_operand:V8HI 1 "register_operand" "x") (match_operand:V8HI 1 "register_operand" "x")
(parallel [(match_operand:SI 2 "const_0_to_7_operand" "0")]))))] (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
"TARGET_SSE2" "TARGET_SSE2"
"pextrw\t{%2, %1, %0|%0, %1, %2}" "pextrw\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog") [(set_attr "type" "sselog")
...@@ -2865,48 +2867,6 @@ ...@@ -2865,48 +2867,6 @@
operands[1] = gen_rtx_REG (DImode, REGNO (operands[1])); operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
}) })
(define_expand "sse2_loadq"
[(set (match_operand:V2DI 0 "register_operand" "")
(vec_merge:V2DI
(vec_duplicate:V2DI
(match_operand:DI 1 "nonimmediate_operand" ""))
(match_dup 2)
(const_int 1)))]
"TARGET_SSE"
"operands[2] = CONST0_RTX (V2DImode);")
(define_insn "*sse2_loadq"
[(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x")
(vec_merge:V2DI
(vec_duplicate:V2DI
(match_operand:DI 1 "nonimmediate_operand" " m,*y,Y,0"))
(match_operand:V2DI 2 "vector_move_operand" " C, C,0,x")
(const_int 1)))]
"TARGET_SSE && !TARGET_64BIT"
"@
movq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}
movq\t{%1, %0|%0, %1}
shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}"
[(set_attr "type" "ssemov,ssemov,ssemov,sselog")
(set_attr "mode" "TI,TI,TI,V4SF")])
(define_insn "*sse2_loadq_rex64"
[(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x,x")
(vec_merge:V2DI
(vec_duplicate:V2DI
(match_operand:DI 1 "nonimmediate_operand" " m,*y, r,x"))
(match_operand:V2DI 2 "vector_move_operand" " C, C, C,0")
(const_int 1)))]
"TARGET_SSE2 && TARGET_64BIT"
"@
movq\t{%1, %0|%0, %1}
movq2dq\t{%1, %0|%0, %1}
movd\t{%1, %0|%0, %1}
movq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "TI")])
(define_insn "*vec_dupv4si" (define_insn "*vec_dupv4si"
[(set (match_operand:V4SI 0 "register_operand" "=Y,x") [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
(vec_duplicate:V4SI (vec_duplicate:V4SI
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment