Commit 1c47af84 by Richard Henderson Committed by Richard Henderson

emmintrin.h (_mm_loadh_pd): Don't cast pointer arg to __v2si.

        * config/i386/emmintrin.h (_mm_loadh_pd): Don't cast pointer arg
        to __v2si.
        (_mm_storeh_pd, _mm_loadl_pd, _mm_storel_pd): Likewise.
        * config/i386/i386.c (ix86_init_mmx_sse_builtins): Use double* or
        const double* for __builtin_ia32_loadhpd, __builtin_ia32_loadlpd,
        __builtin_ia32_storehpd, __builtin_ia32_storelpd.
        (ix86_expand_builtin): Update to match.
        (ix86_expand_vector_init): Use sse2_loadlpd.
        * config/i386/i386.md (vec_setv2df): Use sse2_loadlpd, sse2_loadhpd.
        (vec_extractv2df): Use sse2_storelpd, sse2_storehpd.
        (sse2_storehpd, sse2_loadhpd, sse2_storelpd, sse2_loadlpd): New.
        (sse2_movhpd): Remove.

From-SVN: r92536
parent 5e5f01b9
2004-12-22 Richard Henderson <rth@redhat.com>
* config/i386/emmintrin.h (_mm_loadh_pd): Don't cast pointer arg
to __v2si.
(_mm_storeh_pd, _mm_loadl_pd, _mm_storel_pd): Likewise.
* config/i386/i386.c (ix86_init_mmx_sse_builtins): Use double* or
const double* for __builtin_ia32_loadhpd, __builtin_ia32_loadlpd,
__builtin_ia32_storehpd, __builtin_ia32_storelpd.
(ix86_expand_builtin): Update to match.
(ix86_expand_vector_init): Use sse2_loadlpd.
* config/i386/i386.md (vec_setv2df): Use sse2_loadlpd, sse2_loadhpd.
(vec_extractv2df): Use sse2_storelpd, sse2_storehpd.
(sse2_storehpd, sse2_loadhpd, sse2_storelpd, sse2_loadlpd): New.
(sse2_movhpd): Remove.
2004-12-23 Alan Modra <amodra@bigpond.net.au>
PR target/18751
......
......@@ -937,25 +937,25 @@ _mm_unpacklo_pd (__m128d __A, __m128d __B)
static __inline __m128d
_mm_loadh_pd (__m128d __A, double const *__B)
{
return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, (__v2si *)__B);
return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
}
static __inline void
_mm_storeh_pd (double *__A, __m128d __B)
{
__builtin_ia32_storehpd ((__v2si *)__A, (__v2df)__B);
__builtin_ia32_storehpd (__A, (__v2df)__B);
}
static __inline __m128d
_mm_loadl_pd (__m128d __A, double const *__B)
{
return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, (__v2si *)__B);
return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
}
static __inline void
_mm_storel_pd (double *__A, __m128d __B)
{
__builtin_ia32_storelpd ((__v2si *)__A, (__v2df)__B);
__builtin_ia32_storelpd (__A, (__v2df)__B);
}
static __inline int
......
......@@ -12551,12 +12551,9 @@ ix86_init_mmx_sse_builtins (void)
V2DF_type_node, V2DF_type_node,
integer_type_node,
NULL_TREE);
tree v2df_ftype_v2df_pv2si
tree v2df_ftype_v2df_pcdouble
= build_function_type_list (V2DF_type_node,
V2DF_type_node, pv2si_type_node, NULL_TREE);
tree void_ftype_pv2si_v2df
= build_function_type_list (void_type_node,
pv2si_type_node, V2DF_type_node, NULL_TREE);
V2DF_type_node, pcdouble_type_node, NULL_TREE);
tree void_ftype_pdouble_v2df
= build_function_type_list (void_type_node,
pdouble_type_node, V2DF_type_node, NULL_TREE);
......@@ -12858,10 +12855,10 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREHPD);
def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORELPD);
def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
......@@ -13405,8 +13402,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case IX86_BUILTIN_LOADLPD:
icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
: fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
: fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
: CODE_FOR_sse2_movsd);
: fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
: CODE_FOR_sse2_loadlpd);
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
......@@ -13430,12 +13427,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case IX86_BUILTIN_STOREHPS:
case IX86_BUILTIN_STORELPS:
case IX86_BUILTIN_STOREHPD:
case IX86_BUILTIN_STORELPD:
icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
: fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
: fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
: CODE_FOR_sse2_movsd);
: CODE_FOR_sse_movlps);
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
......@@ -13451,7 +13444,28 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if (! pat)
return 0;
emit_insn (pat);
return 0;
return const0_rtx;
case IX86_BUILTIN_STOREHPD:
case IX86_BUILTIN_STORELPD:
icode = (fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_storehpd
: CODE_FOR_sse2_storelpd);
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
mode0 = insn_data[icode].operand[0].mode;
mode1 = insn_data[icode].operand[1].mode;
op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
pat = GEN_FCN (icode) (op0, op1);
if (! pat)
return 0;
emit_insn (pat);
return const0_rtx;
case IX86_BUILTIN_MOVNTPS:
return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
......@@ -15189,24 +15203,29 @@ ix86_expand_vector_init (rtx target, rtx vals)
/* ... values where only first field is non-constant are best loaded
from the pool and overwritten via move later. */
if (!i)
if (i == 0)
{
rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
GET_MODE_INNER (mode), 0);
op = force_reg (mode, op);
XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
switch (GET_MODE (target))
{
case V2DFmode:
emit_insn (gen_sse2_movsd (target, target, op));
break;
case V4SFmode:
case V2DFmode:
emit_insn (gen_sse2_loadlpd (target, target, XVECEXP (vals, 0, 0)));
break;
case V4SFmode:
{
/* ??? We can represent this better. */
rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
GET_MODE_INNER (mode), 0);
op = force_reg (mode, op);
emit_insn (gen_sse_movss (target, target, op));
break;
default:
break;
}
break;
default:
break;
}
return;
}
......
......@@ -4734,16 +4734,10 @@
switch (INTVAL (operands[2]))
{
case 0:
emit_insn (gen_sse2_movsd (operands[0], operands[0],
simplify_gen_subreg (V2DFmode, operands[1],
DFmode, 0)));
emit_insn (gen_sse2_loadlpd (operands[0], operands[0], operands[1]));
break;
case 1:
{
rtx op1 = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
emit_insn (gen_sse2_unpcklpd (operands[0], operands[0], op1));
}
emit_insn (gen_sse2_loadhpd (operands[0], operands[0], operands[1]));
break;
default:
abort ();
......@@ -4760,14 +4754,10 @@
switch (INTVAL (operands[2]))
{
case 0:
emit_move_insn (operands[0], gen_lowpart (DFmode, operands[1]));
emit_insn (gen_sse2_storelpd (operands[0], operands[1]));
break;
case 1:
{
rtx dest = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
emit_insn (gen_sse2_unpckhpd (dest, operands[1], operands[1]));
}
emit_insn (gen_sse2_storehpd (operands[0], operands[1]));
break;
default:
abort ();
......@@ -23731,17 +23721,103 @@
[(set_attr "type" "ssemov")
(set_attr "mode" "TI")])
(define_insn "sse2_movhpd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
(vec_merge:V2DF
(match_operand:V2DF 1 "nonimmediate_operand" "0,0")
(match_operand:V2DF 2 "nonimmediate_operand" "m,x")
(const_int 1)))]
"TARGET_SSE2 && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
"movhpd\t{%2, %0|%0, %2}"
;; Store the high double of the source vector into the double destination.
(define_insn "sse2_storehpd"
[(set (match_operand:DF 0 "nonimmediate_operand" "=m,Y,Y")
(vec_select:DF
(match_operand:V2DF 1 "nonimmediate_operand" " Y,0,o")
(parallel [(const_int 1)])))]
"TARGET_SSE2"
"@
movhpd\t{%1, %0|%0, %1}
unpckhpd\t%0, %0
#"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(vec_select:DF
(match_operand:V2DF 1 "memory_operand" "")
(parallel [(const_int 1)])))]
"TARGET_SSE2 && reload_completed"
[(const_int 0)]
{
emit_move_insn (operands[0], adjust_address (operands[1], DFmode, 8));
DONE;
})
;; Load the high double of the target vector from the source scalar.
(define_insn "sse2_loadhpd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,o")
(vec_concat:V2DF
(vec_select:DF
(match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
(parallel [(const_int 0)]))
(match_operand:DF 2 "nonimmediate_operand" " m,Y,Y")))]
"TARGET_SSE2"
"@
movhpd\t{%2, %0|%0, %2}
unpcklpd\t{%2, %0|%0, %2}
#"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
(define_split
[(set (match_operand:V2DF 0 "memory_operand" "")
(vec_concat:V2DF
(vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
(match_operand:DF 1 "register_operand" "")))]
"TARGET_SSE2 && reload_completed"
[(const_int 0)]
{
emit_move_insn (adjust_address (operands[0], DFmode, 8), operands[1]);
DONE;
})
;; Store the low double of the source vector into the double destination.
(define_expand "sse2_storelpd"
[(set (match_operand:DF 0 "nonimmediate_operand" "")
(vec_select:DF
(match_operand:V2DF 1 "nonimmediate_operand" "")
(parallel [(const_int 1)])))]
"TARGET_SSE2"
{
operands[1] = gen_lowpart (DFmode, operands[1]);
emit_move_insn (operands[0], operands[1]);
DONE;
})
;; Load the load double of the target vector from the source scalar.
(define_insn "sse2_loadlpd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m")
(vec_concat:V2DF
(match_operand:DF 2 "nonimmediate_operand" " m,Y,Y")
(vec_select:DF
(match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
(parallel [(const_int 1)]))))]
"TARGET_SSE2"
"@
movlpd\t{%2, %0|%0, %2}
movsd\t{%2, %0|%0, %2}
movlpd\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
;; Merge the low part of the source vector into the low part of the target.
(define_insn "sse2_movsd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m")
(vec_merge:V2DF
(match_operand:V2DF 1 "nonimmediate_operand" "0,0,0")
(match_operand:V2DF 2 "nonimmediate_operand" "x,m,Y")
(const_int 2)))]
"TARGET_SSE2"
"@movsd\t{%2, %0|%0, %2}
movlpd\t{%2, %0|%0, %2}
movlpd\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF,V2DF,V2DF")])
(define_expand "sse2_loadsd"
[(match_operand:V2DF 0 "register_operand" "")
(match_operand:DF 1 "memory_operand" "")]
......@@ -23763,24 +23839,6 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF")])
;; ??? We ought to be using ix86_binary_operator_ok on this pattern, so
;; that we enforce the whole matching memory thing through combine et al.
;; But that requires that things be set up properly when invoked via an
;; intrinsic, which we don't do. Which leads to instantiate virtual regs
;; lossage, as seen compiling gcc.dg/i386-sse-2.c for x86_64 at -O0.
(define_insn "sse2_movsd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
(vec_merge:V2DF
(match_operand:V2DF 1 "nonimmediate_operand" "0,0,0")
(match_operand:V2DF 2 "nonimmediate_operand" "x,m,x")
(const_int 2)))]
"TARGET_SSE2"
"@movsd\t{%2, %0|%0, %2}
movlpd\t{%2, %0|%0, %2}
movlpd\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "DF,V2DF,V2DF")])
(define_insn "sse2_storesd"
[(set (match_operand:DF 0 "memory_operand" "=m")
(vec_select:DF
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment