Commit f8ca7923 by Jan Hubicka Committed by Jan Hubicka

re PR target/8322 (SSE2 intrinsics broken?)

	* i386.h (CONST_DOUBLE_OK_FOR_LETTER_P): Remove 'H'
	* i386.md (movsf*, movdf*): Use 'C' instead of 'H'
	* md.texi (machine dependent constraints): Document 'C'

	* simplify-rtx.c (simplify_subreg): Fix const_int->vector subregging.

	* i386.c (ix86_expand_vector_move): Fix.

	* i386.c (ix86_expand_builtin): Use sse2_maskmovdqu_rex64.
	* i386.md (sse2_maskmovdqu_rex64): New pattern

	PR target/8322
	* xmmintrin.h (_mm_stream_pi, _mm_stream_pd): Fix cast.
	(ix86_init_mmx_sse_builtins): Fix type.

From-SVN: r58631
parent c95d07f8
Tue Oct 29 19:32:16 CET 2002 Jan Hubicka <jh@suse.cz>
* i386.h (CONST_DOUBLE_OK_FOR_LETTER_P): Remove 'H'
* i386.md (movsf*, movdf*): Use 'C' instead of 'H'
* md.texi (machine dependent constraints): Document 'C'
* simplify-rtx.c (simplify_subreg): Fix const_int->vector subregging.
* i386.c (ix86_expand_vector_move): Fix.
* i386.c (ix86_expand_builtin): Use sse2_maskmovdqu_rex64.
* i386.md (sse2_maskmovdqu_rex64): New pattern
PR target/8322
* xmmintrin.h (_mm_stream_pi, _mm_stream_pd): Fix cast.
(ix86_init_mmx_sse_builtins): Fix type.
2002-10-29 Jason Thorpe <thorpej@wasabisystems.com>
* gthr-posix.h: Include <unistd.h> for feature tests.
......
......@@ -7799,14 +7799,10 @@ ix86_expand_vector_move (mode, operands)
if ((reload_in_progress | reload_completed) == 0
&& register_operand (operands[0], mode)
&& CONSTANT_P (operands[1]))
{
rtx addr = gen_reg_rtx (Pmode);
emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
operands[1] = gen_rtx_MEM (mode, addr);
}
operands[1] = force_const_mem (mode, operands[1]);
/* Make operand1 a register if it isn't already. */
if ((reload_in_progress | reload_completed) == 0
if (!no_new_pseudos
&& !register_operand (operands[0], mode)
&& !register_operand (operands[1], mode))
{
......@@ -12287,10 +12283,10 @@ ix86_init_mmx_sse_builtins ()
/* @@@ the type is bogus */
tree v4sf_ftype_v4sf_pv2si
= build_function_type_list (V4SF_type_node,
V4SF_type_node, pv2di_type_node, NULL_TREE);
V4SF_type_node, pv2si_type_node, NULL_TREE);
tree void_ftype_pv2si_v4sf
= build_function_type_list (void_type_node,
pv2di_type_node, V4SF_type_node, NULL_TREE);
pv2si_type_node, V4SF_type_node, NULL_TREE);
tree void_ftype_pfloat_v4sf
= build_function_type_list (void_type_node,
pfloat_type_node, V4SF_type_node, NULL_TREE);
......@@ -13145,7 +13141,8 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
case IX86_BUILTIN_MASKMOVDQU:
icode = (fcode == IX86_BUILTIN_MASKMOVQ
? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
: CODE_FOR_sse2_maskmovdqu);
: (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
: CODE_FOR_sse2_maskmovdqu));
/* Note the arg order is different from the operand order. */
arg1 = TREE_VALUE (arglist);
arg2 = TREE_VALUE (TREE_CHAIN (arglist));
......
......@@ -1411,7 +1411,7 @@ enum reg_class
#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \
((C) == 'G' ? standard_80387_constant_p (VALUE) \
: ((C) == 'H' ? standard_sse_constant_p (VALUE) : 0))
: 0)
/* A C expression that defines the optional machine-dependent
constraint letters that can be used to segregate specific types of
......
......@@ -2095,7 +2095,7 @@
(define_insn "*movsf_1"
[(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!rm,!*y")
(match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,H,x,xm#rf,x#rf,rm,*y,*y"))]
(match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,rm,*y,*y"))]
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
&& (reload_in_progress || reload_completed
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
......@@ -2280,7 +2280,7 @@
(define_insn "*movdf_nointeger"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,m,f#Y,*r,o,Y#f,Y#f,Y#f,m")
(match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,H,Y#f,YHm#f,Y#f"))]
(match_operand:DF 1 "general_operand" "fm#Y,f#Y,G,*roF,F*r,C,Y#f,YHm#f,Y#f"))]
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
&& (optimize_size || !TARGET_INTEGER_DFMODE_MOVES)
&& (reload_in_progress || reload_completed
......@@ -2341,7 +2341,7 @@
(define_insn "*movdf_integer"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f#Yr,m,f#Yr,r#Yf,o,Y#rf,Y#rf,Y#rf,m")
(match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,H,Y#rf,Ym#rf,Y#rf"))]
(match_operand:DF 1 "general_operand" "fm#Yr,f#Yr,G,roF#Yf,Fr#Yf,C,Y#rf,Ym#rf,Y#rf"))]
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
&& !optimize_size && TARGET_INTEGER_DFMODE_MOVES
&& (reload_in_progress || reload_completed
......@@ -20552,6 +20552,17 @@
[(set_attr "type" "ssecvt")
(set_attr "mode" "TI")])
(define_insn "sse2_maskmovdqu_rex64"
[(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
(match_operand:V16QI 2 "register_operand" "x")]
UNSPEC_MASKMOV))]
"TARGET_SSE2"
;; @@@ check ordering of operands in intel/nonintel syntax
"maskmovdqu\t{%2, %1|%1, %2}"
[(set_attr "type" "ssecvt")
(set_attr "mode" "TI")])
(define_insn "sse2_movntv2df"
[(set (match_operand:V2DF 0 "memory_operand" "=m")
(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
......
......@@ -1037,7 +1037,7 @@ _mm_prefetch (void *__P, enum _mm_hint __I)
static __inline void
_mm_stream_pi (__m64 *__P, __m64 __A)
{
__builtin_ia32_movntq (__P, (long long)__A);
__builtin_ia32_movntq ((unsigned long long *)__P, (unsigned long long)__A);
}
/* Likewise. The address must be 16-byte aligned. */
......@@ -2426,7 +2426,7 @@ _mm_stream_si128 (__m128i *__A, __m128i __B)
static __inline void
_mm_stream_pd (double *__A, __m128d __B)
{
__builtin_ia32_movntpd ((__m128d *)__A, (__v2df)__B);
__builtin_ia32_movntpd (__A, (__v2df)__B);
}
static __inline __m128i
......
......@@ -1606,6 +1606,10 @@ Second floating point register
@item c
@samp{c} register
@item C
Specifies constant that can be easilly constructed in SSE register without
loading it from memory.
@item d
@samp{d} register
......
......@@ -2399,7 +2399,10 @@ simplify_subreg (outermode, op, innermode, byte)
/* This might fail, e.g. if taking a subreg from a SYMBOL_REF. */
/* ??? It would be nice if we could actually make such subregs
on targets that allow such relocations. */
elt = simplify_subreg (submode, op, innermode, byte);
if (byte >= GET_MODE_UNIT_SIZE (innermode))
elt = CONST0_RTX (submode);
else
elt = simplify_subreg (submode, op, innermode, byte);
if (! elt)
return NULL_RTX;
RTVEC_ELT (v, i) = elt;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment