Commit 1194ca05 by Jan Hubicka Committed by Jan Hubicka

i386.c (classify_argument): Pass MMX arguments in memory

	* i386.c (classify_argument): Pass MMX arguments in memory
	(ix86_expand_builtin): Expand proper address mode for cflush.
	* i386.md (movdqa): Fix typo.
	(sse2_cflush): Accept DImode addresses.

	* xmmintrin.h (_mm_sqrt_sd): Accept two arguments.
	(_mm_max_sd): Fix pasto.
	(_mm_storeh_pd, _mm_storel_pd): Fix.

	* i386.c (bdesc_comi): Fix to match specification.
	(ix86_expand_sse_comi): Emit the comparison properly.
	* i386.md (sse_comi, sse2_comi, sse_ucomi, sse2_ucomi):
	Do not use comparison operator.
	(vnmaskcmp): Fix template.

	* xmmintrin.h (_mm_cvtps_pi16): Fix.

From-SVN: r58321
parent 0d8b229b
Sun Oct 20 00:31:31 CEST 2002 Jan Hubicka <jh@suse.cz>
* i386.c (classify_argument): Pass MMX arguments in memory
(ix86_expand_builtin): Expand proper address mode for cflush.
* i386.md (movdqa): Fix typo.
(sse2_cflush): Accept DImode addresses.
* xmmintrin.h (_mm_sqrt_sd): Accept two arguments.
(_mm_max_sd): Fix pasto.
(_mm_storeh_pd, _mm_storel_pd): Fix.
* i386.c (bdesc_comi): Fix to match specification.
(ix86_expand_sse_comi): Emit the comparison properly.
* i386.md (sse_comi, sse2_comi, sse_ucomi, sse2_ucomi):
Do not use comparison operator.
(vnmaskcmp): Fix template.
* xmmintrin.h (_mm_cvtps_pi16): Fix.
2002-10-19 Sebastian Pop <s.pop@laposte.net>
* dependence.c : Removed.
......
......@@ -1922,8 +1922,7 @@ classify_argument (mode, type, classes, bit_offset)
case V2SImode:
case V4HImode:
case V8QImode:
classes[0] = X86_64_SSE_CLASS;
return 1;
return 0;
case BLKmode:
case VOIDmode:
return 0;
......@@ -11755,30 +11754,30 @@ struct builtin_description
static const struct builtin_description bdesc_comi[] =
{
{ MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
{ MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
{ MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
{ MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
{ MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
{ MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
{ MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
{ MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
{ MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
{ MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
{ MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
{ MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
{ MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
{ MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
{ MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
{ MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
{ MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
{ MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
{ MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
{ MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
{ MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
{ MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
{ MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
{ MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
{ MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
{ MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
{ MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
{ MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
{ MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
{ MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
{ MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
{ MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
{ MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
{ MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
{ MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
{ MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
{ MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
{ MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
{ MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
{ MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
{ MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
{ MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
{ MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
{ MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
{ MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
{ MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
{ MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
{ MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
};
static const struct builtin_description bdesc_2arg[] =
......@@ -12881,14 +12880,14 @@ ix86_expand_sse_comi (d, arglist, target)
op1 = copy_to_mode_reg (mode1, op1);
op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
pat = GEN_FCN (d->icode) (op0, op1, op2);
pat = GEN_FCN (d->icode) (op0, op1);
if (! pat)
return 0;
emit_insn (pat);
emit_insn (gen_rtx_SET (VOIDmode,
gen_rtx_STRICT_LOW_PART (VOIDmode, target),
gen_rtx_fmt_ee (comparison, QImode,
gen_rtx_REG (CCmode, FLAGS_REG),
SET_DEST (pat),
const0_rtx)));
return SUBREG_REG (target);
......@@ -13349,9 +13348,8 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
arg0 = TREE_VALUE (arglist);
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
icode = CODE_FOR_sse2_clflush;
mode0 = insn_data[icode].operand[0].mode;
if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
op0 = copy_to_mode_reg (mode0, op0);
if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
op0 = copy_to_mode_reg (Pmode, op0);
emit_insn (gen_sse2_clflush (op0));
return 0;
......
......@@ -17828,7 +17828,7 @@
(match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))]
"TARGET_SSE"
;; @@@ let's try to use movaps here.
"movdga\t{%1, %0|%0, %1}"
"movdqa\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF")])
......@@ -18883,13 +18883,12 @@
(define_insn "sse_comi"
[(set (reg:CCFP 17)
(match_operator:CCFP 2 "sse_comparison_operator"
[(vec_select:SF
(compare:CCFP (vec_select:SF
(match_operand:V4SF 0 "register_operand" "x")
(parallel [(const_int 0)]))
(vec_select:SF
(match_operand:V4SF 1 "register_operand" "x")
(parallel [(const_int 0)]))]))]
(parallel [(const_int 0)]))))]
"TARGET_SSE"
"comiss\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecmp")
......@@ -18897,13 +18896,12 @@
(define_insn "sse_ucomi"
[(set (reg:CCFPU 17)
(match_operator:CCFPU 2 "sse_comparison_operator"
[(vec_select:SF
(compare:CCFPU (vec_select:SF
(match_operand:V4SF 0 "register_operand" "x")
(parallel [(const_int 0)]))
(vec_select:SF
(match_operand:V4SF 1 "register_operand" "x")
(parallel [(const_int 0)]))]))]
(parallel [(const_int 0)]))))]
"TARGET_SSE"
"ucomiss\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecmp")
......@@ -20409,7 +20407,12 @@
[(match_operand:V2DF 1 "register_operand" "0")
(match_operand:V2DF 2 "nonimmediate_operand" "x")])))]
"TARGET_SSE2"
"cmpn%D3pd\t{%2, %0|%0, %2}"
{
if (GET_CODE (operands[3]) == UNORDERED)
return "cmpordps\t{%2, %0|%0, %2}";
else
return "cmpn%D3pd\t{%2, %0|%0, %2}";
}
[(set_attr "type" "ssecmp")
(set_attr "mode" "V2DF")])
......@@ -20436,19 +20439,23 @@
(subreg:V2DI (match_dup 1) 0)
(const_int 1)))]
"TARGET_SSE2"
"cmp%D3sd\t{%2, %0|%0, %2}"
{
if (GET_CODE (operands[3]) == UNORDERED)
return "cmpordsd\t{%2, %0|%0, %2}";
else
return "cmpn%D3sd\t{%2, %0|%0, %2}";
}
[(set_attr "type" "ssecmp")
(set_attr "mode" "DF")])
(define_insn "sse2_comi"
[(set (reg:CCFP 17)
(match_operator:CCFP 2 "sse_comparison_operator"
[(vec_select:DF
(compare:CCFP (vec_select:DF
(match_operand:V2DF 0 "register_operand" "x")
(parallel [(const_int 0)]))
(vec_select:DF
(match_operand:V2DF 1 "register_operand" "x")
(parallel [(const_int 0)]))]))]
(parallel [(const_int 0)]))))]
"TARGET_SSE2"
"comisd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecmp")
......@@ -20456,13 +20463,12 @@
(define_insn "sse2_ucomi"
[(set (reg:CCFPU 17)
(match_operator:CCFPU 2 "sse_comparison_operator"
[(vec_select:DF
(compare:CCFPU (vec_select:DF
(match_operand:V2DF 0 "register_operand" "x")
(parallel [(const_int 0)]))
(vec_select:DF
(match_operand:V2DF 1 "register_operand" "x")
(parallel [(const_int 0)]))]))]
(parallel [(const_int 0)]))))]
"TARGET_SSE2"
"ucomisd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecmp")
......@@ -21630,7 +21636,7 @@
(set_attr "mode" "V2DF")])
(define_insn "sse2_clflush"
[(unspec_volatile [(match_operand:SI 0 "address_operand" "p")]
[(unspec_volatile [(match_operand 0 "address_operand" "p")]
UNSPECV_CLFLUSH)]
"TARGET_SSE2"
"clflush %0"
......
......@@ -606,7 +606,7 @@ _mm_cvtps_pi16(__m128 __A)
__v4sf __losf = __builtin_ia32_movhlps (__hisf, __hisf);
__v2si __hisi = __builtin_ia32_cvtps2pi (__hisf);
__v2si __losi = __builtin_ia32_cvtps2pi (__losf);
return (__m64) __builtin_ia32_packssdw (__losi, __hisi);
return (__m64) __builtin_ia32_packssdw (__hisi, __losi);
}
/* Convert the four SPFP values in A to four signed 8-bit integers. */
......@@ -1293,10 +1293,12 @@ _mm_sqrt_pd (__m128d __A)
return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
}
/* Return pair {sqrt (A[0), B[1]}. */
static __inline __m128d
_mm_sqrt_sd (__m128d __A)
_mm_sqrt_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__A);
__v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
}
static __inline __m128d
......@@ -1320,7 +1322,7 @@ _mm_max_pd (__m128d __A, __m128d __B)
static __inline __m128d
_mm_max_sd (__m128d __A, __m128d __B)
{
return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
}
static __inline __m128d
......@@ -1696,10 +1698,9 @@ _mm_loadh_pd (__m128d __A, double *__B)
}
static __inline void
_mm_storeh_pd (__m128d *__A, double __B)
_mm_storeh_pd (double *__A, __m128d __B)
{
__v2df __tmp = __builtin_ia32_loadsd (&__B);
__builtin_ia32_storehpd ((__v2si *)__A, __tmp);
__builtin_ia32_storehpd ((__v2si *)__A, (__v2df)__B);
}
static __inline __m128d
......@@ -1709,10 +1710,9 @@ _mm_loadl_pd (__m128d __A, double *__B)
}
static __inline void
_mm_storel_pd (__m128d *__A, double __B)
_mm_storel_pd (double *__A, __m128d __B)
{
__v2df __tmp = __builtin_ia32_loadsd (&__B);
__builtin_ia32_storelpd ((__v2si *)__A, __tmp);
__builtin_ia32_storelpd ((__v2si *)__A, (__v2df)__B);
}
static __inline int
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment