Commit ab8efbd8 by Richard Henderson Committed by Richard Henderson

i386.c (ix86_prepare_sse_fp_compare_args): Split ...

        * config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Split ...
        (ix86_expand_sse_fp_minmax): ... from ...
        (ix86_expand_fp_movcc): ... here.
        (ix86_expand_sse_movcc): Rewrite from ix86_split_sse_movcc.
        * config/i386/i386-protos.h: Update.
        * config/i386/i386.md (UNSPEC_IEEE_MIN, UNSPEC_IEEE_MAX): New.
        (sse_setccsf, sse_setccdf): Allow before reload.
        (movsfcc_1_sse_min, movsfcc_1_sse_max, movsfcc_1_sse): Remove.
        (movdfcc_1_sse_min, movdfcc_1_sse_max, movdfcc_1_sse): Remove.
        (ieee_sminsf3, ieee_smaxsf3, ieee_smindf3, ieee_smaxdf3): New.
        * config/i386/sse.md (andsf3, nandsf3, iorsf3, xorsf3): New.
        (anddf3, nanddf3, iordf3, xordf3): New.

From-SVN: r98068
parent 0b90f180
2005-04-12 Richard Henderson <rth@redhat.com>
* config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Split ...
(ix86_expand_sse_fp_minmax): ... from ...
(ix86_expand_fp_movcc): ... here.
(ix86_expand_sse_movcc): Rewrite from ix86_split_sse_movcc.
* config/i386/i386-protos.h: Update.
* config/i386/i386.md (UNSPEC_IEEE_MIN, UNSPEC_IEEE_MAX): New.
(sse_setccsf, sse_setccdf): Allow before reload.
(movsfcc_1_sse_min, movsfcc_1_sse_max, movsfcc_1_sse): Remove.
(movdfcc_1_sse_min, movdfcc_1_sse_max, movdfcc_1_sse): Remove.
(ieee_sminsf3, ieee_smaxsf3, ieee_smindf3, ieee_smaxdf3): New.
* config/i386/sse.md (andsf3, nandsf3, iorsf3, xorsf3): New.
(anddf3, nanddf3, iordf3, xordf3): New.
2005-04-12 Jeff Law <law@redhat.com> 2005-04-12 Jeff Law <law@redhat.com>
* Makefile.in (OBJS-common): Add tree-ssa-uncprop.o. * Makefile.in (OBJS-common): Add tree-ssa-uncprop.o.
......
...@@ -150,7 +150,6 @@ extern void ix86_expand_branch (enum rtx_code, rtx); ...@@ -150,7 +150,6 @@ extern void ix86_expand_branch (enum rtx_code, rtx);
extern int ix86_expand_setcc (enum rtx_code, rtx); extern int ix86_expand_setcc (enum rtx_code, rtx);
extern int ix86_expand_int_movcc (rtx[]); extern int ix86_expand_int_movcc (rtx[]);
extern int ix86_expand_fp_movcc (rtx[]); extern int ix86_expand_fp_movcc (rtx[]);
extern void ix86_split_sse_movcc (rtx[]);
extern int ix86_expand_int_addcc (rtx[]); extern int ix86_expand_int_addcc (rtx[]);
extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int); extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
extern void x86_initialize_trampoline (rtx, rtx, rtx); extern void x86_initialize_trampoline (rtx, rtx, rtx);
......
...@@ -10025,6 +10025,180 @@ ix86_expand_int_movcc (rtx operands[]) ...@@ -10025,6 +10025,180 @@ ix86_expand_int_movcc (rtx operands[])
return 1; /* DONE */ return 1; /* DONE */
} }
/* Swap, force into registers, or otherwise massage the two operands
to an sse comparison with a mask result. Thus we differ a bit from
ix86_prepare_fp_compare_args which expects to produce a flags result.
The DEST operand exists to help determine whether to commute commutative
operators. The POP0/POP1 operands are updated in place. The new
comparison code is returned, or UNKNOWN if not implementable. */
static enum rtx_code
ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
rtx *pop0, rtx *pop1)
{
rtx tmp;
switch (code)
{
case LTGT:
case UNEQ:
/* We have no LTGT as an operator. We could implement it with
NE & ORDERED, but this requires an extra temporary. It's
not clear that it's worth it. */
return UNKNOWN;
case LT:
case LE:
case UNGT:
case UNGE:
/* These are supported directly. */
break;
case EQ:
case NE:
case UNORDERED:
case ORDERED:
/* For commutative operators, try to canonicalize the destination
operand to be first in the comparison - this helps reload to
avoid extra moves. */
if (!dest || !rtx_equal_p (dest, *pop1))
break;
/* FALLTHRU */
case GE:
case GT:
case UNLE:
case UNLT:
/* These are not supported directly. Swap the comparison operands
to transform into something that is supported. */
tmp = *pop0;
*pop0 = *pop1;
*pop1 = tmp;
code = swap_condition (code);
break;
default:
gcc_unreachable ();
}
return code;
}
/* Detect conditional moves that exactly match min/max operational
semantics. Note that this is IEEE safe, as long as we don't
interchange the operands.
Returns FALSE if this conditional move doesn't match a MIN/MAX,
and TRUE if the operation is successful and instructions are emitted. */
static bool
ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
rtx cmp_op1, rtx if_true, rtx if_false)
{
enum machine_mode mode;
bool is_min;
rtx tmp;
if (code == LT)
;
else if (code == UNGE)
{
tmp = if_true;
if_true = if_false;
if_false = tmp;
}
else
return false;
if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
is_min = true;
else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
is_min = false;
else
return false;
mode = GET_MODE (dest);
/* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
but MODE may be a vector mode and thus not appropriate. */
if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
{
int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
rtvec v;
if_true = force_reg (mode, if_true);
v = gen_rtvec (2, if_true, if_false);
tmp = gen_rtx_UNSPEC (mode, v, u);
}
else
{
code = is_min ? SMIN : SMAX;
tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
}
emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
return true;
}
static void
ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
rtx op_true, rtx op_false)
{
enum machine_mode mode = GET_MODE (dest);
rtx t1, t2, t3, x;
cmp_op0 = force_reg (mode, cmp_op0);
if (!nonimmediate_operand (cmp_op1, mode))
cmp_op1 = force_reg (mode, cmp_op1);
if (optimize
|| reg_overlap_mentioned_p (dest, op_true)
|| reg_overlap_mentioned_p (dest, op_false))
t1 = gen_reg_rtx (mode);
else
t1 = dest;
x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
gcc_assert (sse_comparison_operator (x, VOIDmode));
emit_insn (gen_rtx_SET (VOIDmode, t1, x));
if (op_false == CONST0_RTX (mode))
{
op_true = force_reg (mode, op_true);
x = gen_rtx_AND (mode, t1, op_true);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
else if (op_true == CONST0_RTX (mode))
{
op_false = force_reg (mode, op_false);
x = gen_rtx_NOT (mode, t1);
x = gen_rtx_AND (mode, x, op_false);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
else
{
op_true = force_reg (mode, op_true);
op_false = force_reg (mode, op_false);
t2 = gen_reg_rtx (mode);
if (optimize)
t3 = gen_reg_rtx (mode);
else
t3 = dest;
x = gen_rtx_AND (mode, op_true, t1);
emit_insn (gen_rtx_SET (VOIDmode, t2, x));
x = gen_rtx_NOT (mode, t1);
x = gen_rtx_AND (mode, x, op_false);
emit_insn (gen_rtx_SET (VOIDmode, t3, x));
x = gen_rtx_IOR (mode, t3, t2);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
}
int int
ix86_expand_fp_movcc (rtx operands[]) ix86_expand_fp_movcc (rtx operands[])
{ {
...@@ -10034,88 +10208,30 @@ ix86_expand_fp_movcc (rtx operands[]) ...@@ -10034,88 +10208,30 @@ ix86_expand_fp_movcc (rtx operands[])
if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
{ {
rtx cmp_op0, cmp_op1, if_true, if_false; enum machine_mode cmode;
rtx clob;
enum machine_mode vmode, cmode;
bool is_minmax = false;
cmp_op0 = ix86_compare_op0;
cmp_op1 = ix86_compare_op1;
if_true = operands[2];
if_false = operands[3];
/* Since we've no cmove for sse registers, don't force bad register /* Since we've no cmove for sse registers, don't force bad register
allocation just to gain access to it. Deny movcc when the allocation just to gain access to it. Deny movcc when the
comparison mode doesn't match the move mode. */ comparison mode doesn't match the move mode. */
cmode = GET_MODE (cmp_op0); cmode = GET_MODE (ix86_compare_op0);
if (cmode == VOIDmode) if (cmode == VOIDmode)
cmode = GET_MODE (cmp_op1); cmode = GET_MODE (ix86_compare_op1);
if (cmode != mode) if (cmode != mode)
return 0; return 0;
/* We have no LTGT as an operator. We could implement it with code = ix86_prepare_sse_fp_compare_args (operands[0], code,
NE & ORDERED, but this requires an extra temporary. It's &ix86_compare_op0,
not clear that it's worth it. */ &ix86_compare_op1);
if (code == LTGT || code == UNEQ) if (code == UNKNOWN)
return 0; return 0;
/* Massage condition to satisfy sse_comparison_operator. Try if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
to canonicalize the destination operand to be first in the ix86_compare_op1, operands[2],
comparison - this helps reload to avoid extra moves. */ operands[3]))
if (!sse_comparison_operator (operands[1], VOIDmode) return 1;
|| (COMMUTATIVE_P (operands[1])
&& rtx_equal_p (operands[0], cmp_op1)))
{
tmp = cmp_op0;
cmp_op0 = cmp_op1;
cmp_op1 = tmp;
code = swap_condition (code);
}
/* Detect conditional moves that exactly match min/max operational
semantics. Note that this is IEEE safe, as long as we don't
interchange the operands. Which is why we keep this in the form
if an IF_THEN_ELSE instead of reducing to SMIN/SMAX. */
if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1))
{
if (((cmp_op0 == if_true && cmp_op1 == if_false)
|| (cmp_op0 == if_false && cmp_op1 == if_true)))
{
is_minmax = true;
if (code == UNGE)
{
code = LT;
tmp = if_true;
if_true = if_false;
if_false = tmp;
}
}
}
if (mode == SFmode)
vmode = V4SFmode;
else if (mode == DFmode)
vmode = V2DFmode;
else
gcc_unreachable ();
cmp_op0 = force_reg (mode, cmp_op0);
if (!nonimmediate_operand (cmp_op1, mode))
cmp_op1 = force_reg (mode, cmp_op1);
tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
gcc_assert (sse_comparison_operator (tmp, VOIDmode));
tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false);
tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
if (!is_minmax)
{
clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode));
tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
}
emit_insn (tmp); ix86_expand_sse_movcc (operands[0], code, ix86_compare_op0,
ix86_compare_op1, operands[2], operands[3]);
return 1; return 1;
} }
...@@ -10166,100 +10282,6 @@ ix86_expand_fp_movcc (rtx operands[]) ...@@ -10166,100 +10282,6 @@ ix86_expand_fp_movcc (rtx operands[])
return 1; return 1;
} }
void
ix86_split_sse_movcc (rtx operands[])
{
rtx dest, scratch, cmp, op_true, op_false, x;
enum machine_mode mode, vmode;
/* Note that the operator CMP has been set up with matching constraints
such that dest is valid for the comparison. Unless one of the true
or false operands are zero, the true operand has already been placed
in SCRATCH. */
dest = operands[0];
scratch = operands[1];
op_true = operands[2];
op_false = operands[3];
cmp = operands[4];
mode = GET_MODE (dest);
vmode = GET_MODE (scratch);
/* We need to make sure that the TRUE and FALSE operands are out of the
way of the destination. Marking the destination earlyclobber doesn't
work, since we want matching constraints for the actual comparison, so
at some point we always wind up having to do a copy ourselves here.
We very much prefer the TRUE value to be in SCRATCH. If it turns out
that FALSE overlaps DEST, then we invert the comparison so that we
still only have to do one move. */
if (rtx_equal_p (op_false, dest))
{
enum rtx_code code;
if (rtx_equal_p (op_true, dest))
{
/* ??? Really ought not happen. It means some optimizer managed
to prove the operands were identical, but failed to fold the
conditional move to a straight move. Do so here, because
otherwise we'll generate incorrect code. And since they're
both already in the destination register, nothing to do. */
return;
}
x = gen_rtx_REG (mode, REGNO (scratch));
emit_move_insn (x, op_false);
op_false = op_true;
op_true = x;
code = GET_CODE (cmp);
code = reverse_condition_maybe_unordered (code);
cmp = gen_rtx_fmt_ee (code, mode, XEXP (cmp, 0), XEXP (cmp, 1));
}
else if (op_true == CONST0_RTX (mode))
;
else if (op_false == CONST0_RTX (mode) && !rtx_equal_p (op_true, dest))
;
else
{
x = gen_rtx_REG (mode, REGNO (scratch));
emit_move_insn (x, op_true);
op_true = x;
}
emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
dest = simplify_gen_subreg (vmode, dest, mode, 0);
if (op_false == CONST0_RTX (mode))
{
op_true = simplify_gen_subreg (vmode, op_true, mode, 0);
x = gen_rtx_AND (vmode, dest, op_true);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
else
{
op_false = simplify_gen_subreg (vmode, op_false, mode, 0);
if (op_true == CONST0_RTX (mode))
{
x = gen_rtx_NOT (vmode, dest);
x = gen_rtx_AND (vmode, x, op_false);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
else
{
x = gen_rtx_AND (vmode, scratch, dest);
emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
x = gen_rtx_NOT (vmode, dest);
x = gen_rtx_AND (vmode, x, op_false);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
x = gen_rtx_IOR (vmode, dest, scratch);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
}
}
}
/* Expand conditional increment or decrement using adb/sbb instructions. /* Expand conditional increment or decrement using adb/sbb instructions.
The default case using setcc followed by the conditional move can be The default case using setcc followed by the conditional move can be
done by generic code. */ done by generic code. */
......
...@@ -104,6 +104,8 @@ ...@@ -104,6 +104,8 @@
; Generic math support ; Generic math support
(UNSPEC_COPYSIGN 50) (UNSPEC_COPYSIGN 50)
(UNSPEC_IEEE_MIN 51) ; not commutative
(UNSPEC_IEEE_MAX 52) ; not commutative
; x87 Floating point ; x87 Floating point
(UNSPEC_SIN 60) (UNSPEC_SIN 60)
...@@ -12462,17 +12464,14 @@ ...@@ -12462,17 +12464,14 @@
;; The SSE store flag instructions saves 0 or 0xffffffff to the result. ;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
;; subsequent logical operations are used to imitate conditional moves. ;; subsequent logical operations are used to imitate conditional moves.
;; 0xffffffff is NaN, but not in normalized form, so we can't represent ;; 0xffffffff is NaN, but not in normalized form, so we can't represent
;; it directly. Further holding this value in pseudo register might bring ;; it directly.
;; problem in implicit normalization in spill code.
;; So we don't define FLOAT_STORE_FLAG_VALUE and create these
;; instructions after reload by splitting the conditional move patterns.
(define_insn "*sse_setccsf" (define_insn "*sse_setccsf"
[(set (match_operand:SF 0 "register_operand" "=x") [(set (match_operand:SF 0 "register_operand" "=x")
(match_operator:SF 1 "sse_comparison_operator" (match_operator:SF 1 "sse_comparison_operator"
[(match_operand:SF 2 "register_operand" "0") [(match_operand:SF 2 "register_operand" "0")
(match_operand:SF 3 "nonimmediate_operand" "xm")]))] (match_operand:SF 3 "nonimmediate_operand" "xm")]))]
"TARGET_SSE && reload_completed" "TARGET_SSE"
"cmp%D1ss\t{%3, %0|%0, %3}" "cmp%D1ss\t{%3, %0|%0, %3}"
[(set_attr "type" "ssecmp") [(set_attr "type" "ssecmp")
(set_attr "mode" "SF")]) (set_attr "mode" "SF")])
...@@ -12482,7 +12481,7 @@ ...@@ -12482,7 +12481,7 @@
(match_operator:DF 1 "sse_comparison_operator" (match_operator:DF 1 "sse_comparison_operator"
[(match_operand:DF 2 "register_operand" "0") [(match_operand:DF 2 "register_operand" "0")
(match_operand:DF 3 "nonimmediate_operand" "Ym")]))] (match_operand:DF 3 "nonimmediate_operand" "Ym")]))]
"TARGET_SSE2 && reload_completed" "TARGET_SSE2"
"cmp%D1sd\t{%3, %0|%0, %3}" "cmp%D1sd\t{%3, %0|%0, %3}"
[(set_attr "type" "ssecmp") [(set_attr "type" "ssecmp")
(set_attr "mode" "DF")]) (set_attr "mode" "DF")])
...@@ -17707,51 +17706,6 @@ ...@@ -17707,51 +17706,6 @@
"(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH" "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH"
"if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
;; These versions of min/max are aware of the instruction's behavior
;; wrt -0.0 and NaN inputs. If we don't care about either, then we
;; should have used the smin/smax expanders in the first place.
(define_insn "*movsfcc_1_sse_min"
[(set (match_operand:SF 0 "register_operand" "=x")
(if_then_else:SF
(lt:SF (match_operand:SF 1 "register_operand" "0")
(match_operand:SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(match_dup 2)))]
"TARGET_SSE_MATH"
"minss\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "SF")])
(define_insn "*movsfcc_1_sse_max"
[(set (match_operand:SF 0 "register_operand" "=x")
(if_then_else:SF
(lt:SF (match_operand:SF 2 "nonimmediate_operand" "xm")
(match_operand:SF 1 "nonimmediate_operand" "0"))
(match_dup 1)
(match_dup 2)))]
"TARGET_SSE_MATH"
"maxss\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "SF")])
(define_insn_and_split "*movsfcc_1_sse"
[(set (match_operand:SF 0 "register_operand" "=x,x,x")
(if_then_else:SF
(match_operator:SF 4 "sse_comparison_operator"
[(match_operand:SF 5 "register_operand" "0,0,0")
(match_operand:SF 6 "nonimmediate_operand" "xm,xm,xm")])
(match_operand:SF 2 "reg_or_0_operand" "C,x,x")
(match_operand:SF 3 "reg_or_0_operand" "x,C,x")))
(clobber (match_scratch:V4SF 1 "=&x,&x,&x"))]
"TARGET_SSE_MATH"
"#"
"&& reload_completed"
[(const_int 0)]
{
ix86_split_sse_movcc (operands);
DONE;
})
(define_insn "*movsfcc_1_387" (define_insn "*movsfcc_1_387"
[(set (match_operand:SF 0 "register_operand" "=f#r,f#r,r#f,r#f") [(set (match_operand:SF 0 "register_operand" "=f#r,f#r,r#f,r#f")
(if_then_else:SF (match_operator 1 "fcmov_comparison_operator" (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
...@@ -17776,51 +17730,6 @@ ...@@ -17776,51 +17730,6 @@
"(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)" "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)"
"if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;") "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
;; These versions of min/max are aware of the instruction's behavior
;; wrt -0.0 and NaN inputs. If we don't care about either, then we
;; should have used the smin/smax expanders in the first place.
(define_insn "*movdfcc_1_sse_min"
[(set (match_operand:DF 0 "register_operand" "=x")
(if_then_else:DF
(lt:DF (match_operand:DF 1 "register_operand" "0")
(match_operand:DF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(match_dup 2)))]
"TARGET_SSE2 && TARGET_SSE_MATH"
"minsd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "DF")])
(define_insn "*movdfcc_1_sse_max"
[(set (match_operand:DF 0 "register_operand" "=x")
(if_then_else:DF
(lt:DF (match_operand:DF 2 "nonimmediate_operand" "xm")
(match_operand:DF 1 "nonimmediate_operand" "0"))
(match_dup 1)
(match_dup 2)))]
"TARGET_SSE2 && TARGET_SSE_MATH"
"maxsd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "DF")])
(define_insn_and_split "*movdfcc_1_sse"
[(set (match_operand:DF 0 "register_operand" "=x,x,x")
(if_then_else:DF
(match_operator:DF 4 "sse_comparison_operator"
[(match_operand:DF 5 "register_operand" "0,0,0")
(match_operand:DF 6 "nonimmediate_operand" "xm,xm,xm")])
(match_operand:DF 2 "reg_or_0_operand" "C,x,x")
(match_operand:DF 3 "reg_or_0_operand" "x,C,x")))
(clobber (match_scratch:V2DF 1 "=&x,&x,&x"))]
"TARGET_SSE2 && TARGET_SSE_MATH"
"#"
"&& reload_completed"
[(const_int 0)]
{
ix86_split_sse_movcc (operands);
DONE;
})
(define_insn "*movdfcc_1" (define_insn "*movdfcc_1"
[(set (match_operand:DF 0 "register_operand" "=f#r,f#r,&r#f,&r#f") [(set (match_operand:DF 0 "register_operand" "=f#r,f#r,&r#f,&r#f")
(if_then_else:DF (match_operator 1 "fcmov_comparison_operator" (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
...@@ -17935,6 +17844,52 @@ ...@@ -17935,6 +17844,52 @@
[(set_attr "type" "sseadd") [(set_attr "type" "sseadd")
(set_attr "mode" "DF")]) (set_attr "mode" "DF")])
;; These versions of the min/max patterns implement exactly the operations
;; min = (op1 < op2 ? op1 : op2)
;; max = (!(op1 < op2) ? op1 : op2)
;; Their operands are not commutative, and thus they may be used in the
;; presence of -0.0 and NaN.
(define_insn "*ieee_sminsf3"
[(set (match_operand:SF 0 "register_operand" "=x")
(unspec:SF [(match_operand:SF 1 "register_operand" "0")
(match_operand:SF 2 "nonimmediate_operand" "xm")]
UNSPEC_IEEE_MIN))]
"TARGET_SSE_MATH"
"minss\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "SF")])
(define_insn "*ieee_smaxsf3"
[(set (match_operand:SF 0 "register_operand" "=x")
(unspec:SF [(match_operand:SF 1 "register_operand" "0")
(match_operand:SF 2 "nonimmediate_operand" "xm")]
UNSPEC_IEEE_MAX))]
"TARGET_SSE_MATH"
"maxss\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "SF")])
(define_insn "*ieee_smindf3"
[(set (match_operand:DF 0 "register_operand" "=x")
(unspec:DF [(match_operand:DF 1 "register_operand" "0")
(match_operand:DF 2 "nonimmediate_operand" "xm")]
UNSPEC_IEEE_MIN))]
"TARGET_SSE2 && TARGET_SSE_MATH"
"minsd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "DF")])
(define_insn "*ieee_smaxdf3"
[(set (match_operand:DF 0 "register_operand" "=x")
(unspec:DF [(match_operand:DF 1 "register_operand" "0")
(match_operand:DF 2 "nonimmediate_operand" "xm")]
UNSPEC_IEEE_MAX))]
"TARGET_SSE2 && TARGET_SSE_MATH"
"maxsd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
(set_attr "mode" "DF")])
;; Conditional addition patterns ;; Conditional addition patterns
(define_expand "addqicc" (define_expand "addqicc"
[(match_operand:QI 0 "register_operand" "") [(match_operand:QI 0 "register_operand" "")
......
...@@ -773,6 +773,47 @@ ...@@ -773,6 +773,47 @@
[(set_attr "type" "sselog") [(set_attr "type" "sselog")
(set_attr "mode" "V4SF")]) (set_attr "mode" "V4SF")])
;; Also define scalar versions. These are used for abs, neg, and
;; conditional move. Using subregs into vector modes causes regiser
;; allocation lossage. These patterns do not allow memory operands
;; because the native instructions read the full 128-bits.
(define_insn "*andsf3"
[(set (match_operand:SF 0 "register_operand" "=x")
(and:SF (match_operand:SF 1 "register_operand" "0")
(match_operand:SF 2 "register_operand" "x")))]
"TARGET_SSE"
"andps\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
(define_insn "*nandsf3"
[(set (match_operand:SF 0 "register_operand" "=x")
(and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
(match_operand:SF 2 "register_operand" "x")))]
"TARGET_SSE"
"andnps\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
(define_insn "*iorsf3"
[(set (match_operand:SF 0 "register_operand" "=x")
(ior:SF (match_operand:SF 1 "register_operand" "0")
(match_operand:SF 2 "register_operand" "x")))]
"TARGET_SSE"
"orps\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
(define_insn "*xorsf3"
[(set (match_operand:SF 0 "register_operand" "=x")
(xor:SF (match_operand:SF 1 "register_operand" "0")
(match_operand:SF 2 "register_operand" "x")))]
"TARGET_SSE"
"xorps\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;;
;; Parallel single-precision floating point conversion operations ;; Parallel single-precision floating point conversion operations
...@@ -1624,7 +1665,7 @@ ...@@ -1624,7 +1665,7 @@
[(set (match_operand:V2DF 0 "register_operand" "=x") [(set (match_operand:V2DF 0 "register_operand" "=x")
(and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
(match_operand:V2DF 2 "nonimmediate_operand" "xm")))] (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2 && ix86_binary_operator_ok (AND, V4SFmode, operands)" "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
"andpd\t{%2, %0|%0, %2}" "andpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog") [(set_attr "type" "sselog")
(set_attr "mode" "V2DF")]) (set_attr "mode" "V2DF")])
...@@ -1670,6 +1711,47 @@ ...@@ -1670,6 +1711,47 @@
[(set_attr "type" "sselog") [(set_attr "type" "sselog")
(set_attr "mode" "V2DF")]) (set_attr "mode" "V2DF")])
;; Also define scalar versions. These are used for abs, neg, and
;; conditional move. Using subregs into vector modes causes regiser
;; allocation lossage. These patterns do not allow memory operands
;; because the native instructions read the full 128-bits.
(define_insn "*anddf3"
[(set (match_operand:DF 0 "register_operand" "=x")
(and:DF (match_operand:DF 1 "register_operand" "0")
(match_operand:DF 2 "register_operand" "x")))]
"TARGET_SSE2"
"andpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V2DF")])
(define_insn "*nanddf3"
[(set (match_operand:DF 0 "register_operand" "=x")
(and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
(match_operand:DF 2 "register_operand" "x")))]
"TARGET_SSE2"
"andnpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V2DF")])
(define_insn "*iordf3"
[(set (match_operand:DF 0 "register_operand" "=x")
(ior:DF (match_operand:DF 1 "register_operand" "0")
(match_operand:DF 2 "register_operand" "x")))]
"TARGET_SSE2"
"orpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V2DF")])
(define_insn "*xordf3"
[(set (match_operand:DF 0 "register_operand" "=x")
(xor:DF (match_operand:DF 1 "register_operand" "0")
(match_operand:DF 2 "register_operand" "x")))]
"TARGET_SSE2"
"xorpd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
(set_attr "mode" "V2DF")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;;
;; Parallel double-precision floating point conversion operations ;; Parallel double-precision floating point conversion operations
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment