i386.c (ix86_prepare_sse_fp_compare_args): Split ...

* config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Split ... (ix86_expand_sse_fp_minmax): ... from ... (ix86_expand_fp_movcc): ... here. (ix86_expand_sse_movcc): Rewrite from ix86_split_sse_movcc. * config/i386/i386-protos.h: Update. * config/i386/i386.md (UNSPEC_IEEE_MIN, UNSPEC_IEEE_MAX): New. (sse_setccsf, sse_setccdf): Allow before reload. (movsfcc_1_sse_min, movsfcc_1_sse_max, movsfcc_1_sse): Remove. (movdfcc_1_sse_min, movdfcc_1_sse_max, movdfcc_1_sse): Remove. (ieee_sminsf3, ieee_smaxsf3, ieee_smindf3, ieee_smaxdf3): New. * config/i386/sse.md (andsf3, nandsf3, iorsf3, xorsf3): New. (anddf3, nanddf3, iordf3, xordf3): New. From-SVN: r98068

i386.c (ix86_prepare_sse_fp_compare_args): Split ...
* config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Split ... (ix86_expand_sse_fp_minmax): ... from ... (ix86_expand_fp_movcc): ... here. (ix86_expand_sse_movcc): Rewrite from ix86_split_sse_movcc. * config/i386/i386-protos.h: Update. * config/i386/i386.md (UNSPEC_IEEE_MIN, UNSPEC_IEEE_MAX): New. (sse_setccsf, sse_setccdf): Allow before reload. (movsfcc_1_sse_min, movsfcc_1_sse_max, movsfcc_1_sse): Remove. (movdfcc_1_sse_min, movdfcc_1_sse_max, movdfcc_1_sse): Remove. (ieee_sminsf3, ieee_smaxsf3, ieee_smindf3, ieee_smaxdf3): New. * config/i386/sse.md (andsf3, nandsf3, iorsf3, xorsf3): New. (anddf3, nanddf3, iordf3, xordf3): New. From-SVN: r98068
ab8efbd8 · Richard Henderson · Richard Henderson · 0b90f180 · ab8efbd8 · ab8efbd8
Commit ab8efbd8 authored Apr 12, 2005 by Richard Henderson Committed by Richard Henderson Apr 12, 2005
Show whitespace changes
Inline Side-by-side

Showing with 327 additions and 254 deletions

gcc/ChangeLog
+15 -0

gcc/config/i386/i386-protos.h
+0 -1

gcc/config/i386/i386.c
+178 -156

gcc/config/i386/i386.md
+51 -96

gcc/config/i386/sse.md
+83 -1

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2005-04-12  Richard Henderson  <rth@redhat.com>
+
+	* config/i386/i386.c (ix86_prepare_sse_fp_compare_args): Split ...
+	(ix86_expand_sse_fp_minmax): ... from ... 
+	(ix86_expand_fp_movcc): ... here.
+	(ix86_expand_sse_movcc): Rewrite from ix86_split_sse_movcc.
+	* config/i386/i386-protos.h: Update.
+	* config/i386/i386.md (UNSPEC_IEEE_MIN, UNSPEC_IEEE_MAX): New.
+	(sse_setccsf, sse_setccdf): Allow before reload.
+	(movsfcc_1_sse_min, movsfcc_1_sse_max, movsfcc_1_sse): Remove.
+	(movdfcc_1_sse_min, movdfcc_1_sse_max, movdfcc_1_sse): Remove.
+	(ieee_sminsf3, ieee_smaxsf3, ieee_smindf3, ieee_smaxdf3): New.
+	* config/i386/sse.md (andsf3, nandsf3, iorsf3, xorsf3): New.
+	(anddf3, nanddf3, iordf3, xordf3): New.
+
 2005-04-12  Jeff Law  <law@redhat.com>

 	* Makefile.in (OBJS-common): Add tree-ssa-uncprop.o.

--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -150,7 +150,6 @@ extern void ix86_expand_branch (enum rtx_code, rtx);
 extern int ix86_expand_setcc (enum rtx_code, rtx);
 extern int ix86_expand_int_movcc (rtx[]);
 extern int ix86_expand_fp_movcc (rtx[]);
-extern void ix86_split_sse_movcc (rtx[]);
 extern int ix86_expand_int_addcc (rtx[]);
 extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
 extern void x86_initialize_trampoline (rtx, rtx, rtx);

--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10025,97 +10025,213 @@ ix86_expand_int_movcc (rtx operands[])
  return 1; /* DONE */
 }

-int
-ix86_expand_fp_movcc (rtx operands[])
-{
-  enum machine_mode mode = GET_MODE (operands[0]);
-  enum rtx_code code = GET_CODE (operands[1]);
-  rtx tmp, compare_op, second_test, bypass_test;
-
-  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
-    {
-      rtx cmp_op0, cmp_op1, if_true, if_false;
-      rtx clob;
-      enum machine_mode vmode, cmode;
-      bool is_minmax = false;
+/* Swap, force into registers, or otherwise massage the two operands
+   to an sse comparison with a mask result.  Thus we differ a bit from
+   ix86_prepare_fp_compare_args which expects to produce a flags result.

-      cmp_op0 = ix86_compare_op0;
-      cmp_op1 = ix86_compare_op1;
-      if_true = operands[2];
-      if_false = operands[3];
+   The DEST operand exists to help determine whether to commute commutative
+   operators.  The POP0/POP1 operands are updated in place.  The new
+   comparison code is returned, or UNKNOWN if not implementable.  */

-      /* Since we've no cmove for sse registers, don't force bad register
-	 allocation just to gain access to it.  Deny movcc when the
-	 comparison mode doesn't match the move mode.  */
-      cmode = GET_MODE (cmp_op0);
-      if (cmode == VOIDmode)
-	cmode = GET_MODE (cmp_op1);
-      if (cmode != mode)
-	return 0;
+static enum rtx_code
+ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
+				  rtx *pop0, rtx *pop1)
+{
+  rtx tmp;

+  switch (code)
+    {
+    case LTGT:
+    case UNEQ:
      /* We have no LTGT as an operator.  We could implement it with
 	 NE & ORDERED, but this requires an extra temporary.  It's
 	 not clear that it's worth it.  */
-      if (code == LTGT || code == UNEQ)
-	return 0;
+      return UNKNOWN;

-      /* Massage condition to satisfy sse_comparison_operator.  Try
-	 to canonicalize the destination operand to be first in the
-	 comparison - this helps reload to avoid extra moves.  */
-      if (!sse_comparison_operator (operands[1], VOIDmode)
-	  || (COMMUTATIVE_P (operands[1])
-	      && rtx_equal_p (operands[0], cmp_op1)))
-	{
-	  tmp = cmp_op0;
-	  cmp_op0 = cmp_op1;
-	  cmp_op1 = tmp;
+    case LT:
+    case LE:
+    case UNGT:
+    case UNGE:
+      /* These are supported directly.  */
+      break;
+
+    case EQ:
+    case NE:
+    case UNORDERED:
+    case ORDERED:
+      /* For commutative operators, try to canonicalize the destination
+	 operand to be first in the comparison - this helps reload to
+	 avoid extra moves.  */
+      if (!dest || !rtx_equal_p (dest, *pop1))
+	break;
+      /* FALLTHRU */
+
+    case GE:
+    case GT:
+    case UNLE:
+    case UNLT:
+      /* These are not supported directly.  Swap the comparison operands
+	 to transform into something that is supported.  */
+      tmp = *pop0;
+      *pop0 = *pop1;
+      *pop1 = tmp;
      code = swap_condition (code);
+      break;
+
+    default:
+      gcc_unreachable ();
    }

-      /* Detect conditional moves that exactly match min/max operational
+  return code;
+}
+
+/* Detect conditional moves that exactly match min/max operational
   semantics.  Note that this is IEEE safe, as long as we don't
-	 interchange the operands.  Which is why we keep this in the form
-	 if an IF_THEN_ELSE instead of reducing to SMIN/SMAX.  */
-      if ((code == LT || code == UNGE) && REG_P (cmp_op0) && REG_P (cmp_op1))
-	{
-	  if (((cmp_op0 == if_true && cmp_op1 == if_false)
-	      || (cmp_op0 == if_false && cmp_op1 == if_true)))
-	    {
-	      is_minmax = true;
-	      if (code == UNGE)
+   interchange the operands.
+
+   Returns FALSE if this conditional move doesn't match a MIN/MAX,
+   and TRUE if the operation is successful and instructions are emitted.  */
+
+static bool
+ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
+			   rtx cmp_op1, rtx if_true, rtx if_false)
+{
+  enum machine_mode mode;
+  bool is_min;
+  rtx tmp;
+
+  if (code == LT)
+    ;
+  else if (code == UNGE)
    {
-		  code = LT;
      tmp = if_true;
      if_true = if_false;
      if_false = tmp;
    }
+  else
+    return false;
+
+  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
+    is_min = true;
+  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
+    is_min = false;
+  else
+    return false;
+
+  mode = GET_MODE (dest);
+
+  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
+     but MODE may be a vector mode and thus not appropriate.  */
+  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
+    {
+      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
+      rtvec v;
+
+      if_true = force_reg (mode, if_true);
+      v = gen_rtvec (2, if_true, if_false);
+      tmp = gen_rtx_UNSPEC (mode, v, u);
    }
+  else
+    {
+      code = is_min ? SMIN : SMAX;
+      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
    }

-      if (mode == SFmode)
-	vmode = V4SFmode;
-      else if (mode == DFmode)
-	vmode = V2DFmode;
-      else
-	gcc_unreachable ();
+  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
+  return true;
+}
+
+static void
+ix86_expand_sse_movcc (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
+		       rtx op_true, rtx op_false)
+{
+  enum machine_mode mode = GET_MODE (dest);
+  rtx t1, t2, t3, x;

  cmp_op0 = force_reg (mode, cmp_op0);
  if (!nonimmediate_operand (cmp_op1, mode))
    cmp_op1 = force_reg (mode, cmp_op1);

-      tmp = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
-      gcc_assert (sse_comparison_operator (tmp, VOIDmode));
+  if (optimize
+      || reg_overlap_mentioned_p (dest, op_true)
+      || reg_overlap_mentioned_p (dest, op_false))
+    t1 = gen_reg_rtx (mode);
+  else
+    t1 = dest;

-      tmp = gen_rtx_IF_THEN_ELSE (mode, tmp, if_true, if_false);
-      tmp = gen_rtx_SET (VOIDmode, operands[0], tmp);
+  x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
+  gcc_assert (sse_comparison_operator (x, VOIDmode));
+  emit_insn (gen_rtx_SET (VOIDmode, t1, x));

-      if (!is_minmax)
+  if (op_false == CONST0_RTX (mode))
    {
-	  clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (vmode));
-	  tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
+      op_true = force_reg (mode, op_true);
+      x = gen_rtx_AND (mode, t1, op_true);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
    }
+  else if (op_true == CONST0_RTX (mode))
+    {
+      op_false = force_reg (mode, op_false);
+      x = gen_rtx_NOT (mode, t1);
+      x = gen_rtx_AND (mode, x, op_false);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+  else
+    {
+      op_true = force_reg (mode, op_true);
+      op_false = force_reg (mode, op_false);

-      emit_insn (tmp);
+      t2 = gen_reg_rtx (mode);
+      if (optimize)
+	t3 = gen_reg_rtx (mode);
+      else
+	t3 = dest;
+
+      x = gen_rtx_AND (mode, op_true, t1);
+      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
+
+      x = gen_rtx_NOT (mode, t1);
+      x = gen_rtx_AND (mode, x, op_false);
+      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
+
+      x = gen_rtx_IOR (mode, t3, t2);
+      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
+    }
+}
+
+int
+ix86_expand_fp_movcc (rtx operands[])
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx tmp, compare_op, second_test, bypass_test;
+
+  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
+    {
+      enum machine_mode cmode;
+
+      /* Since we've no cmove for sse registers, don't force bad register
+	 allocation just to gain access to it.  Deny movcc when the
+	 comparison mode doesn't match the move mode.  */
+      cmode = GET_MODE (ix86_compare_op0);
+      if (cmode == VOIDmode)
+	cmode = GET_MODE (ix86_compare_op1);
+      if (cmode != mode)
+	return 0;
+
+      code = ix86_prepare_sse_fp_compare_args (operands[0], code,
+					       &ix86_compare_op0,
+					       &ix86_compare_op1);
+      if (code == UNKNOWN)
+	return 0;
+
+      if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
+				     ix86_compare_op1, operands[2],
+				     operands[3]))
+	return 1;
+
+      ix86_expand_sse_movcc (operands[0], code, ix86_compare_op0,
+			     ix86_compare_op1, operands[2], operands[3]);
      return 1;
    }

@@ -10166,100 +10282,6 @@ ix86_expand_fp_movcc (rtx operands[])
  return 1;
 }

-void
-ix86_split_sse_movcc (rtx operands[])
-{
-  rtx dest, scratch, cmp, op_true, op_false, x;
-  enum machine_mode mode, vmode;
-
-  /* Note that the operator CMP has been set up with matching constraints
-     such that dest is valid for the comparison.  Unless one of the true
-     or false operands are zero, the true operand has already been placed
-     in SCRATCH.  */
-  dest = operands[0];
-  scratch = operands[1];
-  op_true = operands[2];
-  op_false = operands[3];
-  cmp = operands[4];
-
-  mode = GET_MODE (dest);
-  vmode = GET_MODE (scratch);
-
-  /* We need to make sure that the TRUE and FALSE operands are out of the
-     way of the destination.  Marking the destination earlyclobber doesn't
-     work, since we want matching constraints for the actual comparison, so
-     at some point we always wind up having to do a copy ourselves here.
-     We very much prefer the TRUE value to be in SCRATCH.  If it turns out
-     that FALSE overlaps DEST, then we invert the comparison so that we
-     still only have to do one move.  */
-  if (rtx_equal_p (op_false, dest))
-    {
-      enum rtx_code code;
-
-      if (rtx_equal_p (op_true, dest))
-	{
-	  /* ??? Really ought not happen.  It means some optimizer managed
-	     to prove the operands were identical, but failed to fold the
-	     conditional move to a straight move.  Do so here, because 
-	     otherwise we'll generate incorrect code.  And since they're
-	     both already in the destination register, nothing to do.  */
-	  return;
-	}
-
-      x = gen_rtx_REG (mode, REGNO (scratch));
-      emit_move_insn (x, op_false);
-      op_false = op_true;
-      op_true = x;
-
-      code = GET_CODE (cmp);
-      code = reverse_condition_maybe_unordered (code);
-      cmp = gen_rtx_fmt_ee (code, mode, XEXP (cmp, 0), XEXP (cmp, 1));
-    }
-  else if (op_true == CONST0_RTX (mode))
-    ;
-  else if (op_false == CONST0_RTX (mode) && !rtx_equal_p (op_true, dest))
-    ;
-  else
-    {
-      x = gen_rtx_REG (mode, REGNO (scratch));
-      emit_move_insn (x, op_true);
-      op_true = x;
-    }
-
-  emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
-  dest = simplify_gen_subreg (vmode, dest, mode, 0);
-
-  if (op_false == CONST0_RTX (mode))
-    {
-      op_true = simplify_gen_subreg (vmode, op_true, mode, 0);
-      x = gen_rtx_AND (vmode, dest, op_true);
-      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-    }
-  else
-    {
-      op_false = simplify_gen_subreg (vmode, op_false, mode, 0);
-
-      if (op_true == CONST0_RTX (mode))
-	{
-	  x = gen_rtx_NOT (vmode, dest);
-	  x = gen_rtx_AND (vmode, x, op_false);
-	  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-	}
-      else
-	{
-	  x = gen_rtx_AND (vmode, scratch, dest);
-	  emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
-
-	  x = gen_rtx_NOT (vmode, dest);
-	  x = gen_rtx_AND (vmode, x, op_false);
-	  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-
-	  x = gen_rtx_IOR (vmode, dest, scratch);
-	  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
-	}
-    }
-}
-
 /* Expand conditional increment or decrement using adb/sbb instructions.
   The default case using setcc followed by the conditional move can be
   done by generic code.  */

--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -104,6 +104,8 @@

   ; Generic math support
   (UNSPEC_COPYSIGN		50)
+   (UNSPEC_IEEE_MIN		51)	; not commutative
+   (UNSPEC_IEEE_MAX		52)	; not commutative

   ; x87 Floating point
   (UNSPEC_SIN			60)
@@ -12462,17 +12464,14 @@
 ;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
 ;; subsequent logical operations are used to imitate conditional moves.
 ;; 0xffffffff is NaN, but not in normalized form, so we can't represent
-;; it directly.  Further holding this value in pseudo register might bring
-;; problem in implicit normalization in spill code.
-;; So we don't define FLOAT_STORE_FLAG_VALUE and create these
-;; instructions after reload by splitting the conditional move patterns.
+;; it directly.

 (define_insn "*sse_setccsf"
  [(set (match_operand:SF 0 "register_operand" "=x")
 	(match_operator:SF 1 "sse_comparison_operator"
 	  [(match_operand:SF 2 "register_operand" "0")
 	   (match_operand:SF 3 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE && reload_completed"
+  "TARGET_SSE"
  "cmp%D1ss\t{%3, %0|%0, %3}"
  [(set_attr "type" "ssecmp")
   (set_attr "mode" "SF")])
@@ -12482,7 +12481,7 @@
 	(match_operator:DF 1 "sse_comparison_operator"
 	  [(match_operand:DF 2 "register_operand" "0")
 	   (match_operand:DF 3 "nonimmediate_operand" "Ym")]))]
-  "TARGET_SSE2 && reload_completed"
+  "TARGET_SSE2"
  "cmp%D1sd\t{%3, %0|%0, %3}"
  [(set_attr "type" "ssecmp")
   (set_attr "mode" "DF")])
@@ -17707,51 +17706,6 @@
  "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH"
  "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")

-;; These versions of min/max are aware of the instruction's behavior
-;; wrt -0.0 and NaN inputs.  If we don't care about either, then we
-;; should have used the smin/smax expanders in the first place.
-(define_insn "*movsfcc_1_sse_min"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(if_then_else:SF
-	  (lt:SF (match_operand:SF 1 "register_operand" "0")
-		 (match_operand:SF 2 "nonimmediate_operand" "xm"))
-	  (match_dup 1)
-	  (match_dup 2)))]
-  "TARGET_SSE_MATH"
-  "minss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
-
-(define_insn "*movsfcc_1_sse_max"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-	(if_then_else:SF
-	  (lt:SF (match_operand:SF 2 "nonimmediate_operand" "xm")
-		 (match_operand:SF 1 "nonimmediate_operand" "0"))
-	  (match_dup 1)
-	  (match_dup 2)))]
-  "TARGET_SSE_MATH"
-  "maxss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
-
-(define_insn_and_split "*movsfcc_1_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x,x,x")
-	(if_then_else:SF
-	  (match_operator:SF 4 "sse_comparison_operator"
-	    [(match_operand:SF 5 "register_operand" "0,0,0")
-	     (match_operand:SF 6 "nonimmediate_operand" "xm,xm,xm")])
-	  (match_operand:SF 2 "reg_or_0_operand" "C,x,x")
-	  (match_operand:SF 3 "reg_or_0_operand" "x,C,x")))
-   (clobber (match_scratch:V4SF 1 "=&x,&x,&x"))]
-  "TARGET_SSE_MATH"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  ix86_split_sse_movcc (operands);
-  DONE;
-})
-
 (define_insn "*movsfcc_1_387"
  [(set (match_operand:SF 0 "register_operand" "=f#r,f#r,r#f,r#f")
 	(if_then_else:SF (match_operator 1 "fcmov_comparison_operator" 
@@ -17776,51 +17730,6 @@
  "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)"
  "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")

-;; These versions of min/max are aware of the instruction's behavior
-;; wrt -0.0 and NaN inputs.  If we don't care about either, then we
-;; should have used the smin/smax expanders in the first place.
-(define_insn "*movdfcc_1_sse_min"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(if_then_else:DF
-	  (lt:DF (match_operand:DF 1 "register_operand" "0")
-		 (match_operand:DF 2 "nonimmediate_operand" "xm"))
-	  (match_dup 1)
-	  (match_dup 2)))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "minsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-
-(define_insn "*movdfcc_1_sse_max"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-	(if_then_else:DF
-	  (lt:DF (match_operand:DF 2 "nonimmediate_operand" "xm")
-		 (match_operand:DF 1 "nonimmediate_operand" "0"))
-	  (match_dup 1)
-	  (match_dup 2)))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "maxsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-
-(define_insn_and_split "*movdfcc_1_sse"
-  [(set (match_operand:DF 0 "register_operand" "=x,x,x")
-	(if_then_else:DF
-	  (match_operator:DF 4 "sse_comparison_operator"
-	    [(match_operand:DF 5 "register_operand" "0,0,0")
-	     (match_operand:DF 6 "nonimmediate_operand" "xm,xm,xm")])
-	  (match_operand:DF 2 "reg_or_0_operand" "C,x,x")
-	  (match_operand:DF 3 "reg_or_0_operand" "x,C,x")))
-   (clobber (match_scratch:V2DF 1 "=&x,&x,&x"))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  ix86_split_sse_movcc (operands);
-  DONE;
-})
-
 (define_insn "*movdfcc_1"
  [(set (match_operand:DF 0 "register_operand" "=f#r,f#r,&r#f,&r#f")
 	(if_then_else:DF (match_operator 1 "fcmov_comparison_operator" 
@@ -17935,6 +17844,52 @@
  [(set_attr "type" "sseadd")
   (set_attr "mode" "DF")])

+;; These versions of the min/max patterns implement exactly the operations
+;;   min = (op1 < op2 ? op1 : op2)
+;;   max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*ieee_sminsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "0")
+		    (match_operand:SF 2 "nonimmediate_operand" "xm")]
+		   UNSPEC_IEEE_MIN))]
+  "TARGET_SSE_MATH"
+  "minss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "SF")])
+
+(define_insn "*ieee_smaxsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "0")
+		    (match_operand:SF 2 "nonimmediate_operand" "xm")]
+		   UNSPEC_IEEE_MAX))]
+  "TARGET_SSE_MATH"
+  "maxss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "SF")])
+
+(define_insn "*ieee_smindf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+	(unspec:DF [(match_operand:DF 1 "register_operand" "0")
+		    (match_operand:DF 2 "nonimmediate_operand" "xm")]
+		   UNSPEC_IEEE_MIN))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "minsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "DF")])
+
+(define_insn "*ieee_smaxdf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+	(unspec:DF [(match_operand:DF 1 "register_operand" "0")
+		    (match_operand:DF 2 "nonimmediate_operand" "xm")]
+		   UNSPEC_IEEE_MAX))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "maxsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "DF")])
+
 ;; Conditional addition patterns
 (define_expand "addqicc"
  [(match_operand:QI 0 "register_operand" "")

--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -773,6 +773,47 @@
  [(set_attr "type" "sselog")
   (set_attr "mode" "V4SF")])

+;; Also define scalar versions.  These are used for abs, neg, and
+;; conditional move.  Using subregs into vector modes causes regiser
+;; allocation lossage.  These patterns do not allow memory operands
+;; because the native instructions read the full 128-bits.
+
+(define_insn "*andsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(and:SF (match_operand:SF 1 "register_operand" "0")
+		(match_operand:SF 2 "register_operand" "x")))]
+  "TARGET_SSE"
+  "andps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*nandsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
+		(match_operand:SF 2 "register_operand" "x")))]
+  "TARGET_SSE"
+  "andnps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*iorsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(ior:SF (match_operand:SF 1 "register_operand" "0")
+		(match_operand:SF 2 "register_operand" "x")))]
+  "TARGET_SSE"
+  "orps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*xorsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+	(xor:SF (match_operand:SF 1 "register_operand" "0")
+		(match_operand:SF 2 "register_operand" "x")))]
+  "TARGET_SSE"
+  "xorps\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V4SF")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel single-precision floating point conversion operations
@@ -1624,7 +1665,7 @@
  [(set (match_operand:V2DF 0 "register_operand" "=x")
 	(and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
 		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (AND, V4SFmode, operands)"
+  "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
  "andpd\t{%2, %0|%0, %2}"
  [(set_attr "type" "sselog")
   (set_attr "mode" "V2DF")])
@@ -1670,6 +1711,47 @@
  [(set_attr "type" "sselog")
   (set_attr "mode" "V2DF")])

+;; Also define scalar versions.  These are used for abs, neg, and
+;; conditional move.  Using subregs into vector modes causes regiser
+;; allocation lossage.  These patterns do not allow memory operands
+;; because the native instructions read the full 128-bits.
+
+(define_insn "*anddf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+	(and:DF (match_operand:DF 1 "register_operand" "0")
+		(match_operand:DF 2 "register_operand" "x")))]
+  "TARGET_SSE2"
+  "andpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "*nanddf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+	(and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
+		(match_operand:DF 2 "register_operand" "x")))]
+  "TARGET_SSE2"
+  "andnpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "*iordf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+	(ior:DF (match_operand:DF 1 "register_operand" "0")
+		(match_operand:DF 2 "register_operand" "x")))]
+  "TARGET_SSE2"
+  "orpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V2DF")])
+
+(define_insn "*xordf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+	(xor:DF (match_operand:DF 1 "register_operand" "0")
+		(match_operand:DF 2 "register_operand" "x")))]
+  "TARGET_SSE2"
+  "xorpd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog")
+   (set_attr "mode" "V2DF")])
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel double-precision floating point conversion operations