ia64-protos.h (ia64_dconst_0_5): New.

* config/ia64/ia64-protos.h (ia64_dconst_0_5): New. (ia64_dconst_0_375): New. * config/ia64/ia64.c (ia64_override_options): Remove -minline-sqrt-min-latency warning. (ia64_dconst_0_5_rtx, ia64_dconst_0_5): New. (ia64_dconst_0_375_rtx, ia64_dconst_0_375): New * config/ia64/ia64.md (*sqrt_approx): Remove. (sqrtsf2): Remove #if 0. (sqrtsf2_internal_thr): Rewrite and move to div.md. (sqrtdf): Remove assert. (sqrtdf2_internal_thr): Rewrite and move to div.md. (sqrtxf2): Remove #if 0. (sqrtxf2_internal_thr): Rewrite and move to div.md. * div.md (sqrt_approx_rf): New. (sqrtsf2_internal_thr): New implementation. (sqrtsf2_internal_lat): New. (sqrtdf2_internal_thr: New implementation. (sqrtxf2_internal): New implementation. From-SVN: r147713

ia64-protos.h (ia64_dconst_0_5): New.
* config/ia64/ia64-protos.h (ia64_dconst_0_5): New. (ia64_dconst_0_375): New. * config/ia64/ia64.c (ia64_override_options): Remove -minline-sqrt-min-latency warning. (ia64_dconst_0_5_rtx, ia64_dconst_0_5): New. (ia64_dconst_0_375_rtx, ia64_dconst_0_375): New * config/ia64/ia64.md (*sqrt_approx): Remove. (sqrtsf2): Remove #if 0. (sqrtsf2_internal_thr): Rewrite and move to div.md. (sqrtdf): Remove assert. (sqrtdf2_internal_thr): Rewrite and move to div.md. (sqrtxf2): Remove #if 0. (sqrtxf2_internal_thr): Rewrite and move to div.md. * div.md (sqrt_approx_rf): New. (sqrtsf2_internal_thr): New implementation. (sqrtsf2_internal_lat): New. (sqrtdf2_internal_thr: New implementation. (sqrtxf2_internal): New implementation. From-SVN: r147713
f3a83111 · Steve Ellcey · Steve Ellcey · 1ffc7157 · f3a83111 · f3a83111
Commit f3a83111 authored May 19, 2009 by Steve Ellcey Committed by Steve Ellcey May 19, 2009
Show whitespace changes
Inline Side-by-side

Showing with 335 additions and 420 deletions

gcc/ChangeLog
+21 -0

gcc/config/ia64/div.md
+278 -0

gcc/config/ia64/ia64-protos.h
+3 -0

gcc/config/ia64/ia64.c
+29 -6

gcc/config/ia64/ia64.md
+4 -414

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2009-05-19  Steve Ellcey  <sje@cup.hp.com>
+
+	* config/ia64/ia64-protos.h (ia64_dconst_0_5): New.
+	(ia64_dconst_0_375): New.
+	* config/ia64/ia64.c (ia64_override_options): Remove
+	-minline-sqrt-min-latency warning.
+	(ia64_dconst_0_5_rtx, ia64_dconst_0_5): New.
+	(ia64_dconst_0_375_rtx, ia64_dconst_0_375): New
+	* config/ia64/ia64.md (*sqrt_approx): Remove.
+	(sqrtsf2): Remove #if 0.
+	(sqrtsf2_internal_thr): Rewrite and move to div.md.
+	(sqrtdf): Remove assert.
+	(sqrtdf2_internal_thr): Rewrite and move to div.md.
+	(sqrtxf2): Remove #if 0.
+	(sqrtxf2_internal_thr): Rewrite and move to div.md.
+	* div.md (sqrt_approx_rf): New.
+	(sqrtsf2_internal_thr): New implementation.
+	(sqrtsf2_internal_lat): New.
+	(sqrtdf2_internal_thr: New implementation.
+	(sqrtxf2_internal): New implementation.
+
 2009-05-19  Francois-Xavier Coudert  <fxcoudert@gmail.com>
 	    Hans-Peter Nilsson  <hp@axis.com>


--- a/gcc/config/ia64/div.md
+++ b/gcc/config/ia64/div.md
@@ -518,3 +518,281 @@
  emit_insn (gen_truncrfxf2 (operands[0], q_res));
  DONE;
 })
+
+
+;; SQRT operations
+
+
+(define_insn "sqrt_approx_rf"
+  [(set (match_operand:RF 0 "fr_register_operand" "=f")
+                (unspec:RF [(match_operand:RF 1 "fr_reg_or_fp01_operand" "fG")]
+			   UNSPEC_FR_SQRT_RECIP_APPROX_RES))
+   (set (match_operand:BI 2 "register_operand" "=c")
+        (unspec:BI [(match_dup 1)] UNSPEC_FR_SQRT_RECIP_APPROX))
+   (use (match_operand:SI 3 "const_int_operand" ""))]
+  ""
+  "frsqrta.s%3 %0, %2 = %F1"
+  [(set_attr "itanium_class" "fmisc")
+   (set_attr "predicable" "no")])
+
+(define_expand "sqrtsf2_internal_thr"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+        (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx g         = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx s         = gen_reg_rtx (RFmode);
+  rtx f         = gen_reg_rtx (RFmode);
+  rtx y1        = gen_reg_rtx (RFmode);
+  rtx g1        = gen_reg_rtx (RFmode);
+  rtx h         = gen_reg_rtx (RFmode);
+  rtx d         = gen_reg_rtx (RFmode);
+  rtx g2        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (BImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx c1        = ia64_dconst_0_5();
+  rtx c2        = ia64_dconst_0_375();
+  rtx reg_df_c1	= gen_reg_rtx (DFmode);
+  rtx reg_df_c2	= gen_reg_rtx (DFmode);
+  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
+  rtx reg_rf_c2 = gen_reg_rtx (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_sgl = CONST0_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Put needed constants into registers.	 */
+  emit_insn (gen_movdf (reg_df_c1, c1));
+  emit_insn (gen_movdf (reg_df_c2, c2));
+  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
+  emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2));
+  /* Empty conversion to put input into RFmode.  */
+  emit_insn (gen_extendsfrf2 (b, operands[1]));
+  /* y = sqrt (1 / b)			*/
+  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
+  /* g = b * y				*/
+  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
+  /* e = 1 - (g * y)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off));
+  /* s = 0.5 + (0.375 * e)		*/
+  emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off));
+  /* f = y * e				*/
+  emit_insn (gen_mulrf3_cond (f, cond, y, e, zero, status1, trunc_off));
+  /* y1 = y + (f * s)			*/
+  emit_insn (gen_m2addrf4_cond (y1, cond, y, f, s, zero, status1, trunc_off));
+  /* g1 = single (b * y1)		*/
+  emit_insn (gen_mulrf3_cond (g1, cond, b, y1, zero, status1, trunc_sgl));
+  /* h = 0.5 * y1			*/
+  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y1, zero, status1, trunc_off));
+  /* d = b - g1 * g1			*/
+  emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off));
+  /* g2 = single(g1 + (d * h))		*/
+  emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h, y, status0, trunc_sgl));
+  /* Conversion back into SFmode.       */
+  emit_insn (gen_truncrfsf2 (operands[0], g2));
+  DONE;
+})
+
+(define_expand "sqrtsf2_internal_lat"
+  [(set (match_operand:SF 0 "fr_register_operand" "")
+        (sqrt:SF (match_operand:SF 1 "fr_register_operand" "")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx g         = gen_reg_rtx (RFmode);
+  rtx g1        = gen_reg_rtx (RFmode);
+  rtx g2        = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx s         = gen_reg_rtx (RFmode);
+  rtx f         = gen_reg_rtx (RFmode);
+  rtx f1        = gen_reg_rtx (RFmode);
+  rtx h         = gen_reg_rtx (RFmode);
+  rtx h1        = gen_reg_rtx (RFmode);
+  rtx d         = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (BImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx one       = CONST1_RTX (RFmode);
+  rtx c1        = ia64_dconst_0_5();
+  rtx c2        = ia64_dconst_0_375();
+  rtx reg_df_c1	= gen_reg_rtx (DFmode);
+  rtx reg_df_c2	= gen_reg_rtx (DFmode);
+  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
+  rtx reg_rf_c2 = gen_reg_rtx (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_sgl = CONST0_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Put needed constants into registers.	 */
+  emit_insn (gen_movdf (reg_df_c1, c1));
+  emit_insn (gen_movdf (reg_df_c2, c2));
+  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
+  emit_insn (gen_extenddfrf2 (reg_rf_c2, reg_df_c2));
+  /* Empty conversion to put input into RFmode.  */
+  emit_insn (gen_extendsfrf2 (b, operands[1]));
+  /* y = sqrt (1 / b)			*/
+  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
+  /* g = b * y				*/
+  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
+  /* e = 1 - (g * y)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, one, g, y, zero, status1, trunc_off));
+  /* h = 0.5 * y			*/
+  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
+  /* s = 0.5 + (0.375 * e)		*/
+  emit_insn (gen_m2addrf4_cond (s, cond, reg_rf_c1, reg_rf_c2, e, zero, status1, trunc_off));
+  /* f = e * g				*/
+  emit_insn (gen_mulrf3_cond (f, cond, e, g, zero, status1, trunc_off));
+  /* g1 = single (g + (f * s))		*/
+  emit_insn (gen_m2addrf4_cond (g1, cond, g, f, s, zero, status1, trunc_sgl));
+  /* f1 = e * h				*/
+  emit_insn (gen_mulrf3_cond (f1, cond, e, h, zero, status1, trunc_off));
+  /* d = b - g1 * g1			*/
+  emit_insn (gen_m2subrf4_cond (d, cond, b, g1, g1, zero, status1, trunc_off));
+  /* h1 = h + (f1 * s)			*/
+  emit_insn (gen_m2addrf4_cond (h1, cond, h, f1, s, zero, status1, trunc_off));
+  /* g2 = single(g1 + (d * h1))		*/
+  emit_insn (gen_m2addrf4_cond (g2, cond, g1, d, h1, y, status0, trunc_sgl));
+  /* Conversion back into SFmode.       */
+  emit_insn (gen_truncrfsf2 (operands[0], g2));
+  DONE;
+})
+
+(define_expand "sqrtdf2_internal_thr"
+  [(set (match_operand:DF 0 "fr_register_operand" "")
+        (sqrt:DF (match_operand:DF 1 "fr_register_operand" "")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx g         = gen_reg_rtx (RFmode);
+  rtx g1        = gen_reg_rtx (RFmode);
+  rtx g2        = gen_reg_rtx (RFmode);
+  rtx g3        = gen_reg_rtx (RFmode);
+  rtx g4        = gen_reg_rtx (RFmode);
+  rtx r         = gen_reg_rtx (RFmode);
+  rtx r1        = gen_reg_rtx (RFmode);
+  rtx h         = gen_reg_rtx (RFmode);
+  rtx h1        = gen_reg_rtx (RFmode);
+  rtx h2        = gen_reg_rtx (RFmode);
+  rtx d         = gen_reg_rtx (RFmode);
+  rtx d1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (BImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx c1        = ia64_dconst_0_5();
+  rtx reg_df_c1	= gen_reg_rtx (DFmode);
+  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_dbl = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Put needed constants into registers.	 */
+  emit_insn (gen_movdf (reg_df_c1, c1));
+  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
+  /* Empty conversion to put input into RFmode.  */
+  emit_insn (gen_extenddfrf2 (b, operands[1]));
+  /* y = sqrt (1 / b)			*/
+  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
+  /* g = b * y				*/
+  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
+  /* h = 0.5 * y			*/
+  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
+  /* r = 0.5 - (g * h)			*/
+  emit_insn (gen_m2subrf4_cond (r, cond, reg_rf_c1, g, h, zero, status1, trunc_off));
+  /* g1 = g + (g * r)			*/
+  emit_insn (gen_m2addrf4_cond (g1, cond, g, g, r, zero, status1, trunc_off));
+  /* h1 = h + (h * r)			*/
+  emit_insn (gen_m2addrf4_cond (h1, cond, h, h, r, zero, status1, trunc_off));
+  /* r1 = 0.5 - (g1 * h1)		*/
+  emit_insn (gen_m2subrf4_cond (r1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off));
+  /* g2 = g1 + (g1 * r1)		*/
+  emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, r1, zero, status1, trunc_off));
+  /* h2 = h1 + (h1 * r1)		*/
+  emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, r1, zero, status1, trunc_off));
+  /* d = b - (g2 * g2)			*/
+  emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off));
+  /* g3 = g2 + (d * h2)			*/
+  emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off));
+  /* d1 = b - (g3 * g3)			*/
+  emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off));
+  /* g4 = g3 + (d1 * h2)		*/
+  emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h2, y, status1, trunc_dbl));
+  /* Conversion back into SFmode.       */
+  emit_insn (gen_truncrfdf2 (operands[0], g4));
+  DONE;
+})
+
+(define_expand "sqrtxf2_internal"
+  [(set (match_operand:XF 0 "fr_register_operand" "")
+        (sqrt:XF (match_operand:XF 1 "fr_register_operand" "")))]
+  "TARGET_INLINE_SQRT"
+{
+  rtx y         = gen_reg_rtx (RFmode);
+  rtx b         = gen_reg_rtx (RFmode);
+  rtx g         = gen_reg_rtx (RFmode);
+  rtx g1        = gen_reg_rtx (RFmode);
+  rtx g2        = gen_reg_rtx (RFmode);
+  rtx g3        = gen_reg_rtx (RFmode);
+  rtx g4        = gen_reg_rtx (RFmode);
+  rtx e         = gen_reg_rtx (RFmode);
+  rtx e1        = gen_reg_rtx (RFmode);
+  rtx e2        = gen_reg_rtx (RFmode);
+  rtx h         = gen_reg_rtx (RFmode);
+  rtx h1        = gen_reg_rtx (RFmode);
+  rtx h2        = gen_reg_rtx (RFmode);
+  rtx h3        = gen_reg_rtx (RFmode);
+  rtx d         = gen_reg_rtx (RFmode);
+  rtx d1        = gen_reg_rtx (RFmode);
+  rtx cond      = gen_reg_rtx (BImode);
+  rtx zero      = CONST0_RTX (RFmode);
+  rtx c1        = ia64_dconst_0_5();
+  rtx reg_df_c1	= gen_reg_rtx (DFmode);
+  rtx reg_rf_c1 = gen_reg_rtx (RFmode);
+  rtx status0   = CONST0_RTX (SImode);
+  rtx status1   = CONST1_RTX (SImode);
+  rtx trunc_off = CONST2_RTX (SImode);
+
+  /* Put needed constants into registers.	 */
+  emit_insn (gen_movdf (reg_df_c1, c1));
+  emit_insn (gen_extenddfrf2 (reg_rf_c1, reg_df_c1));
+  /* Empty conversion to put input into RFmode.  */
+  emit_insn (gen_extendxfrf2 (b, operands[1]));
+  /* y = sqrt (1 / b)			*/
+  emit_insn (gen_sqrt_approx_rf (y, b, cond, status0));
+  /* g = b * y				*/
+  emit_insn (gen_mulrf3_cond (g, cond, b, y, zero, status1, trunc_off));
+  /* h = 0.5 * y			*/
+  emit_insn (gen_mulrf3_cond (h, cond, reg_rf_c1, y, zero, status1, trunc_off));
+  /* e = 0.5 - (g * h)			*/
+  emit_insn (gen_m2subrf4_cond (e, cond, reg_rf_c1, g, h, zero, status1, trunc_off));
+  /* g1 = g + (g * e)			*/
+  emit_insn (gen_m2addrf4_cond (g1, cond, g, g, e, zero, status1, trunc_off));
+  /* h1 = h + (h * e)			*/
+  emit_insn (gen_m2addrf4_cond (h1, cond, h, h, e, zero, status1, trunc_off));
+  /* e1 = 0.5 - (g1 * h1)		*/
+  emit_insn (gen_m2subrf4_cond (e1, cond, reg_rf_c1, g1, h1, zero, status1, trunc_off));
+  /* g2 = g1 + (g1 * e1)		*/
+  emit_insn (gen_m2addrf4_cond (g2, cond, g1, g1, e1, zero, status1, trunc_off));
+  /* h2 = h1 + (h1 * e1)		*/
+  emit_insn (gen_m2addrf4_cond (h2, cond, h1, h1, e1, zero, status1, trunc_off));
+  /* d = b - (g2 * g2)			*/
+  emit_insn (gen_m2subrf4_cond (d, cond, b, g2, g2, zero, status1, trunc_off));
+  /* e2 = 0.5 - (g2 * h2)		*/
+  emit_insn (gen_m2subrf4_cond (e2, cond, reg_rf_c1, g2, h2, zero, status1, trunc_off));
+  /* g3 = g2 + (d * h2)			*/
+  emit_insn (gen_m2addrf4_cond (g3, cond, g2, d, h2, zero, status1, trunc_off));
+  /* h3 = h2 + (e2 * h2)		*/
+  emit_insn (gen_m2addrf4_cond (h3, cond, h2, e2, h2, zero, status1, trunc_off));
+  /* d1 = b - (g3 * g3)			*/
+  emit_insn (gen_m2subrf4_cond (d1, cond, b, g3, g3, zero, status1, trunc_off));
+  /* g4 = g3 + (d1 * h3)		*/
+  emit_insn (gen_m2addrf4_cond (g4, cond, g3, d1, h3, y, status1, trunc_off));
+  /* Conversion back into SFmode.       */
+  emit_insn (gen_truncrfxf2 (operands[0], g4));
+  DONE;
+})
--- a/gcc/config/ia64/ia64-protos.h
+++ b/gcc/config/ia64/ia64-protos.h
@@ -102,3 +102,6 @@ extern void ia64_profile_hook (int);

 extern void ia64_optimization_options (int, int);
 extern void ia64_init_expanders (void);
+
+extern rtx ia64_dconst_0_5 (void);
+extern rtx ia64_dconst_0_375 (void);
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -5280,12 +5280,6 @@ ia64_override_options (void)
  if (TARGET_AUTO_PIC)
    target_flags |= MASK_CONST_GP;

-  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
-    {
-      warning (0, "not yet implemented: latency-optimized inline square root");
-      TARGET_INLINE_SQRT = INL_MAX_THR;
-    }
-
  ia64_flag_schedule_insns2 = flag_schedule_insns_after_reload;
  flag_schedule_insns_after_reload = 0;

@@ -10571,4 +10565,33 @@ ia64_c_mode_for_suffix (char suffix)
  return VOIDmode;
 }

+static GTY(()) rtx ia64_dconst_0_5_rtx;
+
+rtx
+ia64_dconst_0_5 (void)
+{
+  if (! ia64_dconst_0_5_rtx)
+    {
+      REAL_VALUE_TYPE rv;
+      real_from_string (&rv, "0.5");
+      ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
+    }
+  return ia64_dconst_0_5_rtx;
+}
+
+static GTY(()) rtx ia64_dconst_0_375_rtx;
+
+rtx
+ia64_dconst_0_375 (void)
+{
+  if (! ia64_dconst_0_375_rtx)
+    {
+      REAL_VALUE_TYPE rv;
+      real_from_string (&rv, "0.375");
+      ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
+    }
+  return ia64_dconst_0_375_rtx;
+}
+
+
 #include "gt-ia64.h"
--- a/gcc/config/ia64/ia64.md
+++ b/gcc/config/ia64/ia64.md
@@ -3161,21 +3161,6 @@
  DONE;
 })

-;; Inline square root.
-
-(define_insn "*sqrt_approx"
-  [(set (match_operand:XF 0 "fr_register_operand" "=f")
-        (div:XF (const_int 1)
-		(unspec:XF [(match_operand:XF 2 "fr_reg_or_fp01_operand" "fG")]
-			   UNSPEC_FR_SQRT_RECIP_APPROX_RES)))
-   (set (match_operand:BI 1 "register_operand" "=c")
-        (unspec:BI [(match_dup 2)] UNSPEC_FR_SQRT_RECIP_APPROX))
-   (use (match_operand:SI 3 "const_int_operand" "")) ]
-  ""
-  "frsqrta.s%3 %0, %1 = %2"
-  [(set_attr "itanium_class" "fmisc")
-   (set_attr "predicable" "no")])
-
 (define_insn "setf_exp_xf"
  [(set (match_operand:XF 0 "fr_register_operand" "=f")
        (unspec:XF [(match_operand:DI 1 "register_operand" "r")]
@@ -3184,134 +3169,23 @@
  "setf.exp %0 = %1"
  [(set_attr "itanium_class" "frfr")])

+
+;; Inline square root.
+
 (define_expand "sqrtsf2"
  [(set (match_operand:SF 0 "fr_register_operand" "=&f")
 	(sqrt:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))]
  "TARGET_INLINE_SQRT"
 {
  rtx insn;
-#if 0
  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
    insn = gen_sqrtsf2_internal_lat (operands[0], operands[1]);
  else
-#else
-  gcc_assert (TARGET_INLINE_SQRT != INL_MIN_LAT);
-#endif
    insn = gen_sqrtsf2_internal_thr (operands[0], operands[1]);
  emit_insn (insn);
  DONE;
 })

-;; Latency-optimized square root.
-;; FIXME: Implement.
-
-;; Throughput-optimized square root.
-
-(define_insn_and_split "sqrtsf2_internal_thr"
-  [(set (match_operand:SF 0 "fr_register_operand" "=&f")
-	(sqrt:SF (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")))
-   ;; Register r2 in optimization guide.
-   (clobber (match_scratch:DI 2 "=r"))
-   ;; Register f8 in optimization guide
-   (clobber (match_scratch:XF 3 "=&f"))
-   ;; Register f9 in optimization guide
-   (clobber (match_scratch:XF 4 "=&f"))
-   ;; Register f10 in optimization guide
-   (clobber (match_scratch:XF 5 "=&f"))
-   ;; Register p6 in optimization guide.
-   (clobber (match_scratch:BI 6 "=c"))]
-  "TARGET_INLINE_SQRT == INL_MAX_THR"
-  "#"
-  "&& reload_completed"
-  [ ;; exponent of +1/2 in r2
-    (set (match_dup 2) (const_int 65534))
-    ;; +1/2 in f8
-    (set (match_dup 3) 
-         (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP))
-    ;; Step 1
-    ;; y0 = 1/sqrt(a) in f7
-    (parallel [(set (match_dup 7)
-                    (div:XF (const_int 1)
-			    (unspec:XF [(match_dup 8)]
-				       UNSPEC_FR_SQRT_RECIP_APPROX_RES)))
-               (set (match_dup 6)
-                    (unspec:BI [(match_dup 8)]
-			       UNSPEC_FR_SQRT_RECIP_APPROX))
-               (use (const_int 0))])
-    ;; Step 2
-    ;; H0 = 1/2 * y0 in f9
-    (cond_exec (ne (match_dup 6) (const_int 0))
-      (parallel [(set (match_dup 4)
-                      (plus:XF (mult:XF (match_dup 3) (match_dup 7))
-                               (match_dup 9)))
-                 (use (const_int 1))]))
-    ;; Step 3
-    ;; S0 = a * y0 in f7
-    (cond_exec (ne (match_dup 6) (const_int 0))
-      (parallel [(set (match_dup 7)
-                      (plus:XF (mult:XF (match_dup 8) (match_dup 7))
-                               (match_dup 9)))
-                 (use (const_int 1))]))
-    ;; Step 4
-    ;; d = 1/2 - S0 * H0 in f10
-    (cond_exec (ne (match_dup 6) (const_int 0))
-      (parallel [(set (match_dup 5)
-                      (minus:XF (match_dup 3)
-				(mult:XF (match_dup 7) (match_dup 4))))
-                 (use (const_int 1))]))
-    ;; Step 5
-    ;; d' = d + 1/2 * d in f8
-    (cond_exec (ne (match_dup 6) (const_int 0))
-       (parallel [(set (match_dup 3)
-                       (plus:XF (mult:XF (match_dup 3) (match_dup 5))
-                                (match_dup 5)))
-                  (use (const_int 1))]))
-    ;; Step 6
-    ;; e = d + d * d' in f8
-    (cond_exec (ne (match_dup 6) (const_int 0))
-       (parallel [(set (match_dup 3)
-                       (plus:XF (mult:XF (match_dup 5) (match_dup 3))
-                                (match_dup 5)))
-                  (use (const_int 1))]))
-    ;; Step 7
-    ;; S1 = S0 + e * S0 in f7
-    (cond_exec (ne (match_dup 6) (const_int 0))
-      (parallel [(set (match_dup 0)
-		      (float_truncate:SF
-                        (plus:XF (mult:XF (match_dup 3) (match_dup 7))
-                                 (match_dup 7))))
-                 (use (const_int 1))]))
-    ;; Step 8
-    ;; H1 = H0 + e * H0 in f8
-    (cond_exec (ne (match_dup 6) (const_int 0))
-       (parallel [(set (match_dup 3)
-                       (plus:XF (mult:XF (match_dup 3) (match_dup 4))
-                                (match_dup 4)))
-                  (use (const_int 1))]))
-    ;; Step 9 
-    ;; d1 = a - S1 * S1 in f9
-    (cond_exec (ne (match_dup 6) (const_int 0))
-       (parallel [(set (match_dup 4)
-                       (minus:XF (match_dup 8)
-				 (mult:XF (match_dup 7) (match_dup 7))))
-                  (use (const_int 1))]))
-    ;; Step 10
-    ;; S = S1 + d1 * H1 in f7
-    (cond_exec (ne (match_dup 6) (const_int 0))
-       (parallel [(set (match_dup 0)
-                       (float_truncate:SF
-                         (plus:XF (mult:XF (match_dup 4) (match_dup 3))
-                                  (match_dup 7))))
-                  (use (const_int 0))]))]
-{
-  /* Generate 82-bit versions of the input and output operands.  */
-  operands[7] = gen_rtx_REG (XFmode, REGNO (operands[0]));
-  operands[8] = gen_rtx_REG (XFmode, REGNO (operands[1]));
-  /* Generate required floating-point constants.  */
-  operands[9] = CONST0_RTX (XFmode);
-}
-  [(set_attr "predicable" "no")])
-
 ;; ::::::::::::::::::::
 ;; ::
 ;; :: 64-bit floating point arithmetic
@@ -3546,144 +3420,12 @@
  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
    insn = gen_sqrtdf2_internal_lat (operands[0], operands[1]);
  else
-#else
-  gcc_assert (TARGET_INLINE_SQRT != INL_MIN_LAT);
 #endif
  insn = gen_sqrtdf2_internal_thr (operands[0], operands[1]);
  emit_insn (insn);
  DONE;
 })

-;; Latency-optimized square root.
-;; FIXME: Implement.
-
-;; Throughput-optimized square root.
-
-(define_insn_and_split "sqrtdf2_internal_thr"
-  [(set (match_operand:DF 0 "fr_register_operand" "=&f")
-	(sqrt:DF (match_operand:DF 1 "fr_reg_or_fp01_operand" "fG")))
-   ;; Register r2 in optimization guide.
-   (clobber (match_scratch:DI 2 "=r"))
-   ;; Register f8 in optimization guide
-   (clobber (match_scratch:XF 3 "=&f"))
-   ;; Register f9 in optimization guide
-   (clobber (match_scratch:XF 4 "=&f"))
-   ;; Register f10 in optimization guide
-   (clobber (match_scratch:XF 5 "=&f"))
-   ;; Register p6 in optimization guide.
-   (clobber (match_scratch:BI 6 "=c"))]
-  "TARGET_INLINE_SQRT == INL_MAX_THR"
-  "#"
-  "&& reload_completed"
-  [ ;; exponent of +1/2 in r2
-    (set (match_dup 2) (const_int 65534))
-    ;; +1/2 in f10
-    (set (match_dup 5) 
-         (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP))
-    ;; Step 1
-    ;; y0 = 1/sqrt(a) in f7
-    (parallel [(set (match_dup 7)
-                    (div:XF (const_int 1)
-			    (unspec:XF [(match_dup 8)]
-				       UNSPEC_FR_SQRT_RECIP_APPROX_RES)))
-               (set (match_dup 6)
-                    (unspec:BI [(match_dup 8)]
-			       UNSPEC_FR_SQRT_RECIP_APPROX))
-               (use (const_int 0))])
-    ;; Step 2
-    ;; H0 = 1/2 * y0 in f8
-    (cond_exec (ne (match_dup 6) (const_int 0))
-      (parallel [(set (match_dup 3)
-                      (plus:XF (mult:XF (match_dup 5) (match_dup 7))
-                               (match_dup 9)))
-                 (use (const_int 1))]))
-    ;; Step 3
-    ;; G0 = a * y0 in f7
-    (cond_exec (ne (match_dup 6) (const_int 0))
-      (parallel [(set (match_dup 7)
-                      (plus:XF (mult:XF (match_dup 8) (match_dup 7))
-                               (match_dup 9)))
-                 (use (const_int 1))]))
-    ;; Step 4
-    ;; r0 = 1/2 - G0 * H0 in f9
-    (cond_exec (ne (match_dup 6) (const_int 0))
-      (parallel [(set (match_dup 4)
-                      (minus:XF (match_dup 5)
-				(mult:XF (match_dup 7) (match_dup 3))))
-                 (use (const_int 1))]))
-    ;; Step 5
-    ;; H1 = H0 + r0 * H0 in f8
-    (cond_exec (ne (match_dup 6) (const_int 0))
-       (parallel [(set (match_dup 3)
-                       (plus:XF (mult:XF (match_dup 4) (match_dup 3))
-                                (match_dup 3)))
-                  (use (const_int 1))]))
-    ;; Step 6
-    ;; G1 = G0 + r0 * G0 in f7
-    (cond_exec (ne (match_dup 6) (const_int 0))
-       (parallel [(set (match_dup 7)
-                       (plus:XF (mult:XF (match_dup 4) (match_dup 7))
-                                (match_dup 7)))
-                  (use (const_int 1))]))
-    ;; Step 7
-    ;; r1 = 1/2 - G1 * H1 in f9
-    (cond_exec (ne (match_dup 6) (const_int 0))
-      (parallel [(set (match_dup 4)
-                      (minus:XF (match_dup 5)
-				(mult:XF (match_dup 7) (match_dup 3))))
-                 (use (const_int 1))]))
-    ;; Step 8
-    ;; H2 = H1 + r1 * H1 in f8
-    (cond_exec (ne (match_dup 6) (const_int 0))
-       (parallel [(set (match_dup 3)
-                       (plus:XF (mult:XF (match_dup 4) (match_dup 3))
-                                (match_dup 3)))
-                  (use (const_int 1))]))
-    ;; Step 9 
-    ;; G2 = G1 + r1 * G1 in f7
-    (cond_exec (ne (match_dup 6) (const_int 0))
-       (parallel [(set (match_dup 7)
-                       (plus:XF (mult:XF (match_dup 4) (match_dup 7))
-                                (match_dup 7)))
-                  (use (const_int 1))]))
-    ;; Step 10
-    ;; d2 = a - G2 * G2 in f9
-    (cond_exec (ne (match_dup 6) (const_int 0))
-       (parallel [(set (match_dup 4)
-                       (minus:XF (match_dup 8)
-				 (mult:XF (match_dup 7) (match_dup 7))))
-                  (use (const_int 1))]))
-    ;; Step 11
-    ;; G3 = G2 + d2 * H2 in f7
-    (cond_exec (ne (match_dup 6) (const_int 0))
-       (parallel [(set (match_dup 7)
-                       (plus:XF (mult:XF (match_dup 4) (match_dup 3))
-                                (match_dup 7)))
-                  (use (const_int 1))]))
-    ;; Step 12
-    ;; d3 = a - G3 * G3 in f9
-    (cond_exec (ne (match_dup 6) (const_int 0))
-       (parallel [(set (match_dup 4)
-                       (minus:XF (match_dup 8)
-				 (mult:XF (match_dup 7) (match_dup 7))))
-                  (use (const_int 1))]))
-    ;; Step 13
-    ;; S = G3 + d3 * H2 in f7
-    (cond_exec (ne (match_dup 6) (const_int 0))
-       (parallel [(set (match_dup 0)
-                       (float_truncate:DF
-                         (plus:XF (mult:XF (match_dup 4) (match_dup 3))
-                                  (match_dup 7))))
-                  (use (const_int 0))]))]
-{
-  /* Generate 82-bit versions of the input and output operands.  */
-  operands[7] = gen_rtx_REG (XFmode, REGNO (operands[0]));
-  operands[8] = gen_rtx_REG (XFmode, REGNO (operands[1]));
-  /* Generate required floating-point constants.  */
-  operands[9] = CONST0_RTX (XFmode);
-}
-  [(set_attr "predicable" "no")])
-
 ;; ::::::::::::::::::::
 ;; ::
 ;; :: 80-bit floating point arithmetic
@@ -4056,163 +3798,11 @@
  "TARGET_INLINE_SQRT"
 {
  rtx insn;
-#if 0
-  if (TARGET_INLINE_SQRT == INL_MIN_LAT)
-    insn = gen_sqrtxf2_internal_lat (operands[0], operands[1]);
-  else
-#else
-  gcc_assert (TARGET_INLINE_SQRT != INL_MIN_LAT);
-#endif
-  insn = gen_sqrtxf2_internal_thr (operands[0], operands[1]);
+  insn = gen_sqrtxf2_internal (operands[0], operands[1]);
  emit_insn (insn);
  DONE;
 })

-;; Latency-optimized square root.
-;; FIXME: Implement.
-
-;; Throughput-optimized square root.
-
-(define_insn_and_split "sqrtxf2_internal_thr"
-  [(set (match_operand:XF 0 "fr_register_operand" "=&f")
-	(sqrt:XF (match_operand:XF 1 "fr_reg_or_fp01_operand" "fG")))
-   ;; Register r2 in optimization guide.
-   (clobber (match_scratch:DI 2 "=r"))
-   ;; Register f8 in optimization guide
-   (clobber (match_scratch:XF 3 "=&f"))
-   ;; Register f9 in optimization guide
-   (clobber (match_scratch:XF 4 "=&f"))
-   ;; Register f10 in optimization guide
-   (clobber (match_scratch:XF 5 "=&f"))
-   ;; Register f11 in optimization guide
-   (clobber (match_scratch:XF 6 "=&f"))
-   ;; Register p6 in optimization guide.
-   (clobber (match_scratch:BI 7 "=c"))]
-  "TARGET_INLINE_SQRT == INL_MAX_THR"
-  "#"
-  "&& reload_completed"
-  [ ;; exponent of +1/2 in r2
-    (set (match_dup 2) (const_int 65534))
-    ;; +1/2 in f8.  The Intel manual mistakenly specifies f10.
-    (set (match_dup 3) 
-         (unspec:XF [(match_dup 2)] UNSPEC_SETF_EXP))
-    ;; Step 1
-    ;; y0 = 1/sqrt(a) in f7
-    (parallel [(set (match_dup 8)
-                    (div:XF (const_int 1)
-			    (unspec:XF [(match_dup 9)]
-				       UNSPEC_FR_SQRT_RECIP_APPROX_RES)))
-               (set (match_dup 7)
-                    (unspec:BI [(match_dup 9)]
-			       UNSPEC_FR_SQRT_RECIP_APPROX))
-               (use (const_int 0))])
-    ;; Step 2
-    ;; H0 = 1/2 * y0 in f9
-    (cond_exec (ne (match_dup 7) (const_int 0))
-      (parallel [(set (match_dup 4)
-                      (plus:XF (mult:XF (match_dup 3) (match_dup 8))
-                               (match_dup 10)))
-                 (use (const_int 1))]))
-    ;; Step 3
-    ;; S0 = a * y0 in f7
-    (cond_exec (ne (match_dup 7) (const_int 0))
-      (parallel [(set (match_dup 8)
-                      (plus:XF (mult:XF (match_dup 9) (match_dup 8))
-                               (match_dup 10)))
-                 (use (const_int 1))]))
-    ;; Step 4
-    ;; d0 = 1/2 - S0 * H0 in f10
-    (cond_exec (ne (match_dup 7) (const_int 0))
-      (parallel [(set (match_dup 5)
-                      (minus:XF (match_dup 3)
-				(mult:XF (match_dup 8) (match_dup 4))))
-                 (use (const_int 1))]))
-    ;; Step 5
-    ;; H1 = H0 + d0 * H0 in f9
-    (cond_exec (ne (match_dup 7) (const_int 0))
-       (parallel [(set (match_dup 4)
-                       (plus:XF (mult:XF (match_dup 5) (match_dup 4))
-                                (match_dup 4)))
-                  (use (const_int 1))]))
-    ;; Step 6
-    ;; S1 = S0 + d0 * S0 in f7
-    (cond_exec (ne (match_dup 7) (const_int 0))
-       (parallel [(set (match_dup 8)
-                       (plus:XF (mult:XF (match_dup 5) (match_dup 8))
-                                (match_dup 8)))
-                  (use (const_int 1))]))
-    ;; Step 7
-    ;; d1 = 1/2 - S1 * H1 in f10
-    (cond_exec (ne (match_dup 7) (const_int 0))
-      (parallel [(set (match_dup 5)
-                      (minus:XF (match_dup 3)
-				(mult:XF (match_dup 8) (match_dup 4))))
-                 (use (const_int 1))]))
-    ;; Step 8
-    ;; H2 = H1 + d1 * H1 in f9
-    (cond_exec (ne (match_dup 7) (const_int 0))
-       (parallel [(set (match_dup 4)
-                       (plus:XF (mult:XF (match_dup 5) (match_dup 4))
-                                (match_dup 4)))
-                  (use (const_int 1))]))
-    ;; Step 9 
-    ;; S2 = S1 + d1 * S1 in f7
-    (cond_exec (ne (match_dup 7) (const_int 0))
-       (parallel [(set (match_dup 8)
-                       (plus:XF (mult:XF (match_dup 5) (match_dup 8))
-                                (match_dup 8)))
-                  (use (const_int 1))]))
-    ;; Step 10
-    ;; d2 = 1/2 - S2 * H2 in f10
-    (cond_exec (ne (match_dup 7) (const_int 0))
-       (parallel [(set (match_dup 5)
-                       (minus:XF (match_dup 3)
-				 (mult:XF (match_dup 8) (match_dup 4))))
-                  (use (const_int 1))]))
-    ;; Step 11
-    ;; e2 = a - S2 * S2 in f8
-    (cond_exec (ne (match_dup 7) (const_int 0))
-       (parallel [(set (match_dup 3)
-                       (minus:XF (match_dup 9)
-				 (mult:XF (match_dup 8) (match_dup 8))))
-                  (use (const_int 1))]))
-    ;; Step 12
-    ;; S3 = S2 + e2 * H2 in f7
-    (cond_exec (ne (match_dup 7) (const_int 0))
-       (parallel [(set (match_dup 8)
-                       (plus:XF (mult:XF (match_dup 3) (match_dup 4))
-                                (match_dup 8)))
-                  (use (const_int 1))]))
-    ;; Step 13
-    ;; H3 = H2 + d2 * H2 in f9
-    (cond_exec (ne (match_dup 7) (const_int 0))
-       (parallel [(set (match_dup 4)
-                       (plus:XF (mult:XF (match_dup 5) (match_dup 4))
-                                (match_dup 4)))
-                  (use (const_int 1))]))
-    ;; Step 14
-    ;; e3 = a - S3 * S3 in f8
-    (cond_exec (ne (match_dup 7) (const_int 0))
-       (parallel [(set (match_dup 3)
-                       (minus:XF (match_dup 9)
-				 (mult:XF (match_dup 8) (match_dup 8))))
-                  (use (const_int 1))]))
-    ;; Step 15
-    ;; S = S3 + e3 * H3 in f7
-    (cond_exec (ne (match_dup 7) (const_int 0))
-       (parallel [(set (match_dup 0)
-                       (plus:XF (mult:XF (match_dup 3) (match_dup 4))
-                                (match_dup 8)))
-                  (use (const_int 0))]))]
-{
-  /* Generate 82-bit versions of the input and output operands.  */
-  operands[8] = gen_rtx_REG (XFmode, REGNO (operands[0]));
-  operands[9] = gen_rtx_REG (XFmode, REGNO (operands[1]));
-  /* Generate required floating-point constants.  */
-  operands[10] = CONST0_RTX (XFmode);
-}
-  [(set_attr "predicable" "no")])
-
 ;; ??? frcpa works like cmp.foo.unc.

 (define_insn "*recip_approx"