Commit ad45ac43 by David Edelsohn Committed by David Edelsohn

re PR target/68609 (PowerPC reciprocal estimate missed opportunities)

        PR target/68609
        * config/rs6000/rs6000.c (rs6000_emit_msub): Delete.
        (rs6000_emit_swsqrt): Convert to Goldschmidt's Algorithm
        * config/rs6000/rs6000.md (sqrt<mode>2): Limit swsqrt to high
        precision estimate.

From-SVN: r232439
parent c5e3b6fc
2016-01-15 David Edelsohn <dje.gcc@gmail.com>
PR target/68609
* config/rs6000/rs6000.c (rs6000_emit_msub): Delete.
(rs6000_emit_swsqrt): Convert to Goldschmidt's Algorithm
* config/rs6000/rs6000.md (sqrt<mode>2): Limit swsqrt to high
precision estimate.
2016-01-15 Richard Biener <rguenther@suse.de> 2016-01-15 Richard Biener <rguenther@suse.de>
PR tree-optimization/66856 PR tree-optimization/66856
......
...@@ -32769,29 +32769,6 @@ rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a) ...@@ -32769,29 +32769,6 @@ rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
emit_move_insn (target, dst); emit_move_insn (target, dst);
} }
/* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
static void
rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
{
machine_mode mode = GET_MODE (target);
rtx dst;
/* Altivec does not support fms directly;
generate in terms of fma in that case. */
if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
else
{
a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
}
gcc_assert (dst != NULL);
if (dst != target)
emit_move_insn (target, dst);
}
/* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */ /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
static void static void
...@@ -32890,15 +32867,16 @@ rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p) ...@@ -32890,15 +32867,16 @@ rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d)); add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
} }
/* Newton-Raphson approximation of single/double-precision floating point /* Goldschmidt's Algorithm for single/double-precision floating point
rsqrt. Assumes no trapping math and finite arguments. */ sqrt and rsqrt. Assumes no trapping math and finite arguments. */
void void
rs6000_emit_swsqrt (rtx dst, rtx src, bool recip) rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
{ {
machine_mode mode = GET_MODE (src); machine_mode mode = GET_MODE (src);
rtx x0 = gen_reg_rtx (mode); rtx e = gen_reg_rtx (mode);
rtx y = gen_reg_rtx (mode); rtx g = gen_reg_rtx (mode);
rtx h = gen_reg_rtx (mode);
/* Low precision estimates guarantee 5 bits of accuracy. High /* Low precision estimates guarantee 5 bits of accuracy. High
precision estimates guarantee 14 bits of accuracy. SFmode precision estimates guarantee 14 bits of accuracy. SFmode
...@@ -32909,55 +32887,68 @@ rs6000_emit_swsqrt (rtx dst, rtx src, bool recip) ...@@ -32909,55 +32887,68 @@ rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
if (mode == DFmode || mode == V2DFmode) if (mode == DFmode || mode == V2DFmode)
passes++; passes++;
REAL_VALUE_TYPE dconst3_2;
int i; int i;
rtx halfthree; rtx mhalf;
enum insn_code code = optab_handler (smul_optab, mode); enum insn_code code = optab_handler (smul_optab, mode);
insn_gen_fn gen_mul = GEN_FCN (code); insn_gen_fn gen_mul = GEN_FCN (code);
gcc_assert (code != CODE_FOR_nothing); gcc_assert (code != CODE_FOR_nothing);
/* Load up the constant 1.5 either as a scalar, or as a vector. */ mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
halfthree = rs6000_load_constant_and_splat (mode, dconst3_2); /* e = rsqrt estimate */
emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
/* x0 = rsqrt estimate */ UNSPEC_RSQRT)));
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
UNSPEC_RSQRT)));
/* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */ /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
if (!recip) if (!recip)
{ {
rtx zero = force_reg (mode, CONST0_RTX (mode)); rtx zero = force_reg (mode, CONST0_RTX (mode));
rtx target = emit_conditional_move (x0, GT, src, zero, mode, rtx target = emit_conditional_move (e, GT, src, zero, mode,
x0, zero, mode, 0); e, zero, mode, 0);
if (target != x0) if (target != e)
emit_move_insn (x0, target); emit_move_insn (e, target);
} }
/* y = 0.5 * src = 1.5 * src - src -> fewer constants */ /* g = sqrt estimate. */
rs6000_emit_msub (y, src, halfthree, src); emit_insn (gen_mul (g, e, src));
/* h = 1/(2*sqrt) estimate. */
emit_insn (gen_mul (h, e, mhalf));
for (i = 0; i < passes; i++) if (recip)
{ {
rtx x1 = gen_reg_rtx (mode); if (passes == 1)
rtx u = gen_reg_rtx (mode); {
rtx v = gen_reg_rtx (mode); rtx t = gen_reg_rtx (mode);
rs6000_emit_nmsub (t, g, h, mhalf);
/* Apply correction directly to 1/rsqrt estimate. */
rs6000_emit_madd (dst, e, t, e);
}
else
{
for (i = 0; i < passes; i++)
{
rtx t1 = gen_reg_rtx (mode);
rtx g1 = gen_reg_rtx (mode);
rtx h1 = gen_reg_rtx (mode);
/* x1 = x0 * (1.5 - y * (x0 * x0)) */ rs6000_emit_nmsub (t1, g, h, mhalf);
emit_insn (gen_mul (u, x0, x0)); rs6000_emit_madd (g1, g, t1, g);
rs6000_emit_nmsub (v, y, u, halfthree); rs6000_emit_madd (h1, h, t1, h);
emit_insn (gen_mul (x1, x0, v));
x0 = x1;
}
/* If not reciprocal, multiply by src to produce sqrt. */ g = g1;
if (!recip) h = h1;
emit_insn (gen_mul (dst, src, x0)); }
/* Multiply by 2 for 1/rsqrt. */
emit_insn (gen_add3_insn (dst, h, h));
}
}
else else
emit_move_insn (dst, x0); {
rtx t = gen_reg_rtx (mode);
rs6000_emit_nmsub (t, g, h, mhalf);
rs6000_emit_madd (dst, g, t, g);
}
return; return;
} }
...@@ -4444,6 +4444,7 @@ ...@@ -4444,6 +4444,7 @@
&& (TARGET_PPC_GPOPT || (<MODE>mode == SFmode && TARGET_XILINX_FPU))" && (TARGET_PPC_GPOPT || (<MODE>mode == SFmode && TARGET_XILINX_FPU))"
{ {
if (<MODE>mode == SFmode if (<MODE>mode == SFmode
&& TARGET_RECIP_PRECISION
&& RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode) && RS6000_RECIP_HAVE_RSQRTE_P (<MODE>mode)
&& !optimize_function_for_size_p (cfun) && !optimize_function_for_size_p (cfun)
&& flag_finite_math_only && !flag_trapping_math && flag_finite_math_only && !flag_trapping_math
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment