Commit ef765ea9 by David Edelsohn Committed by David Edelsohn

rs6000.c (rs6000_emit_swdivsf): New function.

        * config/rs6000/rs6000.c (rs6000_emit_swdivsf): New function.
        (rs6000_emit_swdivdf): New function.
        * config/rs6000/rs6000.md (fres): New pattern.
        (divsf3): Add approximation through rs6000_emit_swdivsf.
        (fred): New pattern.
        (divdf3): Add approximation through rs6000_emit_swdivdf.
        * config/rs6000/rs6000-protos.h (rs6000_emit_swdivsf): Declare.
        (rs6000_emit_swdivdf): Declare.
        * config/rs6000/rs6000.opt (mswdiv): New option.
        * doc/invoke.texi (RS/6000 and PowerPC Options): Document mswdiv.

From-SVN: r100645
parent 6c4b13a9
2005-06-05 David Edelsohn <edelsohn@gnu.org>
* config/rs6000/rs6000.c (rs6000_emit_swdivsf): New function.
(rs6000_emit_swdivdf): New function.
* config/rs6000/rs6000.md (fres): New pattern.
(divsf3): Add approximation through rs6000_emit_swdivsf.
(fred): New pattern.
(divdf3): Add approximation through rs6000_emit_swdivdf.
* config/rs6000/rs6000-protos.h (rs6000_emit_swdivsf): Declare.
(rs6000_emit_swdivdf): Declare.
* config/rs6000/rs6000.opt (mswdiv): New option.
* doc/invoke.texi (RS/6000 and PowerPC Options): Document mswdiv.
2005-06-05 Zdenek Dvorak <dvorakz@suse.cz>
PR tree-optimization/21846
......
......@@ -82,6 +82,8 @@ extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx);
extern void rs6000_emit_sync (enum rtx_code, enum machine_mode,
rtx, rtx, rtx, rtx, bool);
extern void rs6000_emit_swdivsf (rtx, rtx, rtx);
extern void rs6000_emit_swdivdf (rtx, rtx, rtx);
extern void output_toc (FILE *, rtx, int, enum machine_mode);
extern void rs6000_initialize_trampoline (rtx, rtx, rtx);
extern rtx rs6000_longcall_ref (rtx);
......
......@@ -17955,6 +17955,109 @@ rs6000_memory_move_cost (enum machine_mode mode, enum reg_class class,
return 4 + rs6000_register_move_cost (mode, class, GENERAL_REGS);
}
/* Newton-Raphson approximation of single-precision floating point divide n/d.
Assumes no trapping math and finite arguments. */
void
rs6000_emit_swdivsf (rtx res, rtx n, rtx d)
{
rtx x0, e0, e1, y1, u0, v0, one;
x0 = gen_reg_rtx (SFmode);
e0 = gen_reg_rtx (SFmode);
e1 = gen_reg_rtx (SFmode);
y1 = gen_reg_rtx (SFmode);
u0 = gen_reg_rtx (SFmode);
v0 = gen_reg_rtx (SFmode);
one = force_reg (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconst1, SFmode));
/* x0 = 1./d estimate */
emit_insn (gen_rtx_SET (VOIDmode, x0,
gen_rtx_UNSPEC (SFmode, gen_rtvec (1, d),
UNSPEC_FRES)));
/* e0 = 1. - d * x0 */
emit_insn (gen_rtx_SET (VOIDmode, e0,
gen_rtx_MINUS (SFmode, one,
gen_rtx_MULT (SFmode, d, x0))));
/* e1 = e0 + e0 * e0 */
emit_insn (gen_rtx_SET (VOIDmode, e1,
gen_rtx_PLUS (SFmode,
gen_rtx_MULT (SFmode, e0, e0), e0)));
/* y1 = x0 + e1 * x0 */
emit_insn (gen_rtx_SET (VOIDmode, y1,
gen_rtx_PLUS (SFmode,
gen_rtx_MULT (SFmode, e1, x0), x0)));
/* u0 = n * y1 */
emit_insn (gen_rtx_SET (VOIDmode, u0,
gen_rtx_MULT (SFmode, n, y1)));
/* v0 = n - d * u0 */
emit_insn (gen_rtx_SET (VOIDmode, v0,
gen_rtx_MINUS (SFmode, n,
gen_rtx_MULT (SFmode, d, u0))));
/* res = u0 + v0 * y1 */
emit_insn (gen_rtx_SET (VOIDmode, res,
gen_rtx_PLUS (SFmode,
gen_rtx_MULT (SFmode, v0, y1), u0)));
}
/* Newton-Raphson approximation of double-precision floating point divide n/d.
Assumes no trapping math and finite arguments. */
void
rs6000_emit_swdivdf (rtx res, rtx n, rtx d)
{
rtx x0, e0, e1, e2, y1, y2, y3, u0, v0, one;
x0 = gen_reg_rtx (DFmode);
e0 = gen_reg_rtx (DFmode);
e1 = gen_reg_rtx (DFmode);
e2 = gen_reg_rtx (DFmode);
y1 = gen_reg_rtx (DFmode);
y2 = gen_reg_rtx (DFmode);
y3 = gen_reg_rtx (DFmode);
u0 = gen_reg_rtx (DFmode);
v0 = gen_reg_rtx (DFmode);
one = force_reg (DFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconst1, DFmode));
/* x0 = 1./d estimate */
emit_insn (gen_rtx_SET (VOIDmode, x0,
gen_rtx_UNSPEC (DFmode, gen_rtvec (1, d),
UNSPEC_FRES)));
/* e0 = 1. - d * x0 */
emit_insn (gen_rtx_SET (VOIDmode, e0,
gen_rtx_MINUS (DFmode, one,
gen_rtx_MULT (SFmode, d, x0))));
/* y1 = x0 + e0 * x0 */
emit_insn (gen_rtx_SET (VOIDmode, y1,
gen_rtx_PLUS (DFmode,
gen_rtx_MULT (DFmode, e0, x0), x0)));
/* e1 = e0 * e0 */
emit_insn (gen_rtx_SET (VOIDmode, e1,
gen_rtx_MULT (DFmode, e0, e0)));
/* y2 = y1 + e1 * y1 */
emit_insn (gen_rtx_SET (VOIDmode, y2,
gen_rtx_PLUS (DFmode,
gen_rtx_MULT (DFmode, e1, y1), y1)));
/* e2 = e1 * e1 */
emit_insn (gen_rtx_SET (VOIDmode, e2,
gen_rtx_MULT (DFmode, e1, e1)));
/* y3 = y2 + e2 * y2 */
emit_insn (gen_rtx_SET (VOIDmode, y3,
gen_rtx_PLUS (DFmode,
gen_rtx_MULT (DFmode, e2, y2), y2)));
/* u0 = n * y3 */
emit_insn (gen_rtx_SET (VOIDmode, u0,
gen_rtx_MULT (DFmode, n, y3)));
/* v0 = n - d * u0 */
emit_insn (gen_rtx_SET (VOIDmode, v0,
gen_rtx_MINUS (DFmode, n,
gen_rtx_MULT (DFmode, d, u0))));
/* res = u0 + v0 * y3 */
emit_insn (gen_rtx_SET (VOIDmode, res,
gen_rtx_PLUS (DFmode,
gen_rtx_MULT (DFmode, v0, y3), u0)));
}
/* Return an RTX representing where to find the function value of a
function returning MODE. */
static rtx
......
......@@ -58,6 +58,7 @@
(UNSPEC_LWSYNC 36)
(UNSPEC_ISYNC 37)
(UNSPEC_POPCNTB 38)
(UNSPEC_FRES 39)
])
;;
......@@ -4640,12 +4641,26 @@
"{fm|fmul} %0,%1,%2"
[(set_attr "type" "dmul")])
(define_insn "fres"
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
"TARGET_PPC_GFXOPT && flag_finite_math_only"
"fres %0,%1"
[(set_attr "type" "fp")])
(define_expand "divsf3"
[(set (match_operand:SF 0 "gpc_reg_operand" "")
(div:SF (match_operand:SF 1 "gpc_reg_operand" "")
(match_operand:SF 2 "gpc_reg_operand" "")))]
"TARGET_HARD_FLOAT"
"")
{
if (swdiv && !optimize_size && TARGET_PPC_GFXOPT
&& flag_finite_math_only && !flag_trapping_math)
{
rs6000_emit_swdivsf (operands[0], operands[1], operands[2]);
DONE;
}
})
(define_insn ""
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
......@@ -5028,12 +5043,26 @@
"{fm|fmul} %0,%1,%2"
[(set_attr "type" "dmul")])
(define_insn "fred"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
"TARGET_POPCNTB && flag_finite_math_only"
"fre %0,%1"
[(set_attr "type" "fp")])
(define_expand "divdf3"
[(set (match_operand:DF 0 "gpc_reg_operand" "")
(div:DF (match_operand:DF 1 "gpc_reg_operand" "")
(match_operand:DF 2 "gpc_reg_operand" "")))]
"TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)"
"")
{
if (swdiv && !optimize_size && TARGET_POPCNTB
&& flag_finite_math_only && !flag_trapping_math)
{
rs6000_emit_swdivdf (operands[0], operands[1], operands[2]);
DONE;
}
})
(define_insn "*divdf3_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
......
......@@ -123,6 +123,10 @@ mxl-compat
Target Report Var(TARGET_XL_COMPAT)
Conform more closely to IBM XLC semantics
mswdiv
Target Report Var(swdiv)
Generate software floating point divide for better throughput
mno-fp-in-toc
Target Report RejectNegative Mask(NO_FP_IN_TOC)
Do not place floating point constants in TOC
......
......@@ -631,7 +631,7 @@ See RS/6000 and PowerPC Options.
-mstrict-align -mno-strict-align -mrelocatable @gol
-mno-relocatable -mrelocatable-lib -mno-relocatable-lib @gol
-mtoc -mno-toc -mlittle -mlittle-endian -mbig -mbig-endian @gol
-mdynamic-no-pic @gol
-mdynamic-no-pic -maltivec -mswdiv @gol
-mprioritize-restricted-insns=@var{priority} @gol
-msched-costly-dep=@var{dependence_type} @gol
-minsert-sched-nops=@var{scheme} @gol
......@@ -10720,6 +10720,17 @@ values for @var{cpu_type} are used for @option{-mtune} as for
architecture, registers, and mnemonics set by @option{-mcpu}, but the
scheduling parameters set by @option{-mtune}.
@item -mswdiv
@itemx -mno-swdiv
@opindex mswdiv
@opindex mno-swdiv
Generate code to compute division as reciprocal estimate and iterative
refinement, creating opportunities for increased throughput. This
feature requires: optional PowerPC Graphics instruction set for single
precision and FRE instruction for double precision, assuming divides
cannot generate user-visible traps, and the domain values not include
Infinities, denormals or zero denominator.
@item -maltivec
@itemx -mno-altivec
@opindex maltivec
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment