Commit 9c78b944 by David Edelsohn Committed by David Edelsohn

rs6000-protos.h (rs6000_emit_swrsqrtsf): Declare.

	* config/rs6000/rs6000-protos.h (rs6000_emit_swrsqrtsf): Declare.
	* config/rs6000/rs6000.opt (swdiv): Change option to ...
	(recip): this.
	* config/rs6000/rs6000.c (rs6000_builtin_reciprocal): New
	function.
	(TARGET_BUILTIN_RECIPROCAL): Use it.
	(rs6000_builtin_expand): Expand recip, recipf, and rsqrtf.
	(rs6000_init_builtins): Initialize recip, recipf, and rsqrtf.
	(rs6000_emit_swrsqrtsf): New.
	* config/rs6000/rs6000.h (rs6000_builtins): Add recip, recipf, and
	rsqrtf. 
	* config/rs6000/rs6000.md (UNSPEC_RSQRT): Define.
	(divsf3): Remove swdiv support.
	(recipsf3): New.
	(rsqrtsf2): New.
	(rsqrt_internal1): New.
	(divdf3): Remove swdiv support.
	(reciptdf3): New.

From-SVN: r128719
parent 2dff8956
2007-09-24 David Edelsohn <edelsohn@gnu.org>
* config/rs6000/rs6000-protos.h (rs6000_emit_swrsqrtsf): Declare.
* config/rs6000/rs6000.opt (swdiv): Change option to ...
(recip): this.
* config/rs6000/rs6000.c (rs6000_builtin_reciprocal): New
function.
(TARGET_BUILTIN_RECIPROCAL): Use it.
(rs6000_builtin_expand): Expand recip, recipf, and rsqrtf.
(rs6000_init_builtins): Initialize recip, recipf, and rsqrtf.
(rs6000_emit_swrsqrtsf): New.
* config/rs6000/rs6000.h (rs6000_builtins): Add recip, recipf, and
rsqrtf.
* config/rs6000/rs6000.md (UNSPEC_RSQRT): Define.
(divsf3): Remove swdiv support.
(recipsf3): New.
(rsqrtsf2): New.
(rsqrt_internal1): New.
(divdf3): Remove swdiv support.
(reciptdf3): New.
2007-09-24 Jakub Jelinek <jakub@redhat.com>
PR c++/33506
......@@ -89,6 +89,7 @@ extern void rs6000_split_compare_and_swapqhi (rtx, rtx, rtx, rtx, rtx, rtx);
extern void rs6000_split_lock_test_and_set (rtx, rtx, rtx, rtx);
extern void rs6000_emit_swdivsf (rtx, rtx, rtx);
extern void rs6000_emit_swdivdf (rtx, rtx, rtx);
extern void rs6000_emit_swrsqrtsf (rtx, rtx);
extern void output_toc (FILE *, rtx, int, enum machine_mode);
extern void rs6000_initialize_trampoline (rtx, rtx, rtx);
extern rtx rs6000_longcall_ref (rtx);
......
......@@ -799,6 +799,7 @@ static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int);
static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int);
static int rs6000_use_sched_lookahead (void);
static int rs6000_use_sched_lookahead_guard (rtx);
static tree rs6000_builtin_reciprocal (unsigned int, bool, bool);
static tree rs6000_builtin_mask_for_load (void);
static tree rs6000_builtin_mul_widen_even (tree);
static tree rs6000_builtin_mul_widen_odd (tree);
......@@ -1213,6 +1214,9 @@ static const char alt_reg_names[][8] =
#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
#define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
#undef TARGET_BUILTIN_RECIPROCAL
#define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
struct gcc_target targetm = TARGET_INITIALIZER;
......@@ -8652,6 +8656,15 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
rtx ret;
bool success;
if (fcode == RS6000_BUILTIN_RECIP)
return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
if (fcode == RS6000_BUILTIN_RECIPF)
return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
if (fcode == RS6000_BUILTIN_RSQRTF)
return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
if (fcode == ALTIVEC_BUILTIN_MASK_FOR_LOAD
|| fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
{
......@@ -8858,6 +8871,31 @@ rs6000_init_builtins (void)
altivec_init_builtins ();
if (TARGET_ALTIVEC || TARGET_SPE || TARGET_PAIRED_FLOAT)
rs6000_common_init_builtins ();
if (TARGET_PPC_GFXOPT)
{
tree ftype = build_function_type_list (float_type_node,
float_type_node,
float_type_node,
NULL_TREE);
def_builtin (MASK_PPC_GFXOPT, "__builtin_recipdivf", ftype,
RS6000_BUILTIN_RECIPF);
ftype = build_function_type_list (float_type_node,
float_type_node,
NULL_TREE);
def_builtin (MASK_PPC_GFXOPT, "__builtin_rsqrtf", ftype,
RS6000_BUILTIN_RSQRTF);
}
if (TARGET_POPCNTB)
{
tree ftype = build_function_type_list (double_type_node,
double_type_node,
double_type_node,
NULL_TREE);
def_builtin (MASK_POPCNTB, "__builtin_recipdiv", ftype,
RS6000_BUILTIN_RECIP);
}
#if TARGET_XCOFF
/* AIX libm provides clog as __clog. */
......@@ -20874,11 +20912,36 @@ rs6000_memory_move_cost (enum machine_mode mode, enum reg_class class,
return 4 + rs6000_register_move_cost (mode, class, GENERAL_REGS);
}
/* Returns a code for a target-specific builtin that implements
reciprocal of the function, or NULL_TREE if not available. */
static tree
rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
bool sqrt ATTRIBUTE_UNUSED)
{
if (! (TARGET_RECIP && TARGET_PPC_GFXOPT && !optimize_size
&& flag_finite_math_only && !flag_trapping_math
&& flag_unsafe_math_optimizations))
return NULL_TREE;
if (md_fn)
return NULL_TREE;
else
switch (fn)
{
case BUILT_IN_SQRTF:
return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
default:
return NULL_TREE;
}
}
/* Newton-Raphson approximation of single-precision floating point divide n/d.
Assumes no trapping math and finite arguments. */
void
rs6000_emit_swdivsf (rtx res, rtx n, rtx d)
rs6000_emit_swdivsf (rtx dst, rtx n, rtx d)
{
rtx x0, e0, e1, y1, u0, v0, one;
......@@ -20913,8 +20976,8 @@ rs6000_emit_swdivsf (rtx res, rtx n, rtx d)
emit_insn (gen_rtx_SET (VOIDmode, v0,
gen_rtx_MINUS (SFmode, n,
gen_rtx_MULT (SFmode, d, u0))));
/* res = u0 + v0 * y1 */
emit_insn (gen_rtx_SET (VOIDmode, res,
/* dst = u0 + v0 * y1 */
emit_insn (gen_rtx_SET (VOIDmode, dst,
gen_rtx_PLUS (SFmode,
gen_rtx_MULT (SFmode, v0, y1), u0)));
}
......@@ -20923,7 +20986,7 @@ rs6000_emit_swdivsf (rtx res, rtx n, rtx d)
Assumes no trapping math and finite arguments. */
void
rs6000_emit_swdivdf (rtx res, rtx n, rtx d)
rs6000_emit_swdivdf (rtx dst, rtx n, rtx d)
{
rtx x0, e0, e1, e2, y1, y2, y3, u0, v0, one;
......@@ -20971,13 +21034,97 @@ rs6000_emit_swdivdf (rtx res, rtx n, rtx d)
emit_insn (gen_rtx_SET (VOIDmode, v0,
gen_rtx_MINUS (DFmode, n,
gen_rtx_MULT (DFmode, d, u0))));
/* res = u0 + v0 * y3 */
emit_insn (gen_rtx_SET (VOIDmode, res,
/* dst = u0 + v0 * y3 */
emit_insn (gen_rtx_SET (VOIDmode, dst,
gen_rtx_PLUS (DFmode,
gen_rtx_MULT (DFmode, v0, y3), u0)));
}
/* Newton-Raphson approximation of single-precision floating point rsqrt.
Assumes no trapping math and finite arguments. */
void
rs6000_emit_swrsqrtsf (rtx dst, rtx src)
{
rtx x0, x1, x2, y1, u0, u1, u2, v0, v1, v2, t0,
half, one, halfthree, c1, cond, label;
x0 = gen_reg_rtx (SFmode);
x1 = gen_reg_rtx (SFmode);
x2 = gen_reg_rtx (SFmode);
y1 = gen_reg_rtx (SFmode);
u0 = gen_reg_rtx (SFmode);
u1 = gen_reg_rtx (SFmode);
u2 = gen_reg_rtx (SFmode);
v0 = gen_reg_rtx (SFmode);
v1 = gen_reg_rtx (SFmode);
v2 = gen_reg_rtx (SFmode);
t0 = gen_reg_rtx (SFmode);
halfthree = gen_reg_rtx (SFmode);
cond = gen_rtx_REG (CCFPmode, CR1_REGNO);
label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
/* check 0.0, 1.0, NaN, Inf by testing src * src = src */
emit_insn (gen_rtx_SET (VOIDmode, t0,
gen_rtx_MULT (SFmode, src, src)));
emit_insn (gen_rtx_SET (VOIDmode, cond,
gen_rtx_COMPARE (CCFPmode, t0, src)));
c1 = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
emit_unlikely_jump (c1, label);
half = force_reg (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode));
one = force_reg (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconst1, SFmode));
/* halfthree = 1.5 = 1.0 + 0.5 */
emit_insn (gen_rtx_SET (VOIDmode, halfthree,
gen_rtx_PLUS (SFmode, one, half)));
/* x0 = rsqrt estimate */
emit_insn (gen_rtx_SET (VOIDmode, x0,
gen_rtx_UNSPEC (SFmode, gen_rtvec (1, src),
UNSPEC_RSQRT)));
/* y1 = 0.5 * src = 1.5 * src - src -> fewer constants */
emit_insn (gen_rtx_SET (VOIDmode, y1,
gen_rtx_MINUS (SFmode,
gen_rtx_MULT (SFmode, src, halfthree),
src)));
/* x1 = x0 * (1.5 - y1 * (x0 * x0)) */
emit_insn (gen_rtx_SET (VOIDmode, u0,
gen_rtx_MULT (SFmode, x0, x0)));
emit_insn (gen_rtx_SET (VOIDmode, v0,
gen_rtx_MINUS (SFmode,
halfthree,
gen_rtx_MULT (SFmode, y1, u0))));
emit_insn (gen_rtx_SET (VOIDmode, x1,
gen_rtx_MULT (SFmode, x0, v0)));
/* x2 = x1 * (1.5 - y1 * (x1 * x1)) */
emit_insn (gen_rtx_SET (VOIDmode, u1,
gen_rtx_MULT (SFmode, x1, x1)));
emit_insn (gen_rtx_SET (VOIDmode, v1,
gen_rtx_MINUS (SFmode,
halfthree,
gen_rtx_MULT (SFmode, y1, u1))));
emit_insn (gen_rtx_SET (VOIDmode, x2,
gen_rtx_MULT (SFmode, x1, v1)));
/* dst = x2 * (1.5 - y1 * (x2 * x2)) */
emit_insn (gen_rtx_SET (VOIDmode, u2,
gen_rtx_MULT (SFmode, x2, x2)));
emit_insn (gen_rtx_SET (VOIDmode, v2,
gen_rtx_MINUS (SFmode,
halfthree,
gen_rtx_MULT (SFmode, y1, u2))));
emit_insn (gen_rtx_SET (VOIDmode, dst,
gen_rtx_MULT (SFmode, x2, v2)));
emit_label (XEXP (label, 0));
}
/* Emit popcount intrinsic on TARGET_POPCNTB targets. DST is the
target, and SRC is the argument operand. */
......
......@@ -2993,6 +2993,10 @@ enum rs6000_builtins
PAIRED_BUILTIN_CMPU0,
PAIRED_BUILTIN_CMPU1,
RS6000_BUILTIN_RECIP,
RS6000_BUILTIN_RECIPF,
RS6000_BUILTIN_RSQRTF,
RS6000_BUILTIN_COUNT
};
......
......@@ -98,6 +98,7 @@
(UNSPEC_DLMZB 45)
(UNSPEC_DLMZB_CR 46)
(UNSPEC_DLMZB_STRLEN 47)
(UNSPEC_RSQRT 48)
])
;;
......@@ -5197,26 +5198,12 @@
"{fm|fmul} %0,%1,%2"
[(set_attr "type" "dmul")])
(define_insn "fres"
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
"TARGET_PPC_GFXOPT && flag_finite_math_only"
"fres %0,%1"
[(set_attr "type" "fp")])
(define_expand "divsf3"
[(set (match_operand:SF 0 "gpc_reg_operand" "")
(div:SF (match_operand:SF 1 "gpc_reg_operand" "")
(match_operand:SF 2 "gpc_reg_operand" "")))]
"TARGET_HARD_FLOAT"
{
if (swdiv && !optimize_size && TARGET_PPC_GFXOPT
&& flag_finite_math_only && !flag_trapping_math)
{
rs6000_emit_swdivsf (operands[0], operands[1], operands[2]);
DONE;
}
})
"")
(define_insn ""
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
......@@ -5234,6 +5221,25 @@
"{fd|fdiv} %0,%1,%2"
[(set_attr "type" "ddiv")])
(define_expand "recipsf3"
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")
(match_operand:SF 2 "gpc_reg_operand" "f")]
UNSPEC_FRES))]
"TARGET_RECIP && TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT && !optimize_size
&& flag_finite_math_only && !flag_trapping_math"
{
rs6000_emit_swdivsf (operands[0], operands[1], operands[2]);
DONE;
})
(define_insn "fres"
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
"TARGET_PPC_GFXOPT && flag_finite_math_only"
"fres %0,%1"
[(set_attr "type" "fp")])
(define_insn ""
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(plus:SF (mult:SF (match_operand:SF 1 "gpc_reg_operand" "%f")
......@@ -5368,6 +5374,25 @@
"fsqrt %0,%1"
[(set_attr "type" "dsqrt")])
(define_expand "rsqrtsf2"
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")]
UNSPEC_RSQRT))]
"TARGET_RECIP && TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT && !optimize_size
&& flag_finite_math_only && !flag_trapping_math"
{
rs6000_emit_swrsqrtsf (operands[0], operands[1]);
DONE;
})
(define_insn "*rsqrt_internal1"
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")]
UNSPEC_RSQRT))]
"TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT"
"frsqrte %0,%1"
[(set_attr "type" "fp")])
(define_expand "copysignsf3"
[(set (match_dup 3)
(abs:SF (match_operand:SF 1 "gpc_reg_operand" "")))
......@@ -5599,26 +5624,12 @@
"{fm|fmul} %0,%1,%2"
[(set_attr "type" "dmul")])
(define_insn "fred"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
"TARGET_POPCNTB && flag_finite_math_only"
"fre %0,%1"
[(set_attr "type" "fp")])
(define_expand "divdf3"
[(set (match_operand:DF 0 "gpc_reg_operand" "")
(div:DF (match_operand:DF 1 "gpc_reg_operand" "")
(match_operand:DF 2 "gpc_reg_operand" "")))]
"TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)"
{
if (swdiv && !optimize_size && TARGET_POPCNTB
&& flag_finite_math_only && !flag_trapping_math)
{
rs6000_emit_swdivdf (operands[0], operands[1], operands[2]);
DONE;
}
})
"")
(define_insn "*divdf3_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
......@@ -5628,6 +5639,25 @@
"{fd|fdiv} %0,%1,%2"
[(set_attr "type" "ddiv")])
(define_expand "recipdf3"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")
(match_operand:DF 2 "gpc_reg_operand" "f")]
UNSPEC_FRES))]
"TARGET_RECIP && TARGET_HARD_FLOAT && TARGET_POPCNTB && !optimize_size
&& flag_finite_math_only && !flag_trapping_math"
{
rs6000_emit_swdivdf (operands[0], operands[1], operands[2]);
DONE;
})
(define_insn "fred"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))]
"TARGET_POPCNTB && flag_finite_math_only"
"fre %0,%1"
[(set_attr "type" "fp")])
(define_insn ""
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(plus:DF (mult:DF (match_operand:DF 1 "gpc_reg_operand" "%f")
......
......@@ -146,9 +146,9 @@ mxl-compat
Target Report Var(TARGET_XL_COMPAT)
Conform more closely to IBM XLC semantics
mswdiv
Target Report Var(swdiv)
Generate software floating point divide for better throughput
mrecip
Target Report Var(TARGET_RECIP)
Generate software reciprocal sqrt for better throughput
mno-fp-in-toc
Target Report RejectNegative Var(TARGET_NO_FP_IN_TOC)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment