Commit 04782385 by Jakub Jelinek Committed by Jakub Jelinek

re PR tree-optimization/85466 (Performance is slow when doing 'branchless'…

re PR tree-optimization/85466 (Performance is slow when doing 'branchless' conditional style math operations)

	PR libstdc++/85466
	* real.h (real_nextafter): Declare.
	* real.c (real_nextafter): New function.
	* fold-const-call.c (fold_const_nextafter): New function.
	(fold_const_call_sss): Call it for CASE_CFN_NEXTAFTER and
	CASE_CFN_NEXTTOWARD.
	(fold_const_call_1): For CASE_CFN_NEXTTOWARD call fold_const_call_sss
	even when arg1_mode is different from arg0_mode.

	* gcc.dg/nextafter-1.c: New test.
	* gcc.dg/nextafter-2.c: New test.
	* gcc.dg/nextafter-3.c: New test.
	* gcc.dg/nextafter-4.c: New test.

From-SVN: r259921
parent 105073e1
2018-05-04 Jakub Jelinek <jakub@redhat.com>
PR libstdc++/85466
* real.h (real_nextafter): Declare.
* real.c (real_nextafter): New function.
* fold-const-call.c (fold_const_nextafter): New function.
(fold_const_call_sss): Call it for CASE_CFN_NEXTAFTER and
CASE_CFN_NEXTTOWARD.
(fold_const_call_1): For CASE_CFN_NEXTTOWARD call fold_const_call_sss
even when arg1_mode is different from arg0_mode.
2018-05-03 Nathan Sidwell <nathan@acm.org> 2018-05-03 Nathan Sidwell <nathan@acm.org>
* doc/extend.texi (Deprecated Features): Remove * doc/extend.texi (Deprecated Features): Remove
......
...@@ -529,6 +529,48 @@ fold_const_pow (real_value *result, const real_value *arg0, ...@@ -529,6 +529,48 @@ fold_const_pow (real_value *result, const real_value *arg0,
/* Try to evaluate: /* Try to evaluate:
*RESULT = nextafter (*ARG0, *ARG1)
or
*RESULT = nexttoward (*ARG0, *ARG1)
in format FORMAT. Return true on success. */
static bool
fold_const_nextafter (real_value *result, const real_value *arg0,
const real_value *arg1, const real_format *format)
{
if (REAL_VALUE_ISSIGNALING_NAN (*arg0)
|| REAL_VALUE_ISSIGNALING_NAN (*arg1))
return false;
/* Don't handle composite modes, nor decimal, nor modes without
inf or denorm at least for now. */
if (format->pnan < format->p
|| format->b == 10
|| !format->has_inf
|| !format->has_denorm)
return false;
if (real_nextafter (result, format, arg0, arg1)
/* If raising underflow or overflow and setting errno to ERANGE,
fail if we care about those side-effects. */
&& (flag_trapping_math || flag_errno_math))
return false;
/* Similarly for nextafter (0, 1) raising underflow. */
else if (flag_trapping_math
&& arg0->cl == rvc_zero
&& result->cl != rvc_zero)
return false;
real_convert (result, format, result);
return true;
}
/* Try to evaluate:
*RESULT = ldexp (*ARG0, ARG1) *RESULT = ldexp (*ARG0, ARG1)
in format FORMAT. Return true on success. */ in format FORMAT. Return true on success. */
...@@ -1260,6 +1302,10 @@ fold_const_call_sss (real_value *result, combined_fn fn, ...@@ -1260,6 +1302,10 @@ fold_const_call_sss (real_value *result, combined_fn fn,
CASE_CFN_POW: CASE_CFN_POW:
return fold_const_pow (result, arg0, arg1, format); return fold_const_pow (result, arg0, arg1, format);
CASE_CFN_NEXTAFTER:
CASE_CFN_NEXTTOWARD:
return fold_const_nextafter (result, arg0, arg1, format);
default: default:
return false; return false;
} }
...@@ -1365,20 +1411,33 @@ fold_const_call_1 (combined_fn fn, tree type, tree arg0, tree arg1) ...@@ -1365,20 +1411,33 @@ fold_const_call_1 (combined_fn fn, tree type, tree arg0, tree arg1)
machine_mode arg0_mode = TYPE_MODE (TREE_TYPE (arg0)); machine_mode arg0_mode = TYPE_MODE (TREE_TYPE (arg0));
machine_mode arg1_mode = TYPE_MODE (TREE_TYPE (arg1)); machine_mode arg1_mode = TYPE_MODE (TREE_TYPE (arg1));
if (arg0_mode == arg1_mode if (mode == arg0_mode
&& real_cst_p (arg0) && real_cst_p (arg0)
&& real_cst_p (arg1)) && real_cst_p (arg1))
{ {
gcc_checking_assert (SCALAR_FLOAT_MODE_P (arg0_mode)); gcc_checking_assert (SCALAR_FLOAT_MODE_P (arg0_mode));
if (mode == arg0_mode) REAL_VALUE_TYPE result;
if (arg0_mode == arg1_mode)
{ {
/* real, real -> real. */ /* real, real -> real. */
REAL_VALUE_TYPE result;
if (fold_const_call_sss (&result, fn, TREE_REAL_CST_PTR (arg0), if (fold_const_call_sss (&result, fn, TREE_REAL_CST_PTR (arg0),
TREE_REAL_CST_PTR (arg1), TREE_REAL_CST_PTR (arg1),
REAL_MODE_FORMAT (mode))) REAL_MODE_FORMAT (mode)))
return build_real (type, result); return build_real (type, result);
} }
else if (arg1_mode == TYPE_MODE (long_double_type_node))
switch (fn)
{
CASE_CFN_NEXTTOWARD:
/* real, long double -> real. */
if (fold_const_call_sss (&result, fn, TREE_REAL_CST_PTR (arg0),
TREE_REAL_CST_PTR (arg1),
REAL_MODE_FORMAT (mode)))
return build_real (type, result);
break;
default:
break;
}
return NULL_TREE; return NULL_TREE;
} }
......
...@@ -5048,6 +5048,102 @@ real_isinteger (const REAL_VALUE_TYPE *c, HOST_WIDE_INT *int_out) ...@@ -5048,6 +5048,102 @@ real_isinteger (const REAL_VALUE_TYPE *c, HOST_WIDE_INT *int_out)
return false; return false;
} }
/* Calculate nextafter (X, Y) or nexttoward (X, Y). Return true if
underflow or overflow needs to be raised. */
bool
real_nextafter (REAL_VALUE_TYPE *r, format_helper fmt,
const REAL_VALUE_TYPE *x, const REAL_VALUE_TYPE *y)
{
int cmp = do_compare (x, y, 2);
/* If either operand is NaN, return qNaN. */
if (cmp == 2)
{
get_canonical_qnan (r, 0);
return false;
}
/* If x == y, return y cast to target type. */
if (cmp == 0)
{
real_convert (r, fmt, y);
return false;
}
if (x->cl == rvc_zero)
{
get_zero (r, y->sign);
r->cl = rvc_normal;
SET_REAL_EXP (r, fmt->emin - fmt->p + 1);
r->sig[SIGSZ - 1] = SIG_MSB;
return false;
}
int np2 = SIGNIFICAND_BITS - fmt->p;
/* For denormals adjust np2 correspondingly. */
if (x->cl == rvc_normal && REAL_EXP (x) < fmt->emin)
np2 += fmt->emin - REAL_EXP (x);
REAL_VALUE_TYPE u;
get_zero (r, x->sign);
get_zero (&u, 0);
set_significand_bit (&u, np2);
r->cl = rvc_normal;
SET_REAL_EXP (r, REAL_EXP (x));
if (x->cl == rvc_inf)
{
bool borrow = sub_significands (r, r, &u, 0);
gcc_assert (borrow);
SET_REAL_EXP (r, fmt->emax);
}
else if (cmp == (x->sign ? 1 : -1))
{
if (add_significands (r, x, &u))
{
/* Overflow. Means the significand had been all ones, and
is now all zeros. Need to increase the exponent, and
possibly re-normalize it. */
SET_REAL_EXP (r, REAL_EXP (r) + 1);
if (REAL_EXP (r) > fmt->emax)
{
get_inf (r, x->sign);
return true;
}
r->sig[SIGSZ - 1] = SIG_MSB;
}
}
else
{
if (REAL_EXP (x) > fmt->emin && x->sig[SIGSZ - 1] == SIG_MSB)
{
int i;
for (i = SIGSZ - 2; i >= 0; i--)
if (x->sig[i])
break;
if (i < 0)
{
/* When mantissa is 1.0, we need to subtract only
half of u: nextafter (1.0, 0.0) is 1.0 - __DBL_EPSILON__ / 2
rather than 1.0 - __DBL_EPSILON__. */
clear_significand_bit (&u, np2);
np2--;
set_significand_bit (&u, np2);
}
}
sub_significands (r, x, &u, 0);
}
/* Clear out trailing garbage. */
clear_significand_below (r, np2);
normalize (r);
if (REAL_EXP (r) <= fmt->emin - fmt->p)
{
get_zero (r, x->sign);
return true;
}
return r->cl == rvc_zero;
}
/* Write into BUF the maximum representable finite floating-point /* Write into BUF the maximum representable finite floating-point
number, (1 - b**-p) * b**emax for a given FP format FMT as a hex number, (1 - b**-p) * b**emax for a given FP format FMT as a hex
float string. LEN is the size of BUF, and the buffer must be large float string. LEN is the size of BUF, and the buffer must be large
......
...@@ -507,6 +507,10 @@ extern void real_copysign (REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *); ...@@ -507,6 +507,10 @@ extern void real_copysign (REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *);
extern bool real_isinteger (const REAL_VALUE_TYPE *, format_helper); extern bool real_isinteger (const REAL_VALUE_TYPE *, format_helper);
extern bool real_isinteger (const REAL_VALUE_TYPE *, HOST_WIDE_INT *); extern bool real_isinteger (const REAL_VALUE_TYPE *, HOST_WIDE_INT *);
/* Calculate nextafter (X, Y) in format FMT. */
extern bool real_nextafter (REAL_VALUE_TYPE *, format_helper,
const REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *);
/* Write into BUF the maximum representable finite floating-point /* Write into BUF the maximum representable finite floating-point
number, (1 - b**-p) * b**emax for a given FP format FMT as a hex number, (1 - b**-p) * b**emax for a given FP format FMT as a hex
float string. BUF must be large enough to contain the result. */ float string. BUF must be large enough to contain the result. */
......
2018-05-04 Jakub Jelinek <jakub@redhat.com>
PR libstdc++/85466
* gcc.dg/nextafter-1.c: New test.
* gcc.dg/nextafter-2.c: New test.
* gcc.dg/nextafter-3.c: New test.
* gcc.dg/nextafter-4.c: New test.
2018-05-03 Nathan Sidwell <nathan@acm.org> 2018-05-03 Nathan Sidwell <nathan@acm.org>
Remove -ffriend-injection. Remove -ffriend-injection.
......
/* PR libstdc++/85466 */
/* { dg-do run } */
/* { dg-options "-O2 -fno-math-errno -fno-trapping-math -fdump-tree-optimized" } */
/* { dg-add-options ieee } */
/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */
/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */
float nextafterf (float, float);
double nextafter (double, double);
long double nextafterl (long double, long double);
float nexttowardf (float, long double);
double nexttoward (double, long double);
long double nexttowardl (long double, long double);
#define CHECK(x) if (!(x)) __builtin_abort ()
#ifndef NEED_ERRNO
#define NEED_ERRNO 0
#endif
#ifndef NEED_EXC
#define NEED_EXC 0
#endif
#define TEST(name, fn, type, L1, L2, l1, l2, MIN1, \
MAX1, DENORM_MIN1, EPSILON1, MIN2, MAX2, DENORM_MIN2) \
void \
name (void) \
{ \
const type a = fn (0.0##L1, 0.0##L2); \
CHECK (a == 0.0##L1 && !__builtin_signbit (a)); \
const type b = fn (0.0##L1, -0.0##L2); \
CHECK (b == 0.0##L1 && __builtin_signbit (b)); \
const type c = fn (__builtin_nan##l1 (""), 0.0##L2); \
CHECK (__builtin_isnan##l1 (c)); \
const type d = fn (2.0##L1, __builtin_nan##l2 ("")); \
CHECK (__builtin_isnan##l1 (d)); \
const type e = NEED_EXC ? DENORM_MIN1 : fn (0.0##L1, 8.0##L2); \
CHECK (e == DENORM_MIN1); \
const type f = fn (1.0##L1, 8.0##L2); \
CHECK (f == 1.0##L1 + EPSILON1); \
const type g = fn (1.0##L1, -8.0##L2); \
CHECK (g == 1.0##L1 - EPSILON1 / 2.0##L1); \
const type h = fn (__builtin_inf (), 0.0##L2); \
CHECK (h == MAX1); \
const type i = fn (-1.0##L1, -__builtin_inf ()); \
CHECK (i == -1.0##L1 - EPSILON1); \
const type j = fn (1.5##L1, __builtin_inf ()); \
CHECK (j == 1.5##L1 + EPSILON1); \
const type k = fn (1.5##L1 - EPSILON1, 100.0##L2); \
CHECK (k == 1.5##L1); \
const type l \
= (NEED_EXC || NEED_ERRNO) ? 0.0##L1 : fn (DENORM_MIN1, 0.0##L2); \
CHECK (l == 0.0##L1 && !__builtin_signbit (l)); \
const type m \
= (NEED_EXC || NEED_ERRNO) ? __builtin_inf##l1 () \
: fn (MAX1, __builtin_inf ()); \
CHECK (__builtin_isinf##l1 (m) && !__builtin_signbit (m)); \
const type n = fn (DENORM_MIN1, 12.0##L2); \
CHECK (n == 2.0##L1 * DENORM_MIN1); \
const type o = fn (n, 24.0##L2); \
CHECK (o == 3.0##L1 * DENORM_MIN1); \
const type p = fn (o, 132.0##L2); \
CHECK (p == 4.0##L1 * DENORM_MIN1); \
const type q = fn (2.0##L1 * DENORM_MIN1, -__builtin_inf ()); \
CHECK (q == DENORM_MIN1); \
const type r = fn (3.0##L1 * DENORM_MIN1, DENORM_MIN2); \
CHECK (r == 2.0##L1 * DENORM_MIN1); \
const type s = fn (4.0##L1 * DENORM_MIN1, 2.0##L2 * DENORM_MIN2); \
CHECK (s == 3.0##L1 * DENORM_MIN1); \
const type t = fn (MIN1, 0.0##L2); \
CHECK (t == MIN1 - DENORM_MIN1); \
const type u = fn (MIN1 - DENORM_MIN1, -MIN2); \
CHECK (u == MIN1 - 2.0##L1 * DENORM_MIN1); \
const type v = fn (MIN1 - 2.0##L1 * DENORM_MIN1, 100.0##L2); \
CHECK (v == MIN1 - DENORM_MIN1); \
const type w = fn (MIN1 - DENORM_MIN1, MAX2); \
CHECK (w == MIN1); \
const type x = fn (MIN1, 17.0##L2); \
CHECK (x == MIN1 + DENORM_MIN1); \
const type y = fn (MIN1 + DENORM_MIN1, __builtin_inf##l2 ()); \
CHECK (y == MIN1 + 2.0##L1 * DENORM_MIN1); \
const type z = fn (MIN1 / 2.0##L1, -MIN2); \
CHECK (z == MIN1 / 2.0##L1 - DENORM_MIN1); \
const type aa = fn (-MIN1 / 4.0##L1, MIN2); \
CHECK (aa == -MIN1 / 4.0##L1 + DENORM_MIN1); \
const type ab = fn (MIN1 * 2.0##L1, -MIN2); \
CHECK (ab == MIN1 * 2.0##L1 - DENORM_MIN1); \
const type ac = fn (MIN1 * 4.0##L1, MIN2); \
CHECK (ac == MIN1 * 4.0##L1 - DENORM_MIN1 * 2.0##L1); \
const type ad = fn (MIN1 * 64.0##L1, MIN2); \
CHECK (ad == MIN1 * 64.0##L1 - DENORM_MIN1 * 32.0##L1); \
const type ae = fn (MIN1 / 2.0##L1 - DENORM_MIN1, 100.0##L2); \
CHECK (ae == MIN1 / 2.0##L1); \
const type af = fn (-MIN1 / 4 + DENORM_MIN1, -100.0##L2); \
CHECK (af == -MIN1 / 4.0##L1); \
const type ag = fn (MIN1 * 2.0##L1 - DENORM_MIN1, 100.0##L2); \
CHECK (ag == MIN1 * 2.0##L1); \
const type ah = fn (MIN1 * 4.0##L1 - 2.0##L1 * DENORM_MIN1, 100.0##L2); \
CHECK (ah == MIN1 * 4.0##L1); \
const type ai = fn (MIN1 * 64.0##L1 - 32.0##L1 * DENORM_MIN1, 100.0##L2); \
CHECK (ai == MIN1 * 64.0##L1); \
const type aj = fn (MIN1 * 64.0##L1, 100.0##L2); \
CHECK (aj == MIN1 * 64.0##L1 + 64.0##L1 * DENORM_MIN1); \
const type ak = fn (MIN1 * 64.0##L1 + DENORM_MIN1 * 64.0##L1, 1024.0##L2); \
CHECK (ak == MIN1 * 64.0##L1 + 128.0##L1 * DENORM_MIN1); \
const type al = fn (128.0##L1, 128.0##L2); \
CHECK (al == 128.0##L1); \
const type am = fn (128.0##L1, 129.0##L2); \
CHECK (am == 128.0##L1 + 128.0##L1 * EPSILON1); \
const type an = fn (-128.0##L1 + -128.0##L1 * EPSILON1, -130.0##L2); \
CHECK (an == -128.0##L1 - 256.0##L1 * EPSILON1); \
const type ao = fn (128.0##L1 + 256.0##L1 * EPSILON1, 256.0##L2); \
CHECK (ao == 128.0##L1 + 384.0##L1 * EPSILON1); \
const type ap = fn (128.0##L1 + 384.0##L1 * EPSILON1, -0.0##L2); \
CHECK (ap == 128.0##L1 + 256.0##L1 * EPSILON1); \
const type aq = fn (128.0##L1 + 256.0##L1 * EPSILON1, 1.0##L2); \
CHECK (aq == 128.0##L1 + 128.0##L1 * EPSILON1); \
const type ar = fn (128.0##L1 + 128.0##L1 * EPSILON1, 0.0##L2); \
CHECK (ar == 128.0##L1); \
const type as = fn (128.0##L1, 0.0##L2); \
CHECK (as == 128.0##L1 - 64.0##L1 * EPSILON1); \
const type at = fn (128.0##L1 - 64.0##L1 * EPSILON1, 5.0##L2); \
CHECK (at == 128.0##L1 - 128.0##L1 * EPSILON1); \
}
TEST (test1, nextafterf, float, F, F, f, f, __FLT_MIN__, __FLT_MAX__,
__FLT_DENORM_MIN__, __FLT_EPSILON__, __FLT_MIN__, __FLT_MAX__,
__FLT_DENORM_MIN__)
TEST (test2, nextafter, double, , , , , __DBL_MIN__, __DBL_MAX__,
__DBL_DENORM_MIN__, __DBL_EPSILON__, __DBL_MIN__, __DBL_MAX__,
__DBL_DENORM_MIN__)
#if __LDBL_MANT_DIG__ != 106
TEST (test3, nextafterl, long double, L, L, l, l, __LDBL_MIN__, __LDBL_MAX__,
__LDBL_DENORM_MIN__, __LDBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
__LDBL_DENORM_MIN__)
TEST (test4, nexttowardf, float, F, L, f, l, __FLT_MIN__, __FLT_MAX__,
__FLT_DENORM_MIN__, __FLT_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
__LDBL_DENORM_MIN__)
TEST (test5, nexttoward, double, , L, , l, __DBL_MIN__, __DBL_MAX__,
__DBL_DENORM_MIN__, __DBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
__LDBL_DENORM_MIN__)
TEST (test6, nexttowardl, long double, L, L, l, l, __LDBL_MIN__, __LDBL_MAX__,
__LDBL_DENORM_MIN__, __LDBL_EPSILON__, __LDBL_MIN__, __LDBL_MAX__,
__LDBL_DENORM_MIN__)
#endif
int
main ()
{
test1 ();
test2 ();
#if __LDBL_MANT_DIG__ != 106
test3 ();
test4 ();
test5 ();
test6 ();
#endif
return 0;
}
/* PR libstdc++/85466 */
/* { dg-do run } */
/* { dg-options "-O2 -fno-builtin" } */
/* { dg-add-options ieee } */
#include "nextafter-1.c"
/* PR libstdc++/85466 */
/* { dg-do run } */
/* { dg-options "-O2 -fmath-errno -fno-trapping-math -fdump-tree-optimized" } */
/* { dg-add-options ieee } */
/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */
/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */
#define NEED_ERRNO 1
#include "nextafter-1.c"
/* PR libstdc++/85466 */
/* { dg-do run } */
/* { dg-options "-O2 -fmath-errno -ftrapping-math -fdump-tree-optimized" } */
/* { dg-add-options ieee } */
/* { dg-final { scan-tree-dump-not "nextafter" "optimized" } } */
/* { dg-final { scan-tree-dump-not "nexttoward" "optimized" } } */
#define NEED_ERRNO 1
#define NEED_EXC 1
#include "nextafter-1.c"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment