Commit 5f606431 by Joseph Myers Committed by Joseph Myers

Update soft-fp from glibc.

This patch updates libgcc's copy of soft-fp from glibc, adding a
testcase for a bug fix this brings in.

Bootstrapped with no regressions on x86_64-unknown-linux-gnu.

libgcc:
	* soft-fp/double.h: Update from glibc.
	* soft-fp/eqdf2.c: Likewise.
	* soft-fp/eqsf2.c: Likewise.
	* soft-fp/eqtf2.c: Likewise.
	* soft-fp/extenddftf2.c: Likewise.
	* soft-fp/extended.h: Likewise.
	* soft-fp/extendsfdf2.c: Likewise.
	* soft-fp/extendsftf2.c: Likewise.
	* soft-fp/extendxftf2.c: Likewise.
	* soft-fp/gedf2.c: Likewise.
	* soft-fp/gesf2.c: Likewise.
	* soft-fp/getf2.c: Likewise.
	* soft-fp/ledf2.c: Likewise.
	* soft-fp/lesf2.c: Likewise.
	* soft-fp/letf2.c: Likewise.
	* soft-fp/op-1.h: Likewise.
	* soft-fp/op-2.h: Likewise.
	* soft-fp/op-4.h: Likewise.
	* soft-fp/op-8.h: Likewise.
	* soft-fp/op-common.h: Likewise.
	* soft-fp/quad.h: Likewise.
	* soft-fp/single.h: Likewise.
	* soft-fp/soft-fp.h: Likewise.
	* soft-fp/unorddf2.c: Likewise.
	* soft-fp/unordsf2.c: Likewise.
	* soft-fp/unordtf2.c: Likewise.
	* config/c6x/eqd.c (__c6xabi_eqd): Update call to FP_CMP_EQ_D.
	* config/c6x/eqf.c (__c6xabi_eqf): Update call to FP_CMP_EQ_S.
	* config/c6x/ged.c (__c6xabi_ged): Update call to FP_CMP_D.
	* config/c6x/gef.c (__c6xabi_gef): Update call to FP_CMP_S.
	* config/c6x/gtd.c (__c6xabi_gtd): Update call to FP_CMP_D.
	* config/c6x/gtf.c (__c6xabi_gtf): Update call to FP_CMP_S.
	* config/c6x/led.c (__c6xabi_led): Update call to FP_CMP_D.
	* config/c6x/lef.c (__c6xabi_lef): Update call to FP_CMP_S.
	* config/c6x/ltd.c (__c6xabi_ltd): Update call to FP_CMP_D.
	* config/c6x/ltf.c (__c6xabi_ltf): Update call to FP_CMP_S.

gcc/testsuite:
	* gcc.dg/torture/float128-extendxf-underflow.c: New test.

From-SVN: r216048
parent c3866869
2014-10-09 Joseph Myers <joseph@codesourcery.com>
* gcc.dg/torture/float128-extendxf-underflow.c: New test.
2014-10-09 Markus Trippelsdorf <markus@trippelsdorf.de> 2014-10-09 Markus Trippelsdorf <markus@trippelsdorf.de>
* g++.dg/ipa/polymorphic-call-1.C: New testcase. * g++.dg/ipa/polymorphic-call-1.C: New testcase.
......
/* Test that extension from XFmode to __float128 raises underflow for
exact tiny values, if trapping on underflow is enabled. */
/* { dg-do run { target i?86-*-*gnu* x86_64-*-*gnu* ia64-*-*gnu* } } */
/* { dg-options "-D_GNU_SOURCE" } */
/* { dg-require-effective-target fenv_exceptions } */
#include <fenv.h>
#include <setjmp.h>
#include <signal.h>
#include <stdlib.h>
volatile sig_atomic_t caught_sigfpe;
sigjmp_buf buf;
static void
handle_sigfpe (int sig)
{
caught_sigfpe = 1;
siglongjmp (buf, 1);
}
int
main (void)
{
volatile long double a = 0x1p-16384L;
volatile __float128 r;
r = a;
if (fetestexcept (FE_UNDERFLOW))
abort ();
if (r != 0x1p-16384q)
abort ();
feenableexcept (FE_UNDERFLOW);
signal (SIGFPE, handle_sigfpe);
if (sigsetjmp (buf, 1) == 0)
r = a;
if (!caught_sigfpe)
abort ();
exit (0);
}
2014-10-09 Joseph Myers <joseph@codesourcery.com>
* soft-fp/double.h: Update from glibc.
* soft-fp/eqdf2.c: Likewise.
* soft-fp/eqsf2.c: Likewise.
* soft-fp/eqtf2.c: Likewise.
* soft-fp/extenddftf2.c: Likewise.
* soft-fp/extended.h: Likewise.
* soft-fp/extendsfdf2.c: Likewise.
* soft-fp/extendsftf2.c: Likewise.
* soft-fp/extendxftf2.c: Likewise.
* soft-fp/gedf2.c: Likewise.
* soft-fp/gesf2.c: Likewise.
* soft-fp/getf2.c: Likewise.
* soft-fp/ledf2.c: Likewise.
* soft-fp/lesf2.c: Likewise.
* soft-fp/letf2.c: Likewise.
* soft-fp/op-1.h: Likewise.
* soft-fp/op-2.h: Likewise.
* soft-fp/op-4.h: Likewise.
* soft-fp/op-8.h: Likewise.
* soft-fp/op-common.h: Likewise.
* soft-fp/quad.h: Likewise.
* soft-fp/single.h: Likewise.
* soft-fp/soft-fp.h: Likewise.
* soft-fp/unorddf2.c: Likewise.
* soft-fp/unordsf2.c: Likewise.
* soft-fp/unordtf2.c: Likewise.
* config/c6x/eqd.c (__c6xabi_eqd): Update call to FP_CMP_EQ_D.
* config/c6x/eqf.c (__c6xabi_eqf): Update call to FP_CMP_EQ_S.
* config/c6x/ged.c (__c6xabi_ged): Update call to FP_CMP_D.
* config/c6x/gef.c (__c6xabi_gef): Update call to FP_CMP_S.
* config/c6x/gtd.c (__c6xabi_gtd): Update call to FP_CMP_D.
* config/c6x/gtf.c (__c6xabi_gtf): Update call to FP_CMP_S.
* config/c6x/led.c (__c6xabi_led): Update call to FP_CMP_D.
* config/c6x/lef.c (__c6xabi_lef): Update call to FP_CMP_S.
* config/c6x/ltd.c (__c6xabi_ltd): Update call to FP_CMP_D.
* config/c6x/ltf.c (__c6xabi_ltf): Update call to FP_CMP_S.
2014-10-08 Rong Xu <xur@google.com> 2014-10-08 Rong Xu <xur@google.com>
* libgcov-util.c (read_gcda_file): Fix format. * libgcov-util.c (read_gcda_file): Fix format.
......
...@@ -38,9 +38,7 @@ CMPtype __c6xabi_eqd(DFtype a, DFtype b) ...@@ -38,9 +38,7 @@ CMPtype __c6xabi_eqd(DFtype a, DFtype b)
FP_UNPACK_RAW_D(A, a); FP_UNPACK_RAW_D(A, a);
FP_UNPACK_RAW_D(B, b); FP_UNPACK_RAW_D(B, b);
FP_CMP_EQ_D(r, A, B); FP_CMP_EQ_D(r, A, B, 1);
if (r && (FP_ISSIGNAN_D(A) || FP_ISSIGNAN_D(B)))
FP_SET_EXCEPTION(FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return !r; return !r;
......
...@@ -38,9 +38,7 @@ CMPtype __c6xabi_eqf(SFtype a, SFtype b) ...@@ -38,9 +38,7 @@ CMPtype __c6xabi_eqf(SFtype a, SFtype b)
FP_UNPACK_RAW_S(A, a); FP_UNPACK_RAW_S(A, a);
FP_UNPACK_RAW_S(B, b); FP_UNPACK_RAW_S(B, b);
FP_CMP_EQ_S(r, A, B); FP_CMP_EQ_S(r, A, B, 1);
if (r && (FP_ISSIGNAN_S(A) || FP_ISSIGNAN_S(B)))
FP_SET_EXCEPTION(FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return !r; return !r;
......
...@@ -38,9 +38,7 @@ CMPtype __c6xabi_ged(DFtype a, DFtype b) ...@@ -38,9 +38,7 @@ CMPtype __c6xabi_ged(DFtype a, DFtype b)
FP_UNPACK_RAW_D(A, a); FP_UNPACK_RAW_D(A, a);
FP_UNPACK_RAW_D(B, b); FP_UNPACK_RAW_D(B, b);
FP_CMP_D(r, A, B, -2); FP_CMP_D(r, A, B, -2, 2);
if (r == -2 && (FP_ISSIGNAN_D(A) || FP_ISSIGNAN_D(B)))
FP_SET_EXCEPTION(FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r >= 0; return r >= 0;
......
...@@ -38,9 +38,7 @@ CMPtype __c6xabi_gef(SFtype a, SFtype b) ...@@ -38,9 +38,7 @@ CMPtype __c6xabi_gef(SFtype a, SFtype b)
FP_UNPACK_RAW_S(A, a); FP_UNPACK_RAW_S(A, a);
FP_UNPACK_RAW_S(B, b); FP_UNPACK_RAW_S(B, b);
FP_CMP_S(r, A, B, -2); FP_CMP_S(r, A, B, -2, 2);
if (r == -2 && (FP_ISSIGNAN_S(A) || FP_ISSIGNAN_S(B)))
FP_SET_EXCEPTION(FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r >= 0; return r >= 0;
......
...@@ -38,9 +38,7 @@ CMPtype __c6xabi_gtd(DFtype a, DFtype b) ...@@ -38,9 +38,7 @@ CMPtype __c6xabi_gtd(DFtype a, DFtype b)
FP_UNPACK_RAW_D(A, a); FP_UNPACK_RAW_D(A, a);
FP_UNPACK_RAW_D(B, b); FP_UNPACK_RAW_D(B, b);
FP_CMP_D(r, A, B, -2); FP_CMP_D(r, A, B, -2, 2);
if (r == -2 && (FP_ISSIGNAN_D(A) || FP_ISSIGNAN_D(B)))
FP_SET_EXCEPTION(FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r > 0; return r > 0;
......
...@@ -38,9 +38,7 @@ CMPtype __c6xabi_gtf(SFtype a, SFtype b) ...@@ -38,9 +38,7 @@ CMPtype __c6xabi_gtf(SFtype a, SFtype b)
FP_UNPACK_RAW_S(A, a); FP_UNPACK_RAW_S(A, a);
FP_UNPACK_RAW_S(B, b); FP_UNPACK_RAW_S(B, b);
FP_CMP_S(r, A, B, -2); FP_CMP_S(r, A, B, -2, 2);
if (r == -2 && (FP_ISSIGNAN_S(A) || FP_ISSIGNAN_S(B)))
FP_SET_EXCEPTION(FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r > 0; return r > 0;
......
...@@ -38,9 +38,7 @@ CMPtype __c6xabi_led(DFtype a, DFtype b) ...@@ -38,9 +38,7 @@ CMPtype __c6xabi_led(DFtype a, DFtype b)
FP_UNPACK_RAW_D(A, a); FP_UNPACK_RAW_D(A, a);
FP_UNPACK_RAW_D(B, b); FP_UNPACK_RAW_D(B, b);
FP_CMP_D(r, A, B, 2); FP_CMP_D(r, A, B, 2, 2);
if (r == 2 && (FP_ISSIGNAN_D(A) || FP_ISSIGNAN_D(B)))
FP_SET_EXCEPTION(FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r <= 0; return r <= 0;
......
...@@ -38,9 +38,7 @@ CMPtype __c6xabi_lef(SFtype a, SFtype b) ...@@ -38,9 +38,7 @@ CMPtype __c6xabi_lef(SFtype a, SFtype b)
FP_UNPACK_RAW_S(A, a); FP_UNPACK_RAW_S(A, a);
FP_UNPACK_RAW_S(B, b); FP_UNPACK_RAW_S(B, b);
FP_CMP_S(r, A, B, 2); FP_CMP_S(r, A, B, 2, 2);
if (r == 2 && (FP_ISSIGNAN_S(A) || FP_ISSIGNAN_S(B)))
FP_SET_EXCEPTION(FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r <= 0; return r <= 0;
......
...@@ -38,9 +38,7 @@ CMPtype __c6xabi_ltd(DFtype a, DFtype b) ...@@ -38,9 +38,7 @@ CMPtype __c6xabi_ltd(DFtype a, DFtype b)
FP_UNPACK_RAW_D(A, a); FP_UNPACK_RAW_D(A, a);
FP_UNPACK_RAW_D(B, b); FP_UNPACK_RAW_D(B, b);
FP_CMP_D(r, A, B, 2); FP_CMP_D(r, A, B, 2, 2);
if (r == 2 && (FP_ISSIGNAN_D(A) || FP_ISSIGNAN_D(B)))
FP_SET_EXCEPTION(FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r < 0; return r < 0;
......
...@@ -38,9 +38,7 @@ CMPtype __c6xabi_ltf(SFtype a, SFtype b) ...@@ -38,9 +38,7 @@ CMPtype __c6xabi_ltf(SFtype a, SFtype b)
FP_UNPACK_RAW_S(A, a); FP_UNPACK_RAW_S(A, a);
FP_UNPACK_RAW_S(B, b); FP_UNPACK_RAW_S(B, b);
FP_CMP_S(r, A, B, 2); FP_CMP_S(r, A, B, 2, 2);
if (r == 2 && (FP_ISSIGNAN_S(A) || FP_ISSIGNAN_S(B)))
FP_SET_EXCEPTION(FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r < 0; return r < 0;
......
...@@ -90,21 +90,21 @@ union _FP_UNION_D ...@@ -90,21 +90,21 @@ union _FP_UNION_D
}; };
# define FP_DECL_D(X) _FP_DECL (2, X) # define FP_DECL_D(X) _FP_DECL (2, X)
# define FP_UNPACK_RAW_D(X, val) _FP_UNPACK_RAW_2 (D, X, val) # define FP_UNPACK_RAW_D(X, val) _FP_UNPACK_RAW_2 (D, X, (val))
# define FP_UNPACK_RAW_DP(X, val) _FP_UNPACK_RAW_2_P (D, X, val) # define FP_UNPACK_RAW_DP(X, val) _FP_UNPACK_RAW_2_P (D, X, (val))
# define FP_PACK_RAW_D(val, X) _FP_PACK_RAW_2 (D, val, X) # define FP_PACK_RAW_D(val, X) _FP_PACK_RAW_2 (D, (val), X)
# define FP_PACK_RAW_DP(val, X) \ # define FP_PACK_RAW_DP(val, X) \
do \ do \
{ \ { \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_2_P (D, val, X); \ _FP_PACK_RAW_2_P (D, (val), X); \
} \ } \
while (0) while (0)
# define FP_UNPACK_D(X, val) \ # define FP_UNPACK_D(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_2 (D, X, val); \ _FP_UNPACK_RAW_2 (D, X, (val)); \
_FP_UNPACK_CANONICAL (D, 2, X); \ _FP_UNPACK_CANONICAL (D, 2, X); \
} \ } \
while (0) while (0)
...@@ -112,7 +112,7 @@ union _FP_UNION_D ...@@ -112,7 +112,7 @@ union _FP_UNION_D
# define FP_UNPACK_DP(X, val) \ # define FP_UNPACK_DP(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_2_P (D, X, val); \ _FP_UNPACK_RAW_2_P (D, X, (val)); \
_FP_UNPACK_CANONICAL (D, 2, X); \ _FP_UNPACK_CANONICAL (D, 2, X); \
} \ } \
while (0) while (0)
...@@ -120,7 +120,7 @@ union _FP_UNION_D ...@@ -120,7 +120,7 @@ union _FP_UNION_D
# define FP_UNPACK_SEMIRAW_D(X, val) \ # define FP_UNPACK_SEMIRAW_D(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_2 (D, X, val); \ _FP_UNPACK_RAW_2 (D, X, (val)); \
_FP_UNPACK_SEMIRAW (D, 2, X); \ _FP_UNPACK_SEMIRAW (D, 2, X); \
} \ } \
while (0) while (0)
...@@ -128,7 +128,7 @@ union _FP_UNION_D ...@@ -128,7 +128,7 @@ union _FP_UNION_D
# define FP_UNPACK_SEMIRAW_DP(X, val) \ # define FP_UNPACK_SEMIRAW_DP(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_2_P (D, X, val); \ _FP_UNPACK_RAW_2_P (D, X, (val)); \
_FP_UNPACK_SEMIRAW (D, 2, X); \ _FP_UNPACK_SEMIRAW (D, 2, X); \
} \ } \
while (0) while (0)
...@@ -137,7 +137,7 @@ union _FP_UNION_D ...@@ -137,7 +137,7 @@ union _FP_UNION_D
do \ do \
{ \ { \
_FP_PACK_CANONICAL (D, 2, X); \ _FP_PACK_CANONICAL (D, 2, X); \
_FP_PACK_RAW_2 (D, val, X); \ _FP_PACK_RAW_2 (D, (val), X); \
} \ } \
while (0) while (0)
...@@ -146,7 +146,7 @@ union _FP_UNION_D ...@@ -146,7 +146,7 @@ union _FP_UNION_D
{ \ { \
_FP_PACK_CANONICAL (D, 2, X); \ _FP_PACK_CANONICAL (D, 2, X); \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_2_P (D, val, X); \ _FP_PACK_RAW_2_P (D, (val), X); \
} \ } \
while (0) while (0)
...@@ -154,7 +154,7 @@ union _FP_UNION_D ...@@ -154,7 +154,7 @@ union _FP_UNION_D
do \ do \
{ \ { \
_FP_PACK_SEMIRAW (D, 2, X); \ _FP_PACK_SEMIRAW (D, 2, X); \
_FP_PACK_RAW_2 (D, val, X); \ _FP_PACK_RAW_2 (D, (val), X); \
} \ } \
while (0) while (0)
...@@ -163,7 +163,7 @@ union _FP_UNION_D ...@@ -163,7 +163,7 @@ union _FP_UNION_D
{ \ { \
_FP_PACK_SEMIRAW (D, 2, X); \ _FP_PACK_SEMIRAW (D, 2, X); \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_2_P (D, val, X); \ _FP_PACK_RAW_2_P (D, (val), X); \
} \ } \
while (0) while (0)
...@@ -174,15 +174,15 @@ union _FP_UNION_D ...@@ -174,15 +174,15 @@ union _FP_UNION_D
# define FP_MUL_D(R, X, Y) _FP_MUL (D, 2, R, X, Y) # define FP_MUL_D(R, X, Y) _FP_MUL (D, 2, R, X, Y)
# define FP_DIV_D(R, X, Y) _FP_DIV (D, 2, R, X, Y) # define FP_DIV_D(R, X, Y) _FP_DIV (D, 2, R, X, Y)
# define FP_SQRT_D(R, X) _FP_SQRT (D, 2, R, X) # define FP_SQRT_D(R, X) _FP_SQRT (D, 2, R, X)
# define _FP_SQRT_MEAT_D(R, S, T, X, Q) _FP_SQRT_MEAT_2 (R, S, T, X, Q) # define _FP_SQRT_MEAT_D(R, S, T, X, Q) _FP_SQRT_MEAT_2 (R, S, T, X, (Q))
# define FP_FMA_D(R, X, Y, Z) _FP_FMA (D, 2, 4, R, X, Y, Z) # define FP_FMA_D(R, X, Y, Z) _FP_FMA (D, 2, 4, R, X, Y, Z)
# define FP_CMP_D(r, X, Y, un) _FP_CMP (D, 2, r, X, Y, un) # define FP_CMP_D(r, X, Y, un, ex) _FP_CMP (D, 2, (r), X, Y, (un), (ex))
# define FP_CMP_EQ_D(r, X, Y) _FP_CMP_EQ (D, 2, r, X, Y) # define FP_CMP_EQ_D(r, X, Y, ex) _FP_CMP_EQ (D, 2, (r), X, Y, (ex))
# define FP_CMP_UNORD_D(r, X, Y) _FP_CMP_UNORD (D, 2, r, X, Y) # define FP_CMP_UNORD_D(r, X, Y, ex) _FP_CMP_UNORD (D, 2, (r), X, Y, (ex))
# define FP_TO_INT_D(r, X, rsz, rsg) _FP_TO_INT (D, 2, r, X, rsz, rsg) # define FP_TO_INT_D(r, X, rsz, rsg) _FP_TO_INT (D, 2, (r), X, (rsz), (rsg))
# define FP_FROM_INT_D(X, r, rs, rt) _FP_FROM_INT (D, 2, X, r, rs, rt) # define FP_FROM_INT_D(X, r, rs, rt) _FP_FROM_INT (D, 2, X, (r), (rs), rt)
# define _FP_FRAC_HIGH_D(X) _FP_FRAC_HIGH_2 (X) # define _FP_FRAC_HIGH_D(X) _FP_FRAC_HIGH_2 (X)
# define _FP_FRAC_HIGH_RAW_D(X) _FP_FRAC_HIGH_2 (X) # define _FP_FRAC_HIGH_RAW_D(X) _FP_FRAC_HIGH_2 (X)
...@@ -209,21 +209,21 @@ union _FP_UNION_D ...@@ -209,21 +209,21 @@ union _FP_UNION_D
}; };
# define FP_DECL_D(X) _FP_DECL (1, X) # define FP_DECL_D(X) _FP_DECL (1, X)
# define FP_UNPACK_RAW_D(X, val) _FP_UNPACK_RAW_1 (D, X, val) # define FP_UNPACK_RAW_D(X, val) _FP_UNPACK_RAW_1 (D, X, (val))
# define FP_UNPACK_RAW_DP(X, val) _FP_UNPACK_RAW_1_P (D, X, val) # define FP_UNPACK_RAW_DP(X, val) _FP_UNPACK_RAW_1_P (D, X, (val))
# define FP_PACK_RAW_D(val, X) _FP_PACK_RAW_1 (D, val, X) # define FP_PACK_RAW_D(val, X) _FP_PACK_RAW_1 (D, (val), X)
# define FP_PACK_RAW_DP(val, X) \ # define FP_PACK_RAW_DP(val, X) \
do \ do \
{ \ { \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_1_P (D, val, X); \ _FP_PACK_RAW_1_P (D, (val), X); \
} \ } \
while (0) while (0)
# define FP_UNPACK_D(X, val) \ # define FP_UNPACK_D(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_1 (D, X, val); \ _FP_UNPACK_RAW_1 (D, X, (val)); \
_FP_UNPACK_CANONICAL (D, 1, X); \ _FP_UNPACK_CANONICAL (D, 1, X); \
} \ } \
while (0) while (0)
...@@ -231,7 +231,7 @@ union _FP_UNION_D ...@@ -231,7 +231,7 @@ union _FP_UNION_D
# define FP_UNPACK_DP(X, val) \ # define FP_UNPACK_DP(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_1_P (D, X, val); \ _FP_UNPACK_RAW_1_P (D, X, (val)); \
_FP_UNPACK_CANONICAL (D, 1, X); \ _FP_UNPACK_CANONICAL (D, 1, X); \
} \ } \
while (0) while (0)
...@@ -239,7 +239,7 @@ union _FP_UNION_D ...@@ -239,7 +239,7 @@ union _FP_UNION_D
# define FP_UNPACK_SEMIRAW_D(X, val) \ # define FP_UNPACK_SEMIRAW_D(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_1 (D, X, val); \ _FP_UNPACK_RAW_1 (D, X, (val)); \
_FP_UNPACK_SEMIRAW (D, 1, X); \ _FP_UNPACK_SEMIRAW (D, 1, X); \
} \ } \
while (0) while (0)
...@@ -247,7 +247,7 @@ union _FP_UNION_D ...@@ -247,7 +247,7 @@ union _FP_UNION_D
# define FP_UNPACK_SEMIRAW_DP(X, val) \ # define FP_UNPACK_SEMIRAW_DP(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_1_P (D, X, val); \ _FP_UNPACK_RAW_1_P (D, X, (val)); \
_FP_UNPACK_SEMIRAW (D, 1, X); \ _FP_UNPACK_SEMIRAW (D, 1, X); \
} \ } \
while (0) while (0)
...@@ -256,7 +256,7 @@ union _FP_UNION_D ...@@ -256,7 +256,7 @@ union _FP_UNION_D
do \ do \
{ \ { \
_FP_PACK_CANONICAL (D, 1, X); \ _FP_PACK_CANONICAL (D, 1, X); \
_FP_PACK_RAW_1 (D, val, X); \ _FP_PACK_RAW_1 (D, (val), X); \
} \ } \
while (0) while (0)
...@@ -265,7 +265,7 @@ union _FP_UNION_D ...@@ -265,7 +265,7 @@ union _FP_UNION_D
{ \ { \
_FP_PACK_CANONICAL (D, 1, X); \ _FP_PACK_CANONICAL (D, 1, X); \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_1_P (D, val, X); \ _FP_PACK_RAW_1_P (D, (val), X); \
} \ } \
while (0) while (0)
...@@ -273,7 +273,7 @@ union _FP_UNION_D ...@@ -273,7 +273,7 @@ union _FP_UNION_D
do \ do \
{ \ { \
_FP_PACK_SEMIRAW (D, 1, X); \ _FP_PACK_SEMIRAW (D, 1, X); \
_FP_PACK_RAW_1 (D, val, X); \ _FP_PACK_RAW_1 (D, (val), X); \
} \ } \
while (0) while (0)
...@@ -282,7 +282,7 @@ union _FP_UNION_D ...@@ -282,7 +282,7 @@ union _FP_UNION_D
{ \ { \
_FP_PACK_SEMIRAW (D, 1, X); \ _FP_PACK_SEMIRAW (D, 1, X); \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_1_P (D, val, X); \ _FP_PACK_RAW_1_P (D, (val), X); \
} \ } \
while (0) while (0)
...@@ -293,18 +293,18 @@ union _FP_UNION_D ...@@ -293,18 +293,18 @@ union _FP_UNION_D
# define FP_MUL_D(R, X, Y) _FP_MUL (D, 1, R, X, Y) # define FP_MUL_D(R, X, Y) _FP_MUL (D, 1, R, X, Y)
# define FP_DIV_D(R, X, Y) _FP_DIV (D, 1, R, X, Y) # define FP_DIV_D(R, X, Y) _FP_DIV (D, 1, R, X, Y)
# define FP_SQRT_D(R, X) _FP_SQRT (D, 1, R, X) # define FP_SQRT_D(R, X) _FP_SQRT (D, 1, R, X)
# define _FP_SQRT_MEAT_D(R, S, T, X, Q) _FP_SQRT_MEAT_1 (R, S, T, X, Q) # define _FP_SQRT_MEAT_D(R, S, T, X, Q) _FP_SQRT_MEAT_1 (R, S, T, X, (Q))
# define FP_FMA_D(R, X, Y, Z) _FP_FMA (D, 1, 2, R, X, Y, Z) # define FP_FMA_D(R, X, Y, Z) _FP_FMA (D, 1, 2, R, X, Y, Z)
/* The implementation of _FP_MUL_D and _FP_DIV_D should be chosen by /* The implementation of _FP_MUL_D and _FP_DIV_D should be chosen by
the target machine. */ the target machine. */
# define FP_CMP_D(r, X, Y, un) _FP_CMP (D, 1, r, X, Y, un) # define FP_CMP_D(r, X, Y, un, ex) _FP_CMP (D, 1, (r), X, Y, (un), (ex))
# define FP_CMP_EQ_D(r, X, Y) _FP_CMP_EQ (D, 1, r, X, Y) # define FP_CMP_EQ_D(r, X, Y, ex) _FP_CMP_EQ (D, 1, (r), X, Y, (ex))
# define FP_CMP_UNORD_D(r, X, Y) _FP_CMP_UNORD (D, 1, r, X, Y) # define FP_CMP_UNORD_D(r, X, Y, ex) _FP_CMP_UNORD (D, 1, (r), X, Y, (ex))
# define FP_TO_INT_D(r, X, rsz, rsg) _FP_TO_INT (D, 1, r, X, rsz, rsg) # define FP_TO_INT_D(r, X, rsz, rsg) _FP_TO_INT (D, 1, (r), X, (rsz), (rsg))
# define FP_FROM_INT_D(X, r, rs, rt) _FP_FROM_INT (D, 1, X, r, rs, rt) # define FP_FROM_INT_D(X, r, rs, rt) _FP_FROM_INT (D, 1, X, (r), (rs), rt)
# define _FP_FRAC_HIGH_D(X) _FP_FRAC_HIGH_1 (X) # define _FP_FRAC_HIGH_D(X) _FP_FRAC_HIGH_1 (X)
# define _FP_FRAC_HIGH_RAW_D(X) _FP_FRAC_HIGH_1 (X) # define _FP_FRAC_HIGH_RAW_D(X) _FP_FRAC_HIGH_1 (X)
......
...@@ -42,9 +42,7 @@ __eqdf2 (DFtype a, DFtype b) ...@@ -42,9 +42,7 @@ __eqdf2 (DFtype a, DFtype b)
FP_INIT_EXCEPTIONS; FP_INIT_EXCEPTIONS;
FP_UNPACK_RAW_D (A, a); FP_UNPACK_RAW_D (A, a);
FP_UNPACK_RAW_D (B, b); FP_UNPACK_RAW_D (B, b);
FP_CMP_EQ_D (r, A, B); FP_CMP_EQ_D (r, A, B, 1);
if (r && (FP_ISSIGNAN_D (A) || FP_ISSIGNAN_D (B)))
FP_SET_EXCEPTION (FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r; return r;
......
...@@ -42,9 +42,7 @@ __eqsf2 (SFtype a, SFtype b) ...@@ -42,9 +42,7 @@ __eqsf2 (SFtype a, SFtype b)
FP_INIT_EXCEPTIONS; FP_INIT_EXCEPTIONS;
FP_UNPACK_RAW_S (A, a); FP_UNPACK_RAW_S (A, a);
FP_UNPACK_RAW_S (B, b); FP_UNPACK_RAW_S (B, b);
FP_CMP_EQ_S (r, A, B); FP_CMP_EQ_S (r, A, B, 1);
if (r && (FP_ISSIGNAN_S (A) || FP_ISSIGNAN_S (B)))
FP_SET_EXCEPTION (FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r; return r;
......
...@@ -42,9 +42,7 @@ __eqtf2 (TFtype a, TFtype b) ...@@ -42,9 +42,7 @@ __eqtf2 (TFtype a, TFtype b)
FP_INIT_EXCEPTIONS; FP_INIT_EXCEPTIONS;
FP_UNPACK_RAW_Q (A, a); FP_UNPACK_RAW_Q (A, a);
FP_UNPACK_RAW_Q (B, b); FP_UNPACK_RAW_Q (B, b);
FP_CMP_EQ_Q (r, A, B); FP_CMP_EQ_Q (r, A, B, 1);
if (r && (FP_ISSIGNAN_Q (A) || FP_ISSIGNAN_Q (B)))
FP_SET_EXCEPTION (FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r; return r;
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
License along with the GNU C Library; if not, see License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */ <http://www.gnu.org/licenses/>. */
#define FP_NO_EXACT_UNDERFLOW
#include "soft-fp.h" #include "soft-fp.h"
#include "double.h" #include "double.h"
#include "quad.h" #include "quad.h"
......
...@@ -91,76 +91,78 @@ union _FP_UNION_E ...@@ -91,76 +91,78 @@ union _FP_UNION_E
# define FP_DECL_E(X) _FP_DECL (4, X) # define FP_DECL_E(X) _FP_DECL (4, X)
# define FP_UNPACK_RAW_E(X, val) \ # define FP_UNPACK_RAW_E(X, val) \
do \ do \
{ \ { \
union _FP_UNION_E _flo; \ union _FP_UNION_E FP_UNPACK_RAW_E_flo; \
_flo.flt = (val); \ FP_UNPACK_RAW_E_flo.flt = (val); \
\ \
X##_f[2] = 0; \ X##_f[2] = 0; \
X##_f[3] = 0; \ X##_f[3] = 0; \
X##_f[0] = _flo.bits.frac0; \ X##_f[0] = FP_UNPACK_RAW_E_flo.bits.frac0; \
X##_f[1] = _flo.bits.frac1; \ X##_f[1] = FP_UNPACK_RAW_E_flo.bits.frac1; \
X##_e = _flo.bits.exp; \ X##_e = FP_UNPACK_RAW_E_flo.bits.exp; \
X##_s = _flo.bits.sign; \ X##_s = FP_UNPACK_RAW_E_flo.bits.sign; \
} \ } \
while (0) while (0)
# define FP_UNPACK_RAW_EP(X, val) \ # define FP_UNPACK_RAW_EP(X, val) \
do \ do \
{ \ { \
union _FP_UNION_E *_flo = (union _FP_UNION_E *) (val); \ union _FP_UNION_E *FP_UNPACK_RAW_EP_flo \
\ = (union _FP_UNION_E *) (val); \
X##_f[2] = 0; \ \
X##_f[3] = 0; \ X##_f[2] = 0; \
X##_f[0] = _flo->bits.frac0; \ X##_f[3] = 0; \
X##_f[1] = _flo->bits.frac1; \ X##_f[0] = FP_UNPACK_RAW_EP_flo->bits.frac0; \
X##_e = _flo->bits.exp; \ X##_f[1] = FP_UNPACK_RAW_EP_flo->bits.frac1; \
X##_s = _flo->bits.sign; \ X##_e = FP_UNPACK_RAW_EP_flo->bits.exp; \
} \ X##_s = FP_UNPACK_RAW_EP_flo->bits.sign; \
} \
while (0) while (0)
# define FP_PACK_RAW_E(val, X) \ # define FP_PACK_RAW_E(val, X) \
do \ do \
{ \ { \
union _FP_UNION_E _flo; \ union _FP_UNION_E FP_PACK_RAW_E_flo; \
\ \
if (X##_e) \ if (X##_e) \
X##_f[1] |= _FP_IMPLBIT_E; \ X##_f[1] |= _FP_IMPLBIT_E; \
else \ else \
X##_f[1] &= ~(_FP_IMPLBIT_E); \ X##_f[1] &= ~(_FP_IMPLBIT_E); \
_flo.bits.frac0 = X##_f[0]; \ FP_PACK_RAW_E_flo.bits.frac0 = X##_f[0]; \
_flo.bits.frac1 = X##_f[1]; \ FP_PACK_RAW_E_flo.bits.frac1 = X##_f[1]; \
_flo.bits.exp = X##_e; \ FP_PACK_RAW_E_flo.bits.exp = X##_e; \
_flo.bits.sign = X##_s; \ FP_PACK_RAW_E_flo.bits.sign = X##_s; \
\ \
(val) = _flo.flt; \ (val) = FP_PACK_RAW_E_flo.flt; \
} \ } \
while (0) while (0)
# define FP_PACK_RAW_EP(val, X) \ # define FP_PACK_RAW_EP(val, X) \
do \ do \
{ \ { \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
{ \ { \
union _FP_UNION_E *_flo = (union _FP_UNION_E *) (val); \ union _FP_UNION_E *FP_PACK_RAW_EP_flo \
\ = (union _FP_UNION_E *) (val); \
if (X##_e) \ \
X##_f[1] |= _FP_IMPLBIT_E; \ if (X##_e) \
else \ X##_f[1] |= _FP_IMPLBIT_E; \
X##_f[1] &= ~(_FP_IMPLBIT_E); \ else \
_flo->bits.frac0 = X##_f[0]; \ X##_f[1] &= ~(_FP_IMPLBIT_E); \
_flo->bits.frac1 = X##_f[1]; \ FP_PACK_RAW_EP_flo->bits.frac0 = X##_f[0]; \
_flo->bits.exp = X##_e; \ FP_PACK_RAW_EP_flo->bits.frac1 = X##_f[1]; \
_flo->bits.sign = X##_s; \ FP_PACK_RAW_EP_flo->bits.exp = X##_e; \
} \ FP_PACK_RAW_EP_flo->bits.sign = X##_s; \
} \ } \
} \
while (0) while (0)
# define FP_UNPACK_E(X, val) \ # define FP_UNPACK_E(X, val) \
do \ do \
{ \ { \
FP_UNPACK_RAW_E (X, val); \ FP_UNPACK_RAW_E (X, (val)); \
_FP_UNPACK_CANONICAL (E, 4, X); \ _FP_UNPACK_CANONICAL (E, 4, X); \
} \ } \
while (0) while (0)
...@@ -168,7 +170,7 @@ union _FP_UNION_E ...@@ -168,7 +170,7 @@ union _FP_UNION_E
# define FP_UNPACK_EP(X, val) \ # define FP_UNPACK_EP(X, val) \
do \ do \
{ \ { \
FP_UNPACK_RAW_EP (X, val); \ FP_UNPACK_RAW_EP (X, (val)); \
_FP_UNPACK_CANONICAL (E, 4, X); \ _FP_UNPACK_CANONICAL (E, 4, X); \
} \ } \
while (0) while (0)
...@@ -176,7 +178,7 @@ union _FP_UNION_E ...@@ -176,7 +178,7 @@ union _FP_UNION_E
# define FP_UNPACK_SEMIRAW_E(X, val) \ # define FP_UNPACK_SEMIRAW_E(X, val) \
do \ do \
{ \ { \
FP_UNPACK_RAW_E (X, val); \ FP_UNPACK_RAW_E (X, (val)); \
_FP_UNPACK_SEMIRAW (E, 4, X); \ _FP_UNPACK_SEMIRAW (E, 4, X); \
} \ } \
while (0) while (0)
...@@ -184,7 +186,7 @@ union _FP_UNION_E ...@@ -184,7 +186,7 @@ union _FP_UNION_E
# define FP_UNPACK_SEMIRAW_EP(X, val) \ # define FP_UNPACK_SEMIRAW_EP(X, val) \
do \ do \
{ \ { \
FP_UNPACK_RAW_EP (X, val); \ FP_UNPACK_RAW_EP (X, (val)); \
_FP_UNPACK_SEMIRAW (E, 4, X); \ _FP_UNPACK_SEMIRAW (E, 4, X); \
} \ } \
while (0) while (0)
...@@ -193,7 +195,7 @@ union _FP_UNION_E ...@@ -193,7 +195,7 @@ union _FP_UNION_E
do \ do \
{ \ { \
_FP_PACK_CANONICAL (E, 4, X); \ _FP_PACK_CANONICAL (E, 4, X); \
FP_PACK_RAW_E (val, X); \ FP_PACK_RAW_E ((val), X); \
} \ } \
while (0) while (0)
...@@ -201,7 +203,7 @@ union _FP_UNION_E ...@@ -201,7 +203,7 @@ union _FP_UNION_E
do \ do \
{ \ { \
_FP_PACK_CANONICAL (E, 4, X); \ _FP_PACK_CANONICAL (E, 4, X); \
FP_PACK_RAW_EP (val, X); \ FP_PACK_RAW_EP ((val), X); \
} \ } \
while (0) while (0)
...@@ -209,7 +211,7 @@ union _FP_UNION_E ...@@ -209,7 +211,7 @@ union _FP_UNION_E
do \ do \
{ \ { \
_FP_PACK_SEMIRAW (E, 4, X); \ _FP_PACK_SEMIRAW (E, 4, X); \
FP_PACK_RAW_E (val, X); \ FP_PACK_RAW_E ((val), X); \
} \ } \
while (0) while (0)
...@@ -217,7 +219,7 @@ union _FP_UNION_E ...@@ -217,7 +219,7 @@ union _FP_UNION_E
do \ do \
{ \ { \
_FP_PACK_SEMIRAW (E, 4, X); \ _FP_PACK_SEMIRAW (E, 4, X); \
FP_PACK_RAW_EP (val, X); \ FP_PACK_RAW_EP ((val), X); \
} \ } \
while (0) while (0)
...@@ -230,50 +232,48 @@ union _FP_UNION_E ...@@ -230,50 +232,48 @@ union _FP_UNION_E
# define FP_SQRT_E(R, X) _FP_SQRT (E, 4, R, X) # define FP_SQRT_E(R, X) _FP_SQRT (E, 4, R, X)
# define FP_FMA_E(R, X, Y, Z) _FP_FMA (E, 4, 8, R, X, Y, Z) # define FP_FMA_E(R, X, Y, Z) _FP_FMA (E, 4, 8, R, X, Y, Z)
/* /* Square root algorithms:
* Square root algorithms: We have just one right now, maybe Newton approximation
* We have just one right now, maybe Newton approximation should be added for those machines where division is fast.
* should be added for those machines where division is fast. This has special _E version because standard _4 square
* This has special _E version because standard _4 square root would not work (it has to start normally with the
* root would not work (it has to start normally with the second word and not the first), but as we have to do it
* second word and not the first), but as we have to do it anyway, we optimize it by doing most of the calculations
* anyway, we optimize it by doing most of the calculations in two UWtype registers instead of four. */
* in two UWtype registers instead of four.
*/
# define _FP_SQRT_MEAT_E(R, S, T, X, q) \ # define _FP_SQRT_MEAT_E(R, S, T, X, q) \
do \ do \
{ \ { \
q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \ (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
_FP_FRAC_SRL_4 (X, (_FP_WORKBITS)); \ _FP_FRAC_SRL_4 (X, (_FP_WORKBITS)); \
while (q) \ while (q) \
{ \ { \
T##_f[1] = S##_f[1] + q; \ T##_f[1] = S##_f[1] + (q); \
if (T##_f[1] <= X##_f[1]) \ if (T##_f[1] <= X##_f[1]) \
{ \ { \
S##_f[1] = T##_f[1] + q; \ S##_f[1] = T##_f[1] + (q); \
X##_f[1] -= T##_f[1]; \ X##_f[1] -= T##_f[1]; \
R##_f[1] += q; \ R##_f[1] += (q); \
} \ } \
_FP_FRAC_SLL_2 (X, 1); \ _FP_FRAC_SLL_2 (X, 1); \
q >>= 1; \ (q) >>= 1; \
} \ } \
q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \ (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
while (q) \ while (q) \
{ \ { \
T##_f[0] = S##_f[0] + q; \ T##_f[0] = S##_f[0] + (q); \
T##_f[1] = S##_f[1]; \ T##_f[1] = S##_f[1]; \
if (T##_f[1] < X##_f[1] \ if (T##_f[1] < X##_f[1] \
|| (T##_f[1] == X##_f[1] \ || (T##_f[1] == X##_f[1] \
&& T##_f[0] <= X##_f[0])) \ && T##_f[0] <= X##_f[0])) \
{ \ { \
S##_f[0] = T##_f[0] + q; \ S##_f[0] = T##_f[0] + (q); \
S##_f[1] += (T##_f[0] > S##_f[0]); \ S##_f[1] += (T##_f[0] > S##_f[0]); \
_FP_FRAC_DEC_2 (X, T); \ _FP_FRAC_DEC_2 (X, T); \
R##_f[0] += q; \ R##_f[0] += (q); \
} \ } \
_FP_FRAC_SLL_2 (X, 1); \ _FP_FRAC_SLL_2 (X, 1); \
q >>= 1; \ (q) >>= 1; \
} \ } \
_FP_FRAC_SLL_4 (R, (_FP_WORKBITS)); \ _FP_FRAC_SLL_4 (R, (_FP_WORKBITS)); \
if (X##_f[0] | X##_f[1]) \ if (X##_f[0] | X##_f[1]) \
...@@ -287,12 +287,12 @@ union _FP_UNION_E ...@@ -287,12 +287,12 @@ union _FP_UNION_E
} \ } \
while (0) while (0)
# define FP_CMP_E(r, X, Y, un) _FP_CMP (E, 4, r, X, Y, un) # define FP_CMP_E(r, X, Y, un, ex) _FP_CMP (E, 4, (r), X, Y, (un), (ex))
# define FP_CMP_EQ_E(r, X, Y) _FP_CMP_EQ (E, 4, r, X, Y) # define FP_CMP_EQ_E(r, X, Y, ex) _FP_CMP_EQ (E, 4, (r), X, Y, (ex))
# define FP_CMP_UNORD_E(r, X, Y) _FP_CMP_UNORD (E, 4, r, X, Y) # define FP_CMP_UNORD_E(r, X, Y, ex) _FP_CMP_UNORD (E, 4, (r), X, Y, (ex))
# define FP_TO_INT_E(r, X, rsz, rsg) _FP_TO_INT (E, 4, r, X, rsz, rsg) # define FP_TO_INT_E(r, X, rsz, rsg) _FP_TO_INT (E, 4, (r), X, (rsz), (rsg))
# define FP_FROM_INT_E(X, r, rs, rt) _FP_FROM_INT (E, 4, X, r, rs, rt) # define FP_FROM_INT_E(X, r, rs, rt) _FP_FROM_INT (E, 4, X, (r), (rs), rt)
# define _FP_FRAC_HIGH_E(X) (X##_f[2]) # define _FP_FRAC_HIGH_E(X) (X##_f[2])
# define _FP_FRAC_HIGH_RAW_E(X) (X##_f[1]) # define _FP_FRAC_HIGH_RAW_E(X) (X##_f[1])
...@@ -323,68 +323,70 @@ union _FP_UNION_E ...@@ -323,68 +323,70 @@ union _FP_UNION_E
# define FP_UNPACK_RAW_E(X, val) \ # define FP_UNPACK_RAW_E(X, val) \
do \ do \
{ \ { \
union _FP_UNION_E _flo; \ union _FP_UNION_E FP_UNPACK_RAW_E_flo; \
_flo.flt = (val); \ FP_UNPACK_RAW_E_flo.flt = (val); \
\ \
X##_f0 = _flo.bits.frac; \ X##_f0 = FP_UNPACK_RAW_E_flo.bits.frac; \
X##_f1 = 0; \ X##_f1 = 0; \
X##_e = _flo.bits.exp; \ X##_e = FP_UNPACK_RAW_E_flo.bits.exp; \
X##_s = _flo.bits.sign; \ X##_s = FP_UNPACK_RAW_E_flo.bits.sign; \
} \ } \
while (0) while (0)
# define FP_UNPACK_RAW_EP(X, val) \ # define FP_UNPACK_RAW_EP(X, val) \
do \ do \
{ \ { \
union _FP_UNION_E *_flo = (union _FP_UNION_E *) (val); \ union _FP_UNION_E *FP_UNPACK_RAW_EP_flo \
\ = (union _FP_UNION_E *) (val); \
X##_f0 = _flo->bits.frac; \ \
X##_f1 = 0; \ X##_f0 = FP_UNPACK_RAW_EP_flo->bits.frac; \
X##_e = _flo->bits.exp; \ X##_f1 = 0; \
X##_s = _flo->bits.sign; \ X##_e = FP_UNPACK_RAW_EP_flo->bits.exp; \
} \ X##_s = FP_UNPACK_RAW_EP_flo->bits.sign; \
} \
while (0) while (0)
# define FP_PACK_RAW_E(val, X) \ # define FP_PACK_RAW_E(val, X) \
do \ do \
{ \ { \
union _FP_UNION_E _flo; \ union _FP_UNION_E FP_PACK_RAW_E_flo; \
\ \
if (X##_e) \ if (X##_e) \
X##_f0 |= _FP_IMPLBIT_E; \ X##_f0 |= _FP_IMPLBIT_E; \
else \ else \
X##_f0 &= ~(_FP_IMPLBIT_E); \ X##_f0 &= ~(_FP_IMPLBIT_E); \
_flo.bits.frac = X##_f0; \ FP_PACK_RAW_E_flo.bits.frac = X##_f0; \
_flo.bits.exp = X##_e; \ FP_PACK_RAW_E_flo.bits.exp = X##_e; \
_flo.bits.sign = X##_s; \ FP_PACK_RAW_E_flo.bits.sign = X##_s; \
\ \
(val) = _flo.flt; \ (val) = FP_PACK_RAW_E_flo.flt; \
} \ } \
while (0) while (0)
# define FP_PACK_RAW_EP(fs, val, X) \ # define FP_PACK_RAW_EP(fs, val, X) \
do \ do \
{ \ { \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
{ \ { \
union _FP_UNION_E *_flo = (union _FP_UNION_E *) (val); \ union _FP_UNION_E *FP_PACK_RAW_EP_flo \
\ = (union _FP_UNION_E *) (val); \
if (X##_e) \ \
X##_f0 |= _FP_IMPLBIT_E; \ if (X##_e) \
else \ X##_f0 |= _FP_IMPLBIT_E; \
X##_f0 &= ~(_FP_IMPLBIT_E); \ else \
_flo->bits.frac = X##_f0; \ X##_f0 &= ~(_FP_IMPLBIT_E); \
_flo->bits.exp = X##_e; \ FP_PACK_RAW_EP_flo->bits.frac = X##_f0; \
_flo->bits.sign = X##_s; \ FP_PACK_RAW_EP_flo->bits.exp = X##_e; \
} \ FP_PACK_RAW_EP_flo->bits.sign = X##_s; \
} \ } \
} \
while (0) while (0)
# define FP_UNPACK_E(X, val) \ # define FP_UNPACK_E(X, val) \
do \ do \
{ \ { \
FP_UNPACK_RAW_E (X, val); \ FP_UNPACK_RAW_E (X, (val)); \
_FP_UNPACK_CANONICAL (E, 2, X); \ _FP_UNPACK_CANONICAL (E, 2, X); \
} \ } \
while (0) while (0)
...@@ -392,7 +394,7 @@ union _FP_UNION_E ...@@ -392,7 +394,7 @@ union _FP_UNION_E
# define FP_UNPACK_EP(X, val) \ # define FP_UNPACK_EP(X, val) \
do \ do \
{ \ { \
FP_UNPACK_RAW_EP (X, val); \ FP_UNPACK_RAW_EP (X, (val)); \
_FP_UNPACK_CANONICAL (E, 2, X); \ _FP_UNPACK_CANONICAL (E, 2, X); \
} \ } \
while (0) while (0)
...@@ -400,7 +402,7 @@ union _FP_UNION_E ...@@ -400,7 +402,7 @@ union _FP_UNION_E
# define FP_UNPACK_SEMIRAW_E(X, val) \ # define FP_UNPACK_SEMIRAW_E(X, val) \
do \ do \
{ \ { \
FP_UNPACK_RAW_E (X, val); \ FP_UNPACK_RAW_E (X, (val)); \
_FP_UNPACK_SEMIRAW (E, 2, X); \ _FP_UNPACK_SEMIRAW (E, 2, X); \
} \ } \
while (0) while (0)
...@@ -408,7 +410,7 @@ union _FP_UNION_E ...@@ -408,7 +410,7 @@ union _FP_UNION_E
# define FP_UNPACK_SEMIRAW_EP(X, val) \ # define FP_UNPACK_SEMIRAW_EP(X, val) \
do \ do \
{ \ { \
FP_UNPACK_RAW_EP (X, val); \ FP_UNPACK_RAW_EP (X, (val)); \
_FP_UNPACK_SEMIRAW (E, 2, X); \ _FP_UNPACK_SEMIRAW (E, 2, X); \
} \ } \
while (0) while (0)
...@@ -417,7 +419,7 @@ union _FP_UNION_E ...@@ -417,7 +419,7 @@ union _FP_UNION_E
do \ do \
{ \ { \
_FP_PACK_CANONICAL (E, 2, X); \ _FP_PACK_CANONICAL (E, 2, X); \
FP_PACK_RAW_E (val, X); \ FP_PACK_RAW_E ((val), X); \
} \ } \
while (0) while (0)
...@@ -425,7 +427,7 @@ union _FP_UNION_E ...@@ -425,7 +427,7 @@ union _FP_UNION_E
do \ do \
{ \ { \
_FP_PACK_CANONICAL (E, 2, X); \ _FP_PACK_CANONICAL (E, 2, X); \
FP_PACK_RAW_EP (val, X); \ FP_PACK_RAW_EP ((val), X); \
} \ } \
while (0) while (0)
...@@ -433,7 +435,7 @@ union _FP_UNION_E ...@@ -433,7 +435,7 @@ union _FP_UNION_E
do \ do \
{ \ { \
_FP_PACK_SEMIRAW (E, 2, X); \ _FP_PACK_SEMIRAW (E, 2, X); \
FP_PACK_RAW_E (val, X); \ FP_PACK_RAW_E ((val), X); \
} \ } \
while (0) while (0)
...@@ -441,7 +443,7 @@ union _FP_UNION_E ...@@ -441,7 +443,7 @@ union _FP_UNION_E
do \ do \
{ \ { \
_FP_PACK_SEMIRAW (E, 2, X); \ _FP_PACK_SEMIRAW (E, 2, X); \
FP_PACK_RAW_EP (val, X); \ FP_PACK_RAW_EP ((val), X); \
} \ } \
while (0) while (0)
...@@ -454,30 +456,28 @@ union _FP_UNION_E ...@@ -454,30 +456,28 @@ union _FP_UNION_E
# define FP_SQRT_E(R, X) _FP_SQRT (E, 2, R, X) # define FP_SQRT_E(R, X) _FP_SQRT (E, 2, R, X)
# define FP_FMA_E(R, X, Y, Z) _FP_FMA (E, 2, 4, R, X, Y, Z) # define FP_FMA_E(R, X, Y, Z) _FP_FMA (E, 2, 4, R, X, Y, Z)
/* /* Square root algorithms:
* Square root algorithms: We have just one right now, maybe Newton approximation
* We have just one right now, maybe Newton approximation should be added for those machines where division is fast.
* should be added for those machines where division is fast. We optimize it by doing most of the calculations
* We optimize it by doing most of the calculations in one UWtype registers instead of two, although we don't
* in one UWtype registers instead of two, although we don't have to. */
* have to.
*/
# define _FP_SQRT_MEAT_E(R, S, T, X, q) \ # define _FP_SQRT_MEAT_E(R, S, T, X, q) \
do \ do \
{ \ { \
q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \ (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
_FP_FRAC_SRL_2 (X, (_FP_WORKBITS)); \ _FP_FRAC_SRL_2 (X, (_FP_WORKBITS)); \
while (q) \ while (q) \
{ \ { \
T##_f0 = S##_f0 + q; \ T##_f0 = S##_f0 + (q); \
if (T##_f0 <= X##_f0) \ if (T##_f0 <= X##_f0) \
{ \ { \
S##_f0 = T##_f0 + q; \ S##_f0 = T##_f0 + (q); \
X##_f0 -= T##_f0; \ X##_f0 -= T##_f0; \
R##_f0 += q; \ R##_f0 += (q); \
} \ } \
_FP_FRAC_SLL_1 (X, 1); \ _FP_FRAC_SLL_1 (X, 1); \
q >>= 1; \ (q) >>= 1; \
} \ } \
_FP_FRAC_SLL_2 (R, (_FP_WORKBITS)); \ _FP_FRAC_SLL_2 (R, (_FP_WORKBITS)); \
if (X##_f0) \ if (X##_f0) \
...@@ -489,12 +489,12 @@ union _FP_UNION_E ...@@ -489,12 +489,12 @@ union _FP_UNION_E
} \ } \
while (0) while (0)
# define FP_CMP_E(r, X, Y, un) _FP_CMP (E, 2, r, X, Y, un) # define FP_CMP_E(r, X, Y, un, ex) _FP_CMP (E, 2, (r), X, Y, (un), (ex))
# define FP_CMP_EQ_E(r, X, Y) _FP_CMP_EQ (E, 2, r, X, Y) # define FP_CMP_EQ_E(r, X, Y, ex) _FP_CMP_EQ (E, 2, (r), X, Y, (ex))
# define FP_CMP_UNORD_E(r, X, Y) _FP_CMP_UNORD (E, 2, r, X, Y) # define FP_CMP_UNORD_E(r, X, Y, ex) _FP_CMP_UNORD (E, 2, (r), X, Y, (ex))
# define FP_TO_INT_E(r, X, rsz, rsg) _FP_TO_INT (E, 2, r, X, rsz, rsg) # define FP_TO_INT_E(r, X, rsz, rsg) _FP_TO_INT (E, 2, (r), X, (rsz), (rsg))
# define FP_FROM_INT_E(X, r, rs, rt) _FP_FROM_INT (E, 2, X, r, rs, rt) # define FP_FROM_INT_E(X, r, rs, rt) _FP_FROM_INT (E, 2, X, (r), (rs), rt)
# define _FP_FRAC_HIGH_E(X) (X##_f1) # define _FP_FRAC_HIGH_E(X) (X##_f1)
# define _FP_FRAC_HIGH_RAW_E(X) (X##_f0) # define _FP_FRAC_HIGH_RAW_E(X) (X##_f0)
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
License along with the GNU C Library; if not, see License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */ <http://www.gnu.org/licenses/>. */
#define FP_NO_EXACT_UNDERFLOW
#include "soft-fp.h" #include "soft-fp.h"
#include "single.h" #include "single.h"
#include "double.h" #include "double.h"
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
License along with the GNU C Library; if not, see License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */ <http://www.gnu.org/licenses/>. */
#define FP_NO_EXACT_UNDERFLOW
#include "soft-fp.h" #include "soft-fp.h"
#include "single.h" #include "single.h"
#include "quad.h" #include "quad.h"
......
...@@ -39,7 +39,7 @@ __extendxftf2 (XFtype a) ...@@ -39,7 +39,7 @@ __extendxftf2 (XFtype a)
FP_DECL_Q (R); FP_DECL_Q (R);
TFtype r; TFtype r;
FP_INIT_ROUNDMODE; FP_INIT_TRAPPING_EXCEPTIONS;
FP_UNPACK_RAW_E (A, a); FP_UNPACK_RAW_E (A, a);
#if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q #if (2 * _FP_W_TYPE_SIZE) < _FP_FRACBITS_Q
FP_EXTEND (Q, E, 4, 4, R, A); FP_EXTEND (Q, E, 4, 4, R, A);
......
...@@ -42,9 +42,7 @@ __gedf2 (DFtype a, DFtype b) ...@@ -42,9 +42,7 @@ __gedf2 (DFtype a, DFtype b)
FP_INIT_EXCEPTIONS; FP_INIT_EXCEPTIONS;
FP_UNPACK_RAW_D (A, a); FP_UNPACK_RAW_D (A, a);
FP_UNPACK_RAW_D (B, b); FP_UNPACK_RAW_D (B, b);
FP_CMP_D (r, A, B, -2); FP_CMP_D (r, A, B, -2, 2);
if (r == -2)
FP_SET_EXCEPTION (FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r; return r;
......
...@@ -42,9 +42,7 @@ __gesf2 (SFtype a, SFtype b) ...@@ -42,9 +42,7 @@ __gesf2 (SFtype a, SFtype b)
FP_INIT_EXCEPTIONS; FP_INIT_EXCEPTIONS;
FP_UNPACK_RAW_S (A, a); FP_UNPACK_RAW_S (A, a);
FP_UNPACK_RAW_S (B, b); FP_UNPACK_RAW_S (B, b);
FP_CMP_S (r, A, B, -2); FP_CMP_S (r, A, B, -2, 2);
if (r == -2)
FP_SET_EXCEPTION (FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r; return r;
......
...@@ -42,9 +42,7 @@ __getf2 (TFtype a, TFtype b) ...@@ -42,9 +42,7 @@ __getf2 (TFtype a, TFtype b)
FP_INIT_EXCEPTIONS; FP_INIT_EXCEPTIONS;
FP_UNPACK_RAW_Q (A, a); FP_UNPACK_RAW_Q (A, a);
FP_UNPACK_RAW_Q (B, b); FP_UNPACK_RAW_Q (B, b);
FP_CMP_Q (r, A, B, -2); FP_CMP_Q (r, A, B, -2, 2);
if (r == -2)
FP_SET_EXCEPTION (FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r; return r;
......
...@@ -42,9 +42,7 @@ __ledf2 (DFtype a, DFtype b) ...@@ -42,9 +42,7 @@ __ledf2 (DFtype a, DFtype b)
FP_INIT_EXCEPTIONS; FP_INIT_EXCEPTIONS;
FP_UNPACK_RAW_D (A, a); FP_UNPACK_RAW_D (A, a);
FP_UNPACK_RAW_D (B, b); FP_UNPACK_RAW_D (B, b);
FP_CMP_D (r, A, B, 2); FP_CMP_D (r, A, B, 2, 2);
if (r == 2)
FP_SET_EXCEPTION (FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r; return r;
......
...@@ -42,9 +42,7 @@ __lesf2 (SFtype a, SFtype b) ...@@ -42,9 +42,7 @@ __lesf2 (SFtype a, SFtype b)
FP_INIT_EXCEPTIONS; FP_INIT_EXCEPTIONS;
FP_UNPACK_RAW_S (A, a); FP_UNPACK_RAW_S (A, a);
FP_UNPACK_RAW_S (B, b); FP_UNPACK_RAW_S (B, b);
FP_CMP_S (r, A, B, 2); FP_CMP_S (r, A, B, 2, 2);
if (r == 2)
FP_SET_EXCEPTION (FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r; return r;
......
...@@ -42,9 +42,7 @@ __letf2 (TFtype a, TFtype b) ...@@ -42,9 +42,7 @@ __letf2 (TFtype a, TFtype b)
FP_INIT_EXCEPTIONS; FP_INIT_EXCEPTIONS;
FP_UNPACK_RAW_Q (A, a); FP_UNPACK_RAW_Q (A, a);
FP_UNPACK_RAW_Q (B, b); FP_UNPACK_RAW_Q (B, b);
FP_CMP_Q (r, A, B, 2); FP_CMP_Q (r, A, B, 2, 2);
if (r == 2)
FP_SET_EXCEPTION (FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r; return r;
......
...@@ -50,8 +50,8 @@ ...@@ -50,8 +50,8 @@
#define _FP_FRAC_SRL_1(X, N) (X##_f >>= N) #define _FP_FRAC_SRL_1(X, N) (X##_f >>= N)
/* Right shift with sticky-lsb. */ /* Right shift with sticky-lsb. */
#define _FP_FRAC_SRST_1(X, S, N, sz) __FP_FRAC_SRST_1 (X##_f, S, N, sz) #define _FP_FRAC_SRST_1(X, S, N, sz) __FP_FRAC_SRST_1 (X##_f, S, (N), (sz))
#define _FP_FRAC_SRS_1(X, N, sz) __FP_FRAC_SRS_1 (X##_f, N, sz) #define _FP_FRAC_SRS_1(X, N, sz) __FP_FRAC_SRS_1 (X##_f, (N), (sz))
#define __FP_FRAC_SRST_1(X, S, N, sz) \ #define __FP_FRAC_SRST_1(X, S, N, sz) \
do \ do \
...@@ -71,9 +71,9 @@ ...@@ -71,9 +71,9 @@
#define _FP_FRAC_ADD_1(R, X, Y) (R##_f = X##_f + Y##_f) #define _FP_FRAC_ADD_1(R, X, Y) (R##_f = X##_f + Y##_f)
#define _FP_FRAC_SUB_1(R, X, Y) (R##_f = X##_f - Y##_f) #define _FP_FRAC_SUB_1(R, X, Y) (R##_f = X##_f - Y##_f)
#define _FP_FRAC_DEC_1(X, Y) (X##_f -= Y##_f) #define _FP_FRAC_DEC_1(X, Y) (X##_f -= Y##_f)
#define _FP_FRAC_CLZ_1(z, X) __FP_CLZ (z, X##_f) #define _FP_FRAC_CLZ_1(z, X) __FP_CLZ ((z), X##_f)
/* Predicates */ /* Predicates. */
#define _FP_FRAC_NEGP_1(X) ((_FP_WS_TYPE) X##_f < 0) #define _FP_FRAC_NEGP_1(X) ((_FP_WS_TYPE) X##_f < 0)
#define _FP_FRAC_ZEROP_1(X) (X##_f == 0) #define _FP_FRAC_ZEROP_1(X) (X##_f == 0)
#define _FP_FRAC_OVERP_1(fs, X) (X##_f & _FP_OVERFLOW_##fs) #define _FP_FRAC_OVERP_1(fs, X) (X##_f & _FP_OVERFLOW_##fs)
...@@ -87,66 +87,62 @@ ...@@ -87,66 +87,62 @@
#define _FP_MINFRAC_1 1 #define _FP_MINFRAC_1 1
#define _FP_MAXFRAC_1 (~(_FP_WS_TYPE) 0) #define _FP_MAXFRAC_1 (~(_FP_WS_TYPE) 0)
/* /* Unpack the raw bits of a native fp value. Do not classify or
* Unpack the raw bits of a native fp value. Do not classify or normalize the data. */
* normalize the data.
*/
#define _FP_UNPACK_RAW_1(fs, X, val) \ #define _FP_UNPACK_RAW_1(fs, X, val) \
do \ do \
{ \ { \
union _FP_UNION_##fs _flo; \ union _FP_UNION_##fs _FP_UNPACK_RAW_1_flo; \
_flo.flt = (val); \ _FP_UNPACK_RAW_1_flo.flt = (val); \
\ \
X##_f = _flo.bits.frac; \ X##_f = _FP_UNPACK_RAW_1_flo.bits.frac; \
X##_e = _flo.bits.exp; \ X##_e = _FP_UNPACK_RAW_1_flo.bits.exp; \
X##_s = _flo.bits.sign; \ X##_s = _FP_UNPACK_RAW_1_flo.bits.sign; \
} \ } \
while (0) while (0)
#define _FP_UNPACK_RAW_1_P(fs, X, val) \ #define _FP_UNPACK_RAW_1_P(fs, X, val) \
do \ do \
{ \ { \
union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \ union _FP_UNION_##fs *_FP_UNPACK_RAW_1_P_flo \
\ = (union _FP_UNION_##fs *) (val); \
X##_f = _flo->bits.frac; \ \
X##_e = _flo->bits.exp; \ X##_f = _FP_UNPACK_RAW_1_P_flo->bits.frac; \
X##_s = _flo->bits.sign; \ X##_e = _FP_UNPACK_RAW_1_P_flo->bits.exp; \
} \ X##_s = _FP_UNPACK_RAW_1_P_flo->bits.sign; \
} \
while (0) while (0)
/* /* Repack the raw bits of a native fp value. */
* Repack the raw bits of a native fp value.
*/
#define _FP_PACK_RAW_1(fs, val, X) \ #define _FP_PACK_RAW_1(fs, val, X) \
do \ do \
{ \ { \
union _FP_UNION_##fs _flo; \ union _FP_UNION_##fs _FP_PACK_RAW_1_flo; \
\ \
_flo.bits.frac = X##_f; \ _FP_PACK_RAW_1_flo.bits.frac = X##_f; \
_flo.bits.exp = X##_e; \ _FP_PACK_RAW_1_flo.bits.exp = X##_e; \
_flo.bits.sign = X##_s; \ _FP_PACK_RAW_1_flo.bits.sign = X##_s; \
\ \
(val) = _flo.flt; \ (val) = _FP_PACK_RAW_1_flo.flt; \
} \ } \
while (0) while (0)
#define _FP_PACK_RAW_1_P(fs, val, X) \ #define _FP_PACK_RAW_1_P(fs, val, X) \
do \ do \
{ \ { \
union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \ union _FP_UNION_##fs *_FP_PACK_RAW_1_P_flo \
\ = (union _FP_UNION_##fs *) (val); \
_flo->bits.frac = X##_f; \ \
_flo->bits.exp = X##_e; \ _FP_PACK_RAW_1_P_flo->bits.frac = X##_f; \
_flo->bits.sign = X##_s; \ _FP_PACK_RAW_1_P_flo->bits.exp = X##_e; \
} \ _FP_PACK_RAW_1_P_flo->bits.sign = X##_s; \
} \
while (0) while (0)
/* /* Multiplication algorithms: */
* Multiplication algorithms:
*/
/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the /* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
multiplication immediately. */ multiplication immediately. */
...@@ -161,11 +157,11 @@ ...@@ -161,11 +157,11 @@
#define _FP_MUL_MEAT_1_imm(wfracbits, R, X, Y) \ #define _FP_MUL_MEAT_1_imm(wfracbits, R, X, Y) \
do \ do \
{ \ { \
_FP_MUL_MEAT_DW_1_imm (wfracbits, R, X, Y); \ _FP_MUL_MEAT_DW_1_imm ((wfracbits), R, X, Y); \
/* Normalize since we know where the msb of the multiplicands \ /* Normalize since we know where the msb of the multiplicands \
were (bit B), we know that the msb of the of the product is \ were (bit B), we know that the msb of the of the product is \
at either 2B or 2B-1. */ \ at either 2B or 2B-1. */ \
_FP_FRAC_SRS_1 (R, wfracbits-1, 2*wfracbits); \ _FP_FRAC_SRS_1 (R, (wfracbits)-1, 2*(wfracbits)); \
} \ } \
while (0) while (0)
...@@ -181,13 +177,15 @@ ...@@ -181,13 +177,15 @@
#define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit) \ #define _FP_MUL_MEAT_1_wide(wfracbits, R, X, Y, doit) \
do \ do \
{ \ { \
_FP_FRAC_DECL_2 (_Z); \ _FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_wide_Z); \
_FP_MUL_MEAT_DW_1_wide (wfracbits, _Z, X, Y, doit); \ _FP_MUL_MEAT_DW_1_wide ((wfracbits), _FP_MUL_MEAT_1_wide_Z, \
X, Y, doit); \
/* Normalize since we know where the msb of the multiplicands \ /* Normalize since we know where the msb of the multiplicands \
were (bit B), we know that the msb of the of the product is \ were (bit B), we know that the msb of the of the product is \
at either 2B or 2B-1. */ \ at either 2B or 2B-1. */ \
_FP_FRAC_SRS_2 (_Z, wfracbits-1, 2*wfracbits); \ _FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_wide_Z, (wfracbits)-1, \
R##_f = _Z_f0; \ 2*(wfracbits)); \
R##_f = _FP_MUL_MEAT_1_wide_Z_f0; \
} \ } \
while (0) while (0)
...@@ -196,62 +194,70 @@ ...@@ -196,62 +194,70 @@
#define _FP_MUL_MEAT_DW_1_hard(wfracbits, R, X, Y) \ #define _FP_MUL_MEAT_DW_1_hard(wfracbits, R, X, Y) \
do \ do \
{ \ { \
_FP_W_TYPE _xh, _xl, _yh, _yl; \ _FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_xh, _FP_MUL_MEAT_DW_1_hard_xl; \
_FP_FRAC_DECL_2 (_a); \ _FP_W_TYPE _FP_MUL_MEAT_DW_1_hard_yh, _FP_MUL_MEAT_DW_1_hard_yl; \
_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_1_hard_a); \
\ \
/* split the words in half */ \ /* Split the words in half. */ \
_xh = X##_f >> (_FP_W_TYPE_SIZE/2); \ _FP_MUL_MEAT_DW_1_hard_xh = X##_f >> (_FP_W_TYPE_SIZE/2); \
_xl = X##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \ _FP_MUL_MEAT_DW_1_hard_xl \
_yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \ = X##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \
_yl = Y##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \ _FP_MUL_MEAT_DW_1_hard_yh = Y##_f >> (_FP_W_TYPE_SIZE/2); \
_FP_MUL_MEAT_DW_1_hard_yl \
= Y##_f & (((_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2)) - 1); \
\ \
/* multiply the pieces */ \ /* Multiply the pieces. */ \
R##_f0 = _xl * _yl; \ R##_f0 = _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yl; \
_a_f0 = _xh * _yl; \ _FP_MUL_MEAT_DW_1_hard_a_f0 \
_a_f1 = _xl * _yh; \ = _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yl; \
R##_f1 = _xh * _yh; \ _FP_MUL_MEAT_DW_1_hard_a_f1 \
= _FP_MUL_MEAT_DW_1_hard_xl * _FP_MUL_MEAT_DW_1_hard_yh; \
R##_f1 = _FP_MUL_MEAT_DW_1_hard_xh * _FP_MUL_MEAT_DW_1_hard_yh; \
\ \
/* reassemble into two full words */ \ /* Reassemble into two full words. */ \
if ((_a_f0 += _a_f1) < _a_f1) \ if ((_FP_MUL_MEAT_DW_1_hard_a_f0 += _FP_MUL_MEAT_DW_1_hard_a_f1) \
< _FP_MUL_MEAT_DW_1_hard_a_f1) \
R##_f1 += (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2); \ R##_f1 += (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE/2); \
_a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2); \ _FP_MUL_MEAT_DW_1_hard_a_f1 \
_a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2); \ = _FP_MUL_MEAT_DW_1_hard_a_f0 >> (_FP_W_TYPE_SIZE/2); \
_FP_FRAC_ADD_2 (R, R, _a); \ _FP_MUL_MEAT_DW_1_hard_a_f0 \
= _FP_MUL_MEAT_DW_1_hard_a_f0 << (_FP_W_TYPE_SIZE/2); \
_FP_FRAC_ADD_2 (R, R, _FP_MUL_MEAT_DW_1_hard_a); \
} \ } \
while (0) while (0)
#define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y) \ #define _FP_MUL_MEAT_1_hard(wfracbits, R, X, Y) \
do \ do \
{ \ { \
_FP_FRAC_DECL_2 (_z); \ _FP_FRAC_DECL_2 (_FP_MUL_MEAT_1_hard_z); \
_FP_MUL_MEAT_DW_1_hard (wfracbits, _z, X, Y); \ _FP_MUL_MEAT_DW_1_hard ((wfracbits), \
\ _FP_MUL_MEAT_1_hard_z, X, Y); \
/* normalize */ \ \
_FP_FRAC_SRS_2 (_z, wfracbits - 1, 2*wfracbits); \ /* Normalize. */ \
R##_f = _z_f0; \ _FP_FRAC_SRS_2 (_FP_MUL_MEAT_1_hard_z, \
} \ (wfracbits) - 1, 2*(wfracbits)); \
R##_f = _FP_MUL_MEAT_1_hard_z_f0; \
} \
while (0) while (0)
/* /* Division algorithms: */
* Division algorithms:
*/
/* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the /* Basic. Assuming the host word size is >= 2*FRACBITS, we can do the
division immediately. Give this macro either _FP_DIV_HELP_imm for division immediately. Give this macro either _FP_DIV_HELP_imm for
C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you C primitives or _FP_DIV_HELP_ldiv for the ISO function. Which you
choose will depend on what the compiler does with divrem4. */ choose will depend on what the compiler does with divrem4. */
#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \ #define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit) \
do \ do \
{ \ { \
_FP_W_TYPE _q, _r; \ _FP_W_TYPE _FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r; \
X##_f <<= (X##_f < Y##_f \ X##_f <<= (X##_f < Y##_f \
? R##_e--, _FP_WFRACBITS_##fs \ ? R##_e--, _FP_WFRACBITS_##fs \
: _FP_WFRACBITS_##fs - 1); \ : _FP_WFRACBITS_##fs - 1); \
doit (_q, _r, X##_f, Y##_f); \ doit (_FP_DIV_MEAT_1_imm_q, _FP_DIV_MEAT_1_imm_r, X##_f, Y##_f); \
R##_f = _q | (_r != 0); \ R##_f = _FP_DIV_MEAT_1_imm_q | (_FP_DIV_MEAT_1_imm_r != 0); \
} \ } \
while (0) while (0)
/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd /* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
...@@ -262,70 +268,80 @@ ...@@ -262,70 +268,80 @@
#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \ #define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y) \
do \ do \
{ \ { \
_FP_W_TYPE _nh, _nl, _q, _r, _y; \ _FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nh; \
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_nl; \
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_q; \
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_r; \
_FP_W_TYPE _FP_DIV_MEAT_1_udiv_norm_y; \
\ \
/* Normalize Y -- i.e. make the most significant bit set. */ \ /* Normalize Y -- i.e. make the most significant bit set. */ \
_y = Y##_f << _FP_WFRACXBITS_##fs; \ _FP_DIV_MEAT_1_udiv_norm_y = Y##_f << _FP_WFRACXBITS_##fs; \
\ \
/* Shift X op correspondingly high, that is, up one full word. */ \ /* Shift X op correspondingly high, that is, up one full word. */ \
if (X##_f < Y##_f) \ if (X##_f < Y##_f) \
{ \ { \
R##_e--; \ R##_e--; \
_nl = 0; \ _FP_DIV_MEAT_1_udiv_norm_nl = 0; \
_nh = X##_f; \ _FP_DIV_MEAT_1_udiv_norm_nh = X##_f; \
} \ } \
else \ else \
{ \ { \
_nl = X##_f << (_FP_W_TYPE_SIZE - 1); \ _FP_DIV_MEAT_1_udiv_norm_nl = X##_f << (_FP_W_TYPE_SIZE - 1); \
_nh = X##_f >> 1; \ _FP_DIV_MEAT_1_udiv_norm_nh = X##_f >> 1; \
} \ } \
\ \
udiv_qrnnd (_q, _r, _nh, _nl, _y); \ udiv_qrnnd (_FP_DIV_MEAT_1_udiv_norm_q, \
R##_f = _q | (_r != 0); \ _FP_DIV_MEAT_1_udiv_norm_r, \
_FP_DIV_MEAT_1_udiv_norm_nh, \
_FP_DIV_MEAT_1_udiv_norm_nl, \
_FP_DIV_MEAT_1_udiv_norm_y); \
R##_f = (_FP_DIV_MEAT_1_udiv_norm_q \
| (_FP_DIV_MEAT_1_udiv_norm_r != 0)); \
} \ } \
while (0) while (0)
#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \ #define _FP_DIV_MEAT_1_udiv(fs, R, X, Y) \
do \ do \
{ \ { \
_FP_W_TYPE _nh, _nl, _q, _r; \ _FP_W_TYPE _FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl; \
if (X##_f < Y##_f) \ _FP_W_TYPE _FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r; \
{ \ if (X##_f < Y##_f) \
R##_e--; \ { \
_nl = X##_f << _FP_WFRACBITS_##fs; \ R##_e--; \
_nh = X##_f >> _FP_WFRACXBITS_##fs; \ _FP_DIV_MEAT_1_udiv_nl = X##_f << _FP_WFRACBITS_##fs; \
} \ _FP_DIV_MEAT_1_udiv_nh = X##_f >> _FP_WFRACXBITS_##fs; \
else \ } \
{ \ else \
_nl = X##_f << (_FP_WFRACBITS_##fs - 1); \ { \
_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \ _FP_DIV_MEAT_1_udiv_nl = X##_f << (_FP_WFRACBITS_##fs - 1); \
} \ _FP_DIV_MEAT_1_udiv_nh = X##_f >> (_FP_WFRACXBITS_##fs + 1); \
udiv_qrnnd (_q, _r, _nh, _nl, Y##_f); \ } \
R##_f = _q | (_r != 0); \ udiv_qrnnd (_FP_DIV_MEAT_1_udiv_q, _FP_DIV_MEAT_1_udiv_r, \
} \ _FP_DIV_MEAT_1_udiv_nh, _FP_DIV_MEAT_1_udiv_nl, \
Y##_f); \
R##_f = _FP_DIV_MEAT_1_udiv_q | (_FP_DIV_MEAT_1_udiv_r != 0); \
} \
while (0) while (0)
/* /* Square root algorithms:
* Square root algorithms: We have just one right now, maybe Newton approximation
* We have just one right now, maybe Newton approximation should be added for those machines where division is fast. */
* should be added for those machines where division is fast.
*/
#define _FP_SQRT_MEAT_1(R, S, T, X, q) \ #define _FP_SQRT_MEAT_1(R, S, T, X, q) \
do \ do \
{ \ { \
while (q != _FP_WORK_ROUND) \ while ((q) != _FP_WORK_ROUND) \
{ \ { \
T##_f = S##_f + q; \ T##_f = S##_f + (q); \
if (T##_f <= X##_f) \ if (T##_f <= X##_f) \
{ \ { \
S##_f = T##_f + q; \ S##_f = T##_f + (q); \
X##_f -= T##_f; \ X##_f -= T##_f; \
R##_f += q; \ R##_f += (q); \
} \ } \
_FP_FRAC_SLL_1 (X, 1); \ _FP_FRAC_SLL_1 (X, 1); \
q >>= 1; \ (q) >>= 1; \
} \ } \
if (X##_f) \ if (X##_f) \
{ \ { \
...@@ -336,17 +352,13 @@ ...@@ -336,17 +352,13 @@
} \ } \
while (0) while (0)
/* /* Assembly/disassembly for converting to/from integral types.
* Assembly/disassembly for converting to/from integral types. No shifting or overflow handled here. */
* No shifting or overflow handled here.
*/
#define _FP_FRAC_ASSEMBLE_1(r, X, rsize) (r = X##_f) #define _FP_FRAC_ASSEMBLE_1(r, X, rsize) ((r) = X##_f)
#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize) (X##_f = r) #define _FP_FRAC_DISASSEMBLE_1(X, r, rsize) (X##_f = (r))
/* /* Convert FP values between word sizes. */
* Convert FP values between word sizes
*/
#define _FP_FRAC_COPY_1_1(D, S) (D##_f = S##_f) #define _FP_FRAC_COPY_1_1(D, S) (D##_f = S##_f)
...@@ -122,16 +122,16 @@ ...@@ -122,16 +122,16 @@
do \ do \
{ \ { \
if (X##_f1) \ if (X##_f1) \
__FP_CLZ (R, X##_f1); \ __FP_CLZ ((R), X##_f1); \
else \ else \
{ \ { \
__FP_CLZ (R, X##_f0); \ __FP_CLZ ((R), X##_f0); \
R += _FP_W_TYPE_SIZE; \ (R) += _FP_W_TYPE_SIZE; \
} \ } \
} \ } \
while (0) while (0)
/* Predicates */ /* Predicates. */
#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0) #define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0)
#define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0) #define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
#define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs) #define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
...@@ -148,9 +148,7 @@ ...@@ -148,9 +148,7 @@
#define _FP_MINFRAC_2 0, 1 #define _FP_MINFRAC_2 0, 1
#define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0) #define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
/* /* Internals. */
* Internals
*/
#define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1) #define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1)
...@@ -158,11 +156,11 @@ ...@@ -158,11 +156,11 @@
do \ do \
{ \ { \
if (xh) \ if (xh) \
__FP_CLZ (R, xh); \ __FP_CLZ ((R), xh); \
else \ else \
{ \ { \
__FP_CLZ (R, xl); \ __FP_CLZ ((R), xl); \
R += _FP_W_TYPE_SIZE; \ (R) += _FP_W_TYPE_SIZE; \
} \ } \
} \ } \
while (0) while (0)
...@@ -182,12 +180,12 @@ ...@@ -182,12 +180,12 @@
(rh = xh - yh - ((rl = xl - yl) > xl)) (rh = xh - yh - ((rl = xl - yl) > xl))
# endif # endif
# ifndef __FP_FRAC_DEC_2 # ifndef __FP_FRAC_DEC_2
# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \ # define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
do \ do \
{ \ { \
UWtype _t = xl; \ UWtype __FP_FRAC_DEC_2_t = xl; \
xh -= yh + ((xl -= yl) > _t); \ xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \
} \ } \
while (0) while (0)
# endif # endif
...@@ -205,91 +203,95 @@ ...@@ -205,91 +203,95 @@
#endif #endif
/* /* Unpack the raw bits of a native fp value. Do not classify or
* Unpack the raw bits of a native fp value. Do not classify or normalize the data. */
* normalize the data.
*/
#define _FP_UNPACK_RAW_2(fs, X, val) \ #define _FP_UNPACK_RAW_2(fs, X, val) \
do \ do \
{ \ { \
union _FP_UNION_##fs _flo; \ union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \
_flo.flt = (val); \ _FP_UNPACK_RAW_2_flo.flt = (val); \
\ \
X##_f0 = _flo.bits.frac0; \ X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \
X##_f1 = _flo.bits.frac1; \ X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \
X##_e = _flo.bits.exp; \ X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \
X##_s = _flo.bits.sign; \ X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \
} \ } \
while (0) while (0)
#define _FP_UNPACK_RAW_2_P(fs, X, val) \ #define _FP_UNPACK_RAW_2_P(fs, X, val) \
do \ do \
{ \ { \
union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \ union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \
\ = (union _FP_UNION_##fs *) (val); \
X##_f0 = _flo->bits.frac0; \ \
X##_f1 = _flo->bits.frac1; \ X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \
X##_e = _flo->bits.exp; \ X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \
X##_s = _flo->bits.sign; \ X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \
} \ X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \
} \
while (0) while (0)
/* /* Repack the raw bits of a native fp value. */
* Repack the raw bits of a native fp value.
*/
#define _FP_PACK_RAW_2(fs, val, X) \ #define _FP_PACK_RAW_2(fs, val, X) \
do \ do \
{ \ { \
union _FP_UNION_##fs _flo; \ union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \
\ \
_flo.bits.frac0 = X##_f0; \ _FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \
_flo.bits.frac1 = X##_f1; \ _FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \
_flo.bits.exp = X##_e; \ _FP_PACK_RAW_2_flo.bits.exp = X##_e; \
_flo.bits.sign = X##_s; \ _FP_PACK_RAW_2_flo.bits.sign = X##_s; \
\ \
(val) = _flo.flt; \ (val) = _FP_PACK_RAW_2_flo.flt; \
} \ } \
while (0) while (0)
#define _FP_PACK_RAW_2_P(fs, val, X) \ #define _FP_PACK_RAW_2_P(fs, val, X) \
do \ do \
{ \ { \
union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \ union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \
\ = (union _FP_UNION_##fs *) (val); \
_flo->bits.frac0 = X##_f0; \ \
_flo->bits.frac1 = X##_f1; \ _FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \
_flo->bits.exp = X##_e; \ _FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \
_flo->bits.sign = X##_s; \ _FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \
} \ _FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \
} \
while (0) while (0)
/* /* Multiplication algorithms: */
* Multiplication algorithms:
*/
/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
#define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \ #define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
do \ do \
{ \ { \
_FP_FRAC_DECL_2 (_b); \ _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \
_FP_FRAC_DECL_2 (_c); \ _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \
\ \
doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0); \ doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \
doit (_b_f1, _b_f0, X##_f0, Y##_f1); \ X##_f0, Y##_f0); \
doit (_c_f1, _c_f0, X##_f1, Y##_f0); \ doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \
doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), X##_f1, Y##_f1); \ X##_f0, Y##_f1); \
doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \
X##_f1, Y##_f0); \
doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
X##_f1, Y##_f1); \
\ \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1), 0, _b_f1, _b_f0, \ _FP_FRAC_WORD_4 (R, 1), 0, \
_FP_MUL_MEAT_DW_2_wide_b_f1, \
_FP_MUL_MEAT_DW_2_wide_b_f0, \
_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1)); \ _FP_FRAC_WORD_4 (R, 1)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0, \ _FP_FRAC_WORD_4 (R, 1), 0, \
_FP_MUL_MEAT_DW_2_wide_c_f1, \
_FP_MUL_MEAT_DW_2_wide_c_f0, \
_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1)); \ _FP_FRAC_WORD_4 (R, 1)); \
} \ } \
...@@ -298,16 +300,18 @@ ...@@ -298,16 +300,18 @@
#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \ #define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
do \ do \
{ \ { \
_FP_FRAC_DECL_4 (_z); \ _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \
\ \
_FP_MUL_MEAT_DW_2_wide (wfracbits, _z, X, Y, doit); \ _FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z, \
X, Y, doit); \
\ \
/* Normalize since we know where the msb of the multiplicands \ /* Normalize since we know where the msb of the multiplicands \
were (bit B), we know that the msb of the of the product is \ were (bit B), we know that the msb of the of the product is \
at either 2B or 2B-1. */ \ at either 2B or 2B-1. */ \
_FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits); \ _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1, \
R##_f0 = _FP_FRAC_WORD_4 (_z, 0); \ 2*(wfracbits)); \
R##_f1 = _FP_FRAC_WORD_4 (_z, 1); \ R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \
R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \
} \ } \
while (0) while (0)
...@@ -318,35 +322,51 @@ ...@@ -318,35 +322,51 @@
#define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \ #define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
do \ do \
{ \ { \
_FP_FRAC_DECL_2 (_b); \ _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \
_FP_FRAC_DECL_2 (_c); \ _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \
_FP_W_TYPE _d; \ _FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \
int _c1, _c2; \ int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \
int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \
\ \
_b_f0 = X##_f0 + X##_f1; \ _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \
_c1 = _b_f0 < X##_f0; \ _FP_MUL_MEAT_DW_2_wide_3mul_c1 \
_b_f1 = Y##_f0 + Y##_f1; \ = _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \
_c2 = _b_f1 < Y##_f0; \ _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \
doit (_d, _FP_FRAC_WORD_4 (R, 0), X##_f0, Y##_f0); \ _FP_MUL_MEAT_DW_2_wide_3mul_c2 \
doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), _b_f0, _b_f1); \ = _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \
doit (_c_f1, _c_f0, X##_f1, Y##_f1); \ doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \
X##_f0, Y##_f0); \
doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \
_FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
_FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \
\ \
_b_f0 &= -_c2; \ _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \
_b_f1 &= -_c1; \ &= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \
_FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \
&= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1), (_c1 & _c2), 0, _d, \ _FP_FRAC_WORD_4 (R, 1), \
(_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
& _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \
_FP_MUL_MEAT_DW_2_wide_3mul_d, \
0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \ 0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
__FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_b_f0); \ _FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \
__FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ __FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_b_f1); \ _FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
__FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1), \ _FP_FRAC_WORD_4 (R, 1), \
0, _d, _FP_FRAC_WORD_4 (R, 0)); \ 0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \
_FP_FRAC_WORD_4 (R, 0)); \
__FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ __FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_FP_FRAC_WORD_4 (R, 1), 0, _c_f1, _c_f0); \ _FP_FRAC_WORD_4 (R, 1), 0, \
_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
_FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \
__FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \ __FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
_c_f1, _c_f0, \ _FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
_FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \
_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \ _FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
} \ } \
while (0) while (0)
...@@ -354,45 +374,51 @@ ...@@ -354,45 +374,51 @@
#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \ #define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
do \ do \
{ \ { \
_FP_FRAC_DECL_4 (_z); \ _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \
\ \
_FP_MUL_MEAT_DW_2_wide_3mul (wfracbits, _z, X, Y, doit); \ _FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits), \
_FP_MUL_MEAT_2_wide_3mul_z, \
X, Y, doit); \
\ \
/* Normalize since we know where the msb of the multiplicands \ /* Normalize since we know where the msb of the multiplicands \
were (bit B), we know that the msb of the of the product is \ were (bit B), we know that the msb of the of the product is \
at either 2B or 2B-1. */ \ at either 2B or 2B-1. */ \
_FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits); \ _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \
R##_f0 = _FP_FRAC_WORD_4 (_z, 0); \ (wfracbits)-1, 2*(wfracbits)); \
R##_f1 = _FP_FRAC_WORD_4 (_z, 1); \ R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \
R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \
} \ } \
while (0) while (0)
#define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \ #define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
do \ do \
{ \ { \
_FP_W_TYPE _x[2], _y[2]; \ _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \
_x[0] = X##_f0; \ _FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \
_x[1] = X##_f1; \ _FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \
_y[0] = Y##_f0; \ _FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \
_y[1] = Y##_f1; \ _FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \
_FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \
\ \
mpn_mul_n (R##_f, _x, _y, 2); \ mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \
_FP_MUL_MEAT_DW_2_gmp_y, 2); \
} \ } \
while (0) while (0)
#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \ #define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
do \ do \
{ \ { \
_FP_FRAC_DECL_4 (_z); \ _FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \
\ \
_FP_MUL_MEAT_DW_2_gmp (wfracbits, _z, X, Y); \ _FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y); \
\ \
/* Normalize since we know where the msb of the multiplicands \ /* Normalize since we know where the msb of the multiplicands \
were (bit B), we know that the msb of the of the product is \ were (bit B), we know that the msb of the of the product is \
at either 2B or 2B-1. */ \ at either 2B or 2B-1. */ \
_FP_FRAC_SRS_4 (_z, wfracbits-1, 2*wfracbits); \ _FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1, \
R##_f0 = _z_f[0]; \ 2*(wfracbits)); \
R##_f1 = _z_f[1]; \ R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \
R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \
} \ } \
while (0) while (0)
...@@ -428,7 +454,7 @@ ...@@ -428,7 +454,7 @@
_p240, _q240, _r240, _s240; \ _p240, _q240, _r240, _s240; \
UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \ UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
\ \
if (wfracbits < 106 || wfracbits > 120) \ if ((wfracbits) < 106 || (wfracbits) > 120) \
abort (); \ abort (); \
\ \
setfetz; \ setfetz; \
...@@ -500,116 +526,136 @@ ...@@ -500,116 +526,136 @@
} \ } \
while (0) while (0)
/* /* Division algorithms: */
* Division algorithms:
*/
#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \ #define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
do \ do \
{ \ { \
_FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \ _FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \
_FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \
if (_FP_FRAC_GE_2 (X, Y)) \ if (_FP_FRAC_GE_2 (X, Y)) \
{ \ { \
_n_f2 = X##_f1 >> 1; \ _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \
_n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \ _FP_DIV_MEAT_2_udiv_n_f1 \
_n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \ = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
_FP_DIV_MEAT_2_udiv_n_f0 \
= X##_f0 << (_FP_W_TYPE_SIZE - 1); \
} \ } \
else \ else \
{ \ { \
R##_e--; \ R##_e--; \
_n_f2 = X##_f1; \ _FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \
_n_f1 = X##_f0; \ _FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \
_n_f0 = 0; \ _FP_DIV_MEAT_2_udiv_n_f0 = 0; \
} \ } \
\ \
/* Normalize, i.e. make the most significant bit of the \ /* Normalize, i.e. make the most significant bit of the \
denominator set. */ \ denominator set. */ \
_FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \ _FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \
\ \
udiv_qrnnd (R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \ udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \
umul_ppmm (_m_f1, _m_f0, R##_f1, Y##_f0); \ _FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \
_r_f0 = _n_f0; \ Y##_f1); \
if (_FP_FRAC_GT_2 (_m, _r)) \ umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \
R##_f1, Y##_f0); \
_FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \
if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
{ \ { \
R##_f1--; \ R##_f1--; \
_FP_FRAC_ADD_2 (_r, Y, _r); \ _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r)) \ _FP_DIV_MEAT_2_udiv_r); \
if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
&& _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
_FP_DIV_MEAT_2_udiv_r)) \
{ \ { \
R##_f1--; \ R##_f1--; \
_FP_FRAC_ADD_2 (_r, Y, _r); \ _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
_FP_DIV_MEAT_2_udiv_r); \
} \ } \
} \ } \
_FP_FRAC_DEC_2 (_r, _m); \ _FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \
\ \
if (_r_f1 == Y##_f1) \ if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \
{ \ { \
/* This is a special case, not an optimization \ /* This is a special case, not an optimization \
(_r/Y##_f1 would not fit into UWtype). \ (_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \
As _r is guaranteed to be < Y, R##_f0 can be either \ As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \
(UWtype)-1 or (UWtype)-2. But as we know what kind \ R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \
of bits it is (sticky, guard, round), we don't care. \ know what kind of bits it is (sticky, guard, round), \
We also don't care what the reminder is, because the \ we don't care. We also don't care what the reminder is, \
guard bit will be set anyway. -jj */ \ because the guard bit will be set anyway. -jj */ \
R##_f0 = -1; \ R##_f0 = -1; \
} \ } \
else \ else \
{ \ { \
udiv_qrnnd (R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \ udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \
umul_ppmm (_m_f1, _m_f0, R##_f0, Y##_f0); \ _FP_DIV_MEAT_2_udiv_r_f1, \
_r_f0 = 0; \ _FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \
if (_FP_FRAC_GT_2 (_m, _r)) \ umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \
_FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \
_FP_DIV_MEAT_2_udiv_r_f0 = 0; \
if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
_FP_DIV_MEAT_2_udiv_r)) \
{ \ { \
R##_f0--; \ R##_f0--; \
_FP_FRAC_ADD_2 (_r, Y, _r); \ _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
if (_FP_FRAC_GE_2 (_r, Y) && _FP_FRAC_GT_2 (_m, _r)) \ _FP_DIV_MEAT_2_udiv_r); \
if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
&& _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
_FP_DIV_MEAT_2_udiv_r)) \
{ \ { \
R##_f0--; \ R##_f0--; \
_FP_FRAC_ADD_2 (_r, Y, _r); \ _FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
_FP_DIV_MEAT_2_udiv_r); \
} \ } \
} \ } \
if (!_FP_FRAC_EQ_2 (_r, _m)) \ if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \
_FP_DIV_MEAT_2_udiv_m)) \
R##_f0 |= _FP_WORK_STICKY; \ R##_f0 |= _FP_WORK_STICKY; \
} \ } \
} \ } \
while (0) while (0)
/* /* Square root algorithms:
* Square root algorithms: We have just one right now, maybe Newton approximation
* We have just one right now, maybe Newton approximation should be added for those machines where division is fast. */
* should be added for those machines where division is fast.
*/
#define _FP_SQRT_MEAT_2(R, S, T, X, q) \ #define _FP_SQRT_MEAT_2(R, S, T, X, q) \
do \ do \
{ \ { \
while (q) \ while (q) \
{ \ { \
T##_f1 = S##_f1 + q; \ T##_f1 = S##_f1 + (q); \
if (T##_f1 <= X##_f1) \ if (T##_f1 <= X##_f1) \
{ \ { \
S##_f1 = T##_f1 + q; \ S##_f1 = T##_f1 + (q); \
X##_f1 -= T##_f1; \ X##_f1 -= T##_f1; \
R##_f1 += q; \ R##_f1 += (q); \
} \ } \
_FP_FRAC_SLL_2 (X, 1); \ _FP_FRAC_SLL_2 (X, 1); \
q >>= 1; \ (q) >>= 1; \
} \ } \
q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \ (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
while (q != _FP_WORK_ROUND) \ while ((q) != _FP_WORK_ROUND) \
{ \ { \
T##_f0 = S##_f0 + q; \ T##_f0 = S##_f0 + (q); \
T##_f1 = S##_f1; \ T##_f1 = S##_f1; \
if (T##_f1 < X##_f1 \ if (T##_f1 < X##_f1 \
|| (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \ || (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
{ \ { \
S##_f0 = T##_f0 + q; \ S##_f0 = T##_f0 + (q); \
S##_f1 += (T##_f0 > S##_f0); \ S##_f1 += (T##_f0 > S##_f0); \
_FP_FRAC_DEC_2 (X, T); \ _FP_FRAC_DEC_2 (X, T); \
R##_f0 += q; \ R##_f0 += (q); \
} \ } \
_FP_FRAC_SLL_2 (X, 1); \ _FP_FRAC_SLL_2 (X, 1); \
q >>= 1; \ (q) >>= 1; \
} \ } \
if (X##_f0 | X##_f1) \ if (X##_f0 | X##_f1) \
{ \ { \
...@@ -622,31 +668,29 @@ ...@@ -622,31 +668,29 @@
while (0) while (0)
/* /* Assembly/disassembly for converting to/from integral types.
* Assembly/disassembly for converting to/from integral types. No shifting or overflow handled here. */
* No shifting or overflow handled here.
*/
#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \ #define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
(void) ((rsize <= _FP_W_TYPE_SIZE) \ (void) (((rsize) <= _FP_W_TYPE_SIZE) \
? ({ r = X##_f0; }) \ ? ({ (r) = X##_f0; }) \
: ({ \ : ({ \
r = X##_f1; \ (r) = X##_f1; \
r <<= _FP_W_TYPE_SIZE; \ (r) <<= _FP_W_TYPE_SIZE; \
r += X##_f0; \ (r) += X##_f0; \
})) }))
#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \ #define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
do \ do \
{ \ { \
X##_f0 = r; \ X##_f0 = (r); \
X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE \
} \ ? 0 \
: (r) >> _FP_W_TYPE_SIZE); \
} \
while (0) while (0)
/* /* Convert FP values between word sizes. */
* Convert FP values between word sizes
*/
#define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0) #define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
......
...@@ -39,91 +39,121 @@ ...@@ -39,91 +39,121 @@
#define _FP_FRAC_LOW_4(X) (X##_f[0]) #define _FP_FRAC_LOW_4(X) (X##_f[0])
#define _FP_FRAC_WORD_4(X, w) (X##_f[w]) #define _FP_FRAC_WORD_4(X, w) (X##_f[w])
#define _FP_FRAC_SLL_4(X, N) \ #define _FP_FRAC_SLL_4(X, N) \
do \ do \
{ \ { \
_FP_I_TYPE _up, _down, _skip, _i; \ _FP_I_TYPE _FP_FRAC_SLL_4_up, _FP_FRAC_SLL_4_down; \
_skip = (N) / _FP_W_TYPE_SIZE; \ _FP_I_TYPE _FP_FRAC_SLL_4_skip, _FP_FRAC_SLL_4_i; \
_up = (N) % _FP_W_TYPE_SIZE; \ _FP_FRAC_SLL_4_skip = (N) / _FP_W_TYPE_SIZE; \
_down = _FP_W_TYPE_SIZE - _up; \ _FP_FRAC_SLL_4_up = (N) % _FP_W_TYPE_SIZE; \
if (!_up) \ _FP_FRAC_SLL_4_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_4_up; \
for (_i = 3; _i >= _skip; --_i) \ if (!_FP_FRAC_SLL_4_up) \
X##_f[_i] = X##_f[_i-_skip]; \ for (_FP_FRAC_SLL_4_i = 3; \
else \ _FP_FRAC_SLL_4_i >= _FP_FRAC_SLL_4_skip; \
{ \ --_FP_FRAC_SLL_4_i) \
for (_i = 3; _i > _skip; --_i) \ X##_f[_FP_FRAC_SLL_4_i] \
X##_f[_i] = (X##_f[_i-_skip] << _up \ = X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip]; \
| X##_f[_i-_skip-1] >> _down); \ else \
X##_f[_i--] = X##_f[0] << _up; \ { \
} \ for (_FP_FRAC_SLL_4_i = 3; \
for (; _i >= 0; --_i) \ _FP_FRAC_SLL_4_i > _FP_FRAC_SLL_4_skip; \
X##_f[_i] = 0; \ --_FP_FRAC_SLL_4_i) \
} \ X##_f[_FP_FRAC_SLL_4_i] \
= ((X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip] \
<< _FP_FRAC_SLL_4_up) \
| (X##_f[_FP_FRAC_SLL_4_i-_FP_FRAC_SLL_4_skip-1] \
>> _FP_FRAC_SLL_4_down)); \
X##_f[_FP_FRAC_SLL_4_i--] = X##_f[0] << _FP_FRAC_SLL_4_up; \
} \
for (; _FP_FRAC_SLL_4_i >= 0; --_FP_FRAC_SLL_4_i) \
X##_f[_FP_FRAC_SLL_4_i] = 0; \
} \
while (0) while (0)
/* This one was broken too */ /* This one was broken too. */
#define _FP_FRAC_SRL_4(X, N) \ #define _FP_FRAC_SRL_4(X, N) \
do \ do \
{ \ { \
_FP_I_TYPE _up, _down, _skip, _i; \ _FP_I_TYPE _FP_FRAC_SRL_4_up, _FP_FRAC_SRL_4_down; \
_skip = (N) / _FP_W_TYPE_SIZE; \ _FP_I_TYPE _FP_FRAC_SRL_4_skip, _FP_FRAC_SRL_4_i; \
_down = (N) % _FP_W_TYPE_SIZE; \ _FP_FRAC_SRL_4_skip = (N) / _FP_W_TYPE_SIZE; \
_up = _FP_W_TYPE_SIZE - _down; \ _FP_FRAC_SRL_4_down = (N) % _FP_W_TYPE_SIZE; \
if (!_down) \ _FP_FRAC_SRL_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_4_down; \
for (_i = 0; _i <= 3-_skip; ++_i) \ if (!_FP_FRAC_SRL_4_down) \
X##_f[_i] = X##_f[_i+_skip]; \ for (_FP_FRAC_SRL_4_i = 0; \
else \ _FP_FRAC_SRL_4_i <= 3-_FP_FRAC_SRL_4_skip; \
{ \ ++_FP_FRAC_SRL_4_i) \
for (_i = 0; _i < 3-_skip; ++_i) \ X##_f[_FP_FRAC_SRL_4_i] \
X##_f[_i] = (X##_f[_i+_skip] >> _down \ = X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip]; \
| X##_f[_i+_skip+1] << _up); \ else \
X##_f[_i++] = X##_f[3] >> _down; \ { \
} \ for (_FP_FRAC_SRL_4_i = 0; \
for (; _i < 4; ++_i) \ _FP_FRAC_SRL_4_i < 3-_FP_FRAC_SRL_4_skip; \
X##_f[_i] = 0; \ ++_FP_FRAC_SRL_4_i) \
} \ X##_f[_FP_FRAC_SRL_4_i] \
= ((X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip] \
>> _FP_FRAC_SRL_4_down) \
| (X##_f[_FP_FRAC_SRL_4_i+_FP_FRAC_SRL_4_skip+1] \
<< _FP_FRAC_SRL_4_up)); \
X##_f[_FP_FRAC_SRL_4_i++] = X##_f[3] >> _FP_FRAC_SRL_4_down; \
} \
for (; _FP_FRAC_SRL_4_i < 4; ++_FP_FRAC_SRL_4_i) \
X##_f[_FP_FRAC_SRL_4_i] = 0; \
} \
while (0) while (0)
/* Right shift with sticky-lsb. /* Right shift with sticky-lsb.
* What this actually means is that we do a standard right-shift, What this actually means is that we do a standard right-shift,
* but that if any of the bits that fall off the right hand side but that if any of the bits that fall off the right hand side
* were one then we always set the LSbit. were one then we always set the LSbit. */
*/ #define _FP_FRAC_SRST_4(X, S, N, size) \
#define _FP_FRAC_SRST_4(X, S, N, size) \ do \
do \ { \
{ \ _FP_I_TYPE _FP_FRAC_SRST_4_up, _FP_FRAC_SRST_4_down; \
_FP_I_TYPE _up, _down, _skip, _i; \ _FP_I_TYPE _FP_FRAC_SRST_4_skip, _FP_FRAC_SRST_4_i; \
_FP_W_TYPE _s; \ _FP_W_TYPE _FP_FRAC_SRST_4_s; \
_skip = (N) / _FP_W_TYPE_SIZE; \ _FP_FRAC_SRST_4_skip = (N) / _FP_W_TYPE_SIZE; \
_down = (N) % _FP_W_TYPE_SIZE; \ _FP_FRAC_SRST_4_down = (N) % _FP_W_TYPE_SIZE; \
_up = _FP_W_TYPE_SIZE - _down; \ _FP_FRAC_SRST_4_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRST_4_down; \
for (_s = _i = 0; _i < _skip; ++_i) \ for (_FP_FRAC_SRST_4_s = _FP_FRAC_SRST_4_i = 0; \
_s |= X##_f[_i]; \ _FP_FRAC_SRST_4_i < _FP_FRAC_SRST_4_skip; \
if (!_down) \ ++_FP_FRAC_SRST_4_i) \
for (_i = 0; _i <= 3-_skip; ++_i) \ _FP_FRAC_SRST_4_s |= X##_f[_FP_FRAC_SRST_4_i]; \
X##_f[_i] = X##_f[_i+_skip]; \ if (!_FP_FRAC_SRST_4_down) \
else \ for (_FP_FRAC_SRST_4_i = 0; \
{ \ _FP_FRAC_SRST_4_i <= 3-_FP_FRAC_SRST_4_skip; \
_s |= X##_f[_i] << _up; \ ++_FP_FRAC_SRST_4_i) \
for (_i = 0; _i < 3-_skip; ++_i) \ X##_f[_FP_FRAC_SRST_4_i] \
X##_f[_i] = (X##_f[_i+_skip] >> _down \ = X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip]; \
| X##_f[_i+_skip+1] << _up); \ else \
X##_f[_i++] = X##_f[3] >> _down; \ { \
} \ _FP_FRAC_SRST_4_s \
for (; _i < 4; ++_i) \ |= X##_f[_FP_FRAC_SRST_4_i] << _FP_FRAC_SRST_4_up; \
X##_f[_i] = 0; \ for (_FP_FRAC_SRST_4_i = 0; \
S = (_s != 0); \ _FP_FRAC_SRST_4_i < 3-_FP_FRAC_SRST_4_skip; \
} \ ++_FP_FRAC_SRST_4_i) \
X##_f[_FP_FRAC_SRST_4_i] \
= ((X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip] \
>> _FP_FRAC_SRST_4_down) \
| (X##_f[_FP_FRAC_SRST_4_i+_FP_FRAC_SRST_4_skip+1] \
<< _FP_FRAC_SRST_4_up)); \
X##_f[_FP_FRAC_SRST_4_i++] \
= X##_f[3] >> _FP_FRAC_SRST_4_down; \
} \
for (; _FP_FRAC_SRST_4_i < 4; ++_FP_FRAC_SRST_4_i) \
X##_f[_FP_FRAC_SRST_4_i] = 0; \
S = (_FP_FRAC_SRST_4_s != 0); \
} \
while (0) while (0)
#define _FP_FRAC_SRS_4(X, N, size) \ #define _FP_FRAC_SRS_4(X, N, size) \
do \ do \
{ \ { \
int _sticky; \ int _FP_FRAC_SRS_4_sticky; \
_FP_FRAC_SRST_4 (X, _sticky, N, size); \ _FP_FRAC_SRST_4 (X, _FP_FRAC_SRS_4_sticky, (N), (size)); \
X##_f[0] |= _sticky; \ X##_f[0] |= _FP_FRAC_SRS_4_sticky; \
} \ } \
while (0) while (0)
#define _FP_FRAC_ADD_4(R, X, Y) \ #define _FP_FRAC_ADD_4(R, X, Y) \
...@@ -181,166 +211,211 @@ ...@@ -181,166 +211,211 @@
do \ do \
{ \ { \
if (X##_f[3]) \ if (X##_f[3]) \
__FP_CLZ (R, X##_f[3]); \ __FP_CLZ ((R), X##_f[3]); \
else if (X##_f[2]) \ else if (X##_f[2]) \
{ \ { \
__FP_CLZ (R, X##_f[2]); \ __FP_CLZ ((R), X##_f[2]); \
R += _FP_W_TYPE_SIZE; \ (R) += _FP_W_TYPE_SIZE; \
} \ } \
else if (X##_f[1]) \ else if (X##_f[1]) \
{ \ { \
__FP_CLZ (R, X##_f[1]); \ __FP_CLZ ((R), X##_f[1]); \
R += _FP_W_TYPE_SIZE*2; \ (R) += _FP_W_TYPE_SIZE*2; \
} \ } \
else \ else \
{ \ { \
__FP_CLZ (R, X##_f[0]); \ __FP_CLZ ((R), X##_f[0]); \
R += _FP_W_TYPE_SIZE*3; \ (R) += _FP_W_TYPE_SIZE*3; \
} \ } \
} \ } \
while (0) while (0)
#define _FP_UNPACK_RAW_4(fs, X, val) \ #define _FP_UNPACK_RAW_4(fs, X, val) \
do \ do \
{ \ { \
union _FP_UNION_##fs _flo; \ union _FP_UNION_##fs _FP_UNPACK_RAW_4_flo; \
_flo.flt = (val); \ _FP_UNPACK_RAW_4_flo.flt = (val); \
X##_f[0] = _flo.bits.frac0; \ X##_f[0] = _FP_UNPACK_RAW_4_flo.bits.frac0; \
X##_f[1] = _flo.bits.frac1; \ X##_f[1] = _FP_UNPACK_RAW_4_flo.bits.frac1; \
X##_f[2] = _flo.bits.frac2; \ X##_f[2] = _FP_UNPACK_RAW_4_flo.bits.frac2; \
X##_f[3] = _flo.bits.frac3; \ X##_f[3] = _FP_UNPACK_RAW_4_flo.bits.frac3; \
X##_e = _flo.bits.exp; \ X##_e = _FP_UNPACK_RAW_4_flo.bits.exp; \
X##_s = _flo.bits.sign; \ X##_s = _FP_UNPACK_RAW_4_flo.bits.sign; \
} \ } \
while (0) while (0)
#define _FP_UNPACK_RAW_4_P(fs, X, val) \ #define _FP_UNPACK_RAW_4_P(fs, X, val) \
do \ do \
{ \ { \
union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \ union _FP_UNION_##fs *_FP_UNPACK_RAW_4_P_flo \
\ = (union _FP_UNION_##fs *) (val); \
X##_f[0] = _flo->bits.frac0; \ \
X##_f[1] = _flo->bits.frac1; \ X##_f[0] = _FP_UNPACK_RAW_4_P_flo->bits.frac0; \
X##_f[2] = _flo->bits.frac2; \ X##_f[1] = _FP_UNPACK_RAW_4_P_flo->bits.frac1; \
X##_f[3] = _flo->bits.frac3; \ X##_f[2] = _FP_UNPACK_RAW_4_P_flo->bits.frac2; \
X##_e = _flo->bits.exp; \ X##_f[3] = _FP_UNPACK_RAW_4_P_flo->bits.frac3; \
X##_s = _flo->bits.sign; \ X##_e = _FP_UNPACK_RAW_4_P_flo->bits.exp; \
} \ X##_s = _FP_UNPACK_RAW_4_P_flo->bits.sign; \
} \
while (0) while (0)
#define _FP_PACK_RAW_4(fs, val, X) \ #define _FP_PACK_RAW_4(fs, val, X) \
do \ do \
{ \ { \
union _FP_UNION_##fs _flo; \ union _FP_UNION_##fs _FP_PACK_RAW_4_flo; \
_flo.bits.frac0 = X##_f[0]; \ _FP_PACK_RAW_4_flo.bits.frac0 = X##_f[0]; \
_flo.bits.frac1 = X##_f[1]; \ _FP_PACK_RAW_4_flo.bits.frac1 = X##_f[1]; \
_flo.bits.frac2 = X##_f[2]; \ _FP_PACK_RAW_4_flo.bits.frac2 = X##_f[2]; \
_flo.bits.frac3 = X##_f[3]; \ _FP_PACK_RAW_4_flo.bits.frac3 = X##_f[3]; \
_flo.bits.exp = X##_e; \ _FP_PACK_RAW_4_flo.bits.exp = X##_e; \
_flo.bits.sign = X##_s; \ _FP_PACK_RAW_4_flo.bits.sign = X##_s; \
(val) = _flo.flt; \ (val) = _FP_PACK_RAW_4_flo.flt; \
} \ } \
while (0) while (0)
#define _FP_PACK_RAW_4_P(fs, val, X) \ #define _FP_PACK_RAW_4_P(fs, val, X) \
do \ do \
{ \ { \
union _FP_UNION_##fs *_flo = (union _FP_UNION_##fs *) (val); \ union _FP_UNION_##fs *_FP_PACK_RAW_4_P_flo \
\ = (union _FP_UNION_##fs *) (val); \
_flo->bits.frac0 = X##_f[0]; \ \
_flo->bits.frac1 = X##_f[1]; \ _FP_PACK_RAW_4_P_flo->bits.frac0 = X##_f[0]; \
_flo->bits.frac2 = X##_f[2]; \ _FP_PACK_RAW_4_P_flo->bits.frac1 = X##_f[1]; \
_flo->bits.frac3 = X##_f[3]; \ _FP_PACK_RAW_4_P_flo->bits.frac2 = X##_f[2]; \
_flo->bits.exp = X##_e; \ _FP_PACK_RAW_4_P_flo->bits.frac3 = X##_f[3]; \
_flo->bits.sign = X##_s; \ _FP_PACK_RAW_4_P_flo->bits.exp = X##_e; \
} \ _FP_PACK_RAW_4_P_flo->bits.sign = X##_s; \
} \
while (0) while (0)
/* /* Multiplication algorithms: */
* Multiplication algorithms:
*/
/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
#define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit) \ #define _FP_MUL_MEAT_DW_4_wide(wfracbits, R, X, Y, doit) \
do \ do \
{ \ { \
_FP_FRAC_DECL_2 (_b); \ _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_b); \
_FP_FRAC_DECL_2 (_c); \ _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_c); \
_FP_FRAC_DECL_2 (_d); \ _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_d); \
_FP_FRAC_DECL_2 (_e); \ _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_e); \
_FP_FRAC_DECL_2 (_f); \ _FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_4_wide_f); \
\ \
doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), X##_f[0], Y##_f[0]); \ doit (_FP_FRAC_WORD_8 (R, 1), _FP_FRAC_WORD_8 (R, 0), \
doit (_b_f1, _b_f0, X##_f[0], Y##_f[1]); \ X##_f[0], Y##_f[0]); \
doit (_c_f1, _c_f0, X##_f[1], Y##_f[0]); \ doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
doit (_d_f1, _d_f0, X##_f[1], Y##_f[1]); \ X##_f[0], Y##_f[1]); \
doit (_e_f1, _e_f0, X##_f[0], Y##_f[2]); \ doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
doit (_f_f1, _f_f0, X##_f[2], Y##_f[0]); \ X##_f[1], Y##_f[0]); \
doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
X##_f[1], Y##_f[1]); \
doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
X##_f[0], Y##_f[2]); \
doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
X##_f[2], Y##_f[0]); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
_FP_FRAC_WORD_8 (R, 1), 0, _b_f1, _b_f0, \ _FP_FRAC_WORD_8 (R, 1), 0, \
_FP_MUL_MEAT_DW_4_wide_b_f1, \
_FP_MUL_MEAT_DW_4_wide_b_f0, \
0, 0, _FP_FRAC_WORD_8 (R, 1)); \ 0, 0, _FP_FRAC_WORD_8 (R, 1)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
_FP_FRAC_WORD_8 (R, 1), 0, _c_f1, _c_f0, \ _FP_FRAC_WORD_8 (R, 1), 0, \
_FP_MUL_MEAT_DW_4_wide_c_f1, \
_FP_MUL_MEAT_DW_4_wide_c_f0, \
_FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \ _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2), \
_FP_FRAC_WORD_8 (R, 1)); \ _FP_FRAC_WORD_8 (R, 1)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
_FP_FRAC_WORD_8 (R, 2), 0, _d_f1, _d_f0, \ _FP_FRAC_WORD_8 (R, 2), 0, \
_FP_MUL_MEAT_DW_4_wide_d_f1, \
_FP_MUL_MEAT_DW_4_wide_d_f0, \
0, _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2)); \ 0, _FP_FRAC_WORD_8 (R, 3), _FP_FRAC_WORD_8 (R, 2)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
_FP_FRAC_WORD_8 (R, 2), 0, _e_f1, _e_f0, \ _FP_FRAC_WORD_8 (R, 2), 0, \
_FP_MUL_MEAT_DW_4_wide_e_f1, \
_FP_MUL_MEAT_DW_4_wide_e_f0, \
_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \ _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
_FP_FRAC_WORD_8 (R, 2)); \ _FP_FRAC_WORD_8 (R, 2)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
_FP_FRAC_WORD_8 (R, 2), 0, _f_f1, _f_f0, \ _FP_FRAC_WORD_8 (R, 2), 0, \
_FP_MUL_MEAT_DW_4_wide_f_f1, \
_FP_MUL_MEAT_DW_4_wide_f_f0, \
_FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \ _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3), \
_FP_FRAC_WORD_8 (R, 2)); \ _FP_FRAC_WORD_8 (R, 2)); \
doit (_b_f1, _b_f0, X##_f[0], Y##_f[3]); \ doit (_FP_MUL_MEAT_DW_4_wide_b_f1, \
doit (_c_f1, _c_f0, X##_f[3], Y##_f[0]); \ _FP_MUL_MEAT_DW_4_wide_b_f0, X##_f[0], Y##_f[3]); \
doit (_d_f1, _d_f0, X##_f[1], Y##_f[2]); \ doit (_FP_MUL_MEAT_DW_4_wide_c_f1, \
doit (_e_f1, _e_f0, X##_f[2], Y##_f[1]); \ _FP_MUL_MEAT_DW_4_wide_c_f0, X##_f[3], Y##_f[0]); \
doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
X##_f[1], Y##_f[2]); \
doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
X##_f[2], Y##_f[1]); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3), 0, _b_f1, _b_f0, \ _FP_FRAC_WORD_8 (R, 3), 0, \
_FP_MUL_MEAT_DW_4_wide_b_f1, \
_FP_MUL_MEAT_DW_4_wide_b_f0, \
0, _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3)); \ 0, _FP_FRAC_WORD_8 (R, 4), _FP_FRAC_WORD_8 (R, 3)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3), 0, _c_f1, _c_f0, \ _FP_FRAC_WORD_8 (R, 3), 0, \
_FP_MUL_MEAT_DW_4_wide_c_f1, \
_FP_MUL_MEAT_DW_4_wide_c_f0, \
_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3)); \ _FP_FRAC_WORD_8 (R, 3)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3), 0, _d_f1, _d_f0, \ _FP_FRAC_WORD_8 (R, 3), 0, \
_FP_MUL_MEAT_DW_4_wide_d_f1, \
_FP_MUL_MEAT_DW_4_wide_d_f0, \
_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3)); \ _FP_FRAC_WORD_8 (R, 3)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3), 0, _e_f1, _e_f0, \ _FP_FRAC_WORD_8 (R, 3), 0, \
_FP_MUL_MEAT_DW_4_wide_e_f1, \
_FP_MUL_MEAT_DW_4_wide_e_f0, \
_FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \ _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4), \
_FP_FRAC_WORD_8 (R, 3)); \ _FP_FRAC_WORD_8 (R, 3)); \
doit (_b_f1, _b_f0, X##_f[2], Y##_f[2]); \ doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
doit (_c_f1, _c_f0, X##_f[1], Y##_f[3]); \ X##_f[2], Y##_f[2]); \
doit (_d_f1, _d_f0, X##_f[3], Y##_f[1]); \ doit (_FP_MUL_MEAT_DW_4_wide_c_f1, _FP_MUL_MEAT_DW_4_wide_c_f0, \
doit (_e_f1, _e_f0, X##_f[2], Y##_f[3]); \ X##_f[1], Y##_f[3]); \
doit (_f_f1, _f_f0, X##_f[3], Y##_f[2]); \ doit (_FP_MUL_MEAT_DW_4_wide_d_f1, _FP_MUL_MEAT_DW_4_wide_d_f0, \
X##_f[3], Y##_f[1]); \
doit (_FP_MUL_MEAT_DW_4_wide_e_f1, _FP_MUL_MEAT_DW_4_wide_e_f0, \
X##_f[2], Y##_f[3]); \
doit (_FP_MUL_MEAT_DW_4_wide_f_f1, _FP_MUL_MEAT_DW_4_wide_f_f0, \
X##_f[3], Y##_f[2]); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
_FP_FRAC_WORD_8 (R, 4), 0, _b_f1, _b_f0, \ _FP_FRAC_WORD_8 (R, 4), 0, \
_FP_MUL_MEAT_DW_4_wide_b_f1, \
_FP_MUL_MEAT_DW_4_wide_b_f0, \
0, _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4)); \ 0, _FP_FRAC_WORD_8 (R, 5), _FP_FRAC_WORD_8 (R, 4)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
_FP_FRAC_WORD_8 (R, 4), 0, _c_f1, _c_f0, \ _FP_FRAC_WORD_8 (R, 4), 0, \
_FP_MUL_MEAT_DW_4_wide_c_f1, \
_FP_MUL_MEAT_DW_4_wide_c_f0, \
_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \ _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
_FP_FRAC_WORD_8 (R, 4)); \ _FP_FRAC_WORD_8 (R, 4)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
_FP_FRAC_WORD_8 (R, 4), 0, _d_f1, _d_f0, \ _FP_FRAC_WORD_8 (R, 4), 0, \
_FP_MUL_MEAT_DW_4_wide_d_f1, \
_FP_MUL_MEAT_DW_4_wide_d_f0, \
_FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \ _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5), \
_FP_FRAC_WORD_8 (R, 4)); \ _FP_FRAC_WORD_8 (R, 4)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
_FP_FRAC_WORD_8 (R, 5), 0, _e_f1, _e_f0, \ _FP_FRAC_WORD_8 (R, 5), 0, \
_FP_MUL_MEAT_DW_4_wide_e_f1, \
_FP_MUL_MEAT_DW_4_wide_e_f0, \
0, _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5)); \ 0, _FP_FRAC_WORD_8 (R, 6), _FP_FRAC_WORD_8 (R, 5)); \
__FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \ __FP_FRAC_ADD_3 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
_FP_FRAC_WORD_8 (R, 5), 0, _f_f1, _f_f0, \ _FP_FRAC_WORD_8 (R, 5), 0, \
_FP_MUL_MEAT_DW_4_wide_f_f1, \
_FP_MUL_MEAT_DW_4_wide_f_f0, \
_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \ _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
_FP_FRAC_WORD_8 (R, 5)); \ _FP_FRAC_WORD_8 (R, 5)); \
doit (_b_f1, _b_f0, X##_f[3], Y##_f[3]); \ doit (_FP_MUL_MEAT_DW_4_wide_b_f1, _FP_MUL_MEAT_DW_4_wide_b_f0, \
X##_f[3], Y##_f[3]); \
__FP_FRAC_ADD_2 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \ __FP_FRAC_ADD_2 (_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6), \
_b_f1, _b_f0, \ _FP_MUL_MEAT_DW_4_wide_b_f1, \
_FP_MUL_MEAT_DW_4_wide_b_f0, \
_FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6)); \ _FP_FRAC_WORD_8 (R, 7), _FP_FRAC_WORD_8 (R, 6)); \
} \ } \
while (0) while (0)
...@@ -348,16 +423,20 @@ ...@@ -348,16 +423,20 @@
#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \ #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \
do \ do \
{ \ { \
_FP_FRAC_DECL_8 (_z); \ _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_wide_z); \
\ \
_FP_MUL_MEAT_DW_4_wide (wfracbits, _z, X, Y, doit); \ _FP_MUL_MEAT_DW_4_wide ((wfracbits), _FP_MUL_MEAT_4_wide_z, \
X, Y, doit); \
\ \
/* Normalize since we know where the msb of the multiplicands \ /* Normalize since we know where the msb of the multiplicands \
were (bit B), we know that the msb of the of the product is \ were (bit B), we know that the msb of the of the product is \
at either 2B or 2B-1. */ \ at either 2B or 2B-1. */ \
_FP_FRAC_SRS_8 (_z, wfracbits-1, 2*wfracbits); \ _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_wide_z, (wfracbits)-1, \
__FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_z, 3), _FP_FRAC_WORD_8 (_z, 2), \ 2*(wfracbits)); \
_FP_FRAC_WORD_8 (_z, 1), _FP_FRAC_WORD_8 (_z, 0)); \ __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 3), \
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 2), \
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 1), \
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_wide_z, 0)); \
} \ } \
while (0) while (0)
...@@ -371,101 +450,110 @@ ...@@ -371,101 +450,110 @@
#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \ #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \
do \ do \
{ \ { \
_FP_FRAC_DECL_8 (_z); \ _FP_FRAC_DECL_8 (_FP_MUL_MEAT_4_gmp_z); \
\ \
_FP_MUL_MEAT_DW_4_gmp (wfracbits, _z, X, Y); \ _FP_MUL_MEAT_DW_4_gmp ((wfracbits), _FP_MUL_MEAT_4_gmp_z, X, Y); \
\ \
/* Normalize since we know where the msb of the multiplicands \ /* Normalize since we know where the msb of the multiplicands \
were (bit B), we know that the msb of the of the product is \ were (bit B), we know that the msb of the of the product is \
at either 2B or 2B-1. */ \ at either 2B or 2B-1. */ \
_FP_FRAC_SRS_8 (_z, wfracbits-1, 2*wfracbits); \ _FP_FRAC_SRS_8 (_FP_MUL_MEAT_4_gmp_z, (wfracbits)-1, \
__FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_z, 3), _FP_FRAC_WORD_8 (_z, 2), \ 2*(wfracbits)); \
_FP_FRAC_WORD_8 (_z, 1), _FP_FRAC_WORD_8 (_z, 0)); \ __FP_FRAC_SET_4 (R, _FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 3), \
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 2), \
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 1), \
_FP_FRAC_WORD_8 (_FP_MUL_MEAT_4_gmp_z, 0)); \
} \ } \
while (0) while (0)
/* /* Helper utility for _FP_DIV_MEAT_4_udiv:
* Helper utility for _FP_DIV_MEAT_4_udiv: * pppp = m * nnn. */
* pppp = m * nnn
*/
#define umul_ppppmnnn(p3, p2, p1, p0, m, n2, n1, n0) \ #define umul_ppppmnnn(p3, p2, p1, p0, m, n2, n1, n0) \
do \ do \
{ \ { \
UWtype _t; \ UWtype umul_ppppmnnn_t; \
umul_ppmm (p1, p0, m, n0); \ umul_ppmm (p1, p0, m, n0); \
umul_ppmm (p2, _t, m, n1); \ umul_ppmm (p2, umul_ppppmnnn_t, m, n1); \
__FP_FRAC_ADDI_2 (p2, p1, _t); \ __FP_FRAC_ADDI_2 (p2, p1, umul_ppppmnnn_t); \
umul_ppmm (p3, _t, m, n2); \ umul_ppmm (p3, umul_ppppmnnn_t, m, n2); \
__FP_FRAC_ADDI_2 (p3, p2, _t); \ __FP_FRAC_ADDI_2 (p3, p2, umul_ppppmnnn_t); \
} \ } \
while (0) while (0)
/* /* Division algorithms: */
* Division algorithms:
*/
#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \ #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \
do \ do \
{ \ { \
int _i; \ int _FP_DIV_MEAT_4_udiv_i; \
_FP_FRAC_DECL_4 (_n); \ _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_n); \
_FP_FRAC_DECL_4 (_m); \ _FP_FRAC_DECL_4 (_FP_DIV_MEAT_4_udiv_m); \
_FP_FRAC_SET_4 (_n, _FP_ZEROFRAC_4); \ _FP_FRAC_SET_4 (_FP_DIV_MEAT_4_udiv_n, _FP_ZEROFRAC_4); \
if (_FP_FRAC_GE_4 (X, Y)) \ if (_FP_FRAC_GE_4 (X, Y)) \
{ \ { \
_n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \ _FP_DIV_MEAT_4_udiv_n_f[3] \
= X##_f[0] << (_FP_W_TYPE_SIZE - 1); \
_FP_FRAC_SRL_4 (X, 1); \ _FP_FRAC_SRL_4 (X, 1); \
} \ } \
else \ else \
R##_e--; \ R##_e--; \
\ \
/* Normalize, i.e. make the most significant bit of the \ /* Normalize, i.e. make the most significant bit of the \
denominator set. */ \ denominator set. */ \
_FP_FRAC_SLL_4 (Y, _FP_WFRACXBITS_##fs); \ _FP_FRAC_SLL_4 (Y, _FP_WFRACXBITS_##fs); \
\ \
for (_i = 3; ; _i--) \ for (_FP_DIV_MEAT_4_udiv_i = 3; ; _FP_DIV_MEAT_4_udiv_i--) \
{ \ { \
if (X##_f[3] == Y##_f[3]) \ if (X##_f[3] == Y##_f[3]) \
{ \ { \
/* This is a special case, not an optimization \ /* This is a special case, not an optimization \
(X##_f[3]/Y##_f[3] would not fit into UWtype). \ (X##_f[3]/Y##_f[3] would not fit into UWtype). \
As X## is guaranteed to be < Y, R##_f[_i] can be either \ As X## is guaranteed to be < Y, \
R##_f[_FP_DIV_MEAT_4_udiv_i] can be either \
(UWtype)-1 or (UWtype)-2. */ \ (UWtype)-1 or (UWtype)-2. */ \
R##_f[_i] = -1; \ R##_f[_FP_DIV_MEAT_4_udiv_i] = -1; \
if (!_i) \ if (!_FP_DIV_MEAT_4_udiv_i) \
break; \ break; \
__FP_FRAC_SUB_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ __FP_FRAC_SUB_4 (X##_f[3], X##_f[2], X##_f[1], X##_f[0], \
Y##_f[2], Y##_f[1], Y##_f[0], 0, \ Y##_f[2], Y##_f[1], Y##_f[0], 0, \
X##_f[2], X##_f[1], X##_f[0], _n_f[_i]); \ X##_f[2], X##_f[1], X##_f[0], \
_FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]); \
_FP_FRAC_SUB_4 (X, Y, X); \ _FP_FRAC_SUB_4 (X, Y, X); \
if (X##_f[3] > Y##_f[3]) \ if (X##_f[3] > Y##_f[3]) \
{ \ { \
R##_f[_i] = -2; \ R##_f[_FP_DIV_MEAT_4_udiv_i] = -2; \
_FP_FRAC_ADD_4 (X, Y, X); \ _FP_FRAC_ADD_4 (X, Y, X); \
} \ } \
} \ } \
else \ else \
{ \ { \
udiv_qrnnd (R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \ udiv_qrnnd (R##_f[_FP_DIV_MEAT_4_udiv_i], \
umul_ppppmnnn (_m_f[3], _m_f[2], _m_f[1], _m_f[0], \ X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \
R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]); \ umul_ppppmnnn (_FP_DIV_MEAT_4_udiv_m_f[3], \
_FP_DIV_MEAT_4_udiv_m_f[2], \
_FP_DIV_MEAT_4_udiv_m_f[1], \
_FP_DIV_MEAT_4_udiv_m_f[0], \
R##_f[_FP_DIV_MEAT_4_udiv_i], \
Y##_f[2], Y##_f[1], Y##_f[0]); \
X##_f[2] = X##_f[1]; \ X##_f[2] = X##_f[1]; \
X##_f[1] = X##_f[0]; \ X##_f[1] = X##_f[0]; \
X##_f[0] = _n_f[_i]; \ X##_f[0] \
if (_FP_FRAC_GT_4 (_m, X)) \ = _FP_DIV_MEAT_4_udiv_n_f[_FP_DIV_MEAT_4_udiv_i]; \
if (_FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
{ \ { \
R##_f[_i]--; \ R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
_FP_FRAC_ADD_4 (X, Y, X); \ _FP_FRAC_ADD_4 (X, Y, X); \
if (_FP_FRAC_GE_4 (X, Y) && _FP_FRAC_GT_4 (_m, X)) \ if (_FP_FRAC_GE_4 (X, Y) \
&& _FP_FRAC_GT_4 (_FP_DIV_MEAT_4_udiv_m, X)) \
{ \ { \
R##_f[_i]--; \ R##_f[_FP_DIV_MEAT_4_udiv_i]--; \
_FP_FRAC_ADD_4 (X, Y, X); \ _FP_FRAC_ADD_4 (X, Y, X); \
} \ } \
} \ } \
_FP_FRAC_DEC_4 (X, _m); \ _FP_FRAC_DEC_4 (X, _FP_DIV_MEAT_4_udiv_m); \
if (!_i) \ if (!_FP_DIV_MEAT_4_udiv_i) \
{ \ { \
if (!_FP_FRAC_EQ_4 (X, _m)) \ if (!_FP_FRAC_EQ_4 (X, _FP_DIV_MEAT_4_udiv_m)) \
R##_f[0] |= _FP_WORK_STICKY; \ R##_f[0] |= _FP_WORK_STICKY; \
break; \ break; \
} \ } \
...@@ -475,48 +563,46 @@ ...@@ -475,48 +563,46 @@
while (0) while (0)
/* /* Square root algorithms:
* Square root algorithms: We have just one right now, maybe Newton approximation
* We have just one right now, maybe Newton approximation should be added for those machines where division is fast. */
* should be added for those machines where division is fast.
*/
#define _FP_SQRT_MEAT_4(R, S, T, X, q) \ #define _FP_SQRT_MEAT_4(R, S, T, X, q) \
do \ do \
{ \ { \
while (q) \ while (q) \
{ \ { \
T##_f[3] = S##_f[3] + q; \ T##_f[3] = S##_f[3] + (q); \
if (T##_f[3] <= X##_f[3]) \ if (T##_f[3] <= X##_f[3]) \
{ \ { \
S##_f[3] = T##_f[3] + q; \ S##_f[3] = T##_f[3] + (q); \
X##_f[3] -= T##_f[3]; \ X##_f[3] -= T##_f[3]; \
R##_f[3] += q; \ R##_f[3] += (q); \
} \ } \
_FP_FRAC_SLL_4 (X, 1); \ _FP_FRAC_SLL_4 (X, 1); \
q >>= 1; \ (q) >>= 1; \
} \ } \
q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \ (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
while (q) \ while (q) \
{ \ { \
T##_f[2] = S##_f[2] + q; \ T##_f[2] = S##_f[2] + (q); \
T##_f[3] = S##_f[3]; \ T##_f[3] = S##_f[3]; \
if (T##_f[3] < X##_f[3] \ if (T##_f[3] < X##_f[3] \
|| (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \ || (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \
{ \ { \
S##_f[2] = T##_f[2] + q; \ S##_f[2] = T##_f[2] + (q); \
S##_f[3] += (T##_f[2] > S##_f[2]); \ S##_f[3] += (T##_f[2] > S##_f[2]); \
__FP_FRAC_DEC_2 (X##_f[3], X##_f[2], \ __FP_FRAC_DEC_2 (X##_f[3], X##_f[2], \
T##_f[3], T##_f[2]); \ T##_f[3], T##_f[2]); \
R##_f[2] += q; \ R##_f[2] += (q); \
} \ } \
_FP_FRAC_SLL_4 (X, 1); \ _FP_FRAC_SLL_4 (X, 1); \
q >>= 1; \ (q) >>= 1; \
} \ } \
q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \ (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
while (q) \ while (q) \
{ \ { \
T##_f[1] = S##_f[1] + q; \ T##_f[1] = S##_f[1] + (q); \
T##_f[2] = S##_f[2]; \ T##_f[2] = S##_f[2]; \
T##_f[3] = S##_f[3]; \ T##_f[3] = S##_f[3]; \
if (T##_f[3] < X##_f[3] \ if (T##_f[3] < X##_f[3] \
...@@ -525,34 +611,34 @@ ...@@ -525,34 +611,34 @@
|| (T##_f[2] == X##_f[2] \ || (T##_f[2] == X##_f[2] \
&& T##_f[1] <= X##_f[1])))) \ && T##_f[1] <= X##_f[1])))) \
{ \ { \
S##_f[1] = T##_f[1] + q; \ S##_f[1] = T##_f[1] + (q); \
S##_f[2] += (T##_f[1] > S##_f[1]); \ S##_f[2] += (T##_f[1] > S##_f[1]); \
S##_f[3] += (T##_f[2] > S##_f[2]); \ S##_f[3] += (T##_f[2] > S##_f[2]); \
__FP_FRAC_DEC_3 (X##_f[3], X##_f[2], X##_f[1], \ __FP_FRAC_DEC_3 (X##_f[3], X##_f[2], X##_f[1], \
T##_f[3], T##_f[2], T##_f[1]); \ T##_f[3], T##_f[2], T##_f[1]); \
R##_f[1] += q; \ R##_f[1] += (q); \
} \ } \
_FP_FRAC_SLL_4 (X, 1); \ _FP_FRAC_SLL_4 (X, 1); \
q >>= 1; \ (q) >>= 1; \
} \ } \
q = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \ (q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
while (q != _FP_WORK_ROUND) \ while ((q) != _FP_WORK_ROUND) \
{ \ { \
T##_f[0] = S##_f[0] + q; \ T##_f[0] = S##_f[0] + (q); \
T##_f[1] = S##_f[1]; \ T##_f[1] = S##_f[1]; \
T##_f[2] = S##_f[2]; \ T##_f[2] = S##_f[2]; \
T##_f[3] = S##_f[3]; \ T##_f[3] = S##_f[3]; \
if (_FP_FRAC_GE_4 (X, T)) \ if (_FP_FRAC_GE_4 (X, T)) \
{ \ { \
S##_f[0] = T##_f[0] + q; \ S##_f[0] = T##_f[0] + (q); \
S##_f[1] += (T##_f[0] > S##_f[0]); \ S##_f[1] += (T##_f[0] > S##_f[0]); \
S##_f[2] += (T##_f[1] > S##_f[1]); \ S##_f[2] += (T##_f[1] > S##_f[1]); \
S##_f[3] += (T##_f[2] > S##_f[2]); \ S##_f[3] += (T##_f[2] > S##_f[2]); \
_FP_FRAC_DEC_4 (X, T); \ _FP_FRAC_DEC_4 (X, T); \
R##_f[0] += q; \ R##_f[0] += (q); \
} \ } \
_FP_FRAC_SLL_4 (X, 1); \ _FP_FRAC_SLL_4 (X, 1); \
q >>= 1; \ (q) >>= 1; \
} \ } \
if (!_FP_FRAC_ZEROP_4 (X)) \ if (!_FP_FRAC_ZEROP_4 (X)) \
{ \ { \
...@@ -564,9 +650,7 @@ ...@@ -564,9 +650,7 @@
while (0) while (0)
/* /* Internals. */
* Internals
*/
#define __FP_FRAC_SET_4(X, I3, I2, I1, I0) \ #define __FP_FRAC_SET_4(X, I3, I2, I1, I0) \
(X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0) (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
...@@ -591,18 +675,19 @@ ...@@ -591,18 +675,19 @@
# define __FP_FRAC_ADD_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \ # define __FP_FRAC_ADD_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
do \ do \
{ \ { \
_FP_W_TYPE _c1, _c2, _c3; \ _FP_W_TYPE __FP_FRAC_ADD_4_c1, __FP_FRAC_ADD_4_c2; \
_FP_W_TYPE __FP_FRAC_ADD_4_c3; \
r0 = x0 + y0; \ r0 = x0 + y0; \
_c1 = r0 < x0; \ __FP_FRAC_ADD_4_c1 = r0 < x0; \
r1 = x1 + y1; \ r1 = x1 + y1; \
_c2 = r1 < x1; \ __FP_FRAC_ADD_4_c2 = r1 < x1; \
r1 += _c1; \ r1 += __FP_FRAC_ADD_4_c1; \
_c2 |= r1 < _c1; \ __FP_FRAC_ADD_4_c2 |= r1 < __FP_FRAC_ADD_4_c1; \
r2 = x2 + y2; \ r2 = x2 + y2; \
_c3 = r2 < x2; \ __FP_FRAC_ADD_4_c3 = r2 < x2; \
r2 += _c2; \ r2 += __FP_FRAC_ADD_4_c2; \
_c3 |= r2 < _c2; \ __FP_FRAC_ADD_4_c3 |= r2 < __FP_FRAC_ADD_4_c2; \
r3 = x3 + y3 + _c3; \ r3 = x3 + y3 + __FP_FRAC_ADD_4_c3; \
} \ } \
while (0) while (0)
#endif #endif
...@@ -611,14 +696,14 @@ ...@@ -611,14 +696,14 @@
# define __FP_FRAC_SUB_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \ # define __FP_FRAC_SUB_3(r2, r1, r0, x2, x1, x0, y2, y1, y0) \
do \ do \
{ \ { \
_FP_W_TYPE _c1, _c2; \ _FP_W_TYPE __FP_FRAC_SUB_3_c1, __FP_FRAC_SUB_3_c2; \
r0 = x0 - y0; \ r0 = x0 - y0; \
_c1 = r0 > x0; \ __FP_FRAC_SUB_3_c1 = r0 > x0; \
r1 = x1 - y1; \ r1 = x1 - y1; \
_c2 = r1 > x1; \ __FP_FRAC_SUB_3_c2 = r1 > x1; \
r1 -= _c1; \ r1 -= __FP_FRAC_SUB_3_c1; \
_c2 |= _c1 && (y1 == x1); \ __FP_FRAC_SUB_3_c2 |= __FP_FRAC_SUB_3_c1 && (y1 == x1); \
r2 = x2 - y2 - _c2; \ r2 = x2 - y2 - __FP_FRAC_SUB_3_c2; \
} \ } \
while (0) while (0)
#endif #endif
...@@ -627,18 +712,19 @@ ...@@ -627,18 +712,19 @@
# define __FP_FRAC_SUB_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \ # define __FP_FRAC_SUB_4(r3, r2, r1, r0, x3, x2, x1, x0, y3, y2, y1, y0) \
do \ do \
{ \ { \
_FP_W_TYPE _c1, _c2, _c3; \ _FP_W_TYPE __FP_FRAC_SUB_4_c1, __FP_FRAC_SUB_4_c2; \
_FP_W_TYPE __FP_FRAC_SUB_4_c3; \
r0 = x0 - y0; \ r0 = x0 - y0; \
_c1 = r0 > x0; \ __FP_FRAC_SUB_4_c1 = r0 > x0; \
r1 = x1 - y1; \ r1 = x1 - y1; \
_c2 = r1 > x1; \ __FP_FRAC_SUB_4_c2 = r1 > x1; \
r1 -= _c1; \ r1 -= __FP_FRAC_SUB_4_c1; \
_c2 |= _c1 && (y1 == x1); \ __FP_FRAC_SUB_4_c2 |= __FP_FRAC_SUB_4_c1 && (y1 == x1); \
r2 = x2 - y2; \ r2 = x2 - y2; \
_c3 = r2 > x2; \ __FP_FRAC_SUB_4_c3 = r2 > x2; \
r2 -= _c2; \ r2 -= __FP_FRAC_SUB_4_c2; \
_c3 |= _c2 && (y2 == x2); \ __FP_FRAC_SUB_4_c3 |= __FP_FRAC_SUB_4_c2 && (y2 == x2); \
r3 = x3 - y3 - _c3; \ r3 = x3 - y3 - __FP_FRAC_SUB_4_c3; \
} \ } \
while (0) while (0)
#endif #endif
...@@ -647,46 +733,56 @@ ...@@ -647,46 +733,56 @@
# define __FP_FRAC_DEC_3(x2, x1, x0, y2, y1, y0) \ # define __FP_FRAC_DEC_3(x2, x1, x0, y2, y1, y0) \
do \ do \
{ \ { \
UWtype _t0, _t1, _t2; \ UWtype __FP_FRAC_DEC_3_t0, __FP_FRAC_DEC_3_t1; \
_t0 = x0, _t1 = x1, _t2 = x2; \ UWtype __FP_FRAC_DEC_3_t2; \
__FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0); \ __FP_FRAC_DEC_3_t0 = x0; \
__FP_FRAC_DEC_3_t1 = x1; \
__FP_FRAC_DEC_3_t2 = x2; \
__FP_FRAC_SUB_3 (x2, x1, x0, __FP_FRAC_DEC_3_t2, \
__FP_FRAC_DEC_3_t1, __FP_FRAC_DEC_3_t0, \
y2, y1, y0); \
} \ } \
while (0) while (0)
#endif #endif
#ifndef __FP_FRAC_DEC_4 #ifndef __FP_FRAC_DEC_4
# define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0) \ # define __FP_FRAC_DEC_4(x3, x2, x1, x0, y3, y2, y1, y0) \
do \ do \
{ \ { \
UWtype _t0, _t1, _t2, _t3; \ UWtype __FP_FRAC_DEC_4_t0, __FP_FRAC_DEC_4_t1; \
_t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3; \ UWtype __FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t3; \
__FP_FRAC_SUB_4 (x3, x2, x1, x0, _t3, _t2, _t1, _t0, y3, y2, y1, y0); \ __FP_FRAC_DEC_4_t0 = x0; \
} \ __FP_FRAC_DEC_4_t1 = x1; \
__FP_FRAC_DEC_4_t2 = x2; \
__FP_FRAC_DEC_4_t3 = x3; \
__FP_FRAC_SUB_4 (x3, x2, x1, x0, __FP_FRAC_DEC_4_t3, \
__FP_FRAC_DEC_4_t2, __FP_FRAC_DEC_4_t1, \
__FP_FRAC_DEC_4_t0, y3, y2, y1, y0); \
} \
while (0) while (0)
#endif #endif
#ifndef __FP_FRAC_ADDI_4 #ifndef __FP_FRAC_ADDI_4
# define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i) \ # define __FP_FRAC_ADDI_4(x3, x2, x1, x0, i) \
do \ do \
{ \ { \
UWtype _t; \ UWtype __FP_FRAC_ADDI_4_t; \
_t = ((x0 += i) < i); \ __FP_FRAC_ADDI_4_t = ((x0 += i) < i); \
x1 += _t; \ x1 += __FP_FRAC_ADDI_4_t; \
_t = (x1 < _t); \ __FP_FRAC_ADDI_4_t = (x1 < __FP_FRAC_ADDI_4_t); \
x2 += _t; \ x2 += __FP_FRAC_ADDI_4_t; \
_t = (x2 < _t); \ __FP_FRAC_ADDI_4_t = (x2 < __FP_FRAC_ADDI_4_t); \
x3 += _t; \ x3 += __FP_FRAC_ADDI_4_t; \
} \ } \
while (0) while (0)
#endif #endif
/* Convert FP values between word sizes. This appears to be more /* Convert FP values between word sizes. This appears to be more
* complicated than I'd have expected it to be, so these might be complicated than I'd have expected it to be, so these might be
* wrong... These macros are in any case somewhat bogus because they wrong... These macros are in any case somewhat bogus because they
* use information about what various FRAC_n variables look like use information about what various FRAC_n variables look like
* internally [eg, that 2 word vars are X_f0 and x_f1]. But so do internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
* the ones in op-2.h and op-1.h. the ones in op-2.h and op-1.h. */
*/
#define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0]) #define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0])
#define _FP_FRAC_COPY_2_4(D, S) \ #define _FP_FRAC_COPY_2_4(D, S) \
...@@ -698,48 +794,60 @@ ...@@ -698,48 +794,60 @@
while (0) while (0)
/* Assembly/disassembly for converting to/from integral types. /* Assembly/disassembly for converting to/from integral types.
* No shifting or overflow handled here. No shifting or overflow handled here. */
*/ /* Put the FP value X into r, which is an integer of size rsize. */
/* Put the FP value X into r, which is an integer of size rsize. */
#define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \ #define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \
do \ do \
{ \ { \
if (rsize <= _FP_W_TYPE_SIZE) \ if ((rsize) <= _FP_W_TYPE_SIZE) \
r = X##_f[0]; \ (r) = X##_f[0]; \
else if (rsize <= 2*_FP_W_TYPE_SIZE) \ else if ((rsize) <= 2*_FP_W_TYPE_SIZE) \
{ \ { \
r = X##_f[1]; \ (r) = X##_f[1]; \
r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE); \ (r) = ((rsize) <= _FP_W_TYPE_SIZE \
r += X##_f[0]; \ ? 0 \
: (r) << _FP_W_TYPE_SIZE); \
(r) += X##_f[0]; \
} \ } \
else \ else \
{ \ { \
/* I'm feeling lazy so we deal with int == 3words (implausible)*/ \ /* I'm feeling lazy so we deal with int == 3words \
/* and int == 4words as a single case. */ \ (implausible) and int == 4words as a single case. */ \
r = X##_f[3]; \ (r) = X##_f[3]; \
r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE); \ (r) = ((rsize) <= _FP_W_TYPE_SIZE \
r += X##_f[2]; \ ? 0 \
r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE); \ : (r) << _FP_W_TYPE_SIZE); \
r += X##_f[1]; \ (r) += X##_f[2]; \
r = (rsize <= _FP_W_TYPE_SIZE ? 0 : r << _FP_W_TYPE_SIZE); \ (r) = ((rsize) <= _FP_W_TYPE_SIZE \
r += X##_f[0]; \ ? 0 \
: (r) << _FP_W_TYPE_SIZE); \
(r) += X##_f[1]; \
(r) = ((rsize) <= _FP_W_TYPE_SIZE \
? 0 \
: (r) << _FP_W_TYPE_SIZE); \
(r) += X##_f[0]; \
} \ } \
} \ } \
while (0) while (0)
/* "No disassemble Number Five!" */ /* "No disassemble Number Five!" */
/* move an integer of size rsize into X's fractional part. We rely on /* Move an integer of size rsize into X's fractional part. We rely on
* the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
* having to mask the values we store into it. having to mask the values we store into it. */
*/ #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \
#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \ do \
do \ { \
{ \ X##_f[0] = (r); \
X##_f[0] = r; \ X##_f[1] = ((rsize) <= _FP_W_TYPE_SIZE \
X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ ? 0 \
X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \ : (r) >> _FP_W_TYPE_SIZE); \
X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \ X##_f[2] = ((rsize) <= 2*_FP_W_TYPE_SIZE \
} \ ? 0 \
: (r) >> 2*_FP_W_TYPE_SIZE); \
X##_f[3] = ((rsize) <= 3*_FP_W_TYPE_SIZE \
? 0 \
: (r) >> 3*_FP_W_TYPE_SIZE); \
} \
while (0) while (0)
#define _FP_FRAC_COPY_4_1(D, S) \ #define _FP_FRAC_COPY_4_1(D, S) \
......
...@@ -30,87 +30,116 @@ ...@@ -30,87 +30,116 @@
<http://www.gnu.org/licenses/>. */ <http://www.gnu.org/licenses/>. */
/* We need just a few things from here for op-4, if we ever need some /* We need just a few things from here for op-4, if we ever need some
other macros, they can be added. */ other macros, they can be added. */
#define _FP_FRAC_DECL_8(X) _FP_W_TYPE X##_f[8] #define _FP_FRAC_DECL_8(X) _FP_W_TYPE X##_f[8]
#define _FP_FRAC_HIGH_8(X) (X##_f[7]) #define _FP_FRAC_HIGH_8(X) (X##_f[7])
#define _FP_FRAC_LOW_8(X) (X##_f[0]) #define _FP_FRAC_LOW_8(X) (X##_f[0])
#define _FP_FRAC_WORD_8(X, w) (X##_f[w]) #define _FP_FRAC_WORD_8(X, w) (X##_f[w])
#define _FP_FRAC_SLL_8(X, N) \ #define _FP_FRAC_SLL_8(X, N) \
do \ do \
{ \ { \
_FP_I_TYPE _up, _down, _skip, _i; \ _FP_I_TYPE _FP_FRAC_SLL_8_up, _FP_FRAC_SLL_8_down; \
_skip = (N) / _FP_W_TYPE_SIZE; \ _FP_I_TYPE _FP_FRAC_SLL_8_skip, _FP_FRAC_SLL_8_i; \
_up = (N) % _FP_W_TYPE_SIZE; \ _FP_FRAC_SLL_8_skip = (N) / _FP_W_TYPE_SIZE; \
_down = _FP_W_TYPE_SIZE - _up; \ _FP_FRAC_SLL_8_up = (N) % _FP_W_TYPE_SIZE; \
if (!_up) \ _FP_FRAC_SLL_8_down = _FP_W_TYPE_SIZE - _FP_FRAC_SLL_8_up; \
for (_i = 7; _i >= _skip; --_i) \ if (!_FP_FRAC_SLL_8_up) \
X##_f[_i] = X##_f[_i-_skip]; \ for (_FP_FRAC_SLL_8_i = 7; \
else \ _FP_FRAC_SLL_8_i >= _FP_FRAC_SLL_8_skip; \
{ \ --_FP_FRAC_SLL_8_i) \
for (_i = 7; _i > _skip; --_i) \ X##_f[_FP_FRAC_SLL_8_i] \
X##_f[_i] = (X##_f[_i-_skip] << _up \ = X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip]; \
| X##_f[_i-_skip-1] >> _down); \ else \
X##_f[_i--] = X##_f[0] << _up; \ { \
} \ for (_FP_FRAC_SLL_8_i = 7; \
for (; _i >= 0; --_i) \ _FP_FRAC_SLL_8_i > _FP_FRAC_SLL_8_skip; \
X##_f[_i] = 0; \ --_FP_FRAC_SLL_8_i) \
} \ X##_f[_FP_FRAC_SLL_8_i] \
= ((X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip] \
<< _FP_FRAC_SLL_8_up) \
| (X##_f[_FP_FRAC_SLL_8_i-_FP_FRAC_SLL_8_skip-1] \
>> _FP_FRAC_SLL_8_down)); \
X##_f[_FP_FRAC_SLL_8_i--] = X##_f[0] << _FP_FRAC_SLL_8_up; \
} \
for (; _FP_FRAC_SLL_8_i >= 0; --_FP_FRAC_SLL_8_i) \
X##_f[_FP_FRAC_SLL_8_i] = 0; \
} \
while (0) while (0)
#define _FP_FRAC_SRL_8(X, N) \ #define _FP_FRAC_SRL_8(X, N) \
do \ do \
{ \ { \
_FP_I_TYPE _up, _down, _skip, _i; \ _FP_I_TYPE _FP_FRAC_SRL_8_up, _FP_FRAC_SRL_8_down; \
_skip = (N) / _FP_W_TYPE_SIZE; \ _FP_I_TYPE _FP_FRAC_SRL_8_skip, _FP_FRAC_SRL_8_i; \
_down = (N) % _FP_W_TYPE_SIZE; \ _FP_FRAC_SRL_8_skip = (N) / _FP_W_TYPE_SIZE; \
_up = _FP_W_TYPE_SIZE - _down; \ _FP_FRAC_SRL_8_down = (N) % _FP_W_TYPE_SIZE; \
if (!_down) \ _FP_FRAC_SRL_8_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRL_8_down; \
for (_i = 0; _i <= 7-_skip; ++_i) \ if (!_FP_FRAC_SRL_8_down) \
X##_f[_i] = X##_f[_i+_skip]; \ for (_FP_FRAC_SRL_8_i = 0; \
else \ _FP_FRAC_SRL_8_i <= 7-_FP_FRAC_SRL_8_skip; \
{ \ ++_FP_FRAC_SRL_8_i) \
for (_i = 0; _i < 7-_skip; ++_i) \ X##_f[_FP_FRAC_SRL_8_i] \
X##_f[_i] = (X##_f[_i+_skip] >> _down \ = X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip]; \
| X##_f[_i+_skip+1] << _up); \ else \
X##_f[_i++] = X##_f[7] >> _down; \ { \
} \ for (_FP_FRAC_SRL_8_i = 0; \
for (; _i < 8; ++_i) \ _FP_FRAC_SRL_8_i < 7-_FP_FRAC_SRL_8_skip; \
X##_f[_i] = 0; \ ++_FP_FRAC_SRL_8_i) \
} \ X##_f[_FP_FRAC_SRL_8_i] \
= ((X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip] \
>> _FP_FRAC_SRL_8_down) \
| (X##_f[_FP_FRAC_SRL_8_i+_FP_FRAC_SRL_8_skip+1] \
<< _FP_FRAC_SRL_8_up)); \
X##_f[_FP_FRAC_SRL_8_i++] = X##_f[7] >> _FP_FRAC_SRL_8_down; \
} \
for (; _FP_FRAC_SRL_8_i < 8; ++_FP_FRAC_SRL_8_i) \
X##_f[_FP_FRAC_SRL_8_i] = 0; \
} \
while (0) while (0)
/* Right shift with sticky-lsb. /* Right shift with sticky-lsb.
* What this actually means is that we do a standard right-shift, What this actually means is that we do a standard right-shift,
* but that if any of the bits that fall off the right hand side but that if any of the bits that fall off the right hand side
* were one then we always set the LSbit. were one then we always set the LSbit. */
*/
#define _FP_FRAC_SRS_8(X, N, size) \ #define _FP_FRAC_SRS_8(X, N, size) \
do \ do \
{ \ { \
_FP_I_TYPE _up, _down, _skip, _i; \ _FP_I_TYPE _FP_FRAC_SRS_8_up, _FP_FRAC_SRS_8_down; \
_FP_W_TYPE _s; \ _FP_I_TYPE _FP_FRAC_SRS_8_skip, _FP_FRAC_SRS_8_i; \
_skip = (N) / _FP_W_TYPE_SIZE; \ _FP_W_TYPE _FP_FRAC_SRS_8_s; \
_down = (N) % _FP_W_TYPE_SIZE; \ _FP_FRAC_SRS_8_skip = (N) / _FP_W_TYPE_SIZE; \
_up = _FP_W_TYPE_SIZE - _down; \ _FP_FRAC_SRS_8_down = (N) % _FP_W_TYPE_SIZE; \
for (_s = _i = 0; _i < _skip; ++_i) \ _FP_FRAC_SRS_8_up = _FP_W_TYPE_SIZE - _FP_FRAC_SRS_8_down; \
_s |= X##_f[_i]; \ for (_FP_FRAC_SRS_8_s = _FP_FRAC_SRS_8_i = 0; \
if (!_down) \ _FP_FRAC_SRS_8_i < _FP_FRAC_SRS_8_skip; \
for (_i = 0; _i <= 7-_skip; ++_i) \ ++_FP_FRAC_SRS_8_i) \
X##_f[_i] = X##_f[_i+_skip]; \ _FP_FRAC_SRS_8_s |= X##_f[_FP_FRAC_SRS_8_i]; \
if (!_FP_FRAC_SRS_8_down) \
for (_FP_FRAC_SRS_8_i = 0; \
_FP_FRAC_SRS_8_i <= 7-_FP_FRAC_SRS_8_skip; \
++_FP_FRAC_SRS_8_i) \
X##_f[_FP_FRAC_SRS_8_i] \
= X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip]; \
else \ else \
{ \ { \
_s |= X##_f[_i] << _up; \ _FP_FRAC_SRS_8_s \
for (_i = 0; _i < 7-_skip; ++_i) \ |= X##_f[_FP_FRAC_SRS_8_i] << _FP_FRAC_SRS_8_up; \
X##_f[_i] = (X##_f[_i+_skip] >> _down \ for (_FP_FRAC_SRS_8_i = 0; \
| X##_f[_i+_skip+1] << _up); \ _FP_FRAC_SRS_8_i < 7-_FP_FRAC_SRS_8_skip; \
X##_f[_i++] = X##_f[7] >> _down; \ ++_FP_FRAC_SRS_8_i) \
X##_f[_FP_FRAC_SRS_8_i] \
= ((X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip] \
>> _FP_FRAC_SRS_8_down) \
| (X##_f[_FP_FRAC_SRS_8_i+_FP_FRAC_SRS_8_skip+1] \
<< _FP_FRAC_SRS_8_up)); \
X##_f[_FP_FRAC_SRS_8_i++] = X##_f[7] >> _FP_FRAC_SRS_8_down; \
} \ } \
for (; _i < 8; ++_i) \ for (; _FP_FRAC_SRS_8_i < 8; ++_FP_FRAC_SRS_8_i) \
X##_f[_i] = 0; \ X##_f[_FP_FRAC_SRS_8_i] = 0; \
/* don't fix the LSB until the very end when we're sure f[0] is \ /* Don't fix the LSB until the very end when we're sure f[0] is \
stable */ \ stable. */ \
X##_f[0] |= (_s != 0); \ X##_f[0] |= (_FP_FRAC_SRS_8_s != 0); \
} \ } \
while (0) while (0)
...@@ -45,10 +45,8 @@ ...@@ -45,10 +45,8 @@
? (_FP_FRAC_HIGH_##fs (X) & _FP_QNANBIT_SH_##fs) \ ? (_FP_FRAC_HIGH_##fs (X) & _FP_QNANBIT_SH_##fs) \
: !(_FP_FRAC_HIGH_##fs (X) & _FP_QNANBIT_SH_##fs)) : !(_FP_FRAC_HIGH_##fs (X) & _FP_QNANBIT_SH_##fs))
/* /* Finish truly unpacking a native fp value by classifying the kind
* Finish truly unpacking a native fp value by classifying the kind of fp value and normalizing both the exponent and the fraction. */
* of fp value and normalizing both the exponent and the fraction.
*/
#define _FP_UNPACK_CANONICAL(fs, wc, X) \ #define _FP_UNPACK_CANONICAL(fs, wc, X) \
do \ do \
...@@ -65,14 +63,23 @@ ...@@ -65,14 +63,23 @@
case 0: \ case 0: \
if (_FP_FRAC_ZEROP_##wc (X)) \ if (_FP_FRAC_ZEROP_##wc (X)) \
X##_c = FP_CLS_ZERO; \ X##_c = FP_CLS_ZERO; \
else if (FP_DENORM_ZERO) \
{ \
X##_c = FP_CLS_ZERO; \
_FP_FRAC_SET_##wc (X, _FP_ZEROFRAC_##wc); \
FP_SET_EXCEPTION (FP_EX_DENORM); \
} \
else \ else \
{ \ { \
/* a denormalized number */ \ /* A denormalized number. */ \
_FP_I_TYPE _shift; \ _FP_I_TYPE _FP_UNPACK_CANONICAL_shift; \
_FP_FRAC_CLZ_##wc (_shift, X); \ _FP_FRAC_CLZ_##wc (_FP_UNPACK_CANONICAL_shift, \
_shift -= _FP_FRACXBITS_##fs; \ X); \
_FP_FRAC_SLL_##wc (X, (_shift+_FP_WORKBITS)); \ _FP_UNPACK_CANONICAL_shift -= _FP_FRACXBITS_##fs; \
X##_e -= _FP_EXPBIAS_##fs - 1 + _shift; \ _FP_FRAC_SLL_##wc (X, (_FP_UNPACK_CANONICAL_shift \
+ _FP_WORKBITS)); \
X##_e -= (_FP_EXPBIAS_##fs - 1 \
+ _FP_UNPACK_CANONICAL_shift); \
X##_c = FP_CLS_NORMAL; \ X##_c = FP_CLS_NORMAL; \
FP_SET_EXCEPTION (FP_EX_DENORM); \ FP_SET_EXCEPTION (FP_EX_DENORM); \
} \ } \
...@@ -84,9 +91,10 @@ ...@@ -84,9 +91,10 @@
else \ else \
{ \ { \
X##_c = FP_CLS_NAN; \ X##_c = FP_CLS_NAN; \
/* Check for signaling NaN */ \ /* Check for signaling NaN. */ \
if (_FP_FRAC_SNANP (fs, X)) \ if (_FP_FRAC_SNANP (fs, X)) \
FP_SET_EXCEPTION (FP_EX_INVALID); \ FP_SET_EXCEPTION (FP_EX_INVALID \
| FP_EX_INVALID_SNAN); \
} \ } \
break; \ break; \
} \ } \
...@@ -98,6 +106,21 @@ ...@@ -98,6 +106,21 @@
other classification is not done. */ other classification is not done. */
#define _FP_UNPACK_SEMIRAW(fs, wc, X) _FP_FRAC_SLL_##wc (X, _FP_WORKBITS) #define _FP_UNPACK_SEMIRAW(fs, wc, X) _FP_FRAC_SLL_##wc (X, _FP_WORKBITS)
/* Check whether a raw or semi-raw input value should be flushed to
zero, and flush it to zero if so. */
#define _FP_CHECK_FLUSH_ZERO(fs, wc, X) \
do \
{ \
if (FP_DENORM_ZERO \
&& X##_e == 0 \
&& !_FP_FRAC_ZEROP_##wc (X)) \
{ \
_FP_FRAC_SET_##wc (X, _FP_ZEROFRAC_##wc); \
FP_SET_EXCEPTION (FP_EX_DENORM); \
} \
} \
while (0)
/* A semi-raw value has overflowed to infinity. Adjust the mantissa /* A semi-raw value has overflowed to infinity. Adjust the mantissa
and exponent appropriately. */ and exponent appropriately. */
#define _FP_OVERFLOW_SEMIRAW(fs, wc, X) \ #define _FP_OVERFLOW_SEMIRAW(fs, wc, X) \
...@@ -122,14 +145,14 @@ ...@@ -122,14 +145,14 @@
/* Check for a semi-raw value being a signaling NaN and raise the /* Check for a semi-raw value being a signaling NaN and raise the
invalid exception if so. */ invalid exception if so. */
#define _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X) \ #define _FP_CHECK_SIGNAN_SEMIRAW(fs, wc, X) \
do \ do \
{ \ { \
if (X##_e == _FP_EXPMAX_##fs \ if (X##_e == _FP_EXPMAX_##fs \
&& !_FP_FRAC_ZEROP_##wc (X) \ && !_FP_FRAC_ZEROP_##wc (X) \
&& _FP_FRAC_SNANP_SEMIRAW (fs, X)) \ && _FP_FRAC_SNANP_SEMIRAW (fs, X)) \
FP_SET_EXCEPTION (FP_EX_INVALID); \ FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_SNAN); \
} \ } \
while (0) while (0)
/* Choose a NaN result from an operation on two semi-raw NaN /* Choose a NaN result from an operation on two semi-raw NaN
...@@ -234,12 +257,10 @@ ...@@ -234,12 +257,10 @@
} \ } \
while (0) while (0)
/* /* Before packing the bits back into the native fp result, take care
* Before packing the bits back into the native fp result, take care of such mundane things as rounding and overflow. Also, for some
* of such mundane things as rounding and overflow. Also, for some kinds of fp values, the original parts may not have been fully
* kinds of fp values, the original parts may not have been fully extracted -- but that is ok, we can regenerate them now. */
* extracted -- but that is ok, we can regenerate them now.
*/
#define _FP_PACK_CANONICAL(fs, wc, X) \ #define _FP_PACK_CANONICAL(fs, wc, X) \
do \ do \
...@@ -259,7 +280,7 @@ ...@@ -259,7 +280,7 @@
_FP_FRAC_SRL_##wc (X, _FP_WORKBITS); \ _FP_FRAC_SRL_##wc (X, _FP_WORKBITS); \
if (X##_e >= _FP_EXPMAX_##fs) \ if (X##_e >= _FP_EXPMAX_##fs) \
{ \ { \
/* overflow */ \ /* Overflow. */ \
switch (FP_ROUNDMODE) \ switch (FP_ROUNDMODE) \
{ \ { \
case FP_RND_NEAREST: \ case FP_RND_NEAREST: \
...@@ -276,13 +297,13 @@ ...@@ -276,13 +297,13 @@
} \ } \
if (X##_c == FP_CLS_INF) \ if (X##_c == FP_CLS_INF) \
{ \ { \
/* Overflow to infinity */ \ /* Overflow to infinity. */ \
X##_e = _FP_EXPMAX_##fs; \ X##_e = _FP_EXPMAX_##fs; \
_FP_FRAC_SET_##wc (X, _FP_ZEROFRAC_##wc); \ _FP_FRAC_SET_##wc (X, _FP_ZEROFRAC_##wc); \
} \ } \
else \ else \
{ \ { \
/* Overflow to maximum normal */ \ /* Overflow to maximum normal. */ \
X##_e = _FP_EXPMAX_##fs - 1; \ X##_e = _FP_EXPMAX_##fs - 1; \
_FP_FRAC_SET_##wc (X, _FP_MAXFRAC_##wc); \ _FP_FRAC_SET_##wc (X, _FP_MAXFRAC_##wc); \
} \ } \
...@@ -292,7 +313,7 @@ ...@@ -292,7 +313,7 @@
} \ } \
else \ else \
{ \ { \
/* we've got a denormalized number */ \ /* We've got a denormalized number. */ \
int _FP_PACK_CANONICAL_is_tiny = 1; \ int _FP_PACK_CANONICAL_is_tiny = 1; \
if (_FP_TININESS_AFTER_ROUNDING && X##_e == 0) \ if (_FP_TININESS_AFTER_ROUNDING && X##_e == 0) \
{ \ { \
...@@ -329,7 +350,7 @@ ...@@ -329,7 +350,7 @@
} \ } \
else \ else \
{ \ { \
/* underflow to zero */ \ /* Underflow to zero. */ \
X##_e = 0; \ X##_e = 0; \
if (!_FP_FRAC_ZEROP_##wc (X)) \ if (!_FP_FRAC_ZEROP_##wc (X)) \
{ \ { \
...@@ -367,18 +388,17 @@ ...@@ -367,18 +388,17 @@
while (0) while (0)
/* This one accepts raw argument and not cooked, returns /* This one accepts raw argument and not cooked, returns
* 1 if X is a signaling NaN. 1 if X is a signaling NaN. */
*/
#define _FP_ISSIGNAN(fs, wc, X) \ #define _FP_ISSIGNAN(fs, wc, X) \
({ \ ({ \
int __ret = 0; \ int _FP_ISSIGNAN_ret = 0; \
if (X##_e == _FP_EXPMAX_##fs) \ if (X##_e == _FP_EXPMAX_##fs) \
{ \ { \
if (!_FP_FRAC_ZEROP_##wc (X) \ if (!_FP_FRAC_ZEROP_##wc (X) \
&& _FP_FRAC_SNANP (fs, X)) \ && _FP_FRAC_SNANP (fs, X)) \
__ret = 1; \ _FP_ISSIGNAN_ret = 1; \
} \ } \
__ret; \ _FP_ISSIGNAN_ret; \
}) })
...@@ -389,12 +409,14 @@ ...@@ -389,12 +409,14 @@
#define _FP_ADD_INTERNAL(fs, wc, R, X, Y, OP) \ #define _FP_ADD_INTERNAL(fs, wc, R, X, Y, OP) \
do \ do \
{ \ { \
_FP_CHECK_FLUSH_ZERO (fs, wc, X); \
_FP_CHECK_FLUSH_ZERO (fs, wc, Y); \
if (X##_s == Y##_s) \ if (X##_s == Y##_s) \
{ \ { \
/* Addition. */ \ /* Addition. */ \
R##_s = X##_s; \ R##_s = X##_s; \
int ediff = X##_e - Y##_e; \ int _FP_ADD_INTERNAL_ediff = X##_e - Y##_e; \
if (ediff > 0) \ if (_FP_ADD_INTERNAL_ediff > 0) \
{ \ { \
R##_e = X##_e; \ R##_e = X##_e; \
if (Y##_e == 0) \ if (Y##_e == 0) \
...@@ -409,8 +431,8 @@ ...@@ -409,8 +431,8 @@
else \ else \
{ \ { \
FP_SET_EXCEPTION (FP_EX_DENORM); \ FP_SET_EXCEPTION (FP_EX_DENORM); \
ediff--; \ _FP_ADD_INTERNAL_ediff--; \
if (ediff == 0) \ if (_FP_ADD_INTERNAL_ediff == 0) \
{ \ { \
_FP_FRAC_ADD_##wc (R, X, Y); \ _FP_FRAC_ADD_##wc (R, X, Y); \
goto add3; \ goto add3; \
...@@ -436,17 +458,19 @@ ...@@ -436,17 +458,19 @@
_FP_FRAC_HIGH_##fs (Y) |= _FP_IMPLBIT_SH_##fs; \ _FP_FRAC_HIGH_##fs (Y) |= _FP_IMPLBIT_SH_##fs; \
\ \
add1: \ add1: \
/* Shift the mantissa of Y to the right EDIFF steps; \ /* Shift the mantissa of Y to the right \
remember to account later for the implicit MSB of X. */ \ _FP_ADD_INTERNAL_EDIFF steps; remember to account \
if (ediff <= _FP_WFRACBITS_##fs) \ later for the implicit MSB of X. */ \
_FP_FRAC_SRS_##wc (Y, ediff, _FP_WFRACBITS_##fs); \ if (_FP_ADD_INTERNAL_ediff <= _FP_WFRACBITS_##fs) \
_FP_FRAC_SRS_##wc (Y, _FP_ADD_INTERNAL_ediff, \
_FP_WFRACBITS_##fs); \
else if (!_FP_FRAC_ZEROP_##wc (Y)) \ else if (!_FP_FRAC_ZEROP_##wc (Y)) \
_FP_FRAC_SET_##wc (Y, _FP_MINFRAC_##wc); \ _FP_FRAC_SET_##wc (Y, _FP_MINFRAC_##wc); \
_FP_FRAC_ADD_##wc (R, X, Y); \ _FP_FRAC_ADD_##wc (R, X, Y); \
} \ } \
else if (ediff < 0) \ else if (_FP_ADD_INTERNAL_ediff < 0) \
{ \ { \
ediff = -ediff; \ _FP_ADD_INTERNAL_ediff = -_FP_ADD_INTERNAL_ediff; \
R##_e = Y##_e; \ R##_e = Y##_e; \
if (X##_e == 0) \ if (X##_e == 0) \
{ \ { \
...@@ -460,8 +484,8 @@ ...@@ -460,8 +484,8 @@
else \ else \
{ \ { \
FP_SET_EXCEPTION (FP_EX_DENORM); \ FP_SET_EXCEPTION (FP_EX_DENORM); \
ediff--; \ _FP_ADD_INTERNAL_ediff--; \
if (ediff == 0) \ if (_FP_ADD_INTERNAL_ediff == 0) \
{ \ { \
_FP_FRAC_ADD_##wc (R, Y, X); \ _FP_FRAC_ADD_##wc (R, Y, X); \
goto add3; \ goto add3; \
...@@ -487,17 +511,19 @@ ...@@ -487,17 +511,19 @@
_FP_FRAC_HIGH_##fs (X) |= _FP_IMPLBIT_SH_##fs; \ _FP_FRAC_HIGH_##fs (X) |= _FP_IMPLBIT_SH_##fs; \
\ \
add2: \ add2: \
/* Shift the mantissa of X to the right EDIFF steps; \ /* Shift the mantissa of X to the right \
remember to account later for the implicit MSB of Y. */ \ _FP_ADD_INTERNAL_EDIFF steps; remember to account \
if (ediff <= _FP_WFRACBITS_##fs) \ later for the implicit MSB of Y. */ \
_FP_FRAC_SRS_##wc (X, ediff, _FP_WFRACBITS_##fs); \ if (_FP_ADD_INTERNAL_ediff <= _FP_WFRACBITS_##fs) \
_FP_FRAC_SRS_##wc (X, _FP_ADD_INTERNAL_ediff, \
_FP_WFRACBITS_##fs); \
else if (!_FP_FRAC_ZEROP_##wc (X)) \ else if (!_FP_FRAC_ZEROP_##wc (X)) \
_FP_FRAC_SET_##wc (X, _FP_MINFRAC_##wc); \ _FP_FRAC_SET_##wc (X, _FP_MINFRAC_##wc); \
_FP_FRAC_ADD_##wc (R, Y, X); \ _FP_FRAC_ADD_##wc (R, Y, X); \
} \ } \
else \ else \
{ \ { \
/* ediff == 0. */ \ /* _FP_ADD_INTERNAL_ediff == 0. */ \
if (!_FP_EXP_NORMAL (fs, wc, X)) \ if (!_FP_EXP_NORMAL (fs, wc, X)) \
{ \ { \
if (X##_e == 0) \ if (X##_e == 0) \
...@@ -573,8 +599,8 @@ ...@@ -573,8 +599,8 @@
else \ else \
{ \ { \
/* Subtraction. */ \ /* Subtraction. */ \
int ediff = X##_e - Y##_e; \ int _FP_ADD_INTERNAL_ediff = X##_e - Y##_e; \
if (ediff > 0) \ if (_FP_ADD_INTERNAL_ediff > 0) \
{ \ { \
R##_e = X##_e; \ R##_e = X##_e; \
R##_s = X##_s; \ R##_s = X##_s; \
...@@ -590,8 +616,8 @@ ...@@ -590,8 +616,8 @@
else \ else \
{ \ { \
FP_SET_EXCEPTION (FP_EX_DENORM); \ FP_SET_EXCEPTION (FP_EX_DENORM); \
ediff--; \ _FP_ADD_INTERNAL_ediff--; \
if (ediff == 0) \ if (_FP_ADD_INTERNAL_ediff == 0) \
{ \ { \
_FP_FRAC_SUB_##wc (R, X, Y); \ _FP_FRAC_SUB_##wc (R, X, Y); \
goto sub3; \ goto sub3; \
...@@ -617,17 +643,19 @@ ...@@ -617,17 +643,19 @@
_FP_FRAC_HIGH_##fs (Y) |= _FP_IMPLBIT_SH_##fs; \ _FP_FRAC_HIGH_##fs (Y) |= _FP_IMPLBIT_SH_##fs; \
\ \
sub1: \ sub1: \
/* Shift the mantissa of Y to the right EDIFF steps; \ /* Shift the mantissa of Y to the right \
remember to account later for the implicit MSB of X. */ \ _FP_ADD_INTERNAL_EDIFF steps; remember to account \
if (ediff <= _FP_WFRACBITS_##fs) \ later for the implicit MSB of X. */ \
_FP_FRAC_SRS_##wc (Y, ediff, _FP_WFRACBITS_##fs); \ if (_FP_ADD_INTERNAL_ediff <= _FP_WFRACBITS_##fs) \
_FP_FRAC_SRS_##wc (Y, _FP_ADD_INTERNAL_ediff, \
_FP_WFRACBITS_##fs); \
else if (!_FP_FRAC_ZEROP_##wc (Y)) \ else if (!_FP_FRAC_ZEROP_##wc (Y)) \
_FP_FRAC_SET_##wc (Y, _FP_MINFRAC_##wc); \ _FP_FRAC_SET_##wc (Y, _FP_MINFRAC_##wc); \
_FP_FRAC_SUB_##wc (R, X, Y); \ _FP_FRAC_SUB_##wc (R, X, Y); \
} \ } \
else if (ediff < 0) \ else if (_FP_ADD_INTERNAL_ediff < 0) \
{ \ { \
ediff = -ediff; \ _FP_ADD_INTERNAL_ediff = -_FP_ADD_INTERNAL_ediff; \
R##_e = Y##_e; \ R##_e = Y##_e; \
R##_s = Y##_s; \ R##_s = Y##_s; \
if (X##_e == 0) \ if (X##_e == 0) \
...@@ -642,8 +670,8 @@ ...@@ -642,8 +670,8 @@
else \ else \
{ \ { \
FP_SET_EXCEPTION (FP_EX_DENORM); \ FP_SET_EXCEPTION (FP_EX_DENORM); \
ediff--; \ _FP_ADD_INTERNAL_ediff--; \
if (ediff == 0) \ if (_FP_ADD_INTERNAL_ediff == 0) \
{ \ { \
_FP_FRAC_SUB_##wc (R, Y, X); \ _FP_FRAC_SUB_##wc (R, Y, X); \
goto sub3; \ goto sub3; \
...@@ -669,10 +697,12 @@ ...@@ -669,10 +697,12 @@
_FP_FRAC_HIGH_##fs (X) |= _FP_IMPLBIT_SH_##fs; \ _FP_FRAC_HIGH_##fs (X) |= _FP_IMPLBIT_SH_##fs; \
\ \
sub2: \ sub2: \
/* Shift the mantissa of X to the right EDIFF steps; \ /* Shift the mantissa of X to the right \
remember to account later for the implicit MSB of Y. */ \ _FP_ADD_INTERNAL_EDIFF steps; remember to account \
if (ediff <= _FP_WFRACBITS_##fs) \ later for the implicit MSB of Y. */ \
_FP_FRAC_SRS_##wc (X, ediff, _FP_WFRACBITS_##fs); \ if (_FP_ADD_INTERNAL_ediff <= _FP_WFRACBITS_##fs) \
_FP_FRAC_SRS_##wc (X, _FP_ADD_INTERNAL_ediff, \
_FP_WFRACBITS_##fs); \
else if (!_FP_FRAC_ZEROP_##wc (X)) \ else if (!_FP_FRAC_ZEROP_##wc (X)) \
_FP_FRAC_SET_##wc (X, _FP_MINFRAC_##wc); \ _FP_FRAC_SET_##wc (X, _FP_MINFRAC_##wc); \
_FP_FRAC_SUB_##wc (R, Y, X); \ _FP_FRAC_SUB_##wc (R, Y, X); \
...@@ -735,7 +765,8 @@ ...@@ -735,7 +765,8 @@
R##_s = _FP_NANSIGN_##fs; \ R##_s = _FP_NANSIGN_##fs; \
_FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs); \ _FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs); \
_FP_FRAC_SLL_##wc (R, _FP_WORKBITS); \ _FP_FRAC_SLL_##wc (R, _FP_WORKBITS); \
FP_SET_EXCEPTION (FP_EX_INVALID); \ FP_SET_EXCEPTION (FP_EX_INVALID \
| FP_EX_INVALID_ISI); \
} \ } \
else \ else \
{ \ { \
...@@ -783,24 +814,26 @@ ...@@ -783,24 +814,26 @@
sub3: \ sub3: \
if (_FP_FRAC_HIGH_##fs (R) & _FP_IMPLBIT_SH_##fs) \ if (_FP_FRAC_HIGH_##fs (R) & _FP_IMPLBIT_SH_##fs) \
{ \ { \
int diff; \ int _FP_ADD_INTERNAL_diff; \
/* Carry into most significant bit of larger one of X and Y, \ /* Carry into most significant bit of larger one of X and Y, \
canceling it; renormalize. */ \ canceling it; renormalize. */ \
_FP_FRAC_HIGH_##fs (R) &= _FP_IMPLBIT_SH_##fs - 1; \ _FP_FRAC_HIGH_##fs (R) &= _FP_IMPLBIT_SH_##fs - 1; \
norm: \ norm: \
_FP_FRAC_CLZ_##wc (diff, R); \ _FP_FRAC_CLZ_##wc (_FP_ADD_INTERNAL_diff, R); \
diff -= _FP_WFRACXBITS_##fs; \ _FP_ADD_INTERNAL_diff -= _FP_WFRACXBITS_##fs; \
_FP_FRAC_SLL_##wc (R, diff); \ _FP_FRAC_SLL_##wc (R, _FP_ADD_INTERNAL_diff); \
if (R##_e <= diff) \ if (R##_e <= _FP_ADD_INTERNAL_diff) \
{ \ { \
/* R is denormalized. */ \ /* R is denormalized. */ \
diff = diff - R##_e + 1; \ _FP_ADD_INTERNAL_diff \
_FP_FRAC_SRS_##wc (R, diff, _FP_WFRACBITS_##fs); \ = _FP_ADD_INTERNAL_diff - R##_e + 1; \
_FP_FRAC_SRS_##wc (R, _FP_ADD_INTERNAL_diff, \
_FP_WFRACBITS_##fs); \
R##_e = 0; \ R##_e = 0; \
} \ } \
else \ else \
{ \ { \
R##_e -= diff; \ R##_e -= _FP_ADD_INTERNAL_diff; \
_FP_FRAC_HIGH_##fs (R) &= ~(_FP_W_TYPE) _FP_IMPLBIT_SH_##fs; \ _FP_FRAC_HIGH_##fs (R) &= ~(_FP_W_TYPE) _FP_IMPLBIT_SH_##fs; \
} \ } \
} \ } \
...@@ -820,9 +853,7 @@ ...@@ -820,9 +853,7 @@
while (0) while (0)
/* /* Main negation routine. The input value is raw. */
* Main negation routine. The input value is raw.
*/
#define _FP_NEG(fs, wc, R, X) \ #define _FP_NEG(fs, wc, R, X) \
do \ do \
...@@ -834,9 +865,7 @@ ...@@ -834,9 +865,7 @@
while (0) while (0)
/* /* Main multiplication routine. The input values should be cooked. */
* Main multiplication routine. The input values should be cooked.
*/
#define _FP_MUL(fs, wc, R, X, Y) \ #define _FP_MUL(fs, wc, R, X, Y) \
do \ do \
...@@ -889,7 +918,7 @@ ...@@ -889,7 +918,7 @@
R##_s = _FP_NANSIGN_##fs; \ R##_s = _FP_NANSIGN_##fs; \
R##_c = FP_CLS_NAN; \ R##_c = FP_CLS_NAN; \
_FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs); \ _FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs); \
FP_SET_EXCEPTION (FP_EX_INVALID); \ FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_IMZ); \
break; \ break; \
\ \
default: \ default: \
...@@ -904,9 +933,9 @@ ...@@ -904,9 +933,9 @@
#define _FP_FMA(fs, wc, dwc, R, X, Y, Z) \ #define _FP_FMA(fs, wc, dwc, R, X, Y, Z) \
do \ do \
{ \ { \
FP_DECL_##fs (T); \ FP_DECL_##fs (_FP_FMA_T); \
T##_s = X##_s ^ Y##_s; \ _FP_FMA_T##_s = X##_s ^ Y##_s; \
T##_e = X##_e + Y##_e + 1; \ _FP_FMA_T##_e = X##_e + Y##_e + 1; \
switch (_FP_CLS_COMBINE (X##_c, Y##_c)) \ switch (_FP_CLS_COMBINE (X##_c, Y##_c)) \
{ \ { \
case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_NORMAL): \ case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_NORMAL): \
...@@ -921,8 +950,8 @@ ...@@ -921,8 +950,8 @@
\ \
case FP_CLS_ZERO: \ case FP_CLS_ZERO: \
R##_c = FP_CLS_NORMAL; \ R##_c = FP_CLS_NORMAL; \
R##_s = T##_s; \ R##_s = _FP_FMA_T##_s; \
R##_e = T##_e; \ R##_e = _FP_FMA_T##_e; \
\ \
_FP_MUL_MEAT_##fs (R, X, Y); \ _FP_MUL_MEAT_##fs (R, X, Y); \
\ \
...@@ -933,38 +962,43 @@ ...@@ -933,38 +962,43 @@
break; \ break; \
\ \
case FP_CLS_NORMAL:; \ case FP_CLS_NORMAL:; \
_FP_FRAC_DECL_##dwc (TD); \ _FP_FRAC_DECL_##dwc (_FP_FMA_TD); \
_FP_FRAC_DECL_##dwc (ZD); \ _FP_FRAC_DECL_##dwc (_FP_FMA_ZD); \
_FP_FRAC_DECL_##dwc (RD); \ _FP_FRAC_DECL_##dwc (_FP_FMA_RD); \
_FP_MUL_MEAT_DW_##fs (TD, X, Y); \ _FP_MUL_MEAT_DW_##fs (_FP_FMA_TD, X, Y); \
R##_e = T##_e; \ R##_e = _FP_FMA_T##_e; \
int tsh = _FP_FRAC_HIGHBIT_DW_##dwc (fs, TD) == 0; \ int _FP_FMA_tsh \
T##_e -= tsh; \ = _FP_FRAC_HIGHBIT_DW_##dwc (fs, _FP_FMA_TD) == 0; \
int ediff = T##_e - Z##_e; \ _FP_FMA_T##_e -= _FP_FMA_tsh; \
if (ediff >= 0) \ int _FP_FMA_ediff = _FP_FMA_T##_e - Z##_e; \
if (_FP_FMA_ediff >= 0) \
{ \ { \
int shift = _FP_WFRACBITS_##fs - tsh - ediff; \ int _FP_FMA_shift \
if (shift <= -_FP_WFRACBITS_##fs) \ = _FP_WFRACBITS_##fs - _FP_FMA_tsh - _FP_FMA_ediff; \
_FP_FRAC_SET_##dwc (ZD, _FP_MINFRAC_##dwc); \ if (_FP_FMA_shift <= -_FP_WFRACBITS_##fs) \
_FP_FRAC_SET_##dwc (_FP_FMA_ZD, _FP_MINFRAC_##dwc); \
else \ else \
{ \ { \
_FP_FRAC_COPY_##dwc##_##wc (ZD, Z); \ _FP_FRAC_COPY_##dwc##_##wc (_FP_FMA_ZD, Z); \
if (shift < 0) \ if (_FP_FMA_shift < 0) \
_FP_FRAC_SRS_##dwc (ZD, -shift, \ _FP_FRAC_SRS_##dwc (_FP_FMA_ZD, -_FP_FMA_shift, \
_FP_WFRACBITS_DW_##fs); \ _FP_WFRACBITS_DW_##fs); \
else if (shift > 0) \ else if (_FP_FMA_shift > 0) \
_FP_FRAC_SLL_##dwc (ZD, shift); \ _FP_FRAC_SLL_##dwc (_FP_FMA_ZD, _FP_FMA_shift); \
} \ } \
R##_s = T##_s; \ R##_s = _FP_FMA_T##_s; \
if (T##_s == Z##_s) \ if (_FP_FMA_T##_s == Z##_s) \
_FP_FRAC_ADD_##dwc (RD, TD, ZD); \ _FP_FRAC_ADD_##dwc (_FP_FMA_RD, _FP_FMA_TD, \
_FP_FMA_ZD); \
else \ else \
{ \ { \
_FP_FRAC_SUB_##dwc (RD, TD, ZD); \ _FP_FRAC_SUB_##dwc (_FP_FMA_RD, _FP_FMA_TD, \
if (_FP_FRAC_NEGP_##dwc (RD)) \ _FP_FMA_ZD); \
if (_FP_FRAC_NEGP_##dwc (_FP_FMA_RD)) \
{ \ { \
R##_s = Z##_s; \ R##_s = Z##_s; \
_FP_FRAC_SUB_##dwc (RD, ZD, TD); \ _FP_FRAC_SUB_##dwc (_FP_FMA_RD, _FP_FMA_ZD, \
_FP_FMA_TD); \
} \ } \
} \ } \
} \ } \
...@@ -972,22 +1006,24 @@ ...@@ -972,22 +1006,24 @@
{ \ { \
R##_e = Z##_e; \ R##_e = Z##_e; \
R##_s = Z##_s; \ R##_s = Z##_s; \
_FP_FRAC_COPY_##dwc##_##wc (ZD, Z); \ _FP_FRAC_COPY_##dwc##_##wc (_FP_FMA_ZD, Z); \
_FP_FRAC_SLL_##dwc (ZD, _FP_WFRACBITS_##fs); \ _FP_FRAC_SLL_##dwc (_FP_FMA_ZD, _FP_WFRACBITS_##fs); \
int shift = -ediff - tsh; \ int _FP_FMA_shift = -_FP_FMA_ediff - _FP_FMA_tsh; \
if (shift >= _FP_WFRACBITS_DW_##fs) \ if (_FP_FMA_shift >= _FP_WFRACBITS_DW_##fs) \
_FP_FRAC_SET_##dwc (TD, _FP_MINFRAC_##dwc); \ _FP_FRAC_SET_##dwc (_FP_FMA_TD, _FP_MINFRAC_##dwc); \
else if (shift > 0) \ else if (_FP_FMA_shift > 0) \
_FP_FRAC_SRS_##dwc (TD, shift, \ _FP_FRAC_SRS_##dwc (_FP_FMA_TD, _FP_FMA_shift, \
_FP_WFRACBITS_DW_##fs); \ _FP_WFRACBITS_DW_##fs); \
if (Z##_s == T##_s) \ if (Z##_s == _FP_FMA_T##_s) \
_FP_FRAC_ADD_##dwc (RD, ZD, TD); \ _FP_FRAC_ADD_##dwc (_FP_FMA_RD, _FP_FMA_ZD, \
_FP_FMA_TD); \
else \ else \
_FP_FRAC_SUB_##dwc (RD, ZD, TD); \ _FP_FRAC_SUB_##dwc (_FP_FMA_RD, _FP_FMA_ZD, \
_FP_FMA_TD); \
} \ } \
if (_FP_FRAC_ZEROP_##dwc (RD)) \ if (_FP_FRAC_ZEROP_##dwc (_FP_FMA_RD)) \
{ \ { \
if (T##_s == Z##_s) \ if (_FP_FMA_T##_s == Z##_s) \
R##_s = Z##_s; \ R##_s = Z##_s; \
else \ else \
R##_s = (FP_ROUNDMODE == FP_RND_MINF); \ R##_s = (FP_ROUNDMODE == FP_RND_MINF); \
...@@ -996,17 +1032,17 @@ ...@@ -996,17 +1032,17 @@
} \ } \
else \ else \
{ \ { \
int rlz; \ int _FP_FMA_rlz; \
_FP_FRAC_CLZ_##dwc (rlz, RD); \ _FP_FRAC_CLZ_##dwc (_FP_FMA_rlz, _FP_FMA_RD); \
rlz -= _FP_WFRACXBITS_DW_##fs; \ _FP_FMA_rlz -= _FP_WFRACXBITS_DW_##fs; \
R##_e -= rlz; \ R##_e -= _FP_FMA_rlz; \
int shift = _FP_WFRACBITS_##fs - rlz; \ int _FP_FMA_shift = _FP_WFRACBITS_##fs - _FP_FMA_rlz; \
if (shift > 0) \ if (_FP_FMA_shift > 0) \
_FP_FRAC_SRS_##dwc (RD, shift, \ _FP_FRAC_SRS_##dwc (_FP_FMA_RD, _FP_FMA_shift, \
_FP_WFRACBITS_DW_##fs); \ _FP_WFRACBITS_DW_##fs); \
else if (shift < 0) \ else if (_FP_FMA_shift < 0) \
_FP_FRAC_SLL_##dwc (RD, -shift); \ _FP_FRAC_SLL_##dwc (_FP_FMA_RD, -_FP_FMA_shift); \
_FP_FRAC_COPY_##wc##_##dwc (R, RD); \ _FP_FRAC_COPY_##wc##_##dwc (R, _FP_FMA_RD); \
R##_c = FP_CLS_NORMAL; \ R##_c = FP_CLS_NORMAL; \
} \ } \
break; \ break; \
...@@ -1014,39 +1050,39 @@ ...@@ -1014,39 +1050,39 @@
goto done_fma; \ goto done_fma; \
\ \
case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NAN): \ case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NAN): \
_FP_CHOOSENAN (fs, wc, T, X, Y, '*'); \ _FP_CHOOSENAN (fs, wc, _FP_FMA_T, X, Y, '*'); \
break; \ break; \
\ \
case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NORMAL): \ case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NORMAL): \
case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_INF): \ case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_INF): \
case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_ZERO): \ case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_ZERO): \
T##_s = X##_s; \ _FP_FMA_T##_s = X##_s; \
\ \
case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_INF): \ case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_INF): \
case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NORMAL): \ case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NORMAL): \
case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_NORMAL): \ case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_NORMAL): \
case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_ZERO): \ case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_ZERO): \
_FP_FRAC_COPY_##wc (T, X); \ _FP_FRAC_COPY_##wc (_FP_FMA_T, X); \
T##_c = X##_c; \ _FP_FMA_T##_c = X##_c; \
break; \ break; \
\ \
case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_NAN): \ case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_NAN): \
case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NAN): \ case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NAN): \
case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_NAN): \ case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_NAN): \
T##_s = Y##_s; \ _FP_FMA_T##_s = Y##_s; \
\ \
case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_INF): \ case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_INF): \
case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_ZERO): \ case _FP_CLS_COMBINE (FP_CLS_NORMAL, FP_CLS_ZERO): \
_FP_FRAC_COPY_##wc (T, Y); \ _FP_FRAC_COPY_##wc (_FP_FMA_T, Y); \
T##_c = Y##_c; \ _FP_FMA_T##_c = Y##_c; \
break; \ break; \
\ \
case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_ZERO): \ case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_ZERO): \
case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_INF): \ case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_INF): \
T##_s = _FP_NANSIGN_##fs; \ _FP_FMA_T##_s = _FP_NANSIGN_##fs; \
T##_c = FP_CLS_NAN; \ _FP_FMA_T##_c = FP_CLS_NAN; \
_FP_FRAC_SET_##wc (T, _FP_NANFRAC_##fs); \ _FP_FRAC_SET_##wc (_FP_FMA_T, _FP_NANFRAC_##fs); \
FP_SET_EXCEPTION (FP_EX_INVALID); \ FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_IMZ_FMA); \
break; \ break; \
\ \
default: \ default: \
...@@ -1054,10 +1090,10 @@ ...@@ -1054,10 +1090,10 @@
} \ } \
\ \
/* T = X * Y is zero, infinity or NaN. */ \ /* T = X * Y is zero, infinity or NaN. */ \
switch (_FP_CLS_COMBINE (T##_c, Z##_c)) \ switch (_FP_CLS_COMBINE (_FP_FMA_T##_c, Z##_c)) \
{ \ { \
case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NAN): \ case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NAN): \
_FP_CHOOSENAN (fs, wc, R, T, Z, '+'); \ _FP_CHOOSENAN (fs, wc, R, _FP_FMA_T, Z, '+'); \
break; \ break; \
\ \
case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NORMAL): \ case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_NORMAL): \
...@@ -1065,9 +1101,9 @@ ...@@ -1065,9 +1101,9 @@
case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_ZERO): \ case _FP_CLS_COMBINE (FP_CLS_NAN, FP_CLS_ZERO): \
case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NORMAL): \ case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NORMAL): \
case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_ZERO): \ case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_ZERO): \
R##_s = T##_s; \ R##_s = _FP_FMA_T##_s; \
_FP_FRAC_COPY_##wc (R, T); \ _FP_FRAC_COPY_##wc (R, _FP_FMA_T); \
R##_c = T##_c; \ R##_c = _FP_FMA_T##_c; \
break; \ break; \
\ \
case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NAN): \ case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_NAN): \
...@@ -1080,7 +1116,7 @@ ...@@ -1080,7 +1116,7 @@
break; \ break; \
\ \
case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_INF): \ case _FP_CLS_COMBINE (FP_CLS_INF, FP_CLS_INF): \
if (T##_s == Z##_s) \ if (_FP_FMA_T##_s == Z##_s) \
{ \ { \
R##_s = Z##_s; \ R##_s = Z##_s; \
_FP_FRAC_COPY_##wc (R, Z); \ _FP_FRAC_COPY_##wc (R, Z); \
...@@ -1091,12 +1127,12 @@ ...@@ -1091,12 +1127,12 @@
R##_s = _FP_NANSIGN_##fs; \ R##_s = _FP_NANSIGN_##fs; \
R##_c = FP_CLS_NAN; \ R##_c = FP_CLS_NAN; \
_FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs); \ _FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs); \
FP_SET_EXCEPTION (FP_EX_INVALID); \ FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_ISI); \
} \ } \
break; \ break; \
\ \
case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_ZERO): \ case _FP_CLS_COMBINE (FP_CLS_ZERO, FP_CLS_ZERO): \
if (T##_s == Z##_s) \ if (_FP_FMA_T##_s == Z##_s) \
R##_s = Z##_s; \ R##_s = Z##_s; \
else \ else \
R##_s = (FP_ROUNDMODE == FP_RND_MINF); \ R##_s = (FP_ROUNDMODE == FP_RND_MINF); \
...@@ -1112,9 +1148,7 @@ ...@@ -1112,9 +1148,7 @@
while (0) while (0)
/* /* Main division routine. The input values should be cooked. */
* Main division routine. The input values should be cooked.
*/
#define _FP_DIV(fs, wc, R, X, Y) \ #define _FP_DIV(fs, wc, R, X, Y) \
do \ do \
...@@ -1167,7 +1201,10 @@ ...@@ -1167,7 +1201,10 @@
R##_s = _FP_NANSIGN_##fs; \ R##_s = _FP_NANSIGN_##fs; \
R##_c = FP_CLS_NAN; \ R##_c = FP_CLS_NAN; \
_FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs); \ _FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs); \
FP_SET_EXCEPTION (FP_EX_INVALID); \ FP_SET_EXCEPTION (FP_EX_INVALID \
| (X##_c == FP_CLS_INF \
? FP_EX_INVALID_IDI \
: FP_EX_INVALID_ZDZ)); \
break; \ break; \
\ \
default: \ default: \
...@@ -1177,46 +1214,85 @@ ...@@ -1177,46 +1214,85 @@
while (0) while (0)
/* /* Helper for comparisons. EX is 0 not to raise exceptions, 1 to
* Main differential comparison routine. The inputs should be raw not raise exceptions for signaling NaN operands, 2 to raise exceptions
* cooked. The return is -1,0,1 for normal values, 2 otherwise. for all NaN operands. Conditionals are organized to allow the
*/ compiler to optimize away code based on the value of EX. */
#define _FP_CMP(fs, wc, ret, X, Y, un) \ #define _FP_CMP_CHECK_NAN(fs, wc, X, Y, ex) \
do \ do \
{ \ { \
/* NANs are unordered */ \ /* The arguments are unordered, which may or may not result in \
an exception. */ \
if (ex) \
{ \
/* At least some cases of unordered arguments result in \
exceptions; check whether this is one. */ \
if (FP_EX_INVALID_SNAN || FP_EX_INVALID_VC) \
{ \
/* Check separately for each case of "invalid" \
exceptions. */ \
if ((ex) == 2) \
FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_VC); \
if (_FP_ISSIGNAN (fs, wc, X) \
|| _FP_ISSIGNAN (fs, wc, Y)) \
FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_SNAN); \
} \
/* Otherwise, we only need to check whether to raise an \
exception, not which case or cases it is. */ \
else if ((ex) == 2 \
|| _FP_ISSIGNAN (fs, wc, X) \
|| _FP_ISSIGNAN (fs, wc, Y)) \
FP_SET_EXCEPTION (FP_EX_INVALID); \
} \
} \
while (0)
/* Main differential comparison routine. The inputs should be raw not
cooked. The return is -1, 0, 1 for normal values, UN
otherwise. */
#define _FP_CMP(fs, wc, ret, X, Y, un, ex) \
do \
{ \
/* NANs are unordered. */ \
if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (X)) \ if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (X)) \
|| (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (Y))) \ || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (Y))) \
{ \ { \
ret = un; \ (ret) = (un); \
_FP_CMP_CHECK_NAN (fs, wc, X, Y, (ex)); \
} \ } \
else \ else \
{ \ { \
int __is_zero_x; \ int _FP_CMP_is_zero_x; \
int __is_zero_y; \ int _FP_CMP_is_zero_y; \
\
_FP_CHECK_FLUSH_ZERO (fs, wc, X); \
_FP_CHECK_FLUSH_ZERO (fs, wc, Y); \
\ \
__is_zero_x = (!X##_e && _FP_FRAC_ZEROP_##wc (X)) ? 1 : 0; \ _FP_CMP_is_zero_x \
__is_zero_y = (!Y##_e && _FP_FRAC_ZEROP_##wc (Y)) ? 1 : 0; \ = (!X##_e && _FP_FRAC_ZEROP_##wc (X)) ? 1 : 0; \
_FP_CMP_is_zero_y \
= (!Y##_e && _FP_FRAC_ZEROP_##wc (Y)) ? 1 : 0; \
\ \
if (__is_zero_x && __is_zero_y) \ if (_FP_CMP_is_zero_x && _FP_CMP_is_zero_y) \
ret = 0; \ (ret) = 0; \
else if (__is_zero_x) \ else if (_FP_CMP_is_zero_x) \
ret = Y##_s ? 1 : -1; \ (ret) = Y##_s ? 1 : -1; \
else if (__is_zero_y) \ else if (_FP_CMP_is_zero_y) \
ret = X##_s ? -1 : 1; \ (ret) = X##_s ? -1 : 1; \
else if (X##_s != Y##_s) \ else if (X##_s != Y##_s) \
ret = X##_s ? -1 : 1; \ (ret) = X##_s ? -1 : 1; \
else if (X##_e > Y##_e) \ else if (X##_e > Y##_e) \
ret = X##_s ? -1 : 1; \ (ret) = X##_s ? -1 : 1; \
else if (X##_e < Y##_e) \ else if (X##_e < Y##_e) \
ret = X##_s ? 1 : -1; \ (ret) = X##_s ? 1 : -1; \
else if (_FP_FRAC_GT_##wc (X, Y)) \ else if (_FP_FRAC_GT_##wc (X, Y)) \
ret = X##_s ? -1 : 1; \ (ret) = X##_s ? -1 : 1; \
else if (_FP_FRAC_GT_##wc (Y, X)) \ else if (_FP_FRAC_GT_##wc (Y, X)) \
ret = X##_s ? 1 : -1; \ (ret) = X##_s ? 1 : -1; \
else \ else \
ret = 0; \ (ret) = 0; \
} \ } \
} \ } \
while (0) while (0)
...@@ -1224,180 +1300,227 @@ ...@@ -1224,180 +1300,227 @@
/* Simplification for strict equality. */ /* Simplification for strict equality. */
#define _FP_CMP_EQ(fs, wc, ret, X, Y) \ #define _FP_CMP_EQ(fs, wc, ret, X, Y, ex) \
do \ do \
{ \ { \
/* NANs are unordered */ \ /* NANs are unordered. */ \
if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (X)) \ if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (X)) \
|| (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (Y))) \ || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (Y))) \
{ \ { \
ret = 1; \ (ret) = 1; \
_FP_CMP_CHECK_NAN (fs, wc, X, Y, (ex)); \
} \ } \
else \ else \
{ \ { \
ret = !(X##_e == Y##_e \ _FP_CHECK_FLUSH_ZERO (fs, wc, X); \
&& _FP_FRAC_EQ_##wc (X, Y) \ _FP_CHECK_FLUSH_ZERO (fs, wc, Y); \
&& (X##_s == Y##_s || (!X##_e && _FP_FRAC_ZEROP_##wc (X)))); \ \
(ret) = !(X##_e == Y##_e \
&& _FP_FRAC_EQ_##wc (X, Y) \
&& (X##_s == Y##_s \
|| (!X##_e && _FP_FRAC_ZEROP_##wc (X)))); \
} \ } \
} \ } \
while (0) while (0)
/* Version to test unordered. */ /* Version to test unordered. */
#define _FP_CMP_UNORD(fs, wc, ret, X, Y) \ #define _FP_CMP_UNORD(fs, wc, ret, X, Y, ex) \
do \ do \
{ \ { \
ret = ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (X)) \ (ret) = ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (X)) \
|| (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (Y))); \ || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc (Y))); \
if (ret) \
_FP_CMP_CHECK_NAN (fs, wc, X, Y, (ex)); \
} \ } \
while (0) while (0)
/* /* Main square root routine. The input value should be cooked. */
* Main square root routine. The input value should be cooked.
*/
#define _FP_SQRT(fs, wc, R, X) \ #define _FP_SQRT(fs, wc, R, X) \
do \ do \
{ \ { \
_FP_FRAC_DECL_##wc (T); \ _FP_FRAC_DECL_##wc (_FP_SQRT_T); \
_FP_FRAC_DECL_##wc (S); \ _FP_FRAC_DECL_##wc (_FP_SQRT_S); \
_FP_W_TYPE q; \ _FP_W_TYPE _FP_SQRT_q; \
switch (X##_c) \ switch (X##_c) \
{ \ { \
case FP_CLS_NAN: \ case FP_CLS_NAN: \
_FP_FRAC_COPY_##wc (R, X); \ _FP_FRAC_COPY_##wc (R, X); \
R##_s = X##_s; \ R##_s = X##_s; \
R##_c = FP_CLS_NAN; \ R##_c = FP_CLS_NAN; \
break; \ break; \
case FP_CLS_INF: \ case FP_CLS_INF: \
if (X##_s) \ if (X##_s) \
{ \ { \
R##_s = _FP_NANSIGN_##fs; \ R##_s = _FP_NANSIGN_##fs; \
R##_c = FP_CLS_NAN; /* NAN */ \ R##_c = FP_CLS_NAN; /* NAN */ \
_FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs); \ _FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs); \
FP_SET_EXCEPTION (FP_EX_INVALID); \ FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_SQRT); \
} \ } \
else \ else \
{ \ { \
R##_s = 0; \ R##_s = 0; \
R##_c = FP_CLS_INF; /* sqrt(+inf) = +inf */ \ R##_c = FP_CLS_INF; /* sqrt(+inf) = +inf */ \
} \ } \
break; \ break; \
case FP_CLS_ZERO: \ case FP_CLS_ZERO: \
R##_s = X##_s; \ R##_s = X##_s; \
R##_c = FP_CLS_ZERO; /* sqrt(+-0) = +-0 */ \ R##_c = FP_CLS_ZERO; /* sqrt(+-0) = +-0 */ \
break; \ break; \
case FP_CLS_NORMAL: \ case FP_CLS_NORMAL: \
R##_s = 0; \ R##_s = 0; \
if (X##_s) \ if (X##_s) \
{ \ { \
R##_c = FP_CLS_NAN; /* NAN */ \ R##_c = FP_CLS_NAN; /* NAN */ \
R##_s = _FP_NANSIGN_##fs; \ R##_s = _FP_NANSIGN_##fs; \
_FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs); \ _FP_FRAC_SET_##wc (R, _FP_NANFRAC_##fs); \
FP_SET_EXCEPTION (FP_EX_INVALID); \ FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_SQRT); \
break; \ break; \
} \ } \
R##_c = FP_CLS_NORMAL; \ R##_c = FP_CLS_NORMAL; \
if (X##_e & 1) \ if (X##_e & 1) \
_FP_FRAC_SLL_##wc (X, 1); \ _FP_FRAC_SLL_##wc (X, 1); \
R##_e = X##_e >> 1; \ R##_e = X##_e >> 1; \
_FP_FRAC_SET_##wc (S, _FP_ZEROFRAC_##wc); \ _FP_FRAC_SET_##wc (_FP_SQRT_S, _FP_ZEROFRAC_##wc); \
_FP_FRAC_SET_##wc (R, _FP_ZEROFRAC_##wc); \ _FP_FRAC_SET_##wc (R, _FP_ZEROFRAC_##wc); \
q = _FP_OVERFLOW_##fs >> 1; \ _FP_SQRT_q = _FP_OVERFLOW_##fs >> 1; \
_FP_SQRT_MEAT_##wc (R, S, T, X, q); \ _FP_SQRT_MEAT_##wc (R, _FP_SQRT_S, _FP_SQRT_T, X, \
} \ _FP_SQRT_q); \
} \ } \
} \
while (0) while (0)
/* /* Convert from FP to integer. Input is raw. */
* Convert from FP to integer. Input is raw.
*/
/* RSIGNED can have following values: /* RSIGNED can have following values:
* 0: the number is required to be 0..(2^rsize)-1, if not, NV is set plus 0: the number is required to be 0..(2^rsize)-1, if not, NV is set plus
* the result is either 0 or (2^rsize)-1 depending on the sign in such the result is either 0 or (2^rsize)-1 depending on the sign in such
* case. case.
* 1: the number is required to be -(2^(rsize-1))..(2^(rsize-1))-1, if not, 1: the number is required to be -(2^(rsize-1))..(2^(rsize-1))-1, if not,
* NV is set plus the result is either -(2^(rsize-1)) or (2^(rsize-1))-1 NV is set plus the result is either -(2^(rsize-1)) or (2^(rsize-1))-1
* depending on the sign in such case. depending on the sign in such case.
* -1: the number is required to be -(2^(rsize-1))..(2^rsize)-1, if not, NV is 2: the number is required to be -(2^(rsize-1))..(2^(rsize-1))-1, if not,
* set plus the result is either -(2^(rsize-1)) or (2^(rsize-1))-1 NV is set plus the result is reduced modulo 2^rsize.
* depending on the sign in such case. -1: the number is required to be -(2^(rsize-1))..(2^rsize)-1, if not, NV is
*/ set plus the result is either -(2^(rsize-1)) or (2^(rsize-1))-1
depending on the sign in such case. */
#define _FP_TO_INT(fs, wc, r, X, rsize, rsigned) \ #define _FP_TO_INT(fs, wc, r, X, rsize, rsigned) \
do \ do \
{ \ { \
if (X##_e < _FP_EXPBIAS_##fs) \ if (X##_e < _FP_EXPBIAS_##fs) \
{ \ { \
r = 0; \ (r) = 0; \
if (X##_e == 0) \ if (X##_e == 0) \
{ \ { \
if (!_FP_FRAC_ZEROP_##wc (X)) \ if (!_FP_FRAC_ZEROP_##wc (X)) \
{ \ { \
FP_SET_EXCEPTION (FP_EX_INEXACT); \ if (!FP_DENORM_ZERO) \
FP_SET_EXCEPTION (FP_EX_INEXACT); \
FP_SET_EXCEPTION (FP_EX_DENORM); \ FP_SET_EXCEPTION (FP_EX_DENORM); \
} \ } \
} \ } \
else \ else \
FP_SET_EXCEPTION (FP_EX_INEXACT); \ FP_SET_EXCEPTION (FP_EX_INEXACT); \
} \ } \
else if (X##_e >= _FP_EXPBIAS_##fs + rsize - (rsigned > 0 || X##_s) \ else if ((rsigned) == 2 \
|| (!rsigned && X##_s)) \ && (X##_e \
>= ((_FP_EXPMAX_##fs \
< _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs + (rsize) - 1) \
? _FP_EXPMAX_##fs \
: _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs + (rsize) - 1))) \
{ \
/* Overflow resulting in 0. */ \
(r) = 0; \
FP_SET_EXCEPTION (FP_EX_INVALID \
| FP_EX_INVALID_CVI \
| ((FP_EX_INVALID_SNAN \
&& _FP_ISSIGNAN (fs, wc, X)) \
? FP_EX_INVALID_SNAN \
: 0)); \
} \
else if ((rsigned) != 2 \
&& (X##_e >= (_FP_EXPMAX_##fs < _FP_EXPBIAS_##fs + (rsize) \
? _FP_EXPMAX_##fs \
: (_FP_EXPBIAS_##fs + (rsize) \
- ((rsigned) > 0 || X##_s))) \
|| (!(rsigned) && X##_s))) \
{ \ { \
/* Overflow or converting to the most negative integer. */ \ /* Overflow or converting to the most negative integer. */ \
if (rsigned) \ if (rsigned) \
{ \ { \
r = 1; \ (r) = 1; \
r <<= rsize - 1; \ (r) <<= (rsize) - 1; \
r -= 1 - X##_s; \ (r) -= 1 - X##_s; \
} else { \ } \
r = 0; \ else \
if (!X##_s) \ { \
r = ~r; \ (r) = 0; \
} \ if (!X##_s) \
(r) = ~(r); \
} \
\ \
if (rsigned && X##_s && X##_e == _FP_EXPBIAS_##fs + rsize - 1) \ if (_FP_EXPBIAS_##fs + (rsize) - 1 < _FP_EXPMAX_##fs \
&& (rsigned) \
&& X##_s \
&& X##_e == _FP_EXPBIAS_##fs + (rsize) - 1) \
{ \ { \
/* Possibly converting to most negative integer; check the \ /* Possibly converting to most negative integer; check the \
mantissa. */ \ mantissa. */ \
int inexact = 0; \ int _FP_TO_INT_inexact = 0; \
(void) ((_FP_FRACBITS_##fs > rsize) \ (void) ((_FP_FRACBITS_##fs > (rsize)) \
? ({ \ ? ({ \
_FP_FRAC_SRST_##wc (X, inexact, \ _FP_FRAC_SRST_##wc (X, _FP_TO_INT_inexact, \
_FP_FRACBITS_##fs - rsize, \ _FP_FRACBITS_##fs - (rsize), \
_FP_FRACBITS_##fs); \ _FP_FRACBITS_##fs); \
0; \ 0; \
}) \ }) \
: 0); \ : 0); \
if (!_FP_FRAC_ZEROP_##wc (X)) \ if (!_FP_FRAC_ZEROP_##wc (X)) \
FP_SET_EXCEPTION (FP_EX_INVALID); \ FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_CVI); \
else if (inexact) \ else if (_FP_TO_INT_inexact) \
FP_SET_EXCEPTION (FP_EX_INEXACT); \ FP_SET_EXCEPTION (FP_EX_INEXACT); \
} \ } \
else \ else \
FP_SET_EXCEPTION (FP_EX_INVALID); \ FP_SET_EXCEPTION (FP_EX_INVALID \
| FP_EX_INVALID_CVI \
| ((FP_EX_INVALID_SNAN \
&& _FP_ISSIGNAN (fs, wc, X)) \
? FP_EX_INVALID_SNAN \
: 0)); \
} \ } \
else \ else \
{ \ { \
int _FP_TO_INT_inexact = 0; \
_FP_FRAC_HIGH_RAW_##fs (X) |= _FP_IMPLBIT_##fs; \ _FP_FRAC_HIGH_RAW_##fs (X) |= _FP_IMPLBIT_##fs; \
if (X##_e >= _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs - 1) \ if (X##_e >= _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs - 1) \
{ \ { \
_FP_FRAC_ASSEMBLE_##wc (r, X, rsize); \ _FP_FRAC_ASSEMBLE_##wc ((r), X, (rsize)); \
r <<= X##_e - _FP_EXPBIAS_##fs - _FP_FRACBITS_##fs + 1; \ (r) <<= X##_e - _FP_EXPBIAS_##fs - _FP_FRACBITS_##fs + 1; \
} \ } \
else \ else \
{ \ { \
int inexact; \ _FP_FRAC_SRST_##wc (X, _FP_TO_INT_inexact, \
_FP_FRAC_SRST_##wc (X, inexact, \
(_FP_FRACBITS_##fs + _FP_EXPBIAS_##fs - 1 \ (_FP_FRACBITS_##fs + _FP_EXPBIAS_##fs - 1 \
- X##_e), \ - X##_e), \
_FP_FRACBITS_##fs); \ _FP_FRACBITS_##fs); \
if (inexact) \ _FP_FRAC_ASSEMBLE_##wc ((r), X, (rsize)); \
FP_SET_EXCEPTION (FP_EX_INEXACT); \
_FP_FRAC_ASSEMBLE_##wc (r, X, rsize); \
} \ } \
if (rsigned && X##_s) \ if ((rsigned) && X##_s) \
r = -r; \ (r) = -(r); \
if ((rsigned) == 2 && X##_e >= _FP_EXPBIAS_##fs + (rsize) - 1) \
{ \
/* Overflow or converting to the most negative integer. */ \
if (X##_e > _FP_EXPBIAS_##fs + (rsize) - 1 \
|| !X##_s \
|| (r) != (((typeof (r)) 1) << ((rsize) - 1))) \
{ \
_FP_TO_INT_inexact = 0; \
FP_SET_EXCEPTION (FP_EX_INVALID | FP_EX_INVALID_CVI); \
} \
} \
if (_FP_TO_INT_inexact) \
FP_SET_EXCEPTION (FP_EX_INEXACT); \
} \ } \
} \ } \
while (0) while (0)
...@@ -1409,30 +1532,33 @@ ...@@ -1409,30 +1532,33 @@
{ \ { \
if (r) \ if (r) \
{ \ { \
rtype ur_; \ rtype _FP_FROM_INT_ur; \
\ \
if ((X##_s = (r < 0))) \ if ((X##_s = ((r) < 0))) \
r = -(rtype) r; \ (r) = -(rtype) (r); \
\ \
ur_ = (rtype) r; \ _FP_FROM_INT_ur = (rtype) (r); \
(void) ((rsize <= _FP_W_TYPE_SIZE) \ (void) (((rsize) <= _FP_W_TYPE_SIZE) \
? ({ \ ? ({ \
int lz_; \ int _FP_FROM_INT_lz; \
__FP_CLZ (lz_, (_FP_W_TYPE) ur_); \ __FP_CLZ (_FP_FROM_INT_lz, \
X##_e = _FP_EXPBIAS_##fs + _FP_W_TYPE_SIZE - 1 - lz_; \ (_FP_W_TYPE) _FP_FROM_INT_ur); \
X##_e = (_FP_EXPBIAS_##fs + _FP_W_TYPE_SIZE - 1 \
- _FP_FROM_INT_lz); \
}) \ }) \
: ((rsize <= 2 * _FP_W_TYPE_SIZE) \ : (((rsize) <= 2 * _FP_W_TYPE_SIZE) \
? ({ \ ? ({ \
int lz_; \ int _FP_FROM_INT_lz; \
__FP_CLZ_2 (lz_, \ __FP_CLZ_2 (_FP_FROM_INT_lz, \
(_FP_W_TYPE) (ur_ >> _FP_W_TYPE_SIZE), \ (_FP_W_TYPE) (_FP_FROM_INT_ur \
(_FP_W_TYPE) ur_); \ >> _FP_W_TYPE_SIZE), \
(_FP_W_TYPE) _FP_FROM_INT_ur); \
X##_e = (_FP_EXPBIAS_##fs + 2 * _FP_W_TYPE_SIZE - 1 \ X##_e = (_FP_EXPBIAS_##fs + 2 * _FP_W_TYPE_SIZE - 1 \
- lz_); \ - _FP_FROM_INT_lz); \
}) \ }) \
: (abort (), 0))); \ : (abort (), 0))); \
\ \
if (rsize - 1 + _FP_EXPBIAS_##fs >= _FP_EXPMAX_##fs \ if ((rsize) - 1 + _FP_EXPBIAS_##fs >= _FP_EXPMAX_##fs \
&& X##_e >= _FP_EXPMAX_##fs) \ && X##_e >= _FP_EXPMAX_##fs) \
{ \ { \
/* Exponent too big; overflow to infinity. (May also \ /* Exponent too big; overflow to infinity. (May also \
...@@ -1441,11 +1567,11 @@ ...@@ -1441,11 +1567,11 @@
goto pack_semiraw; \ goto pack_semiraw; \
} \ } \
\ \
if (rsize <= _FP_FRACBITS_##fs \ if ((rsize) <= _FP_FRACBITS_##fs \
|| X##_e < _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs) \ || X##_e < _FP_EXPBIAS_##fs + _FP_FRACBITS_##fs) \
{ \ { \
/* Exactly representable; shift left. */ \ /* Exactly representable; shift left. */ \
_FP_FRAC_DISASSEMBLE_##wc (X, ur_, rsize); \ _FP_FRAC_DISASSEMBLE_##wc (X, _FP_FROM_INT_ur, (rsize)); \
if (_FP_EXPBIAS_##fs + _FP_FRACBITS_##fs - 1 - X##_e > 0) \ if (_FP_EXPBIAS_##fs + _FP_FRACBITS_##fs - 1 - X##_e > 0) \
_FP_FRAC_SLL_##wc (X, (_FP_EXPBIAS_##fs \ _FP_FRAC_SLL_##wc (X, (_FP_EXPBIAS_##fs \
+ _FP_FRACBITS_##fs - 1 - X##_e)); \ + _FP_FRACBITS_##fs - 1 - X##_e)); \
...@@ -1455,12 +1581,14 @@ ...@@ -1455,12 +1581,14 @@
/* More bits in integer than in floating type; need to \ /* More bits in integer than in floating type; need to \
round. */ \ round. */ \
if (_FP_EXPBIAS_##fs + _FP_WFRACBITS_##fs - 1 < X##_e) \ if (_FP_EXPBIAS_##fs + _FP_WFRACBITS_##fs - 1 < X##_e) \
ur_ = ((ur_ >> (X##_e - _FP_EXPBIAS_##fs \ _FP_FROM_INT_ur \
- _FP_WFRACBITS_##fs + 1)) \ = ((_FP_FROM_INT_ur >> (X##_e - _FP_EXPBIAS_##fs \
| ((ur_ << (rsize - (X##_e - _FP_EXPBIAS_##fs \ - _FP_WFRACBITS_##fs + 1)) \
- _FP_WFRACBITS_##fs + 1))) \ | ((_FP_FROM_INT_ur \
!= 0)); \ << ((rsize) - (X##_e - _FP_EXPBIAS_##fs \
_FP_FRAC_DISASSEMBLE_##wc (X, ur_, rsize); \ - _FP_WFRACBITS_##fs + 1))) \
!= 0)); \
_FP_FRAC_DISASSEMBLE_##wc (X, _FP_FROM_INT_ur, (rsize)); \
if ((_FP_EXPBIAS_##fs + _FP_WFRACBITS_##fs - 1 - X##_e) > 0) \ if ((_FP_EXPBIAS_##fs + _FP_WFRACBITS_##fs - 1 - X##_e) > 0) \
_FP_FRAC_SLL_##wc (X, (_FP_EXPBIAS_##fs \ _FP_FRAC_SLL_##wc (X, (_FP_EXPBIAS_##fs \
+ _FP_WFRACBITS_##fs - 1 - X##_e)); \ + _FP_WFRACBITS_##fs - 1 - X##_e)); \
...@@ -1501,6 +1629,7 @@ ...@@ -1501,6 +1629,7 @@
{ \ { \
if (S##_e == 0) \ if (S##_e == 0) \
{ \ { \
_FP_CHECK_FLUSH_ZERO (sfs, swc, S); \
if (_FP_FRAC_ZEROP_##swc (S)) \ if (_FP_FRAC_ZEROP_##swc (S)) \
D##_e = 0; \ D##_e = 0; \
else if (_FP_EXPBIAS_##dfs \ else if (_FP_EXPBIAS_##dfs \
...@@ -1510,17 +1639,19 @@ ...@@ -1510,17 +1639,19 @@
_FP_FRAC_SLL_##dwc (D, (_FP_FRACBITS_##dfs \ _FP_FRAC_SLL_##dwc (D, (_FP_FRACBITS_##dfs \
- _FP_FRACBITS_##sfs)); \ - _FP_FRACBITS_##sfs)); \
D##_e = 0; \ D##_e = 0; \
if (FP_TRAPPING_EXCEPTIONS & FP_EX_UNDERFLOW) \
FP_SET_EXCEPTION (FP_EX_UNDERFLOW); \
} \ } \
else \ else \
{ \ { \
int _lz; \ int FP_EXTEND_lz; \
FP_SET_EXCEPTION (FP_EX_DENORM); \ FP_SET_EXCEPTION (FP_EX_DENORM); \
_FP_FRAC_CLZ_##swc (_lz, S); \ _FP_FRAC_CLZ_##swc (FP_EXTEND_lz, S); \
_FP_FRAC_SLL_##dwc (D, \ _FP_FRAC_SLL_##dwc (D, \
_lz + _FP_FRACBITS_##dfs \ FP_EXTEND_lz + _FP_FRACBITS_##dfs \
- _FP_FRACTBITS_##sfs); \ - _FP_FRACTBITS_##sfs); \
D##_e = (_FP_EXPBIAS_##dfs - _FP_EXPBIAS_##sfs + 1 \ D##_e = (_FP_EXPBIAS_##dfs - _FP_EXPBIAS_##sfs + 1 \
+ _FP_FRACXBITS_##sfs - _lz); \ + _FP_FRACXBITS_##sfs - FP_EXTEND_lz); \
} \ } \
} \ } \
else \ else \
...@@ -1529,7 +1660,8 @@ ...@@ -1529,7 +1660,8 @@
if (!_FP_FRAC_ZEROP_##swc (S)) \ if (!_FP_FRAC_ZEROP_##swc (S)) \
{ \ { \
if (_FP_FRAC_SNANP (sfs, S)) \ if (_FP_FRAC_SNANP (sfs, S)) \
FP_SET_EXCEPTION (FP_EX_INVALID); \ FP_SET_EXCEPTION (FP_EX_INVALID \
| FP_EX_INVALID_SNAN); \
_FP_FRAC_SLL_##dwc (D, (_FP_FRACBITS_##dfs \ _FP_FRAC_SLL_##dwc (D, (_FP_FRACBITS_##dfs \
- _FP_FRACBITS_##sfs)); \ - _FP_FRACBITS_##sfs)); \
_FP_SETQNAN (dfs, dwc, D); \ _FP_SETQNAN (dfs, dwc, D); \
...@@ -1584,6 +1716,7 @@ ...@@ -1584,6 +1716,7 @@
{ \ { \
if (S##_e == 0) \ if (S##_e == 0) \
{ \ { \
_FP_CHECK_FLUSH_ZERO (sfs, swc, S); \
D##_e = 0; \ D##_e = 0; \
if (_FP_FRAC_ZEROP_##swc (S)) \ if (_FP_FRAC_ZEROP_##swc (S)) \
_FP_FRAC_SET_##dwc (D, _FP_ZEROFRAC_##dwc); \ _FP_FRAC_SET_##dwc (D, _FP_ZEROFRAC_##dwc); \
...@@ -1626,9 +1759,7 @@ ...@@ -1626,9 +1759,7 @@
} \ } \
while (0) while (0)
/* /* Helper primitives. */
* Helper primitives.
*/
/* Count leading zeros in a word. */ /* Count leading zeros in a word. */
...@@ -1638,11 +1769,11 @@ ...@@ -1638,11 +1769,11 @@
do \ do \
{ \ { \
if (sizeof (_FP_W_TYPE) == sizeof (unsigned int)) \ if (sizeof (_FP_W_TYPE) == sizeof (unsigned int)) \
r = __builtin_clz (x); \ (r) = __builtin_clz (x); \
else if (sizeof (_FP_W_TYPE) == sizeof (unsigned long)) \ else if (sizeof (_FP_W_TYPE) == sizeof (unsigned long)) \
r = __builtin_clzl (x); \ (r) = __builtin_clzl (x); \
else if (sizeof (_FP_W_TYPE) == sizeof (unsigned long long)) \ else if (sizeof (_FP_W_TYPE) == sizeof (unsigned long long)) \
r = __builtin_clzll (x); \ (r) = __builtin_clzll (x); \
else \ else \
abort (); \ abort (); \
} \ } \
...@@ -1652,7 +1783,7 @@ ...@@ -1652,7 +1783,7 @@
#define _FP_DIV_HELP_imm(q, r, n, d) \ #define _FP_DIV_HELP_imm(q, r, n, d) \
do \ do \
{ \ { \
q = n / d, r = n % d; \ (q) = (n) / (d), (r) = (n) % (d); \
} \ } \
while (0) while (0)
...@@ -1662,42 +1793,51 @@ ...@@ -1662,42 +1793,51 @@
#define _FP_DIV_MEAT_N_loop(fs, wc, R, X, Y) \ #define _FP_DIV_MEAT_N_loop(fs, wc, R, X, Y) \
do \ do \
{ \ { \
int count = _FP_WFRACBITS_##fs; \ int _FP_DIV_MEAT_N_loop_count = _FP_WFRACBITS_##fs; \
_FP_FRAC_DECL_##wc (u); \ _FP_FRAC_DECL_##wc (_FP_DIV_MEAT_N_loop_u); \
_FP_FRAC_DECL_##wc (v); \ _FP_FRAC_DECL_##wc (_FP_DIV_MEAT_N_loop_v); \
_FP_FRAC_COPY_##wc (u, X); \ _FP_FRAC_COPY_##wc (_FP_DIV_MEAT_N_loop_u, X); \
_FP_FRAC_COPY_##wc (v, Y); \ _FP_FRAC_COPY_##wc (_FP_DIV_MEAT_N_loop_v, Y); \
_FP_FRAC_SET_##wc (R, _FP_ZEROFRAC_##wc); \ _FP_FRAC_SET_##wc (R, _FP_ZEROFRAC_##wc); \
/* Normalize U and V. */ \ /* Normalize _FP_DIV_MEAT_N_LOOP_U and _FP_DIV_MEAT_N_LOOP_V. */ \
_FP_FRAC_SLL_##wc (u, _FP_WFRACXBITS_##fs); \ _FP_FRAC_SLL_##wc (_FP_DIV_MEAT_N_loop_u, _FP_WFRACXBITS_##fs); \
_FP_FRAC_SLL_##wc (v, _FP_WFRACXBITS_##fs); \ _FP_FRAC_SLL_##wc (_FP_DIV_MEAT_N_loop_v, _FP_WFRACXBITS_##fs); \
/* First round. Since the operands are normalized, either the \ /* First round. Since the operands are normalized, either the \
first or second bit will be set in the fraction. Produce a \ first or second bit will be set in the fraction. Produce a \
normalized result by checking which and adjusting the loop \ normalized result by checking which and adjusting the loop \
count and exponent accordingly. */ \ count and exponent accordingly. */ \
if (_FP_FRAC_GE_1 (u, v)) \ if (_FP_FRAC_GE_1 (_FP_DIV_MEAT_N_loop_u, _FP_DIV_MEAT_N_loop_v)) \
{ \ { \
_FP_FRAC_SUB_##wc (u, u, v); \ _FP_FRAC_SUB_##wc (_FP_DIV_MEAT_N_loop_u, \
_FP_DIV_MEAT_N_loop_u, \
_FP_DIV_MEAT_N_loop_v); \
_FP_FRAC_LOW_##wc (R) |= 1; \ _FP_FRAC_LOW_##wc (R) |= 1; \
count--; \ _FP_DIV_MEAT_N_loop_count--; \
} \ } \
else \ else \
R##_e--; \ R##_e--; \
/* Subsequent rounds. */ \ /* Subsequent rounds. */ \
do \ do \
{ \ { \
int msb = (_FP_WS_TYPE) _FP_FRAC_HIGH_##wc (u) < 0; \ int _FP_DIV_MEAT_N_loop_msb \
_FP_FRAC_SLL_##wc (u, 1); \ = (_FP_WS_TYPE) _FP_FRAC_HIGH_##wc (_FP_DIV_MEAT_N_loop_u) < 0; \
_FP_FRAC_SLL_##wc (_FP_DIV_MEAT_N_loop_u, 1); \
_FP_FRAC_SLL_##wc (R, 1); \ _FP_FRAC_SLL_##wc (R, 1); \
if (msb || _FP_FRAC_GE_1 (u, v)) \ if (_FP_DIV_MEAT_N_loop_msb \
|| _FP_FRAC_GE_1 (_FP_DIV_MEAT_N_loop_u, \
_FP_DIV_MEAT_N_loop_v)) \
{ \ { \
_FP_FRAC_SUB_##wc (u, u, v); \ _FP_FRAC_SUB_##wc (_FP_DIV_MEAT_N_loop_u, \
_FP_DIV_MEAT_N_loop_u, \
_FP_DIV_MEAT_N_loop_v); \
_FP_FRAC_LOW_##wc (R) |= 1; \ _FP_FRAC_LOW_##wc (R) |= 1; \
} \ } \
} \ } \
while (--count > 0); \ while (--_FP_DIV_MEAT_N_loop_count > 0); \
/* If there's anything left in U, the result is inexact. */ \ /* If there's anything left in _FP_DIV_MEAT_N_LOOP_U, the result \
_FP_FRAC_LOW_##wc (R) |= !_FP_FRAC_ZEROP_##wc (u); \ is inexact. */ \
_FP_FRAC_LOW_##wc (R) \
|= !_FP_FRAC_ZEROP_##wc (_FP_DIV_MEAT_N_loop_u); \
} \ } \
while (0) while (0)
......
...@@ -95,21 +95,21 @@ union _FP_UNION_Q ...@@ -95,21 +95,21 @@ union _FP_UNION_Q
# define FP_DECL_Q(X) _FP_DECL (4, X) # define FP_DECL_Q(X) _FP_DECL (4, X)
# define FP_UNPACK_RAW_Q(X, val) _FP_UNPACK_RAW_4 (Q, X, val) # define FP_UNPACK_RAW_Q(X, val) _FP_UNPACK_RAW_4 (Q, X, (val))
# define FP_UNPACK_RAW_QP(X, val) _FP_UNPACK_RAW_4_P (Q, X, val) # define FP_UNPACK_RAW_QP(X, val) _FP_UNPACK_RAW_4_P (Q, X, (val))
# define FP_PACK_RAW_Q(val, X) _FP_PACK_RAW_4 (Q, val, X) # define FP_PACK_RAW_Q(val, X) _FP_PACK_RAW_4 (Q, (val), X)
# define FP_PACK_RAW_QP(val, X) \ # define FP_PACK_RAW_QP(val, X) \
do \ do \
{ \ { \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_4_P (Q, val, X); \ _FP_PACK_RAW_4_P (Q, (val), X); \
} \ } \
while (0) while (0)
# define FP_UNPACK_Q(X, val) \ # define FP_UNPACK_Q(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_4 (Q, X, val); \ _FP_UNPACK_RAW_4 (Q, X, (val)); \
_FP_UNPACK_CANONICAL (Q, 4, X); \ _FP_UNPACK_CANONICAL (Q, 4, X); \
} \ } \
while (0) while (0)
...@@ -117,7 +117,7 @@ union _FP_UNION_Q ...@@ -117,7 +117,7 @@ union _FP_UNION_Q
# define FP_UNPACK_QP(X, val) \ # define FP_UNPACK_QP(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_4_P (Q, X, val); \ _FP_UNPACK_RAW_4_P (Q, X, (val)); \
_FP_UNPACK_CANONICAL (Q, 4, X); \ _FP_UNPACK_CANONICAL (Q, 4, X); \
} \ } \
while (0) while (0)
...@@ -125,7 +125,7 @@ union _FP_UNION_Q ...@@ -125,7 +125,7 @@ union _FP_UNION_Q
# define FP_UNPACK_SEMIRAW_Q(X, val) \ # define FP_UNPACK_SEMIRAW_Q(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_4 (Q, X, val); \ _FP_UNPACK_RAW_4 (Q, X, (val)); \
_FP_UNPACK_SEMIRAW (Q, 4, X); \ _FP_UNPACK_SEMIRAW (Q, 4, X); \
} \ } \
while (0) while (0)
...@@ -133,7 +133,7 @@ union _FP_UNION_Q ...@@ -133,7 +133,7 @@ union _FP_UNION_Q
# define FP_UNPACK_SEMIRAW_QP(X, val) \ # define FP_UNPACK_SEMIRAW_QP(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_4_P (Q, X, val); \ _FP_UNPACK_RAW_4_P (Q, X, (val)); \
_FP_UNPACK_SEMIRAW (Q, 4, X); \ _FP_UNPACK_SEMIRAW (Q, 4, X); \
} \ } \
while (0) while (0)
...@@ -142,7 +142,7 @@ union _FP_UNION_Q ...@@ -142,7 +142,7 @@ union _FP_UNION_Q
do \ do \
{ \ { \
_FP_PACK_CANONICAL (Q, 4, X); \ _FP_PACK_CANONICAL (Q, 4, X); \
_FP_PACK_RAW_4 (Q, val, X); \ _FP_PACK_RAW_4 (Q, (val), X); \
} \ } \
while (0) while (0)
...@@ -151,7 +151,7 @@ union _FP_UNION_Q ...@@ -151,7 +151,7 @@ union _FP_UNION_Q
{ \ { \
_FP_PACK_CANONICAL (Q, 4, X); \ _FP_PACK_CANONICAL (Q, 4, X); \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_4_P (Q, val, X); \ _FP_PACK_RAW_4_P (Q, (val), X); \
} \ } \
while (0) while (0)
...@@ -159,7 +159,7 @@ union _FP_UNION_Q ...@@ -159,7 +159,7 @@ union _FP_UNION_Q
do \ do \
{ \ { \
_FP_PACK_SEMIRAW (Q, 4, X); \ _FP_PACK_SEMIRAW (Q, 4, X); \
_FP_PACK_RAW_4 (Q, val, X); \ _FP_PACK_RAW_4 (Q, (val), X); \
} \ } \
while (0) while (0)
...@@ -168,7 +168,7 @@ union _FP_UNION_Q ...@@ -168,7 +168,7 @@ union _FP_UNION_Q
{ \ { \
_FP_PACK_SEMIRAW (Q, 4, X); \ _FP_PACK_SEMIRAW (Q, 4, X); \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_4_P (Q, val, X); \ _FP_PACK_RAW_4_P (Q, (val), X); \
} \ } \
while (0) while (0)
...@@ -179,15 +179,15 @@ union _FP_UNION_Q ...@@ -179,15 +179,15 @@ union _FP_UNION_Q
# define FP_MUL_Q(R, X, Y) _FP_MUL (Q, 4, R, X, Y) # define FP_MUL_Q(R, X, Y) _FP_MUL (Q, 4, R, X, Y)
# define FP_DIV_Q(R, X, Y) _FP_DIV (Q, 4, R, X, Y) # define FP_DIV_Q(R, X, Y) _FP_DIV (Q, 4, R, X, Y)
# define FP_SQRT_Q(R, X) _FP_SQRT (Q, 4, R, X) # define FP_SQRT_Q(R, X) _FP_SQRT (Q, 4, R, X)
# define _FP_SQRT_MEAT_Q(R, S, T, X, Q) _FP_SQRT_MEAT_4 (R, S, T, X, Q) # define _FP_SQRT_MEAT_Q(R, S, T, X, Q) _FP_SQRT_MEAT_4 (R, S, T, X, (Q))
# define FP_FMA_Q(R, X, Y, Z) _FP_FMA (Q, 4, 8, R, X, Y, Z) # define FP_FMA_Q(R, X, Y, Z) _FP_FMA (Q, 4, 8, R, X, Y, Z)
# define FP_CMP_Q(r, X, Y, un) _FP_CMP (Q, 4, r, X, Y, un) # define FP_CMP_Q(r, X, Y, un, ex) _FP_CMP (Q, 4, (r), X, Y, (un), (ex))
# define FP_CMP_EQ_Q(r, X, Y) _FP_CMP_EQ (Q, 4, r, X, Y) # define FP_CMP_EQ_Q(r, X, Y, ex) _FP_CMP_EQ (Q, 4, (r), X, Y, (ex))
# define FP_CMP_UNORD_Q(r, X, Y) _FP_CMP_UNORD (Q, 4, r, X, Y) # define FP_CMP_UNORD_Q(r, X, Y, ex) _FP_CMP_UNORD (Q, 4, (r), X, Y, (ex))
# define FP_TO_INT_Q(r, X, rsz, rsg) _FP_TO_INT (Q, 4, r, X, rsz, rsg) # define FP_TO_INT_Q(r, X, rsz, rsg) _FP_TO_INT (Q, 4, (r), X, (rsz), (rsg))
# define FP_FROM_INT_Q(X, r, rs, rt) _FP_FROM_INT (Q, 4, X, r, rs, rt) # define FP_FROM_INT_Q(X, r, rs, rt) _FP_FROM_INT (Q, 4, X, (r), (rs), rt)
# define _FP_FRAC_HIGH_Q(X) _FP_FRAC_HIGH_4 (X) # define _FP_FRAC_HIGH_Q(X) _FP_FRAC_HIGH_4 (X)
# define _FP_FRAC_HIGH_RAW_Q(X) _FP_FRAC_HIGH_4 (X) # define _FP_FRAC_HIGH_RAW_Q(X) _FP_FRAC_HIGH_4 (X)
...@@ -219,21 +219,21 @@ union _FP_UNION_Q ...@@ -219,21 +219,21 @@ union _FP_UNION_Q
}; };
# define FP_DECL_Q(X) _FP_DECL (2, X) # define FP_DECL_Q(X) _FP_DECL (2, X)
# define FP_UNPACK_RAW_Q(X, val) _FP_UNPACK_RAW_2 (Q, X, val) # define FP_UNPACK_RAW_Q(X, val) _FP_UNPACK_RAW_2 (Q, X, (val))
# define FP_UNPACK_RAW_QP(X, val) _FP_UNPACK_RAW_2_P (Q, X, val) # define FP_UNPACK_RAW_QP(X, val) _FP_UNPACK_RAW_2_P (Q, X, (val))
# define FP_PACK_RAW_Q(val, X) _FP_PACK_RAW_2 (Q, val, X) # define FP_PACK_RAW_Q(val, X) _FP_PACK_RAW_2 (Q, (val), X)
# define FP_PACK_RAW_QP(val, X) \ # define FP_PACK_RAW_QP(val, X) \
do \ do \
{ \ { \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_2_P (Q, val, X); \ _FP_PACK_RAW_2_P (Q, (val), X); \
} \ } \
while (0) while (0)
# define FP_UNPACK_Q(X, val) \ # define FP_UNPACK_Q(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_2 (Q, X, val); \ _FP_UNPACK_RAW_2 (Q, X, (val)); \
_FP_UNPACK_CANONICAL (Q, 2, X); \ _FP_UNPACK_CANONICAL (Q, 2, X); \
} \ } \
while (0) while (0)
...@@ -241,7 +241,7 @@ union _FP_UNION_Q ...@@ -241,7 +241,7 @@ union _FP_UNION_Q
# define FP_UNPACK_QP(X, val) \ # define FP_UNPACK_QP(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_2_P (Q, X, val); \ _FP_UNPACK_RAW_2_P (Q, X, (val)); \
_FP_UNPACK_CANONICAL (Q, 2, X); \ _FP_UNPACK_CANONICAL (Q, 2, X); \
} \ } \
while (0) while (0)
...@@ -249,7 +249,7 @@ union _FP_UNION_Q ...@@ -249,7 +249,7 @@ union _FP_UNION_Q
# define FP_UNPACK_SEMIRAW_Q(X, val) \ # define FP_UNPACK_SEMIRAW_Q(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_2 (Q, X, val); \ _FP_UNPACK_RAW_2 (Q, X, (val)); \
_FP_UNPACK_SEMIRAW (Q, 2, X); \ _FP_UNPACK_SEMIRAW (Q, 2, X); \
} \ } \
while (0) while (0)
...@@ -257,7 +257,7 @@ union _FP_UNION_Q ...@@ -257,7 +257,7 @@ union _FP_UNION_Q
# define FP_UNPACK_SEMIRAW_QP(X, val) \ # define FP_UNPACK_SEMIRAW_QP(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_2_P (Q, X, val); \ _FP_UNPACK_RAW_2_P (Q, X, (val)); \
_FP_UNPACK_SEMIRAW (Q, 2, X); \ _FP_UNPACK_SEMIRAW (Q, 2, X); \
} \ } \
while (0) while (0)
...@@ -266,7 +266,7 @@ union _FP_UNION_Q ...@@ -266,7 +266,7 @@ union _FP_UNION_Q
do \ do \
{ \ { \
_FP_PACK_CANONICAL (Q, 2, X); \ _FP_PACK_CANONICAL (Q, 2, X); \
_FP_PACK_RAW_2 (Q, val, X); \ _FP_PACK_RAW_2 (Q, (val), X); \
} \ } \
while (0) while (0)
...@@ -275,7 +275,7 @@ union _FP_UNION_Q ...@@ -275,7 +275,7 @@ union _FP_UNION_Q
{ \ { \
_FP_PACK_CANONICAL (Q, 2, X); \ _FP_PACK_CANONICAL (Q, 2, X); \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_2_P (Q, val, X); \ _FP_PACK_RAW_2_P (Q, (val), X); \
} \ } \
while (0) while (0)
...@@ -283,7 +283,7 @@ union _FP_UNION_Q ...@@ -283,7 +283,7 @@ union _FP_UNION_Q
do \ do \
{ \ { \
_FP_PACK_SEMIRAW (Q, 2, X); \ _FP_PACK_SEMIRAW (Q, 2, X); \
_FP_PACK_RAW_2 (Q, val, X); \ _FP_PACK_RAW_2 (Q, (val), X); \
} \ } \
while (0) while (0)
...@@ -292,7 +292,7 @@ union _FP_UNION_Q ...@@ -292,7 +292,7 @@ union _FP_UNION_Q
{ \ { \
_FP_PACK_SEMIRAW (Q, 2, X); \ _FP_PACK_SEMIRAW (Q, 2, X); \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_2_P (Q, val, X); \ _FP_PACK_RAW_2_P (Q, (val), X); \
} \ } \
while (0) while (0)
...@@ -303,15 +303,15 @@ union _FP_UNION_Q ...@@ -303,15 +303,15 @@ union _FP_UNION_Q
# define FP_MUL_Q(R, X, Y) _FP_MUL (Q, 2, R, X, Y) # define FP_MUL_Q(R, X, Y) _FP_MUL (Q, 2, R, X, Y)
# define FP_DIV_Q(R, X, Y) _FP_DIV (Q, 2, R, X, Y) # define FP_DIV_Q(R, X, Y) _FP_DIV (Q, 2, R, X, Y)
# define FP_SQRT_Q(R, X) _FP_SQRT (Q, 2, R, X) # define FP_SQRT_Q(R, X) _FP_SQRT (Q, 2, R, X)
# define _FP_SQRT_MEAT_Q(R, S, T, X, Q) _FP_SQRT_MEAT_2 (R, S, T, X, Q) # define _FP_SQRT_MEAT_Q(R, S, T, X, Q) _FP_SQRT_MEAT_2 (R, S, T, X, (Q))
# define FP_FMA_Q(R, X, Y, Z) _FP_FMA (Q, 2, 4, R, X, Y, Z) # define FP_FMA_Q(R, X, Y, Z) _FP_FMA (Q, 2, 4, R, X, Y, Z)
# define FP_CMP_Q(r, X, Y, un) _FP_CMP (Q, 2, r, X, Y, un) # define FP_CMP_Q(r, X, Y, un, ex) _FP_CMP (Q, 2, (r), X, Y, (un), (ex))
# define FP_CMP_EQ_Q(r, X, Y) _FP_CMP_EQ (Q, 2, r, X, Y) # define FP_CMP_EQ_Q(r, X, Y, ex) _FP_CMP_EQ (Q, 2, (r), X, Y, (ex))
# define FP_CMP_UNORD_Q(r, X, Y) _FP_CMP_UNORD (Q, 2, r, X, Y) # define FP_CMP_UNORD_Q(r, X, Y, ex) _FP_CMP_UNORD (Q, 2, (r), X, Y, (ex))
# define FP_TO_INT_Q(r, X, rsz, rsg) _FP_TO_INT (Q, 2, r, X, rsz, rsg) # define FP_TO_INT_Q(r, X, rsz, rsg) _FP_TO_INT (Q, 2, (r), X, (rsz), (rsg))
# define FP_FROM_INT_Q(X, r, rs, rt) _FP_FROM_INT (Q, 2, X, r, rs, rt) # define FP_FROM_INT_Q(X, r, rs, rt) _FP_FROM_INT (Q, 2, X, (r), (rs), rt)
# define _FP_FRAC_HIGH_Q(X) _FP_FRAC_HIGH_2 (X) # define _FP_FRAC_HIGH_Q(X) _FP_FRAC_HIGH_2 (X)
# define _FP_FRAC_HIGH_RAW_Q(X) _FP_FRAC_HIGH_2 (X) # define _FP_FRAC_HIGH_RAW_Q(X) _FP_FRAC_HIGH_2 (X)
......
...@@ -83,21 +83,21 @@ union _FP_UNION_S ...@@ -83,21 +83,21 @@ union _FP_UNION_S
}; };
#define FP_DECL_S(X) _FP_DECL (1, X) #define FP_DECL_S(X) _FP_DECL (1, X)
#define FP_UNPACK_RAW_S(X, val) _FP_UNPACK_RAW_1 (S, X, val) #define FP_UNPACK_RAW_S(X, val) _FP_UNPACK_RAW_1 (S, X, (val))
#define FP_UNPACK_RAW_SP(X, val) _FP_UNPACK_RAW_1_P (S, X, val) #define FP_UNPACK_RAW_SP(X, val) _FP_UNPACK_RAW_1_P (S, X, (val))
#define FP_PACK_RAW_S(val, X) _FP_PACK_RAW_1 (S, val, X) #define FP_PACK_RAW_S(val, X) _FP_PACK_RAW_1 (S, (val), X)
#define FP_PACK_RAW_SP(val, X) \ #define FP_PACK_RAW_SP(val, X) \
do \ do \
{ \ { \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_1_P (S, val, X); \ _FP_PACK_RAW_1_P (S, (val), X); \
} \ } \
while (0) while (0)
#define FP_UNPACK_S(X, val) \ #define FP_UNPACK_S(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_1 (S, X, val); \ _FP_UNPACK_RAW_1 (S, X, (val)); \
_FP_UNPACK_CANONICAL (S, 1, X); \ _FP_UNPACK_CANONICAL (S, 1, X); \
} \ } \
while (0) while (0)
...@@ -105,7 +105,7 @@ union _FP_UNION_S ...@@ -105,7 +105,7 @@ union _FP_UNION_S
#define FP_UNPACK_SP(X, val) \ #define FP_UNPACK_SP(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_1_P (S, X, val); \ _FP_UNPACK_RAW_1_P (S, X, (val)); \
_FP_UNPACK_CANONICAL (S, 1, X); \ _FP_UNPACK_CANONICAL (S, 1, X); \
} \ } \
while (0) while (0)
...@@ -113,7 +113,7 @@ union _FP_UNION_S ...@@ -113,7 +113,7 @@ union _FP_UNION_S
#define FP_UNPACK_SEMIRAW_S(X, val) \ #define FP_UNPACK_SEMIRAW_S(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_1 (S, X, val); \ _FP_UNPACK_RAW_1 (S, X, (val)); \
_FP_UNPACK_SEMIRAW (S, 1, X); \ _FP_UNPACK_SEMIRAW (S, 1, X); \
} \ } \
while (0) while (0)
...@@ -121,7 +121,7 @@ union _FP_UNION_S ...@@ -121,7 +121,7 @@ union _FP_UNION_S
#define FP_UNPACK_SEMIRAW_SP(X, val) \ #define FP_UNPACK_SEMIRAW_SP(X, val) \
do \ do \
{ \ { \
_FP_UNPACK_RAW_1_P (S, X, val); \ _FP_UNPACK_RAW_1_P (S, X, (val)); \
_FP_UNPACK_SEMIRAW (S, 1, X); \ _FP_UNPACK_SEMIRAW (S, 1, X); \
} \ } \
while (0) while (0)
...@@ -130,7 +130,7 @@ union _FP_UNION_S ...@@ -130,7 +130,7 @@ union _FP_UNION_S
do \ do \
{ \ { \
_FP_PACK_CANONICAL (S, 1, X); \ _FP_PACK_CANONICAL (S, 1, X); \
_FP_PACK_RAW_1 (S, val, X); \ _FP_PACK_RAW_1 (S, (val), X); \
} \ } \
while (0) while (0)
...@@ -139,7 +139,7 @@ union _FP_UNION_S ...@@ -139,7 +139,7 @@ union _FP_UNION_S
{ \ { \
_FP_PACK_CANONICAL (S, 1, X); \ _FP_PACK_CANONICAL (S, 1, X); \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_1_P (S, val, X); \ _FP_PACK_RAW_1_P (S, (val), X); \
} \ } \
while (0) while (0)
...@@ -147,7 +147,7 @@ union _FP_UNION_S ...@@ -147,7 +147,7 @@ union _FP_UNION_S
do \ do \
{ \ { \
_FP_PACK_SEMIRAW (S, 1, X); \ _FP_PACK_SEMIRAW (S, 1, X); \
_FP_PACK_RAW_1 (S, val, X); \ _FP_PACK_RAW_1 (S, (val), X); \
} \ } \
while (0) while (0)
...@@ -156,7 +156,7 @@ union _FP_UNION_S ...@@ -156,7 +156,7 @@ union _FP_UNION_S
{ \ { \
_FP_PACK_SEMIRAW (S, 1, X); \ _FP_PACK_SEMIRAW (S, 1, X); \
if (!FP_INHIBIT_RESULTS) \ if (!FP_INHIBIT_RESULTS) \
_FP_PACK_RAW_1_P (S, val, X); \ _FP_PACK_RAW_1_P (S, (val), X); \
} \ } \
while (0) while (0)
...@@ -167,7 +167,7 @@ union _FP_UNION_S ...@@ -167,7 +167,7 @@ union _FP_UNION_S
#define FP_MUL_S(R, X, Y) _FP_MUL (S, 1, R, X, Y) #define FP_MUL_S(R, X, Y) _FP_MUL (S, 1, R, X, Y)
#define FP_DIV_S(R, X, Y) _FP_DIV (S, 1, R, X, Y) #define FP_DIV_S(R, X, Y) _FP_DIV (S, 1, R, X, Y)
#define FP_SQRT_S(R, X) _FP_SQRT (S, 1, R, X) #define FP_SQRT_S(R, X) _FP_SQRT (S, 1, R, X)
#define _FP_SQRT_MEAT_S(R, S, T, X, Q) _FP_SQRT_MEAT_1 (R, S, T, X, Q) #define _FP_SQRT_MEAT_S(R, S, T, X, Q) _FP_SQRT_MEAT_1 (R, S, T, X, (Q))
#if _FP_W_TYPE_SIZE < 64 #if _FP_W_TYPE_SIZE < 64
# define FP_FMA_S(R, X, Y, Z) _FP_FMA (S, 1, 2, R, X, Y, Z) # define FP_FMA_S(R, X, Y, Z) _FP_FMA (S, 1, 2, R, X, Y, Z)
...@@ -175,12 +175,12 @@ union _FP_UNION_S ...@@ -175,12 +175,12 @@ union _FP_UNION_S
# define FP_FMA_S(R, X, Y, Z) _FP_FMA (S, 1, 1, R, X, Y, Z) # define FP_FMA_S(R, X, Y, Z) _FP_FMA (S, 1, 1, R, X, Y, Z)
#endif #endif
#define FP_CMP_S(r, X, Y, un) _FP_CMP (S, 1, r, X, Y, un) #define FP_CMP_S(r, X, Y, un, ex) _FP_CMP (S, 1, (r), X, Y, (un), (ex))
#define FP_CMP_EQ_S(r, X, Y) _FP_CMP_EQ (S, 1, r, X, Y) #define FP_CMP_EQ_S(r, X, Y, ex) _FP_CMP_EQ (S, 1, (r), X, Y, (ex))
#define FP_CMP_UNORD_S(r, X, Y) _FP_CMP_UNORD (S, 1, r, X, Y) #define FP_CMP_UNORD_S(r, X, Y, ex) _FP_CMP_UNORD (S, 1, (r), X, Y, (ex))
#define FP_TO_INT_S(r, X, rsz, rsg) _FP_TO_INT (S, 1, r, X, rsz, rsg) #define FP_TO_INT_S(r, X, rsz, rsg) _FP_TO_INT (S, 1, (r), X, (rsz), (rsg))
#define FP_FROM_INT_S(X, r, rs, rt) _FP_FROM_INT (S, 1, X, r, rs, rt) #define FP_FROM_INT_S(X, r, rs, rt) _FP_FROM_INT (S, 1, X, (r), (rs), rt)
#define _FP_FRAC_HIGH_S(X) _FP_FRAC_HIGH_1 (X) #define _FP_FRAC_HIGH_S(X) _FP_FRAC_HIGH_1 (X)
#define _FP_FRAC_HIGH_RAW_S(X) _FP_FRAC_HIGH_1 (X) #define _FP_FRAC_HIGH_RAW_S(X) _FP_FRAC_HIGH_1 (X)
......
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
# include "sfp-machine.h" # include "sfp-machine.h"
#endif #endif
/* Allow sfp-machine to have its own byte order definitions. */ /* Allow sfp-machine to have its own byte order definitions. */
#ifndef __BYTE_ORDER #ifndef __BYTE_ORDER
# ifdef _LIBC # ifdef _LIBC
# include <endian.h> # include <endian.h>
...@@ -63,7 +63,7 @@ ...@@ -63,7 +63,7 @@
# define FP_ROUNDMODE FP_RND_NEAREST # define FP_ROUNDMODE FP_RND_NEAREST
#endif #endif
/* By default don't care about exceptions. */ /* By default don't care about exceptions. */
#ifndef FP_EX_INVALID #ifndef FP_EX_INVALID
# define FP_EX_INVALID 0 # define FP_EX_INVALID 0
#endif #endif
...@@ -83,6 +83,44 @@ ...@@ -83,6 +83,44 @@
# define FP_EX_DENORM 0 # define FP_EX_DENORM 0
#endif #endif
/* Sub-exceptions of "invalid". */
/* Signaling NaN operand. */
#ifndef FP_EX_INVALID_SNAN
# define FP_EX_INVALID_SNAN 0
#endif
/* Inf * 0. */
#ifndef FP_EX_INVALID_IMZ
# define FP_EX_INVALID_IMZ 0
#endif
/* fma (Inf, 0, c). */
#ifndef FP_EX_INVALID_IMZ_FMA
# define FP_EX_INVALID_IMZ_FMA 0
#endif
/* Inf - Inf. */
#ifndef FP_EX_INVALID_ISI
# define FP_EX_INVALID_ISI 0
#endif
/* 0 / 0. */
#ifndef FP_EX_INVALID_ZDZ
# define FP_EX_INVALID_ZDZ 0
#endif
/* Inf / Inf. */
#ifndef FP_EX_INVALID_IDI
# define FP_EX_INVALID_IDI 0
#endif
/* sqrt (negative). */
#ifndef FP_EX_INVALID_SQRT
# define FP_EX_INVALID_SQRT 0
#endif
/* Invalid conversion to integer. */
#ifndef FP_EX_INVALID_CVI
# define FP_EX_INVALID_CVI 0
#endif
/* Invalid comparison. */
#ifndef FP_EX_INVALID_VC
# define FP_EX_INVALID_VC 0
#endif
/* _FP_STRUCT_LAYOUT may be defined as an attribute to determine the /* _FP_STRUCT_LAYOUT may be defined as an attribute to determine the
struct layout variant used for structures where bit-fields are used struct layout variant used for structures where bit-fields are used
to access specific parts of binary floating-point numbers. This is to access specific parts of binary floating-point numbers. This is
...@@ -108,30 +146,37 @@ ...@@ -108,30 +146,37 @@
#endif #endif
/* Initialize any machine-specific state used in /* Initialize any machine-specific state used in
FP_TRAPPING_EXCEPTIONS or FP_HANDLE_EXCEPTIONS. */
#ifndef FP_INIT_TRAPPING_EXCEPTIONS
# define FP_INIT_TRAPPING_EXCEPTIONS FP_INIT_ROUNDMODE
#endif
/* Initialize any machine-specific state used in
FP_HANDLE_EXCEPTIONS. */ FP_HANDLE_EXCEPTIONS. */
#ifndef FP_INIT_EXCEPTIONS #ifndef FP_INIT_EXCEPTIONS
# define FP_INIT_EXCEPTIONS FP_INIT_ROUNDMODE # define FP_INIT_EXCEPTIONS FP_INIT_TRAPPING_EXCEPTIONS
#endif #endif
#ifndef FP_HANDLE_EXCEPTIONS #ifndef FP_HANDLE_EXCEPTIONS
# define FP_HANDLE_EXCEPTIONS do {} while (0) # define FP_HANDLE_EXCEPTIONS do {} while (0)
#endif #endif
/* Whether to flush subnormal inputs to zero with the same sign. */
#ifndef FP_DENORM_ZERO
# define FP_DENORM_ZERO 0
#endif
#ifndef FP_INHIBIT_RESULTS #ifndef FP_INHIBIT_RESULTS
/* By default we write the results always. /* By default we write the results always.
* sfp-machine may override this and e.g. sfp-machine may override this and e.g.
* check if some exceptions are unmasked check if some exceptions are unmasked
* and inhibit it in such a case. and inhibit it in such a case. */
*/
# define FP_INHIBIT_RESULTS 0 # define FP_INHIBIT_RESULTS 0
#endif #endif
#define FP_SET_EXCEPTION(ex) \ #define FP_SET_EXCEPTION(ex) \
_fex |= (ex) _fex |= (ex)
#define FP_CLEAR_EXCEPTIONS \
_fex = 0
#define FP_CUR_EXCEPTIONS \ #define FP_CUR_EXCEPTIONS \
(_fex) (_fex)
...@@ -166,6 +211,16 @@ ...@@ -166,6 +211,16 @@
#endif #endif
/* A file using soft-fp may define FP_NO_EXACT_UNDERFLOW before
including soft-fp.h to indicate that, although a macro used there
could allow for the case of exact underflow requiring the underflow
exception to be raised if traps are enabled, for the particular
arguments used in that file no exact underflow can occur. */
#ifdef FP_NO_EXACT_UNDERFLOW
# undef FP_TRAPPING_EXCEPTIONS
# define FP_TRAPPING_EXCEPTIONS 0
#endif
#define _FP_ROUND_NEAREST(wc, X) \ #define _FP_ROUND_NEAREST(wc, X) \
do \ do \
{ \ { \
......
...@@ -41,9 +41,7 @@ __unorddf2 (DFtype a, DFtype b) ...@@ -41,9 +41,7 @@ __unorddf2 (DFtype a, DFtype b)
FP_INIT_EXCEPTIONS; FP_INIT_EXCEPTIONS;
FP_UNPACK_RAW_D (A, a); FP_UNPACK_RAW_D (A, a);
FP_UNPACK_RAW_D (B, b); FP_UNPACK_RAW_D (B, b);
FP_CMP_UNORD_D (r, A, B); FP_CMP_UNORD_D (r, A, B, 1);
if (r && (FP_ISSIGNAN_D (A) || FP_ISSIGNAN_D (B)))
FP_SET_EXCEPTION (FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r; return r;
......
...@@ -41,9 +41,7 @@ __unordsf2 (SFtype a, SFtype b) ...@@ -41,9 +41,7 @@ __unordsf2 (SFtype a, SFtype b)
FP_INIT_EXCEPTIONS; FP_INIT_EXCEPTIONS;
FP_UNPACK_RAW_S (A, a); FP_UNPACK_RAW_S (A, a);
FP_UNPACK_RAW_S (B, b); FP_UNPACK_RAW_S (B, b);
FP_CMP_UNORD_S (r, A, B); FP_CMP_UNORD_S (r, A, B, 1);
if (r && (FP_ISSIGNAN_S (A) || FP_ISSIGNAN_S (B)))
FP_SET_EXCEPTION (FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r; return r;
......
...@@ -41,9 +41,7 @@ __unordtf2 (TFtype a, TFtype b) ...@@ -41,9 +41,7 @@ __unordtf2 (TFtype a, TFtype b)
FP_INIT_EXCEPTIONS; FP_INIT_EXCEPTIONS;
FP_UNPACK_RAW_Q (A, a); FP_UNPACK_RAW_Q (A, a);
FP_UNPACK_RAW_Q (B, b); FP_UNPACK_RAW_Q (B, b);
FP_CMP_UNORD_Q (r, A, B); FP_CMP_UNORD_Q (r, A, B, 1);
if (r && (FP_ISSIGNAN_Q (A) || FP_ISSIGNAN_Q (B)))
FP_SET_EXCEPTION (FP_EX_INVALID);
FP_HANDLE_EXCEPTIONS; FP_HANDLE_EXCEPTIONS;
return r; return r;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment