Commit 6883a666 by Nicolas Pitre Committed by Nicolas Pitre

ieee754-sf.S: Large speed improvements.

* config/arm/ieee754-sf.S: Large speed improvements. Fix NAN handling.
* config/arm/ieee754-df.S: Ditto.

From-SVN: r89364
parent 0aab7a4b
2004-10-21 Nicolas Pitre <nico@cam.org>
* config/arm/ieee754-sf.S: Large speed improvements. Fix NAN handling.
* config/arm/ieee754-df.S: Ditto.
2004-10-20 Zack Weinberg <zack@codesourcery.com> 2004-10-20 Zack Weinberg <zack@codesourcery.com>
* dbxout.c (asmfile): Delete. All uses changed to asm_out_file. * dbxout.c (asmfile): Delete. All uses changed to asm_out_file.
......
...@@ -60,6 +60,7 @@ ...@@ -60,6 +60,7 @@
ARM_FUNC_START negdf2 ARM_FUNC_START negdf2
ARM_FUNC_ALIAS aeabi_dneg negdf2 ARM_FUNC_ALIAS aeabi_dneg negdf2
@ flip sign bit @ flip sign bit
eor xh, xh, #0x80000000 eor xh, xh, #0x80000000
RET RET
...@@ -76,10 +77,10 @@ ARM_FUNC_START aeabi_drsub ...@@ -76,10 +77,10 @@ ARM_FUNC_START aeabi_drsub
eor xh, xh, #0x80000000 @ flip sign bit of first arg eor xh, xh, #0x80000000 @ flip sign bit of first arg
b 1f b 1f
ARM_FUNC_START subdf3 ARM_FUNC_START subdf3
ARM_FUNC_ALIAS aeabi_dsub subdf3 ARM_FUNC_ALIAS aeabi_dsub subdf3
@ flip sign bit of second arg
eor yh, yh, #0x80000000 eor yh, yh, #0x80000000 @ flip sign bit of second arg
#if defined(__thumb__) && !defined(__THUMB_INTERWORK__) #if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
b 1f @ Skip Thumb-code prologue b 1f @ Skip Thumb-code prologue
#endif #endif
...@@ -87,36 +88,23 @@ ARM_FUNC_ALIAS aeabi_dsub subdf3 ...@@ -87,36 +88,23 @@ ARM_FUNC_ALIAS aeabi_dsub subdf3
ARM_FUNC_START adddf3 ARM_FUNC_START adddf3
ARM_FUNC_ALIAS aeabi_dadd adddf3 ARM_FUNC_ALIAS aeabi_dadd adddf3
1: @ Compare both args, return zero if equal but the sign. 1: stmfd sp!, {r4, r5, lr}
teq xl, yl
eoreq ip, xh, yh
teqeq ip, #0x80000000
beq LSYM(Lad_z)
@ If first arg is 0 or -0, return second arg.
@ If second arg is 0 or -0, return first arg.
orrs ip, xl, xh, lsl #1
moveq xl, yl
moveq xh, yh
orrnes ip, yl, yh, lsl #1
RETc(eq)
stmfd sp!, {r4, r5, lr}
@ Mask out exponents.
mov ip, #0x7f000000
orr ip, ip, #0x00f00000
and r4, xh, ip
and r5, yh, ip
@ If either of them is 0x7ff, result will be INF or NAN @ Look for zeroes, equal values, INF, or NAN.
teq r4, ip mov r4, xh, lsl #1
teqne r5, ip mov r5, yh, lsl #1
beq LSYM(Lad_i) teq r4, r5
teqeq xl, yl
orrnes ip, r4, xl
orrnes ip, r5, yl
mvnnes ip, r4, asr #21
mvnnes ip, r5, asr #21
beq LSYM(Lad_s)
@ Compute exponent difference. Make largest exponent in r4, @ Compute exponent difference. Make largest exponent in r4,
@ corresponding arg in xh-xl, and positive exponent difference in r5. @ corresponding arg in xh-xl, and positive exponent difference in r5.
subs r5, r5, r4 mov r4, r4, lsr #21
rsbs r5, r4, r5, lsr #21
rsblt r5, r5, #0 rsblt r5, r5, #0
ble 1f ble 1f
add r4, r4, r5 add r4, r4, r5
...@@ -127,24 +115,24 @@ ARM_FUNC_ALIAS aeabi_dadd adddf3 ...@@ -127,24 +115,24 @@ ARM_FUNC_ALIAS aeabi_dadd adddf3
eor yl, xl, yl eor yl, xl, yl
eor yh, xh, yh eor yh, xh, yh
1: 1:
@ If exponent difference is too large, return largest argument @ If exponent difference is too large, return largest argument
@ already in xh-xl. We need up to 54 bit to handle proper rounding @ already in xh-xl. We need up to 54 bit to handle proper rounding
@ of 0x1p54 - 1.1. @ of 0x1p54 - 1.1.
cmp r5, #(54 << 20) cmp r5, #54
RETLDM "r4, r5" hi RETLDM "r4, r5" hi
@ Convert mantissa to signed integer. @ Convert mantissa to signed integer.
tst xh, #0x80000000 tst xh, #0x80000000
bic xh, xh, ip, lsl #1 mov xh, xh, lsl #12
orr xh, xh, #0x00100000 mov ip, #0x00100000
orr xh, ip, xh, lsr #12
beq 1f beq 1f
rsbs xl, xl, #0 rsbs xl, xl, #0
rsc xh, xh, #0 rsc xh, xh, #0
1: 1:
tst yh, #0x80000000 tst yh, #0x80000000
bic yh, yh, ip, lsl #1 mov yh, yh, lsl #12
orr yh, yh, #0x00100000 orr yh, ip, yh, lsr #12
beq 1f beq 1f
rsbs yl, yl, #0 rsbs yl, yl, #0
rsc yh, yh, #0 rsc yh, yh, #0
...@@ -154,42 +142,30 @@ ARM_FUNC_ALIAS aeabi_dadd adddf3 ...@@ -154,42 +142,30 @@ ARM_FUNC_ALIAS aeabi_dadd adddf3
teq r4, r5 teq r4, r5
beq LSYM(Lad_d) beq LSYM(Lad_d)
LSYM(Lad_x): LSYM(Lad_x):
@ Scale down second arg with exponent difference.
@ Apply shift one bit left to first arg and the rest to second arg
@ to simplify things later, but only if exponent does not become 0.
mov ip, #0
movs r5, r5, lsr #20
beq 3f
teq r4, #(1 << 20)
beq 1f
movs xl, xl, lsl #1
adc xh, ip, xh, lsl #1
sub r4, r4, #(1 << 20)
subs r5, r5, #1
beq 3f
@ Shift yh-yl right per r5, keep leftover bits into ip. @ Compensate for the exponent overlapping the mantissa MSB added later
1: rsbs lr, r5, #32 sub r4, r4, #1
blt 2f
@ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
rsbs lr, r5, #32
blt 1f
mov ip, yl, lsl lr mov ip, yl, lsl lr
mov yl, yl, lsr r5 adds xl, xl, yl, lsr r5
orr yl, yl, yh, lsl lr adc xh, xh, #0
mov yh, yh, asr r5 adds xl, xl, yh, lsl lr
b 3f adcs xh, xh, yh, asr r5
2: sub r5, r5, #32 b 2f
1: sub r5, r5, #32
add lr, lr, #32 add lr, lr, #32
cmp yl, #1 cmp yl, #1
adc ip, ip, yh, lsl lr mov ip, yh, lsl lr
mov yl, yh, asr r5 orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later
mov yh, yh, asr #32 adds xl, xl, yh, asr r5
3: adcs xh, xh, yh, asr #31
@ the actual addition 2:
adds xl, xl, yl
adc xh, xh, yh
@ We now have a result in xh-xl-ip. @ We now have a result in xh-xl-ip.
@ Keep absolute value in xh-xl-ip, sign in r5. @ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
ands r5, xh, #0x80000000 and r5, xh, #0x80000000
bpl LSYM(Lad_p) bpl LSYM(Lad_p)
rsbs ip, ip, #0 rsbs ip, ip, #0
rscs xl, xl, #0 rscs xl, xl, #0
...@@ -198,75 +174,66 @@ LSYM(Lad_x): ...@@ -198,75 +174,66 @@ LSYM(Lad_x):
@ Determine how to normalize the result. @ Determine how to normalize the result.
LSYM(Lad_p): LSYM(Lad_p):
cmp xh, #0x00100000 cmp xh, #0x00100000
bcc LSYM(Lad_l) bcc LSYM(Lad_a)
cmp xh, #0x00200000 cmp xh, #0x00200000
bcc LSYM(Lad_r0) bcc LSYM(Lad_e)
cmp xh, #0x00400000
bcc LSYM(Lad_r1)
@ Result needs to be shifted right. @ Result needs to be shifted right.
movs xh, xh, lsr #1 movs xh, xh, lsr #1
movs xl, xl, rrx movs xl, xl, rrx
movs ip, ip, rrx mov ip, ip, rrx
orrcs ip, ip, #1 add r4, r4, #1
add r4, r4, #(1 << 20)
LSYM(Lad_r1): @ Make sure we did not bust our exponent.
movs xh, xh, lsr #1 mov r2, r4, lsl #21
movs xl, xl, rrx cmn r2, #(2 << 21)
movs ip, ip, rrx bcs LSYM(Lad_o)
orrcs ip, ip, #1
add r4, r4, #(1 << 20)
@ Our result is now properly aligned into xh-xl, remaining bits in ip. @ Our result is now properly aligned into xh-xl, remaining bits in ip.
@ Round with MSB of ip. If halfway between two numbers, round towards @ Round with MSB of ip. If halfway between two numbers, round towards
@ LSB of xl = 0. @ LSB of xl = 0.
LSYM(Lad_r0):
adds xl, xl, ip, lsr #31
adc xh, xh, #0
teq ip, #0x80000000
biceq xl, xl, #1
@ One extreme rounding case may add a new MSB. Adjust exponent.
@ That MSB will be cleared when exponent is merged below.
tst xh, #0x00200000
addne r4, r4, #(1 << 20)
@ Make sure we did not bust our exponent.
adds ip, r4, #(1 << 20)
bmi LSYM(Lad_o)
@ Pack final result together. @ Pack final result together.
LSYM(Lad_e): LSYM(Lad_e):
bic xh, xh, #0x00300000 cmp ip, #0x80000000
orr xh, xh, r4 moveqs ip, xl, lsr #1
adcs xl, xl, #0
adc xh, xh, r4, lsl #20
orr xh, xh, r5 orr xh, xh, r5
RETLDM "r4, r5" RETLDM "r4, r5"
LSYM(Lad_l):
@ Result must be shifted left and exponent adjusted. @ Result must be shifted left and exponent adjusted.
@ No rounding necessary since ip will always be 0. LSYM(Lad_a):
movs ip, ip, lsl #1
adcs xl, xl, xl
adc xh, xh, xh
tst xh, #0x00100000
sub r4, r4, #1
bne LSYM(Lad_e)
@ No rounding necessary since ip will always be 0 at this point.
LSYM(Lad_l):
#if __ARM_ARCH__ < 5 #if __ARM_ARCH__ < 5
teq xh, #0 teq xh, #0
movne r3, #-11 movne r3, #20
moveq r3, #21 moveq r3, #52
moveq xh, xl moveq xh, xl
moveq xl, #0 moveq xl, #0
mov r2, xh mov r2, xh
movs ip, xh, lsr #16 cmp r2, #(1 << 16)
moveq r2, r2, lsl #16 movhs r2, r2, lsr #16
addeq r3, r3, #16 subhs r3, r3, #16
tst r2, #0xff000000 cmp r2, #(1 << 8)
moveq r2, r2, lsl #8 movhs r2, r2, lsr #8
addeq r3, r3, #8 subhs r3, r3, #8
tst r2, #0xf0000000 cmp r2, #(1 << 4)
moveq r2, r2, lsl #4 movhs r2, r2, lsr #4
addeq r3, r3, #4 subhs r3, r3, #4
tst r2, #0xc0000000 cmp r2, #(1 << 2)
moveq r2, r2, lsl #2 subhs r3, r3, #2
addeq r3, r3, #2 sublo r3, r3, r2, lsr #1
tst r2, #0x80000000 sub r3, r3, r2, lsr #3
addeq r3, r3, #1
#else #else
...@@ -302,13 +269,15 @@ LSYM(Lad_l): ...@@ -302,13 +269,15 @@ LSYM(Lad_l):
movle xl, xl, lsl r2 movle xl, xl, lsl r2
@ adjust exponent accordingly. @ adjust exponent accordingly.
3: subs r4, r4, r3, lsl #20 3: subs r4, r4, r3
bgt LSYM(Lad_e) addge xh, xh, r4, lsl #20
orrge xh, xh, r5
RETLDM "r4, r5" ge
@ Exponent too small, denormalize result. @ Exponent too small, denormalize result.
@ Find out proper shift value. @ Find out proper shift value.
mvn r4, r4, asr #20 mvn r4, r4
subs r4, r4, #30 subs r4, r4, #31
bge 2f bge 2f
adds r4, r4, #12 adds r4, r4, #12
bgt 1f bgt 1f
...@@ -337,23 +306,49 @@ LSYM(Lad_l): ...@@ -337,23 +306,49 @@ LSYM(Lad_l):
RETLDM "r4, r5" RETLDM "r4, r5"
@ Adjust exponents for denormalized arguments. @ Adjust exponents for denormalized arguments.
@ Note that r4 must not remain equal to 0.
LSYM(Lad_d): LSYM(Lad_d):
teq r4, #0 teq r4, #0
eoreq xh, xh, #0x00100000
addeq r4, r4, #(1 << 20)
eor yh, yh, #0x00100000 eor yh, yh, #0x00100000
subne r5, r5, #(1 << 20) eoreq xh, xh, #0x00100000
addeq r4, r4, #1
subne r5, r5, #1
b LSYM(Lad_x) b LSYM(Lad_x)
@ Result is x - x = 0, unless x = INF or NAN.
LSYM(Lad_z): LSYM(Lad_s):
sub ip, ip, #0x00100000 @ ip becomes 0x7ff00000 mvns ip, r4, asr #21
and r2, xh, ip mvnnes ip, r5, asr #21
teq r2, ip beq LSYM(Lad_i)
orreq xh, ip, #0x00080000
teq r4, r5
teqeq xl, yl
beq 1f
@ Result is x + 0.0 = x or 0.0 + y = y.
teq r4, #0
moveq xh, yh
moveq xl, yl
RETLDM "r4, r5"
1: teq xh, yh
@ Result is x - x = 0.
movne xh, #0 movne xh, #0
mov xl, #0 movne xl, #0
RET RETLDM "r4, r5" ne
@ Result is x + x = 2x.
movs ip, r4, lsr #21
bne 2f
movs xl, xl, lsl #1
adcs xh, xh, xh
orrcs xh, xh, #0x80000000
RETLDM "r4, r5"
2: adds r4, r4, #(2 << 21)
addcc xh, xh, #(1 << 20)
RETLDM "r4, r5" cc
and r5, xh, #0x80000000
@ Overflow: return INF. @ Overflow: return INF.
LSYM(Lad_o): LSYM(Lad_o):
...@@ -367,19 +362,18 @@ LSYM(Lad_o): ...@@ -367,19 +362,18 @@ LSYM(Lad_o):
@ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN) @ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
@ if either is NAN: return NAN @ if either is NAN: return NAN
@ if opposite sign: return NAN @ if opposite sign: return NAN
@ return xh-xl (which is INF or -INF) @ otherwise return xh-xl (which is INF or -INF)
LSYM(Lad_i): LSYM(Lad_i):
teq r4, ip mvns ip, r4, asr #21
movne xh, yh movne xh, yh
movne xl, yl movne xl, yl
teqeq r5, ip mvneqs ip, r5, asr #21
RETLDM "r4, r5" ne movne yh, xh
movne yl, xl
orrs r4, xl, xh, lsl #12 orrs r4, xl, xh, lsl #12
orreqs r4, yl, yh, lsl #12 orreqs r5, yl, yh, lsl #12
teqeq xh, yh teqeq xh, yh
orrne xh, r5, #0x00080000 orrne xh, xh, #0x00080000 @ quiet NAN
movne xl, #0
RETLDM "r4, r5" RETLDM "r4, r5"
FUNC_END aeabi_dsub FUNC_END aeabi_dsub
...@@ -389,14 +383,17 @@ LSYM(Lad_i): ...@@ -389,14 +383,17 @@ LSYM(Lad_i):
ARM_FUNC_START floatunsidf ARM_FUNC_START floatunsidf
ARM_FUNC_ALIAS aeabi_ui2d floatunsidf ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
teq r0, #0 teq r0, #0
moveq r1, #0 moveq r1, #0
RETc(eq) RETc(eq)
stmfd sp!, {r4, r5, lr} stmfd sp!, {r4, r5, lr}
mov r4, #(0x400 << 20) @ initial exponent mov r4, #0x400 @ initial exponent
add r4, r4, #((52-1) << 20) add r4, r4, #(52-1 - 1)
mov r5, #0 @ sign bit is 0 mov r5, #0 @ sign bit is 0
.ifnc xl, r0
mov xl, r0 mov xl, r0
.endif
mov xh, #0 mov xh, #0
b LSYM(Lad_l) b LSYM(Lad_l)
...@@ -405,15 +402,18 @@ ARM_FUNC_ALIAS aeabi_ui2d floatunsidf ...@@ -405,15 +402,18 @@ ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
ARM_FUNC_START floatsidf ARM_FUNC_START floatsidf
ARM_FUNC_ALIAS aeabi_i2d floatsidf ARM_FUNC_ALIAS aeabi_i2d floatsidf
teq r0, #0 teq r0, #0
moveq r1, #0 moveq r1, #0
RETc(eq) RETc(eq)
stmfd sp!, {r4, r5, lr} stmfd sp!, {r4, r5, lr}
mov r4, #(0x400 << 20) @ initial exponent mov r4, #0x400 @ initial exponent
add r4, r4, #((52-1) << 20) add r4, r4, #(52-1 - 1)
ands r5, r0, #0x80000000 @ sign bit in r5 ands r5, r0, #0x80000000 @ sign bit in r5
rsbmi r0, r0, #0 @ absolute value rsbmi r0, r0, #0 @ absolute value
.ifnc xl, r0
mov xl, r0 mov xl, r0
.endif
mov xh, #0 mov xh, #0
b LSYM(Lad_l) b LSYM(Lad_l)
...@@ -422,26 +422,23 @@ ARM_FUNC_ALIAS aeabi_i2d floatsidf ...@@ -422,26 +422,23 @@ ARM_FUNC_ALIAS aeabi_i2d floatsidf
ARM_FUNC_START extendsfdf2 ARM_FUNC_START extendsfdf2
ARM_FUNC_ALIAS aeabi_f2d extendsfdf2 ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
movs r2, r0, lsl #1 movs r2, r0, lsl #1 @ toss sign bit
beq 1f @ value is 0.0 or -0.0
mov xh, r2, asr #3 @ stretch exponent mov xh, r2, asr #3 @ stretch exponent
mov xh, xh, rrx @ retrieve sign bit mov xh, xh, rrx @ retrieve sign bit
mov xl, r2, lsl #28 @ retrieve remaining bits mov xl, r2, lsl #28 @ retrieve remaining bits
ands r2, r2, #0xff000000 @ isolate exponent andnes r3, r2, #0xff000000 @ isolate exponent
beq 2f @ exponent was 0 but not mantissa teqne r3, #0xff000000 @ if not 0, check if INF or NAN
teq r2, #0xff000000 @ check if INF or NAN
eorne xh, xh, #0x38000000 @ fixup exponent otherwise. eorne xh, xh, #0x38000000 @ fixup exponent otherwise.
RET RETc(ne) @ and return it.
1: mov xh, r0 teq r2, #0 @ if actually 0
mov xl, #0 teqne r3, #0xff000000 @ or INF or NAN
RET RETc(eq) @ we are done already.
2: @ value was denormalized. We can normalize it now. @ value was denormalized. We can normalize it now.
stmfd sp!, {r4, r5, lr} stmfd sp!, {r4, r5, lr}
mov r4, #(0x380 << 20) @ setup corresponding exponent mov r4, #0x380 @ setup corresponding exponent
add r4, r4, #(1 << 20)
and r5, xh, #0x80000000 @ move sign bit in r5 and r5, xh, #0x80000000 @ move sign bit in r5
bic xh, xh, #0x80000000 bic xh, xh, #0x80000000
b LSYM(Lad_l) b LSYM(Lad_l)
...@@ -451,76 +448,90 @@ ARM_FUNC_ALIAS aeabi_f2d extendsfdf2 ...@@ -451,76 +448,90 @@ ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
ARM_FUNC_START floatundidf ARM_FUNC_START floatundidf
ARM_FUNC_ALIAS aeabi_ul2d floatundidf ARM_FUNC_ALIAS aeabi_ul2d floatundidf
orrs r2, r0, r1 orrs r2, r0, r1
#if !defined (__VFP_FP__) && !defined(__SOFTFP__) #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
mvfeqd f0, #0.0 mvfeqd f0, #0.0
#endif #endif
RETc(eq) RETc(eq)
#if !defined (__VFP_FP__) && !defined(__SOFTFP__) #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
@ For hard FPA code we want to return via the tail below so that @ For hard FPA code we want to return via the tail below so that
@ we can return the result in f0 as well as in r0/r1 for backwards @ we can return the result in f0 as well as in r0/r1 for backwards
@ compatibility. @ compatibility.
adr ip, 1f adr ip, LSYM(f0_ret)
stmfd sp!, {r4, r5, ip, lr} stmfd sp!, {r4, r5, ip, lr}
#else #else
stmfd sp!, {r4, r5, lr} stmfd sp!, {r4, r5, lr}
#endif #endif
mov r5, #0 mov r5, #0
b 2f b 2f
ARM_FUNC_START floatdidf ARM_FUNC_START floatdidf
ARM_FUNC_ALIAS aeabi_l2d floatdidf ARM_FUNC_ALIAS aeabi_l2d floatdidf
orrs r2, r0, r1 orrs r2, r0, r1
#if !defined (__VFP_FP__) && !defined(__SOFTFP__) #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
mvfeqd f0, #0.0 mvfeqd f0, #0.0
#endif #endif
RETc(eq) RETc(eq)
#if !defined (__VFP_FP__) && !defined(__SOFTFP__) #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
@ For hard FPA code we want to return via the tail below so that @ For hard FPA code we want to return via the tail below so that
@ we can return the result in f0 as well as in r0/r1 for backwards @ we can return the result in f0 as well as in r0/r1 for backwards
@ compatibility. @ compatibility.
adr ip, 1f adr ip, LSYM(f0_ret)
stmfd sp!, {r4, r5, ip, lr} stmfd sp!, {r4, r5, ip, lr}
#else #else
stmfd sp!, {r4, r5, lr} stmfd sp!, {r4, r5, lr}
#endif #endif
ands r5, ah, #0x80000000 @ sign bit in r5 ands r5, ah, #0x80000000 @ sign bit in r5
bpl 2f bpl 2f
rsbs al, al, #0 rsbs al, al, #0
rsc ah, ah, #0 rsc ah, ah, #0
2: 2:
mov r4, #(0x400 << 20) @ initial exponent mov r4, #0x400 @ initial exponent
add r4, r4, #((52 - 1) << 20) add r4, r4, #(52-1 - 1)
#if !defined (__VFP_FP__) && !defined(__ARMEB__)
@ FPA little-endian: must swap the word order. @ FPA little-endian: must swap the word order.
.ifnc xh, ah
mov ip, al mov ip, al
mov xh, ah mov xh, ah
mov xl, ip mov xl, ip
#endif .endif
movs ip, xh, lsr #23
movs ip, xh, lsr #22
beq LSYM(Lad_p) beq LSYM(Lad_p)
@ The value's too big. Scale it down a bit...
@ The value is too big. Scale it down a bit...
mov r2, #3 mov r2, #3
movs ip, ip, lsr #3 movs ip, ip, lsr #3
addne r2, r2, #3 addne r2, r2, #3
movs ip, ip, lsr #3 movs ip, ip, lsr #3
addne r2, r2, #3 addne r2, r2, #3
add r2, r2, ip
rsb r3, r2, #32 rsb r3, r2, #32
mov ip, xl, lsl r3 mov ip, xl, lsl r3
mov xl, xl, lsr r2 mov xl, xl, lsr r2
orr xl, xl, xh, lsl r3 orr xl, xl, xh, lsl r3
mov xh, xh, lsr r2 mov xh, xh, lsr r2
add r4, r4, r2, lsl #20 add r4, r4, r2
b LSYM(Lad_p) b LSYM(Lad_p)
#if !defined (__VFP_FP__) && !defined(__SOFTFP__) #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
1:
@ Legacy code expects the result to be returned in f0. Copy it @ Legacy code expects the result to be returned in f0. Copy it
@ there as well. @ there as well.
LSYM(f0_ret):
stmfd sp!, {r0, r1} stmfd sp!, {r0, r1}
ldfd f0, [sp], #8 ldfd f0, [sp], #8
RETLDM RETLDM
#endif #endif
FUNC_END floatdidf FUNC_END floatdidf
FUNC_END aeabi_l2d FUNC_END aeabi_l2d
FUNC_END floatundidf FUNC_END floatundidf
...@@ -534,46 +545,38 @@ ARM_FUNC_START muldf3 ...@@ -534,46 +545,38 @@ ARM_FUNC_START muldf3
ARM_FUNC_ALIAS aeabi_dmul muldf3 ARM_FUNC_ALIAS aeabi_dmul muldf3
stmfd sp!, {r4, r5, r6, lr} stmfd sp!, {r4, r5, r6, lr}
@ Mask out exponents. @ Mask out exponents, trap any zero/denormal/INF/NAN.
mov ip, #0x7f000000 mov ip, #0xff
orr ip, ip, #0x00f00000 orr ip, ip, #0x700
and r4, xh, ip ands r4, ip, xh, lsr #20
and r5, yh, ip andnes r5, ip, yh, lsr #20
teqne r4, ip
@ Trap any INF/NAN.
teq r4, ip
teqne r5, ip teqne r5, ip
beq LSYM(Lml_s) bleq LSYM(Lml_s)
@ Trap any multiplication by 0. @ Add exponents together
orrs r6, xl, xh, lsl #1 add r4, r4, r5
orrnes r6, yl, yh, lsl #1
beq LSYM(Lml_z) @ Determine final sign.
eor r6, xh, yh
@ Shift exponents right one bit to make room for overflow bit.
@ If either of them is 0, scale denormalized arguments off line.
@ Then add both exponents together.
movs r4, r4, lsr #1
teqne r5, #0
beq LSYM(Lml_d)
LSYM(Lml_x):
add r4, r4, r5, asr #1
@ Preserve final sign in r4 along with exponent for now.
teq xh, yh
orrmi r4, r4, #0x8000
@ Convert mantissa to unsigned integer. @ Convert mantissa to unsigned integer.
bic xh, xh, ip, lsl #1 @ If power of two, branch to a separate path.
bic yh, yh, ip, lsl #1 bic xh, xh, ip, lsl #21
bic yh, yh, ip, lsl #21
orrs r5, xl, xh, lsl #12
orrnes r5, yl, yh, lsl #12
orr xh, xh, #0x00100000 orr xh, xh, #0x00100000
orr yh, yh, #0x00100000 orr yh, yh, #0x00100000
beq LSYM(Lml_1)
#if __ARM_ARCH__ < 4 #if __ARM_ARCH__ < 4
@ Put sign bit in r6, which will be restored in yl later.
and r6, r6, #0x80000000
@ Well, no way to make it shorter without the umull instruction. @ Well, no way to make it shorter without the umull instruction.
@ We must perform that 53 x 53 bit multiplication by hand. stmfd sp!, {r6, r7, r8, r9, sl, fp}
stmfd sp!, {r7, r8, r9, sl, fp}
mov r7, xl, lsr #16 mov r7, xl, lsr #16
mov r8, yl, lsr #16 mov r8, yl, lsr #16
mov r9, xh, lsr #16 mov r9, xh, lsr #16
...@@ -625,92 +628,83 @@ LSYM(Lml_x): ...@@ -625,92 +628,83 @@ LSYM(Lml_x):
mul fp, xh, yh mul fp, xh, yh
adcs r5, r5, fp adcs r5, r5, fp
adc r6, r6, #0 adc r6, r6, #0
ldmfd sp!, {r7, r8, r9, sl, fp} ldmfd sp!, {yl, r7, r8, r9, sl, fp}
#else #else
@ Here is the actual multiplication: 53 bits * 53 bits -> 106 bits. @ Here is the actual multiplication.
umull ip, lr, xl, yl umull ip, lr, xl, yl
mov r5, #0 mov r5, #0
umlal lr, r5, xl, yh
umlal lr, r5, xh, yl umlal lr, r5, xh, yl
and yl, r6, #0x80000000
umlal lr, r5, xl, yh
mov r6, #0 mov r6, #0
umlal r5, r6, xh, yh umlal r5, r6, xh, yh
#endif #endif
@ The LSBs in ip are only significant for the final rounding. @ The LSBs in ip are only significant for the final rounding.
@ Fold them into one bit of lr. @ Fold them into lr.
teq ip, #0 teq ip, #0
orrne lr, lr, #1 orrne lr, lr, #1
@ Put final sign in xh. @ Adjust result upon the MSB position.
mov xh, r4, lsl #16 sub r4, r4, #0xff
bic r4, r4, #0x8000 cmp r6, #(1 << (20-11))
sbc r4, r4, #0x300
@ Adjust result if one extra MSB appeared (one of four times). bcs 1f
tst r6, #(1 << 9) movs lr, lr, lsl #1
beq 1f adcs r5, r5, r5
add r4, r4, #(1 << 19) adc r6, r6, r6
movs r6, r6, lsr #1
movs r5, r5, rrx
movs lr, lr, rrx
orrcs lr, lr, #1
1:
@ Scale back to 53 bits.
@ xh contains sign bit already.
orr xh, xh, r6, lsl #12
orr xh, xh, r5, lsr #20
mov xl, r5, lsl #12
orr xl, xl, lr, lsr #20
@ Apply exponent bias, check range for underflow.
sub r4, r4, #0x00f80000
subs r4, r4, #0x1f000000
ble LSYM(Lml_u)
@ Round the result.
movs lr, lr, lsl #12
bpl 1f
adds xl, xl, #1
adc xh, xh, #0
teq lr, #0x80000000
biceq xl, xl, #1
@ Rounding may have produced an extra MSB here.
@ The extra bit is cleared before merging the exponent below.
tst xh, #0x00200000
addne r4, r4, #(1 << 19)
1: 1:
@ Check exponent for overflow. @ Shift to final position, add sign to result.
adds ip, r4, #(1 << 19) orr xh, yl, r6, lsl #11
tst ip, #(1 << 30) orr xh, xh, r5, lsr #21
bne LSYM(Lml_o) mov xl, r5, lsl #11
orr xl, xl, lr, lsr #21
@ Add final exponent. mov lr, lr, lsl #11
bic xh, xh, #0x00300000
orr xh, xh, r4, lsl #1 @ Check exponent range for under/overflow.
subs ip, r4, #(254 - 1)
cmphi ip, #0x700
bhi LSYM(Lml_u)
@ Round the result, merge final exponent.
cmp lr, #0x80000000
moveqs lr, xl, lsr #1
adcs xl, xl, #0
adc xh, xh, r4, lsl #20
RETLDM "r4, r5, r6" RETLDM "r4, r5, r6"
@ Result is 0, but determine sign anyway. @ Multiplication by 0x1p*: let''s shortcut a lot of code.
LSYM(Lml_z): LSYM(Lml_1):
and r6, r6, #0x80000000
orr xh, r6, xh
orr xl, xl, yl
eor xh, xh, yh eor xh, xh, yh
LSYM(Ldv_z): subs r4, r4, ip, lsr #1
bic xh, xh, #0x7fffffff rsbgts r5, r4, ip
mov xl, #0 orrgt xh, xh, r4, lsl #20
RETLDM "r4, r5, r6" RETLDM "r4, r5, r6" gt
@ Under/overflow: fix things up for the code below.
orr xh, xh, #0x00100000
mov lr, #0
subs r4, r4, #1
@ Check if denormalized result is possible, otherwise return signed 0.
LSYM(Lml_u): LSYM(Lml_u):
cmn r4, #(53 << 19) @ Overflow?
bgt LSYM(Lml_o)
@ Check if denormalized result is possible, otherwise return signed 0.
cmn r4, #(53 + 1)
movle xl, #0 movle xl, #0
bicle xh, xh, #0x7fffffff bicle xh, xh, #0x7fffffff
RETLDM "r4, r5, r6" le RETLDM "r4, r5, r6" le
@ Find out proper shift value. @ Find out proper shift value.
LSYM(Lml_r): rsb r4, r4, #0
mvn r4, r4, asr #19 subs r4, r4, #32
subs r4, r4, #30
bge 2f bge 2f
adds r4, r4, #12 adds r4, r4, #12
bgt 1f bgt 1f
...@@ -721,14 +715,12 @@ LSYM(Lml_r): ...@@ -721,14 +715,12 @@ LSYM(Lml_r):
mov r3, xl, lsl r5 mov r3, xl, lsl r5
mov xl, xl, lsr r4 mov xl, xl, lsr r4
orr xl, xl, xh, lsl r5 orr xl, xl, xh, lsl r5
movs xh, xh, lsl #1 and r2, xh, #0x80000000
mov xh, xh, lsr r4 bic xh, xh, #0x80000000
mov xh, xh, rrx
adds xl, xl, r3, lsr #31 adds xl, xl, r3, lsr #31
adc xh, xh, #0 adc xh, r2, xh, lsr r4
teq lr, #0 orrs lr, lr, r3, lsl #1
teqeq r3, #0x80000000 biceq xl, xl, r3, lsr #31
biceq xl, xl, #1
RETLDM "r4, r5, r6" RETLDM "r4, r5, r6"
@ shift result right of 21 to 31 bits, or left 11 to 1 bits after @ shift result right of 21 to 31 bits, or left 11 to 1 bits after
...@@ -741,53 +733,70 @@ LSYM(Lml_r): ...@@ -741,53 +733,70 @@ LSYM(Lml_r):
bic xh, xh, #0x7fffffff bic xh, xh, #0x7fffffff
adds xl, xl, r3, lsr #31 adds xl, xl, r3, lsr #31
adc xh, xh, #0 adc xh, xh, #0
teq lr, #0 orrs lr, lr, r3, lsl #1
teqeq r3, #0x80000000 biceq xl, xl, r3, lsr #31
biceq xl, xl, #1
RETLDM "r4, r5, r6" RETLDM "r4, r5, r6"
@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch @ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
@ from xh to xl. Leftover bits are in r3-r6-lr for rounding. @ from xh to xl. Leftover bits are in r3-r6-lr for rounding.
2: rsb r5, r4, #32 2: rsb r5, r4, #32
mov r6, xl, lsl r5 orr lr, lr, xl, lsl r5
mov r3, xl, lsr r4 mov r3, xl, lsr r4
orr r3, r3, xh, lsl r5 orr r3, r3, xh, lsl r5
mov xl, xh, lsr r4 mov xl, xh, lsr r4
bic xh, xh, #0x7fffffff bic xh, xh, #0x7fffffff
bic xl, xl, xh, lsr r4 bic xl, xl, xh, lsr r4
add xl, xl, r3, lsr #31 add xl, xl, r3, lsr #31
orrs r6, r6, lr orrs lr, lr, r3, lsl #1
teqeq r3, #0x80000000 biceq xl, xl, r3, lsr #31
biceq xl, xl, #1
RETLDM "r4, r5, r6" RETLDM "r4, r5, r6"
@ One or both arguments are denormalized. @ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit. @ Scale them leftwards and preserve sign bit.
LSYM(Lml_d): LSYM(Lml_d):
mov lr, #0
teq r4, #0 teq r4, #0
bne 2f bne 2f
and r6, xh, #0x80000000 and r6, xh, #0x80000000
1: movs xl, xl, lsl #1 1: movs xl, xl, lsl #1
adc xh, lr, xh, lsl #1 adc xh, xh, xh
tst xh, #0x00100000 tst xh, #0x00100000
subeq r4, r4, #(1 << 19) subeq r4, r4, #1
beq 1b beq 1b
orr xh, xh, r6 orr xh, xh, r6
teq r5, #0 teq r5, #0
bne LSYM(Lml_x) movne pc, lr
2: and r6, yh, #0x80000000 2: and r6, yh, #0x80000000
3: movs yl, yl, lsl #1 3: movs yl, yl, lsl #1
adc yh, lr, yh, lsl #1 adc yh, yh, yh
tst yh, #0x00100000 tst yh, #0x00100000
subeq r5, r5, #(1 << 20) subeq r5, r5, #1
beq 3b beq 3b
orr yh, yh, r6 orr yh, yh, r6
b LSYM(Lml_x) mov pc, lr
@ One or both args are INF or NAN.
LSYM(Lml_s): LSYM(Lml_s):
@ Isolate the INF and NAN cases away
teq r4, ip
and r5, ip, yh, lsr #20
teqne r5, ip
beq 1f
@ Here, one or more arguments are either denormalized or zero.
orrs r6, xl, xh, lsl #1
orrnes r6, yl, yh, lsl #1
bne LSYM(Lml_d)
@ Result is 0, but determine sign anyway.
LSYM(Lml_z):
eor xh, xh, yh
bic xh, xh, #0x7fffffff
mov xl, #0
RETLDM "r4, r5, r6"
1: @ One or both args are INF or NAN.
orrs r6, xl, xh, lsl #1 orrs r6, xl, xh, lsl #1
moveq xl, yl
moveq xh, yh
orrnes r6, yl, yh, lsl #1 orrnes r6, yl, yh, lsl #1
beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
teq r4, ip teq r4, ip
...@@ -797,6 +806,8 @@ LSYM(Lml_s): ...@@ -797,6 +806,8 @@ LSYM(Lml_s):
1: teq r5, ip 1: teq r5, ip
bne LSYM(Lml_i) bne LSYM(Lml_i)
orrs r6, yl, yh, lsl #12 orrs r6, yl, yh, lsl #12
movne xl, yl
movne xh, yh
bne LSYM(Lml_n) @ <anything> * NAN -> NAN bne LSYM(Lml_n) @ <anything> * NAN -> NAN
@ Result is INF, but we need to determine its sign. @ Result is INF, but we need to determine its sign.
...@@ -811,9 +822,9 @@ LSYM(Lml_o): ...@@ -811,9 +822,9 @@ LSYM(Lml_o):
mov xl, #0 mov xl, #0
RETLDM "r4, r5, r6" RETLDM "r4, r5, r6"
@ Return NAN. @ Return a quiet NAN.
LSYM(Lml_n): LSYM(Lml_n):
mov xh, #0x7f000000 orr xh, xh, #0x7f000000
orr xh, xh, #0x00f80000 orr xh, xh, #0x00f80000
RETLDM "r4, r5, r6" RETLDM "r4, r5, r6"
...@@ -825,41 +836,31 @@ ARM_FUNC_ALIAS aeabi_ddiv divdf3 ...@@ -825,41 +836,31 @@ ARM_FUNC_ALIAS aeabi_ddiv divdf3
stmfd sp!, {r4, r5, r6, lr} stmfd sp!, {r4, r5, r6, lr}
@ Mask out exponents. @ Mask out exponents, trap any zero/denormal/INF/NAN.
mov ip, #0x7f000000 mov ip, #0xff
orr ip, ip, #0x00f00000 orr ip, ip, #0x700
and r4, xh, ip ands r4, ip, xh, lsr #20
and r5, yh, ip andnes r5, ip, yh, lsr #20
teqne r4, ip
@ Trap any INF/NAN or zeroes.
teq r4, ip
teqne r5, ip teqne r5, ip
orrnes r6, xl, xh, lsl #1 bleq LSYM(Ldv_s)
orrnes r6, yl, yh, lsl #1
beq LSYM(Ldv_s)
@ Shift exponents right one bit to make room for overflow bit. @ Substract divisor exponent from dividend''s.
@ If either of them is 0, scale denormalized arguments off line. sub r4, r4, r5
@ Then substract divisor exponent from dividend''s.
movs r4, r4, lsr #1
teqne r5, #0
beq LSYM(Ldv_d)
LSYM(Ldv_x):
sub r4, r4, r5, asr #1
@ Preserve final sign into lr. @ Preserve final sign into lr.
eor lr, xh, yh eor lr, xh, yh
@ Convert mantissa to unsigned integer. @ Convert mantissa to unsigned integer.
@ Dividend -> r5-r6, divisor -> yh-yl. @ Dividend -> r5-r6, divisor -> yh-yl.
mov r5, #0x10000000 orrs r5, yl, yh, lsl #12
mov xh, xh, lsl #12
beq LSYM(Ldv_1)
mov yh, yh, lsl #12 mov yh, yh, lsl #12
mov r5, #0x10000000
orr yh, r5, yh, lsr #4 orr yh, r5, yh, lsr #4
orr yh, yh, yl, lsr #24 orr yh, yh, yl, lsr #24
movs yl, yl, lsl #8 mov yl, yl, lsl #8
mov xh, xh, lsl #12
teqeq yh, r5
beq LSYM(Ldv_1)
orr r5, r5, xh, lsr #4 orr r5, r5, xh, lsr #4
orr r5, r5, xl, lsr #24 orr r5, r5, xl, lsr #24
mov r6, xl, lsl #8 mov r6, xl, lsl #8
...@@ -868,21 +869,15 @@ LSYM(Ldv_x): ...@@ -868,21 +869,15 @@ LSYM(Ldv_x):
and xh, lr, #0x80000000 and xh, lr, #0x80000000
@ Ensure result will land to known bit position. @ Ensure result will land to known bit position.
@ Apply exponent bias accordingly.
cmp r5, yh cmp r5, yh
cmpeq r6, yl cmpeq r6, yl
adc r4, r4, #(255 - 2)
add r4, r4, #0x300
bcs 1f bcs 1f
sub r4, r4, #(1 << 19)
movs yh, yh, lsr #1 movs yh, yh, lsr #1
mov yl, yl, rrx mov yl, yl, rrx
1: 1:
@ Apply exponent bias, check range for over/underflow.
add r4, r4, #0x1f000000
add r4, r4, #0x00f80000
cmn r4, #(53 << 19)
ble LSYM(Ldv_z)
cmp r4, ip, lsr #1
bge LSYM(Lml_o)
@ Perform first substraction to align result to a nibble. @ Perform first substraction to align result to a nibble.
subs r6, r6, yl subs r6, r6, yl
sbc r5, r5, yh sbc r5, r5, yh
...@@ -944,73 +939,42 @@ LSYM(Ldv_x): ...@@ -944,73 +939,42 @@ LSYM(Ldv_x):
orreq xh, xh, xl orreq xh, xh, xl
moveq xl, #0 moveq xl, #0
3: 3:
@ Check if denormalized result is needed. @ Check exponent range for under/overflow.
cmp r4, #0 subs ip, r4, #(254 - 1)
ble LSYM(Ldv_u) cmphi ip, #0x700
bhi LSYM(Lml_u)
@ Apply proper rounding. @ Round the result, merge final exponent.
subs ip, r5, yh subs ip, r5, yh
subeqs ip, r6, yl subeqs ip, r6, yl
moveqs ip, xl, lsr #1
adcs xl, xl, #0 adcs xl, xl, #0
adc xh, xh, #0 adc xh, xh, r4, lsl #20
teq ip, #0
biceq xl, xl, #1
@ Add exponent to result.
bic xh, xh, #0x00100000
orr xh, xh, r4, lsl #1
RETLDM "r4, r5, r6" RETLDM "r4, r5, r6"
@ Division by 0x1p*: shortcut a lot of code. @ Division by 0x1p*: shortcut a lot of code.
LSYM(Ldv_1): LSYM(Ldv_1):
and lr, lr, #0x80000000 and lr, lr, #0x80000000
orr xh, lr, xh, lsr #12 orr xh, lr, xh, lsr #12
add r4, r4, #0x1f000000 adds r4, r4, ip, lsr #1
add r4, r4, #0x00f80000 rsbgts r5, r4, ip
cmp r4, ip, lsr #1 orrgt xh, xh, r4, lsl #20
bge LSYM(Lml_o)
cmp r4, #0
orrgt xh, xh, r4, lsl #1
RETLDM "r4, r5, r6" gt RETLDM "r4, r5, r6" gt
cmn r4, #(53 << 19)
ble LSYM(Ldv_z)
orr xh, xh, #0x00100000 orr xh, xh, #0x00100000
mov lr, #0 mov lr, #0
b LSYM(Lml_r) subs r4, r4, #1
b LSYM(Lml_u)
@ Result must be denormalized: put remainder in lr for @ Result mightt need to be denormalized: put remainder bits
@ rounding considerations. @ in lr for rounding considerations.
LSYM(Ldv_u): LSYM(Ldv_u):
orr lr, r5, r6 orr lr, r5, r6
b LSYM(Lml_r) b LSYM(Lml_u)
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
LSYM(Ldv_d):
mov lr, #0
teq r4, #0
bne 2f
and r6, xh, #0x80000000
1: movs xl, xl, lsl #1
adc xh, lr, xh, lsl #1
tst xh, #0x00100000
subeq r4, r4, #(1 << 19)
beq 1b
orr xh, xh, r6
teq r5, #0
bne LSYM(Ldv_x)
2: and r6, yh, #0x80000000
3: movs yl, yl, lsl #1
adc yh, lr, yh, lsl #1
tst yh, #0x00100000
subeq r5, r5, #(1 << 20)
beq 3b
orr yh, yh, r6
b LSYM(Ldv_x)
@ One or both arguments is either INF, NAN or zero. @ One or both arguments is either INF, NAN or zero.
LSYM(Ldv_s): LSYM(Ldv_s):
and r5, ip, yh, lsr #20
teq r4, ip teq r4, ip
teqeq r5, ip teqeq r5, ip
beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
...@@ -1018,13 +982,23 @@ LSYM(Ldv_s): ...@@ -1018,13 +982,23 @@ LSYM(Ldv_s):
bne 1f bne 1f
orrs r4, xl, xh, lsl #12 orrs r4, xl, xh, lsl #12
bne LSYM(Lml_n) @ NAN / <anything> -> NAN bne LSYM(Lml_n) @ NAN / <anything> -> NAN
b LSYM(Lml_i) @ INF / <anything> -> INF teq r5, ip
bne LSYM(Lml_i) @ INF / <anything> -> INF
mov xl, yl
mov xh, yh
b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
1: teq r5, ip 1: teq r5, ip
bne 2f bne 2f
orrs r5, yl, yh, lsl #12 orrs r5, yl, yh, lsl #12
bne LSYM(Lml_n) @ <anything> / NAN -> NAN beq LSYM(Lml_z) @ <anything> / INF -> 0
b LSYM(Lml_z) @ <anything> / INF -> 0 mov xl, yl
2: @ One or both arguments are 0. mov xh, yh
b LSYM(Lml_n) @ <anything> / NAN -> NAN
2: @ If both are non-zero, we need to normalize and resume above.
orrs r6, xl, xh, lsl #1
orrnes r6, yl, yh, lsl #1
bne LSYM(Lml_d)
@ One or both arguments are 0.
orrs r4, xl, xh, lsl #1 orrs r4, xl, xh, lsl #1
bne LSYM(Lml_i) @ <non_zero> / 0 -> INF bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
orrs r5, yl, yh, lsl #1 orrs r5, yl, yh, lsl #1
...@@ -1038,6 +1012,8 @@ LSYM(Ldv_s): ...@@ -1038,6 +1012,8 @@ LSYM(Ldv_s):
#ifdef L_cmpdf2 #ifdef L_cmpdf2
@ Note: only r0 (return value) and ip are clobbered here.
ARM_FUNC_START gtdf2 ARM_FUNC_START gtdf2
ARM_FUNC_ALIAS gedf2 gtdf2 ARM_FUNC_ALIAS gedf2 gtdf2
mov ip, #-1 mov ip, #-1
...@@ -1053,15 +1029,13 @@ ARM_FUNC_ALIAS nedf2 cmpdf2 ...@@ -1053,15 +1029,13 @@ ARM_FUNC_ALIAS nedf2 cmpdf2
ARM_FUNC_ALIAS eqdf2 cmpdf2 ARM_FUNC_ALIAS eqdf2 cmpdf2
mov ip, #1 @ how should we specify unordered here? mov ip, #1 @ how should we specify unordered here?
1: stmfd sp!, {r4, r5, lr} 1: str ip, [sp, #-4]
@ Trap any INF/NAN first. @ Trap any INF/NAN first.
mov lr, #0x7f000000 mov ip, xh, lsl #1
orr lr, lr, #0x00f00000 mvns ip, ip, asr #21
and r4, xh, lr mov ip, yh, lsl #1
and r5, yh, lr mvnnes ip, ip, asr #21
teq r4, lr
teqne r5, lr
beq 3f beq 3f
@ Test for equality. @ Test for equality.
...@@ -1071,37 +1045,37 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2 ...@@ -1071,37 +1045,37 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2
teqne xh, yh @ or xh == yh teqne xh, yh @ or xh == yh
teqeq xl, yl @ and xl == yl teqeq xl, yl @ and xl == yl
moveq r0, #0 @ then equal. moveq r0, #0 @ then equal.
RETLDM "r4, r5" eq RETc(eq)
@ Check for sign difference. @ Clear C flag
teq xh, yh cmn r0, #0
movmi r0, xh, asr #31
orrmi r0, r0, #1
RETLDM "r4, r5" mi
@ Compare exponents. @ Compare sign,
cmp r4, r5 teq xh, yh
@ Compare mantissa if exponents are equal. @ Compare values if same sign
moveq xh, xh, lsl #12 cmppl xh, yh
cmpeq xh, yh, lsl #12
cmpeq xl, yl cmpeq xl, yl
@ Result:
movcs r0, yh, asr #31 movcs r0, yh, asr #31
mvncc r0, yh, asr #31 mvncc r0, yh, asr #31
orr r0, r0, #1 orr r0, r0, #1
RETLDM "r4, r5" RET
@ Look for a NAN. @ Look for a NAN.
3: teq r4, lr 3: mov ip, xh, lsl #1
mvns ip, ip, asr #21
bne 4f bne 4f
orrs xl, xl, xh, lsl #12 orrs ip, xl, xh, lsl #12
bne 5f @ x is NAN bne 5f @ x is NAN
4: teq r5, lr 4: mov ip, yh, lsl #1
mvns ip, ip, asr #21
bne 2b bne 2b
orrs yl, yl, yh, lsl #12 orrs ip, yl, yh, lsl #12
beq 2b @ y is not NAN beq 2b @ y is not NAN
5: mov r0, ip @ return unordered code from ip 5: ldr r0, [sp, #-4] @ unordered return code
RETLDM "r4, r5" RET
FUNC_END gedf2 FUNC_END gedf2
FUNC_END gtdf2 FUNC_END gtdf2
...@@ -1112,6 +1086,7 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2 ...@@ -1112,6 +1086,7 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2
FUNC_END cmpdf2 FUNC_END cmpdf2
ARM_FUNC_START aeabi_cdrcmple ARM_FUNC_START aeabi_cdrcmple
mov ip, r0 mov ip, r0
mov r0, r2 mov r0, r2
mov r2, ip mov r2, ip
...@@ -1122,85 +1097,95 @@ ARM_FUNC_START aeabi_cdrcmple ...@@ -1122,85 +1097,95 @@ ARM_FUNC_START aeabi_cdrcmple
ARM_FUNC_START aeabi_cdcmpeq ARM_FUNC_START aeabi_cdcmpeq
ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
@ The status-returning routines are required to preserve all @ The status-returning routines are required to preserve all
@ registers except ip, lr, and cpsr. @ registers except ip, lr, and cpsr.
6: stmfd sp!, {r0, r1, r2, r3, lr} 6: stmfd sp!, {r0, lr}
ARM_CALL cmpdf2 ARM_CALL cmpdf2
@ Set the Z flag correctly, and the C flag unconditionally. @ Set the Z flag correctly, and the C flag unconditionally.
cmp r0, #0 cmp r0, #0
@ Clear the C flag if the return value was -1, indicating @ Clear the C flag if the return value was -1, indicating
@ that the first operand was smaller than the second. @ that the first operand was smaller than the second.
cmnmi r0, #0 cmnmi r0, #0
RETLDM "r0, r1, r2, r3" RETLDM "r0"
FUNC_END aeabi_cdcmple FUNC_END aeabi_cdcmple
FUNC_END aeabi_cdcmpeq FUNC_END aeabi_cdcmpeq
FUNC_END aeabi_cdrcmple
ARM_FUNC_START aeabi_dcmpeq ARM_FUNC_START aeabi_dcmpeq
str lr, [sp, #-4]! str lr, [sp, #-4]!
ARM_CALL aeabi_cdcmple ARM_CALL aeabi_cdcmple
moveq r0, #1 @ Equal to. moveq r0, #1 @ Equal to.
movne r0, #0 @ Less than, greater than, or unordered. movne r0, #0 @ Less than, greater than, or unordered.
RETLDM RETLDM
FUNC_END aeabi_dcmpeq FUNC_END aeabi_dcmpeq
ARM_FUNC_START aeabi_dcmplt ARM_FUNC_START aeabi_dcmplt
str lr, [sp, #-4]! str lr, [sp, #-4]!
ARM_CALL aeabi_cdcmple ARM_CALL aeabi_cdcmple
movcc r0, #1 @ Less than. movcc r0, #1 @ Less than.
movcs r0, #0 @ Equal to, greater than, or unordered. movcs r0, #0 @ Equal to, greater than, or unordered.
RETLDM RETLDM
FUNC_END aeabi_dcmplt FUNC_END aeabi_dcmplt
ARM_FUNC_START aeabi_dcmple ARM_FUNC_START aeabi_dcmple
str lr, [sp, #-4]! str lr, [sp, #-4]!
ARM_CALL aeabi_cdcmple ARM_CALL aeabi_cdcmple
movls r0, #1 @ Less than or equal to. movls r0, #1 @ Less than or equal to.
movhi r0, #0 @ Greater than or unordered. movhi r0, #0 @ Greater than or unordered.
RETLDM RETLDM
FUNC_END aeabi_dcmple FUNC_END aeabi_dcmple
ARM_FUNC_START aeabi_dcmpge ARM_FUNC_START aeabi_dcmpge
str lr, [sp, #-4]! str lr, [sp, #-4]!
ARM_CALL aeabi_cdrcmple ARM_CALL aeabi_cdrcmple
movls r0, #1 @ Operand 2 is less than or equal to operand 1. movls r0, #1 @ Operand 2 is less than or equal to operand 1.
movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
RETLDM RETLDM
FUNC_END aeabi_dcmpge FUNC_END aeabi_dcmpge
ARM_FUNC_START aeabi_dcmpgt ARM_FUNC_START aeabi_dcmpgt
str lr, [sp, #-4]! str lr, [sp, #-4]!
ARM_CALL aeabi_cdrcmple ARM_CALL aeabi_cdrcmple
movcc r0, #1 @ Operand 2 is less than operand 1. movcc r0, #1 @ Operand 2 is less than operand 1.
movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
@ or they are unordered. @ or they are unordered.
RETLDM RETLDM
FUNC_END aeabi_dcmpgt FUNC_END aeabi_dcmpgt
#endif /* L_cmpdf2 */ #endif /* L_cmpdf2 */
#ifdef L_unorddf2 #ifdef L_unorddf2
ARM_FUNC_START unorddf2 ARM_FUNC_START unorddf2
ARM_FUNC_ALIAS aeabi_dcmpun unorddf2 ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
str lr, [sp, #-4]! mov ip, xh, lsl #1
mov ip, #0x7f000000 mvns ip, ip, asr #21
orr ip, ip, #0x00f00000
and lr, xh, ip
teq lr, ip
bne 1f bne 1f
orrs xl, xl, xh, lsl #12 orrs ip, xl, xh, lsl #12
bne 3f @ x is NAN bne 3f @ x is NAN
1: and lr, yh, ip 1: mov ip, yh, lsl #1
teq lr, ip mvns ip, ip, asr #21
bne 2f bne 2f
orrs yl, yl, yh, lsl #12 orrs ip, yl, yh, lsl #12
bne 3f @ y is NAN bne 3f @ y is NAN
2: mov r0, #0 @ arguments are ordered. 2: mov r0, #0 @ arguments are ordered.
RETLDM RET
3: mov r0, #1 @ arguments are unordered. 3: mov r0, #1 @ arguments are unordered.
RETLDM RET
FUNC_END aeabi_dcmpun FUNC_END aeabi_dcmpun
FUNC_END unorddf2 FUNC_END unorddf2
...@@ -1211,31 +1196,22 @@ ARM_FUNC_ALIAS aeabi_dcmpun unorddf2 ...@@ -1211,31 +1196,22 @@ ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
ARM_FUNC_START fixdfsi ARM_FUNC_START fixdfsi
ARM_FUNC_ALIAS aeabi_d2iz fixdfsi ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
orrs ip, xl, xh, lsl #1
beq 1f @ value is 0.
mov r3, r3, rrx @ preserve C flag (the actual sign)
@ check exponent range. @ check exponent range.
mov ip, #0x7f000000 mov r2, xh, lsl #1
orr ip, ip, #0x00f00000 adds r2, r2, #(1 << 21)
and r2, xh, ip bcs 2f @ value is INF or NAN
teq r2, ip bpl 1f @ value is too small
beq 2f @ value is INF or NAN mov r3, #(0xfffffc00 + 31)
bic ip, ip, #0x40000000 subs r2, r3, r2, asr #21
cmp r2, ip bls 3f @ value is too large
bcc 1f @ value is too small
add ip, ip, #(31 << 20) @ scale value
cmp r2, ip mov r3, xh, lsl #11
bcs 3f @ value is too large orr r3, r3, #0x80000000
orr r3, r3, xl, lsr #21
rsb r2, r2, ip tst xh, #0x80000000 @ the sign bit
mov ip, xh, lsl #11 mov r0, r3, lsr r2
orr ip, ip, #0x80000000
orr ip, ip, xl, lsr #21
mov r2, r2, lsr #20
tst r3, #0x80000000 @ the sign bit
mov r0, ip, lsr r2
rsbne r0, r0, #0 rsbne r0, r0, #0
RET RET
...@@ -1243,8 +1219,8 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi ...@@ -1243,8 +1219,8 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
RET RET
2: orrs xl, xl, xh, lsl #12 2: orrs xl, xl, xh, lsl #12
bne 4f @ r0 is NAN. bne 4f @ x is NAN.
3: ands r0, r3, #0x80000000 @ the sign bit 3: ands r0, xh, #0x80000000 @ the sign bit
moveq r0, #0x7fffffff @ maximum signed positive si moveq r0, #0x7fffffff @ maximum signed positive si
RET RET
...@@ -1260,29 +1236,22 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi ...@@ -1260,29 +1236,22 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
ARM_FUNC_START fixunsdfsi ARM_FUNC_START fixunsdfsi
ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
orrs ip, xl, xh, lsl #1
movcss r0, #0 @ value is negative
RETc(eq) @ or 0 (xl, xh overlap r0)
@ check exponent range. @ check exponent range.
mov ip, #0x7f000000 movs r2, xh, lsl #1
orr ip, ip, #0x00f00000 bcs 1f @ value is negative
and r2, xh, ip adds r2, r2, #(1 << 21)
teq r2, ip bcs 2f @ value is INF or NAN
beq 2f @ value is INF or NAN bpl 1f @ value is too small
bic ip, ip, #0x40000000 mov r3, #(0xfffffc00 + 31)
cmp r2, ip subs r2, r3, r2, asr #21
bcc 1f @ value is too small bmi 3f @ value is too large
add ip, ip, #(31 << 20)
cmp r2, ip @ scale value
bhi 3f @ value is too large mov r3, xh, lsl #11
orr r3, r3, #0x80000000
rsb r2, r2, ip orr r3, r3, xl, lsr #21
mov ip, xh, lsl #11 mov r0, r3, lsr r2
orr ip, ip, #0x80000000
orr ip, ip, xl, lsr #21
mov r2, r2, lsr #20
mov r0, ip, lsr r2
RET RET
1: mov r0, #0 1: mov r0, #0
...@@ -1305,90 +1274,60 @@ ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi ...@@ -1305,90 +1274,60 @@ ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
ARM_FUNC_START truncdfsf2 ARM_FUNC_START truncdfsf2
ARM_FUNC_ALIAS aeabi_d2f truncdfsf2 ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
orrs r2, xl, xh, lsl #1
moveq r0, r2, rrx
RETc(eq) @ value is 0.0 or -0.0
@ check exponent range. @ check exponent range.
mov ip, #0x7f000000 mov r2, xh, lsl #1
orr ip, ip, #0x00f00000 subs r3, r2, #((1023 - 127) << 21)
and r2, ip, xh subcss ip, r3, #(1 << 21)
teq r2, ip rsbcss ip, ip, #(254 << 21)
beq 2f @ value is INF or NAN bls 2f @ value is out of range
bic xh, xh, ip
cmp r2, #(0x380 << 20) 1: @ shift and round mantissa
bls 4f @ value is too small and ip, xh, #0x80000000
mov r2, xl, lsl #3
@ shift and round mantissa orr xl, ip, xl, lsr #29
1: movs r3, xl, lsr #29 cmp r2, #0x80000000
adc r3, r3, xh, lsl #3 adc r0, xl, r3, lsl #2
biceq r0, r0, #1
@ if halfway between two numbers, round towards LSB = 0.
mov xl, xl, lsl #3
teq xl, #0x80000000
biceq r3, r3, #1
@ rounding might have created an extra MSB. If so adjust exponent.
tst r3, #0x00800000
addne r2, r2, #(1 << 20)
bicne r3, r3, #0x00800000
@ check exponent for overflow
mov ip, #(0x400 << 20)
orr ip, ip, #(0x07f << 20)
cmp r2, ip
bcs 3f @ overflow
@ adjust exponent, merge with sign bit and mantissa.
movs xh, xh, lsl #1
mov r2, r2, lsl #4
orr r0, r3, r2, rrx
eor r0, r0, #0x40000000
RET RET
2: @ chech for NAN 2: @ either overflow or underflow
orrs xl, xl, xh, lsl #12 tst xh, #0x40000000
movne r0, #0x7f000000 bne 3f @ overflow
orrne r0, r0, #0x00c00000
RETc(ne) @ return NAN
3: @ return INF with sign @ check if denormalized value is possible
and r0, xh, #0x80000000 adds r2, r3, #(23 << 21)
orr r0, r0, #0x7f000000 andlt r0, xh, #0x80000000 @ too small, return signed 0.
orr r0, r0, #0x00800000 RETc(lt)
RET
4: @ check if denormalized value is possible
subs r2, r2, #((0x380 - 24) << 20)
andle r0, xh, #0x80000000 @ too small, return signed 0.
RETc(le)
@ denormalize value so we can resume with the code above afterwards. @ denormalize value so we can resume with the code above afterwards.
orr xh, xh, #0x00100000 orr xh, xh, #0x00100000
mov r2, r2, lsr #20 mov r2, r2, lsr #21
rsb r2, r2, #25 rsb r2, r2, #24
cmp r2, #20
bgt 6f
rsb ip, r2, #32 rsb ip, r2, #32
mov r3, xl, lsl ip movs r3, xl, lsl ip
mov xl, xl, lsr r2 mov xl, xl, lsr r2
orr xl, xl, xh, lsl ip orrne xl, xl, #1 @ fold r3 for rounding considerations.
movs xh, xh, lsl #1 mov r3, xh, lsl #11
mov xh, xh, lsr r2 mov r3, r3, lsr #11
mov xh, xh, rrx orr xl, xl, r3, lsl ip
5: teq r3, #0 @ fold r3 bits into the LSB mov r3, r3, lsr r2
orrne xl, xl, #1 @ for rounding considerations. mov r3, r3, lsl #1
mov r2, #(0x380 << 20) @ equivalent to the 0 float exponent
b 1b b 1b
6: rsb r2, r2, #(12 + 20) 3: @ chech for NAN
rsb ip, r2, #32 mvns r3, r2, asr #21
mov r3, xl, lsl r2 bne 5f @ simple overflow
mov xl, xl, lsr ip orrs r3, xl, xh, lsl #12
orr xl, xl, xh, lsl r2 movne r0, #0x7f000000
and xh, xh, #0x80000000 orrne r0, r0, #0x00c00000
b 5b RETc(ne) @ return NAN
5: @ return INF with sign
and r0, xh, #0x80000000
orr r0, r0, #0x7f000000
orr r0, r0, #0x00800000
RET
FUNC_END aeabi_d2f FUNC_END aeabi_d2f
FUNC_END truncdfsf2 FUNC_END truncdfsf2
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
ARM_FUNC_START negsf2 ARM_FUNC_START negsf2
ARM_FUNC_ALIAS aeabi_fneg negsf2 ARM_FUNC_ALIAS aeabi_fneg negsf2
eor r0, r0, #0x80000000 @ flip sign bit eor r0, r0, #0x80000000 @ flip sign bit
RET RET
...@@ -56,11 +56,11 @@ ARM_FUNC_ALIAS aeabi_fneg negsf2 ...@@ -56,11 +56,11 @@ ARM_FUNC_ALIAS aeabi_fneg negsf2
ARM_FUNC_START aeabi_frsub ARM_FUNC_START aeabi_frsub
eor r0, r0, #0x80000000 @ flip sign bit of first arg eor r0, r0, #0x80000000 @ flip sign bit of first arg
b 1f b 1f
ARM_FUNC_START subsf3 ARM_FUNC_START subsf3
ARM_FUNC_ALIAS aeabi_fsub subsf3 ARM_FUNC_ALIAS aeabi_fsub subsf3
eor r1, r1, #0x80000000 @ flip sign bit of second arg eor r1, r1, #0x80000000 @ flip sign bit of second arg
#if defined(__thumb__) && !defined(__THUMB_INTERWORK__) #if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
b 1f @ Skip Thumb-code prologue b 1f @ Skip Thumb-code prologue
...@@ -68,32 +68,19 @@ ARM_FUNC_ALIAS aeabi_fsub subsf3 ...@@ -68,32 +68,19 @@ ARM_FUNC_ALIAS aeabi_fsub subsf3
ARM_FUNC_START addsf3 ARM_FUNC_START addsf3
ARM_FUNC_ALIAS aeabi_fadd addsf3 ARM_FUNC_ALIAS aeabi_fadd addsf3
1: @ Compare both args, return zero if equal but the sign.
eor r2, r0, r1
teq r2, #0x80000000
beq LSYM(Lad_z)
@ If first arg is 0 or -0, return second arg. 1: @ Look for zeroes, equal values, INF, or NAN.
@ If second arg is 0 or -0, return first arg. movs r2, r0, lsl #1
bics r2, r0, #0x80000000 movnes r3, r1, lsl #1
moveq r0, r1 teqne r2, r3
bicnes r2, r1, #0x80000000 mvnnes ip, r2, asr #24
RETc(eq) mvnnes ip, r3, asr #24
beq LSYM(Lad_s)
@ Mask out exponents.
mov ip, #0xff000000
and r2, r0, ip, lsr #1
and r3, r1, ip, lsr #1
@ If either of them is 255, result will be INF or NAN
teq r2, ip, lsr #1
teqne r3, ip, lsr #1
beq LSYM(Lad_i)
@ Compute exponent difference. Make largest exponent in r2, @ Compute exponent difference. Make largest exponent in r2,
@ corresponding arg in r0, and positive exponent difference in r3. @ corresponding arg in r0, and positive exponent difference in r3.
subs r3, r3, r2 mov r2, r2, lsr #24
rsbs r3, r2, r3, lsr #24
addgt r2, r2, r3 addgt r2, r2, r3
eorgt r1, r0, r1 eorgt r1, r0, r1
eorgt r0, r1, r0 eorgt r0, r1, r0
...@@ -103,7 +90,7 @@ ARM_FUNC_ALIAS aeabi_fadd addsf3 ...@@ -103,7 +90,7 @@ ARM_FUNC_ALIAS aeabi_fadd addsf3
@ If exponent difference is too large, return largest argument @ If exponent difference is too large, return largest argument
@ already in r0. We need up to 25 bit to handle proper rounding @ already in r0. We need up to 25 bit to handle proper rounding
@ of 0x1p25 - 1.1. @ of 0x1p25 - 1.1.
cmp r3, #(25 << 23) cmp r3, #25
RETc(hi) RETc(hi)
@ Convert mantissa to signed integer. @ Convert mantissa to signed integer.
...@@ -122,25 +109,17 @@ ARM_FUNC_ALIAS aeabi_fadd addsf3 ...@@ -122,25 +109,17 @@ ARM_FUNC_ALIAS aeabi_fadd addsf3
beq LSYM(Lad_d) beq LSYM(Lad_d)
LSYM(Lad_x): LSYM(Lad_x):
@ Scale down second arg with exponent difference. @ Compensate for the exponent overlapping the mantissa MSB added later
@ Apply shift one bit left to first arg and the rest to second arg sub r2, r2, #1
@ to simplify things later, but only if exponent does not become 0.
movs r3, r3, lsr #23
teqne r2, #(1 << 23)
movne r0, r0, lsl #1
subne r2, r2, #(1 << 23)
subne r3, r3, #1
@ Shift second arg into ip, keep leftover bits into r1. @ Shift and add second arg to first arg in r0.
mov ip, r1, asr r3 @ Keep leftover bits into r1.
adds r0, r0, r1, asr r3
rsb r3, r3, #32 rsb r3, r3, #32
mov r1, r1, lsl r3 mov r1, r1, lsl r3
add r0, r0, ip @ the actual addition @ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
and r3, r0, #0x80000000
@ We now have a 64 bit result in r0-r1.
@ Keep absolute value in r0-r1, sign in r3.
ands r3, r0, #0x80000000
bpl LSYM(Lad_p) bpl LSYM(Lad_p)
rsbs r1, r1, #0 rsbs r1, r1, #0
rsc r0, r0, #0 rsc r0, r0, #0
...@@ -148,103 +127,117 @@ LSYM(Lad_x): ...@@ -148,103 +127,117 @@ LSYM(Lad_x):
@ Determine how to normalize the result. @ Determine how to normalize the result.
LSYM(Lad_p): LSYM(Lad_p):
cmp r0, #0x00800000 cmp r0, #0x00800000
bcc LSYM(Lad_l) bcc LSYM(Lad_a)
cmp r0, #0x01000000 cmp r0, #0x01000000
bcc LSYM(Lad_r0) bcc LSYM(Lad_e)
cmp r0, #0x02000000
bcc LSYM(Lad_r1)
@ Result needs to be shifted right. @ Result needs to be shifted right.
movs r0, r0, lsr #1 movs r0, r0, lsr #1
mov r1, r1, rrx mov r1, r1, rrx
add r2, r2, #(1 << 23) add r2, r2, #1
LSYM(Lad_r1):
movs r0, r0, lsr #1
mov r1, r1, rrx
add r2, r2, #(1 << 23)
@ Our result is now properly aligned into r0, remaining bits in r1.
@ Round with MSB of r1. If halfway between two numbers, round towards
@ LSB of r0 = 0.
LSYM(Lad_r0):
add r0, r0, r1, lsr #31
teq r1, #0x80000000
biceq r0, r0, #1
@ Rounding may have added a new MSB. Adjust exponent.
@ That MSB will be cleared when exponent is merged below.
tst r0, #0x01000000
addne r2, r2, #(1 << 23)
@ Make sure we did not bust our exponent. @ Make sure we did not bust our exponent.
cmp r2, #(254 << 23) cmp r2, #254
bhi LSYM(Lad_o) bhs LSYM(Lad_o)
@ Our result is now properly aligned into r0, remaining bits in r1.
@ Pack final result together. @ Pack final result together.
@ Round with MSB of r1. If halfway between two numbers, round towards
@ LSB of r0 = 0.
LSYM(Lad_e): LSYM(Lad_e):
bic r0, r0, #0x01800000 cmp r1, #0x80000000
orr r0, r0, r2 adc r0, r0, r2, lsl #23
biceq r0, r0, #1
orr r0, r0, r3 orr r0, r0, r3
RET RET
@ Result must be shifted left. @ Result must be shifted left and exponent adjusted.
@ No rounding necessary since r1 will always be 0. LSYM(Lad_a):
movs r1, r1, lsl #1
adc r0, r0, r0
tst r0, #0x00800000
sub r2, r2, #1
bne LSYM(Lad_e)
@ No rounding necessary since r1 will always be 0 at this point.
LSYM(Lad_l): LSYM(Lad_l):
#if __ARM_ARCH__ < 5 #if __ARM_ARCH__ < 5
movs ip, r0, lsr #12 movs ip, r0, lsr #12
moveq r0, r0, lsl #12 moveq r0, r0, lsl #12
subeq r2, r2, #(12 << 23) subeq r2, r2, #12
tst r0, #0x00ff0000 tst r0, #0x00ff0000
moveq r0, r0, lsl #8 moveq r0, r0, lsl #8
subeq r2, r2, #(8 << 23) subeq r2, r2, #8
tst r0, #0x00f00000 tst r0, #0x00f00000
moveq r0, r0, lsl #4 moveq r0, r0, lsl #4
subeq r2, r2, #(4 << 23) subeq r2, r2, #4
tst r0, #0x00c00000 tst r0, #0x00c00000
moveq r0, r0, lsl #2 moveq r0, r0, lsl #2
subeq r2, r2, #(2 << 23) subeq r2, r2, #2
tst r0, #0x00800000 cmp r0, #0x00800000
moveq r0, r0, lsl #1 movcc r0, r0, lsl #1
subeq r2, r2, #(1 << 23) sbcs r2, r2, #0
cmp r2, #0
bgt LSYM(Lad_e)
#else #else
clz ip, r0 clz ip, r0
sub ip, ip, #8 sub ip, ip, #8
subs r2, r2, ip
mov r0, r0, lsl ip mov r0, r0, lsl ip
subs r2, r2, ip, lsl #23
bgt LSYM(Lad_e)
#endif #endif
@ Exponent too small, denormalize result. @ Final result with sign
mvn r2, r2, asr #23 @ If exponent negative, denormalize result.
add r2, r2, #2 addge r0, r0, r2, lsl #23
orr r0, r3, r0, lsr r2 rsblt r2, r2, #0
orrge r0, r0, r3
orrlt r0, r3, r0, lsr r2
RET RET
@ Fixup and adjust bit position for denormalized arguments. @ Fixup and adjust bit position for denormalized arguments.
@ Note that r2 must not remain equal to 0. @ Note that r2 must not remain equal to 0.
LSYM(Lad_d): LSYM(Lad_d):
teq r2, #0 teq r2, #0
eoreq r0, r0, #0x00800000
addeq r2, r2, #(1 << 23)
eor r1, r1, #0x00800000 eor r1, r1, #0x00800000
subne r3, r3, #(1 << 23) eoreq r0, r0, #0x00800000
addeq r2, r2, #1
subne r3, r3, #1
b LSYM(Lad_x) b LSYM(Lad_x)
@ Result is x - x = 0, unless x is INF or NAN. LSYM(Lad_s):
LSYM(Lad_z): mov r3, r1, lsl #1
mov ip, #0xff000000
and r2, r0, ip, lsr #1 mvns ip, r2, asr #24
teq r2, ip, lsr #1 mvnnes ip, r3, asr #24
moveq r0, ip, asr #2 beq LSYM(Lad_i)
teq r2, r3
beq 1f
@ Result is x + 0.0 = x or 0.0 + y = y.
teq r2, #0
moveq r0, r1
RET
1: teq r0, r1
@ Result is x - x = 0.
movne r0, #0 movne r0, #0
RETc(ne)
@ Result is x + x = 2x.
tst r2, #0xff000000
bne 2f
movs r0, r0, lsl #1
orrcs r0, r0, #0x80000000
RET RET
2: adds r2, r2, #(2 << 24)
addcc r0, r0, #(1 << 23)
RETc(cc)
and r3, r0, #0x80000000
@ Overflow: return INF. @ Overflow: return INF.
LSYM(Lad_o): LSYM(Lad_o):
...@@ -257,16 +250,16 @@ LSYM(Lad_o): ...@@ -257,16 +250,16 @@ LSYM(Lad_o):
@ if r1 != INF/NAN: return r0 (which is INF/NAN) @ if r1 != INF/NAN: return r0 (which is INF/NAN)
@ if r0 or r1 is NAN: return NAN @ if r0 or r1 is NAN: return NAN
@ if opposite sign: return NAN @ if opposite sign: return NAN
@ return r0 (which is INF or -INF) @ otherwise return r0 (which is INF or -INF)
LSYM(Lad_i): LSYM(Lad_i):
teq r2, ip, lsr #1 mvns r2, r2, asr #24
movne r0, r1 movne r0, r1
teqeq r3, ip, lsr #1 mvneqs r3, r3, asr #24
RETc(ne) movne r1, r0
movs r2, r0, lsl #9 movs r2, r0, lsl #9
moveqs r2, r1, lsl #9 moveqs r3, r1, lsl #9
teqeq r0, r1 teqeq r0, r1
orrne r0, r3, #0x00400000 @ NAN orrne r0, r0, #0x00400000 @ quiet NAN
RET RET
FUNC_END aeabi_frsub FUNC_END aeabi_frsub
...@@ -287,28 +280,17 @@ ARM_FUNC_ALIAS aeabi_i2f floatsisf ...@@ -287,28 +280,17 @@ ARM_FUNC_ALIAS aeabi_i2f floatsisf
ands r3, r0, #0x80000000 ands r3, r0, #0x80000000
rsbmi r0, r0, #0 rsbmi r0, r0, #0
1: teq r0, #0 1: movs ip, r0
RETc(eq) RETc(eq)
3: @ Add initial exponent to sign
mov r1, #0 orr r3, r3, #((127 + 23) << 23)
mov r2, #((127 + 23) << 23)
tst r0, #0xfc000000 .ifnc ah, r0
beq LSYM(Lad_p) mov ah, r0
.endif
@ We need to scale the value a little before branching to code above. mov al, #0
tst r0, #0xf0000000 b 2f
4:
orrne r1, r1, r0, lsl #28
movne r0, r0, lsr #4
addne r2, r2, #(4 << 23)
tst r0, #0x0c000000
beq LSYM(Lad_p)
mov r1, r1, lsr #2
orr r1, r1, r0, lsl #30
mov r0, r0, lsr #2
add r2, r2, #(2 << 23)
b LSYM(Lad_p)
FUNC_END aeabi_i2f FUNC_END aeabi_i2f
FUNC_END floatsisf FUNC_END floatsisf
...@@ -317,22 +299,15 @@ ARM_FUNC_ALIAS aeabi_i2f floatsisf ...@@ -317,22 +299,15 @@ ARM_FUNC_ALIAS aeabi_i2f floatsisf
ARM_FUNC_START floatundisf ARM_FUNC_START floatundisf
ARM_FUNC_ALIAS aeabi_ul2f floatundisf ARM_FUNC_ALIAS aeabi_ul2f floatundisf
orrs r2, r0, r1 orrs r2, r0, r1
#if !defined (__VFP_FP__) && !defined(__SOFTFP__) #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
mvfeqs f0, #0.0 mvfeqs f0, #0.0
#endif #endif
RETc(eq) RETc(eq)
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
@ For hard FPA code we want to return via the tail below so that
@ we can return the result in f0 as well as in r0 for backwards
@ compatibility.
str lr, [sp, #-4]!
adr lr, 4f
#endif
mov r3, #0 mov r3, #0
b 2f b 1f
ARM_FUNC_START floatdisf ARM_FUNC_START floatdisf
ARM_FUNC_ALIAS aeabi_l2f floatdisf ARM_FUNC_ALIAS aeabi_l2f floatdisf
...@@ -342,78 +317,80 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf ...@@ -342,78 +317,80 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf
mvfeqs f0, #0.0 mvfeqs f0, #0.0
#endif #endif
RETc(eq) RETc(eq)
ands r3, ah, #0x80000000 @ sign bit in r3
bpl 1f
rsbs al, al, #0
rsc ah, ah, #0
1:
#if !defined (__VFP_FP__) && !defined(__SOFTFP__) #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
@ For hard FPA code we want to return via the tail below so that @ For hard FPA code we want to return via the tail below so that
@ we can return the result in f0 as well as in r0 for backwards @ we can return the result in f0 as well as in r0 for backwards
@ compatibility. @ compatibility.
str lr, [sp, #-4]! str lr, [sp, #-4]!
adr lr, 4f adr lr, LSYM(f0_ret)
#endif #endif
ands r3, ah, #0x80000000 @ sign bit in r3
bpl 2f
rsbs al, al, #0
rsc ah, ah, #0
2:
movs ip, ah movs ip, ah
#ifdef __ARMEB__ moveq ip, al
moveq r0, al
#endif @ Add initial exponent to sign
beq 3b orr r3, r3, #((127 + 23 + 32) << 23)
mov r2, #((127 + 23 + 32) << 23) @ initial exponent subeq r3, r3, #(32 << 23)
#ifndef __ARMEB__ 2: sub r3, r3, #(1 << 23)
mov r1, al
mov r0, ip
#endif
tst r0, #0xfc000000
bne 3f
#if __ARM_ARCH__ < 5 #if __ARM_ARCH__ < 5
cmp r0, #(1 << 13)
movlo ip, #13 mov r2, #23
movlo r0, r0, lsl #13 cmp ip, #(1 << 16)
movhs ip, #0 movhs ip, ip, lsr #16
tst r0, #0x03fc0000 subhs r2, r2, #16
addeq ip, ip, #8 cmp ip, #(1 << 8)
moveq r0, r0, lsl #8 movhs ip, ip, lsr #8
tst r0, #0x03c00000 subhs r2, r2, #8
addeq ip, ip, #4 cmp ip, #(1 << 4)
moveq r0, r0, lsl #4 movhs ip, ip, lsr #4
tst r0, #0x03000000 subhs r2, r2, #4
addeq ip, ip, #2 cmp ip, #(1 << 2)
moveq r0, r0, lsl #2 subhs r2, r2, #2
sublo r2, r2, ip, lsr #1
subs r2, r2, ip, lsr #3
#else #else
clz ip, r0
sub ip, ip, #6 clz r2, ip
mov r0, r0, lsl ip subs r2, r2, #8
#endif #endif
sub r2, r2, ip, lsl #23
rsb ip, ip, #32 sub r3, r3, r2, lsl #23
orr r0, r0, r1, lsr ip blt 3f
rsb ip, ip, #32
mov r1, r1, asl ip add r3, r3, ah, lsl r2
@ At this point we no-longer care about the precise value in r1, only mov ip, al, lsl r2
@ whether only the top bit is set, or if the top bit and some others rsb r2, r2, #32
@ are set. cmp ip, #0x80000000
and ip, r1, #0xff adc r0, r3, al, lsr r2
orr r1, r1, ip, lsl #8 biceq r0, r0, #1
b LSYM(Lad_p) RET
3:
@ We need to scale the value a little before branching to code above. 3: add r2, r2, #32
@ At this point we no-longer care about the precise value in r1, only mov ip, ah, lsl r2
@ whether only the top bit is set, or if the top bit and some others rsb r2, r2, #32
@ are set. orrs al, al, ip, lsl #1
and ip, r1, #0xff adc r0, r3, ah, lsr r2
orr r1, r1, ip, lsl #8 biceq r0, r0, ip, lsr #31
tst r0, #0xf0000000 RET
movne r1, r1, lsr #4
b 4b
#if !defined (__VFP_FP__) && !defined(__SOFTFP__) #if !defined (__VFP_FP__) && !defined(__SOFTFP__)
4:
LSYM(f0_ret)
str r0, [sp, #-4]! str r0, [sp, #-4]!
ldfs f0, [sp], #4 ldfs f0, [sp], #4
RETLDM RETLDM
#endif #endif
FUNC_END floatdisf FUNC_END floatdisf
FUNC_END aeabi_l2f FUNC_END aeabi_l2f
FUNC_END floatundisf FUNC_END floatundisf
...@@ -425,139 +402,117 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf ...@@ -425,139 +402,117 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf
ARM_FUNC_START mulsf3 ARM_FUNC_START mulsf3
ARM_FUNC_ALIAS aeabi_fmul mulsf3 ARM_FUNC_ALIAS aeabi_fmul mulsf3
@ Mask out exponents.
mov ip, #0xff000000
and r2, r0, ip, lsr #1
and r3, r1, ip, lsr #1
@ Trap any INF/NAN.
teq r2, ip, lsr #1
teqne r3, ip, lsr #1
beq LSYM(Lml_s)
@ Trap any multiplication by 0. @ Mask out exponents, trap any zero/denormal/INF/NAN.
bics ip, r0, #0x80000000 mov ip, #0xff
bicnes ip, r1, #0x80000000 ands r2, ip, r0, lsr #23
beq LSYM(Lml_z) andnes r3, ip, r1, lsr #23
teqne r2, ip
@ Shift exponents right one bit to make room for overflow bit. teqne r3, ip
@ If either of them is 0, scale denormalized arguments off line. beq LSYM(Lml_s)
@ Then add both exponents together.
movs r2, r2, lsr #1
teqne r3, #0
beq LSYM(Lml_d)
LSYM(Lml_x): LSYM(Lml_x):
add r2, r2, r3, asr #1
@ Preserve final sign in r2 along with exponent for now. @ Add exponents together
teq r0, r1 add r2, r2, r3
orrmi r2, r2, #0x8000
@ Determine final sign.
eor ip, r0, r1
@ Convert mantissa to unsigned integer. @ Convert mantissa to unsigned integer.
bic r0, r0, #0xff000000 @ If power of two, branch to a separate path.
bic r1, r1, #0xff000000 @ Make up for final alignment.
orr r0, r0, #0x00800000 movs r0, r0, lsl #9
orr r1, r1, #0x00800000 movnes r1, r1, lsl #9
beq LSYM(Lml_1)
mov r3, #0x08000000
orr r0, r3, r0, lsr #5
orr r1, r3, r1, lsr #5
#if __ARM_ARCH__ < 4 #if __ARM_ARCH__ < 4
@ Put sign bit in r3, which will be restored into r0 later.
and r3, ip, #0x80000000
@ Well, no way to make it shorter without the umull instruction. @ Well, no way to make it shorter without the umull instruction.
@ We must perform that 24 x 24 -> 48 bit multiplication by hand. stmfd sp!, {r3, r4, r5}
stmfd sp!, {r4, r5}
mov r4, r0, lsr #16 mov r4, r0, lsr #16
mov r5, r1, lsr #16 mov r5, r1, lsr #16
bic r0, r0, #0x00ff0000 bic r0, r0, r4, lsl #16
bic r1, r1, #0x00ff0000 bic r1, r1, r5, lsl #16
mul ip, r4, r5 mul ip, r4, r5
mul r3, r0, r1 mul r3, r0, r1
mul r0, r5, r0 mul r0, r5, r0
mla r0, r4, r1, r0 mla r0, r4, r1, r0
adds r3, r3, r0, lsl #16 adds r3, r3, r0, lsl #16
adc ip, ip, r0, lsr #16 adc r1, ip, r0, lsr #16
ldmfd sp!, {r4, r5} ldmfd sp!, {r0, r4, r5}
#else #else
umull r3, ip, r0, r1 @ The actual multiplication. @ The actual multiplication.
umull r3, r1, r0, r1
@ Put final sign in r0.
and r0, ip, #0x80000000
#endif #endif
@ Put final sign in r0. @ Adjust result upon the MSB position.
mov r0, r2, lsl #16 cmp r1, #(1 << 23)
bic r2, r2, #0x8000 movcc r1, r1, lsl #1
orrcc r1, r1, r3, lsr #31
@ Adjust result if one extra MSB appeared. movcc r3, r3, lsl #1
@ The LSB may be lost but this never changes the result in this case.
tst ip, #(1 << 15)
addne r2, r2, #(1 << 22)
movnes ip, ip, lsr #1
movne r3, r3, rrx
@ Apply exponent bias, check range for underflow.
subs r2, r2, #(127 << 22)
ble LSYM(Lml_u)
@ Scale back to 24 bits with rounding.
@ r0 contains sign bit already.
orrs r0, r0, r3, lsr #23
adc r0, r0, ip, lsl #9
@ If halfway between two numbers, rounding should be towards LSB = 0.
mov r3, r3, lsl #9
teq r3, #0x80000000
biceq r0, r0, #1
@ Note: rounding may have produced an extra MSB here. @ Add sign to result.
@ The extra bit is cleared before merging the exponent below. orr r0, r0, r1
tst r0, #0x01000000
addne r2, r2, #(1 << 22)
@ Check for exponent overflow @ Apply exponent bias, check for under/overflow.
cmp r2, #(255 << 22) sbc r2, r2, #127
bge LSYM(Lml_o) cmp r2, #(254 - 1)
bhi LSYM(Lml_u)
@ Add final exponent. @ Round the result, merge final exponent.
bic r0, r0, #0x01800000 cmp r3, #0x80000000
orr r0, r0, r2, lsl #1 adc r0, r0, r2, lsl #23
biceq r0, r0, #1
RET RET
@ Result is 0, but determine sign anyway. @ Multiplication by 0x1p*: let''s shortcut a lot of code.
LSYM(Lml_z): LSYM(Lml_1):
eor r0, r0, r1 teq r0, #0
bic r0, r0, #0x7fffffff and ip, ip, #0x80000000
RET moveq r1, r1, lsl #9
orr r0, ip, r0, lsr #9
orr r0, r0, r1, lsr #9
subs r2, r2, #127
rsbgts r3, r2, #255
orrgt r0, r0, r2, lsl #23
RETc(gt)
@ Under/overflow: fix things up for the code below.
orr r0, r0, #0x00800000
mov r3, #0
subs r2, r2, #1
@ Check if denormalized result is possible, otherwise return signed 0.
LSYM(Lml_u): LSYM(Lml_u):
cmn r2, #(24 << 22) @ Overflow?
RETc(le) bgt LSYM(Lml_o)
@ Find out proper shift value. @ Check if denormalized result is possible, otherwise return signed 0.
mvn r1, r2, asr #22 cmn r2, #(24 + 1)
subs r1, r1, #7 bicle r0, r0, #0x7fffffff
bgt LSYM(Lml_ur) RETc(le)
@ Shift value left, round, etc.
add r1, r1, #32
orrs r0, r0, r3, lsr r1
rsb r1, r1, #32
adc r0, r0, ip, lsl r1
mov ip, r3, lsl r1
teq ip, #0x80000000
biceq r0, r0, #1
RET
@ Shift value right, round, etc. @ Shift value right, round, etc.
@ Note: r1 must not be 0 otherwise carry does not get set. rsb r2, r2, #0
LSYM(Lml_ur): movs r1, r0, lsl #1
orrs r0, r0, ip, lsr r1 mov r1, r1, lsr r2
rsb r2, r2, #32
mov ip, r0, lsl r2
movs r0, r1, rrx
adc r0, r0, #0 adc r0, r0, #0
rsb r1, r1, #32 orrs r3, r3, ip, lsl #1
mov ip, ip, lsl r1 biceq r0, r0, ip, lsr #31
teq r3, #0
teqeq ip, #0x80000000
biceq r0, r0, #1
RET RET
@ One or both arguments are denormalized. @ One or both arguments are denormalized.
...@@ -567,32 +522,51 @@ LSYM(Lml_d): ...@@ -567,32 +522,51 @@ LSYM(Lml_d):
and ip, r0, #0x80000000 and ip, r0, #0x80000000
1: moveq r0, r0, lsl #1 1: moveq r0, r0, lsl #1
tsteq r0, #0x00800000 tsteq r0, #0x00800000
subeq r2, r2, #(1 << 22) subeq r2, r2, #1
beq 1b beq 1b
orr r0, r0, ip orr r0, r0, ip
teq r3, #0 teq r3, #0
and ip, r1, #0x80000000 and ip, r1, #0x80000000
2: moveq r1, r1, lsl #1 2: moveq r1, r1, lsl #1
tsteq r1, #0x00800000 tsteq r1, #0x00800000
subeq r3, r3, #(1 << 23) subeq r3, r3, #1
beq 2b beq 2b
orr r1, r1, ip orr r1, r1, ip
b LSYM(Lml_x) b LSYM(Lml_x)
@ One or both args are INF or NAN.
LSYM(Lml_s): LSYM(Lml_s):
@ Isolate the INF and NAN cases away
and r3, ip, r1, lsr #23
teq r2, ip
teqne r3, ip
beq 1f
@ Here, one or more arguments are either denormalized or zero.
bics ip, r0, #0x80000000
bicnes ip, r1, #0x80000000
bne LSYM(Lml_d)
@ Result is 0, but determine sign anyway.
LSYM(Lml_z):
eor r0, r0, r1
bic r0, r0, #0x7fffffff
RET
1: @ One or both args are INF or NAN.
teq r0, #0x0 teq r0, #0x0
teqne r1, #0x0
teqne r0, #0x80000000 teqne r0, #0x80000000
moveq r0, r1
teqne r1, #0x0
teqne r1, #0x80000000 teqne r1, #0x80000000
beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
teq r2, ip, lsr #1 teq r2, ip
bne 1f bne 1f
movs r2, r0, lsl #9 movs r2, r0, lsl #9
bne LSYM(Lml_n) @ NAN * <anything> -> NAN bne LSYM(Lml_n) @ NAN * <anything> -> NAN
1: teq r3, ip, lsr #1 1: teq r3, ip
bne LSYM(Lml_i) bne LSYM(Lml_i)
movs r3, r1, lsl #9 movs r3, r1, lsl #9
movne r0, r1
bne LSYM(Lml_n) @ <anything> * NAN -> NAN bne LSYM(Lml_n) @ <anything> * NAN -> NAN
@ Result is INF, but we need to determine its sign. @ Result is INF, but we need to determine its sign.
...@@ -606,9 +580,9 @@ LSYM(Lml_o): ...@@ -606,9 +580,9 @@ LSYM(Lml_o):
orr r0, r0, #0x00800000 orr r0, r0, #0x00800000
RET RET
@ Return NAN. @ Return a quiet NAN.
LSYM(Lml_n): LSYM(Lml_n):
mov r0, #0x7f000000 orr r0, r0, #0x7f000000
orr r0, r0, #0x00c00000 orr r0, r0, #0x00c00000
RET RET
...@@ -617,37 +591,28 @@ LSYM(Lml_n): ...@@ -617,37 +591,28 @@ LSYM(Lml_n):
ARM_FUNC_START divsf3 ARM_FUNC_START divsf3
ARM_FUNC_ALIAS aeabi_fdiv divsf3 ARM_FUNC_ALIAS aeabi_fdiv divsf3
@ Mask out exponents.
mov ip, #0xff000000
and r2, r0, ip, lsr #1
and r3, r1, ip, lsr #1
@ Trap any INF/NAN or zeroes.
teq r2, ip, lsr #1
teqne r3, ip, lsr #1
bicnes ip, r0, #0x80000000
bicnes ip, r1, #0x80000000
beq LSYM(Ldv_s)
@ Shift exponents right one bit to make room for overflow bit. @ Mask out exponents, trap any zero/denormal/INF/NAN.
@ If either of them is 0, scale denormalized arguments off line. mov ip, #0xff
@ Then substract divisor exponent from dividend''s. ands r2, ip, r0, lsr #23
movs r2, r2, lsr #1 andnes r3, ip, r1, lsr #23
teqne r3, #0 teqne r2, ip
beq LSYM(Ldv_d) teqne r3, ip
beq LSYM(Ldv_s)
LSYM(Ldv_x): LSYM(Ldv_x):
sub r2, r2, r3, asr #1
@ Substract divisor exponent from dividend''s
sub r2, r2, r3
@ Preserve final sign into ip. @ Preserve final sign into ip.
eor ip, r0, r1 eor ip, r0, r1
@ Convert mantissa to unsigned integer. @ Convert mantissa to unsigned integer.
@ Dividend -> r3, divisor -> r1. @ Dividend -> r3, divisor -> r1.
mov r3, #0x10000000
movs r1, r1, lsl #9 movs r1, r1, lsl #9
mov r0, r0, lsl #9 mov r0, r0, lsl #9
beq LSYM(Ldv_1) beq LSYM(Ldv_1)
mov r3, #0x10000000
orr r1, r3, r1, lsr #4 orr r1, r3, r1, lsr #4
orr r3, r3, r0, lsr #4 orr r3, r3, r0, lsr #4
...@@ -655,16 +620,10 @@ LSYM(Ldv_x): ...@@ -655,16 +620,10 @@ LSYM(Ldv_x):
and r0, ip, #0x80000000 and r0, ip, #0x80000000
@ Ensure result will land to known bit position. @ Ensure result will land to known bit position.
@ Apply exponent bias accordingly.
cmp r3, r1 cmp r3, r1
subcc r2, r2, #(1 << 22)
movcc r3, r3, lsl #1 movcc r3, r3, lsl #1
adc r2, r2, #(127 - 2)
@ Apply exponent bias, check range for over/underflow.
add r2, r2, #(127 << 22)
cmn r2, #(24 << 22)
RETc(le)
cmp r2, #(255 << 22)
bge LSYM(Lml_o)
@ The actual division loop. @ The actual division loop.
mov ip, #0x00800000 mov ip, #0x00800000
...@@ -684,44 +643,29 @@ LSYM(Ldv_x): ...@@ -684,44 +643,29 @@ LSYM(Ldv_x):
movnes ip, ip, lsr #4 movnes ip, ip, lsr #4
bne 1b bne 1b
@ Check if denormalized result is needed. @ Check exponent for under/overflow.
cmp r2, #0 cmp r2, #(254 - 1)
ble LSYM(Ldv_u) bhi LSYM(Lml_u)
@ Apply proper rounding. @ Round the result, merge final exponent.
cmp r3, r1 cmp r3, r1
addcs r0, r0, #1 adc r0, r0, r2, lsl #23
biceq r0, r0, #1 biceq r0, r0, #1
@ Add exponent to result.
bic r0, r0, #0x00800000
orr r0, r0, r2, lsl #1
RET RET
@ Division by 0x1p*: let''s shortcut a lot of code. @ Division by 0x1p*: let''s shortcut a lot of code.
LSYM(Ldv_1): LSYM(Ldv_1):
and ip, ip, #0x80000000 and ip, ip, #0x80000000
orr r0, ip, r0, lsr #9 orr r0, ip, r0, lsr #9
add r2, r2, #(127 << 22) adds r2, r2, #127
cmp r2, #(255 << 22) rsbgts r3, r2, #255
bge LSYM(Lml_o) orrgt r0, r0, r2, lsl #23
cmp r2, #0
orrgt r0, r0, r2, lsl #1
RETc(gt) RETc(gt)
cmn r2, #(24 << 22)
movle r0, ip
RETc(le)
orr r0, r0, #0x00800000 orr r0, r0, #0x00800000
mov r3, #0 mov r3, #0
subs r2, r2, #1
@ Result must be denormalized: prepare parameters to use code above. b LSYM(Lml_u)
@ r3 already contains remainder for rounding considerations.
LSYM(Ldv_u):
bic ip, r0, #0x80000000
and r0, r0, #0x80000000
mvn r1, r2, asr #22
add r1, r1, #2
b LSYM(Lml_ur)
@ One or both arguments are denormalized. @ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit. @ Scale them leftwards and preserve sign bit.
...@@ -730,35 +674,40 @@ LSYM(Ldv_d): ...@@ -730,35 +674,40 @@ LSYM(Ldv_d):
and ip, r0, #0x80000000 and ip, r0, #0x80000000
1: moveq r0, r0, lsl #1 1: moveq r0, r0, lsl #1
tsteq r0, #0x00800000 tsteq r0, #0x00800000
subeq r2, r2, #(1 << 22) subeq r2, r2, #1
beq 1b beq 1b
orr r0, r0, ip orr r0, r0, ip
teq r3, #0 teq r3, #0
and ip, r1, #0x80000000 and ip, r1, #0x80000000
2: moveq r1, r1, lsl #1 2: moveq r1, r1, lsl #1
tsteq r1, #0x00800000 tsteq r1, #0x00800000
subeq r3, r3, #(1 << 23) subeq r3, r3, #1
beq 2b beq 2b
orr r1, r1, ip orr r1, r1, ip
b LSYM(Ldv_x) b LSYM(Ldv_x)
@ One or both arguments is either INF, NAN or zero. @ One or both arguments are either INF, NAN, zero or denormalized.
LSYM(Ldv_s): LSYM(Ldv_s):
mov ip, #0xff000000 and r3, ip, r1, lsr #23
teq r2, ip, lsr #1 teq r2, ip
teqeq r3, ip, lsr #1
beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
teq r2, ip, lsr #1
bne 1f bne 1f
movs r2, r0, lsl #9 movs r2, r0, lsl #9
bne LSYM(Lml_n) @ NAN / <anything> -> NAN bne LSYM(Lml_n) @ NAN / <anything> -> NAN
b LSYM(Lml_i) @ INF / <anything> -> INF teq r3, ip
1: teq r3, ip, lsr #1 bne LSYM(Lml_i) @ INF / <anything> -> INF
mov r0, r1
b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
1: teq r3, ip
bne 2f bne 2f
movs r3, r1, lsl #9 movs r3, r1, lsl #9
bne LSYM(Lml_n) @ <anything> / NAN -> NAN beq LSYM(Lml_z) @ <anything> / INF -> 0
b LSYM(Lml_z) @ <anything> / INF -> 0 mov r0, r1
2: @ One or both arguments are 0. b LSYM(Lml_n) @ <anything> / NAN -> NAN
2: @ If both are non-zero, we need to normalize and resume above.
bics ip, r0, #0x80000000
bicnes ip, r1, #0x80000000
bne LSYM(Ldv_d)
@ One or both arguments are zero.
bics r2, r0, #0x80000000 bics r2, r0, #0x80000000
bne LSYM(Lml_i) @ <non_zero> / 0 -> INF bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
bics r3, r1, #0x80000000 bics r3, r1, #0x80000000
...@@ -789,85 +738,50 @@ LSYM(Ldv_s): ...@@ -789,85 +738,50 @@ LSYM(Ldv_s):
ARM_FUNC_START gtsf2 ARM_FUNC_START gtsf2
ARM_FUNC_ALIAS gesf2 gtsf2 ARM_FUNC_ALIAS gesf2 gtsf2
mov r3, #-1 mov ip, #-1
b 1f b 1f
ARM_FUNC_START ltsf2 ARM_FUNC_START ltsf2
ARM_FUNC_ALIAS lesf2 ltsf2 ARM_FUNC_ALIAS lesf2 ltsf2
mov r3, #1 mov ip, #1
b 1f b 1f
ARM_FUNC_START cmpsf2 ARM_FUNC_START cmpsf2
ARM_FUNC_ALIAS nesf2 cmpsf2 ARM_FUNC_ALIAS nesf2 cmpsf2
ARM_FUNC_ALIAS eqsf2 cmpsf2 ARM_FUNC_ALIAS eqsf2 cmpsf2
mov r3, #1 @ how should we specify unordered here? mov ip, #1 @ how should we specify unordered here?
@ Both Inf and NaN have an exponent of 255. Therefore, we 1: str ip, [sp, #-4]
@ compute (r1 & 0x8f80000) || (r2 & 0x8f8000).
1: mov ip, #0xff000000 @ Trap any INF/NAN first.
and r2, r1, ip, lsr #1 mov r2, r0, lsl #1
teq r2, ip, lsr #1 mov r3, r1, lsl #1
and r2, r0, ip, lsr #1 mvns ip, r2, asr #24
teqne r2, ip, lsr #1 mvnnes ip, r3, asr #24
beq 3f beq 3f
@ Test for equality. The representations of +0.0 and -0.0 @ Compare values.
@ have all bits set to zero, except for the sign bit. Since @ Note that 0.0 is equal to -0.0.
@ 0.0 is equal to -0.0, we begin by testing 2: orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag
@ ((r0 | r1) & ~0x8000000). teqne r0, r1 @ if not 0 compare sign
2: orr r3, r0, r1 subpls r0, r2, r3 @ if same sign compare values, set r0
@ If the result of the bitwise and is zero, then the Z flag
@ will be set. In any case, the C flag will be set.
bics r3, r3, #0x80000000 @ either 0.0 or -0.0
teqne r0, r1 @ or both the same
@ If the Z flag is set, the two operands were equal. Return zero.
moveq r0, #0
RETc(eq)
@ Check for sign difference. The N flag is set (due to the @ Result:
@ use of teq above) if the sign bit is set on exactly one movhi r0, r1, asr #31
@ of the operands. Return the sign of the first operand. mvnlo r0, r1, asr #31
movmi r0, r0, asr #31 orrne r0, r0, #1
orrmi r0, r0, #1
RETc(mi)
@ Compare exponents.
and r3, r1, ip, lsr #1
cmp r2, r3
@ Compare mantissa if exponents are equal
moveq r0, r0, lsl #9
cmpeq r0, r1, lsl #9
@ We know the operands cannot be equal at this point, so the
@ Z flag is clear. The C flag is set if the first operand has
@ the greater exponent, or the exponents are equal and the
@ first operand has the greater mantissa. Therefore, if the C
@ flag is set, the first operand is greater iff the sign is
@ positive. These next two instructions will put zero in
@ r0 if the first operand is greater, and -1 if the second
@ operand is greater.
movcs r0, r1, asr #31
mvncc r0, r1, asr #31
@ If r0 is 0, the first operand is greater, so return 1. Leave
@ -1 unchanged.
orr r0, r0, #1
RET RET
@ We know that at least one argument is either Inf or NaN. @ Look for a NAN.
@ Look for a NaN. 3: mvns ip, r2, asr #24
3: and r2, r1, ip, lsr #1
teq r2, ip, lsr #1
bne 4f bne 4f
movs r2, r1, lsl #9
bne 5f @ r1 is NAN
4: and r2, r0, ip, lsr #1
teq r2, ip, lsr #1
bne 2b
movs ip, r0, lsl #9 movs ip, r0, lsl #9
beq 2b @ r0 is not NAN bne 5f @ r0 is NAN
5: @ The Z flag is clear at this point. 4: mvns ip, r3, asr #24
mov r0, r3 @ return unordered code from r3. bne 2b
movs ip, r1, lsl #9
beq 2b @ r1 is not NAN
5: ldr r0, [sp, #-4] @ return unordered code.
RET RET
FUNC_END gesf2 FUNC_END gesf2
...@@ -879,13 +793,15 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2 ...@@ -879,13 +793,15 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2
FUNC_END cmpsf2 FUNC_END cmpsf2
ARM_FUNC_START aeabi_cfrcmple ARM_FUNC_START aeabi_cfrcmple
mov ip, r0 mov ip, r0
mov r0, r1 mov r0, r1
mov r1, ip mov r1, ip
b 6f b 6f
ARM_FUNC_START aeabi_cfcmpeq ARM_FUNC_START aeabi_cfcmpeq
ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
@ The status-returning routines are required to preserve all @ The status-returning routines are required to preserve all
@ registers except ip, lr, and cpsr. @ registers except ip, lr, and cpsr.
6: stmfd sp!, {r0, r1, r2, r3, lr} 6: stmfd sp!, {r0, r1, r2, r3, lr}
...@@ -896,68 +812,79 @@ ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq ...@@ -896,68 +812,79 @@ ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
@ that the first operand was smaller than the second. @ that the first operand was smaller than the second.
cmnmi r0, #0 cmnmi r0, #0
RETLDM "r0, r1, r2, r3" RETLDM "r0, r1, r2, r3"
FUNC_END aeabi_cfcmple FUNC_END aeabi_cfcmple
FUNC_END aeabi_cfcmpeq FUNC_END aeabi_cfcmpeq
FUNC_END aeabi_cfrcmple
ARM_FUNC_START aeabi_fcmpeq ARM_FUNC_START aeabi_fcmpeq
str lr, [sp, #-4]! str lr, [sp, #-4]!
ARM_CALL aeabi_cfcmple ARM_CALL aeabi_cfcmple
moveq r0, #1 @ Equal to. moveq r0, #1 @ Equal to.
movne r0, #0 @ Less than, greater than, or unordered. movne r0, #0 @ Less than, greater than, or unordered.
RETLDM RETLDM
FUNC_END aeabi_fcmpeq FUNC_END aeabi_fcmpeq
ARM_FUNC_START aeabi_fcmplt ARM_FUNC_START aeabi_fcmplt
str lr, [sp, #-4]! str lr, [sp, #-4]!
ARM_CALL aeabi_cfcmple ARM_CALL aeabi_cfcmple
movcc r0, #1 @ Less than. movcc r0, #1 @ Less than.
movcs r0, #0 @ Equal to, greater than, or unordered. movcs r0, #0 @ Equal to, greater than, or unordered.
RETLDM RETLDM
FUNC_END aeabi_fcmplt FUNC_END aeabi_fcmplt
ARM_FUNC_START aeabi_fcmple ARM_FUNC_START aeabi_fcmple
str lr, [sp, #-4]! str lr, [sp, #-4]!
ARM_CALL aeabi_cfcmple ARM_CALL aeabi_cfcmple
movls r0, #1 @ Less than or equal to. movls r0, #1 @ Less than or equal to.
movhi r0, #0 @ Greater than or unordered. movhi r0, #0 @ Greater than or unordered.
RETLDM RETLDM
FUNC_END aeabi_fcmple FUNC_END aeabi_fcmple
ARM_FUNC_START aeabi_fcmpge ARM_FUNC_START aeabi_fcmpge
str lr, [sp, #-4]! str lr, [sp, #-4]!
ARM_CALL aeabi_cfrcmple ARM_CALL aeabi_cfrcmple
movls r0, #1 @ Operand 2 is less than or equal to operand 1. movls r0, #1 @ Operand 2 is less than or equal to operand 1.
movhi r0, #0 @ Operand 2 greater than operand 1, or unordered. movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
RETLDM RETLDM
FUNC_END aeabi_fcmpge FUNC_END aeabi_fcmpge
ARM_FUNC_START aeabi_fcmpgt ARM_FUNC_START aeabi_fcmpgt
str lr, [sp, #-4]! str lr, [sp, #-4]!
ARM_CALL aeabi_cfrcmple ARM_CALL aeabi_cfrcmple
movcc r0, #1 @ Operand 2 is less than operand 1. movcc r0, #1 @ Operand 2 is less than operand 1.
movcs r0, #0 @ Operand 2 is greater than or equal to operand 1, movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
@ or they are unordered. @ or they are unordered.
RETLDM RETLDM
FUNC_END aeabi_fcmpgt FUNC_END aeabi_fcmpgt
#endif /* L_cmpsf2 */ #endif /* L_cmpsf2 */
#ifdef L_unordsf2 #ifdef L_unordsf2
ARM_FUNC_START unordsf2 ARM_FUNC_START unordsf2
ARM_FUNC_ALIAS aeabi_fcmpun unordsf2 ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
mov ip, #0xff000000 mov r2, r0, lsl #1
and r2, r1, ip, lsr #1 mov r3, r1, lsl #1
teq r2, ip, lsr #1 mvns ip, r2, asr #24
bne 1f bne 1f
movs r2, r1, lsl #9 movs ip, r0, lsl #9
bne 3f @ r1 is NAN
1: and r2, r0, ip, lsr #1
teq r2, ip, lsr #1
bne 2f
movs r2, r0, lsl #9
bne 3f @ r0 is NAN bne 3f @ r0 is NAN
1: mvns ip, r3, asr #24
bne 2f
movs ip, r1, lsl #9
bne 3f @ r1 is NAN
2: mov r0, #0 @ arguments are ordered. 2: mov r0, #0 @ arguments are ordered.
RET RET
3: mov r0, #1 @ arguments are unordered. 3: mov r0, #1 @ arguments are unordered.
...@@ -972,37 +899,35 @@ ARM_FUNC_ALIAS aeabi_fcmpun unordsf2 ...@@ -972,37 +899,35 @@ ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
ARM_FUNC_START fixsfsi ARM_FUNC_START fixsfsi
ARM_FUNC_ALIAS aeabi_f2iz fixsfsi ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
movs r0, r0, lsl #1
RETc(eq) @ value is 0.
mov r1, r1, rrx @ preserve C flag (the actual sign)
@ check exponent range. @ check exponent range.
and r2, r0, #0xff000000 mov r2, r0, lsl #1
cmp r2, #(127 << 24) cmp r2, #(127 << 24)
movcc r0, #0 @ value is too small bcc 1f @ value is too small
RETc(cc) mov r3, #(127 + 31)
cmp r2, #((127 + 31) << 24) subs r2, r3, r2, lsr #24
bcs 1f @ value is too large bls 2f @ value is too large
mov r0, r0, lsl #7 @ scale value
orr r0, r0, #0x80000000 mov r3, r0, lsl #8
mov r2, r2, lsr #24 orr r3, r3, #0x80000000
rsb r2, r2, #(127 + 31) tst r0, #0x80000000 @ the sign bit
tst r1, #0x80000000 @ the sign bit mov r0, r3, lsr r2
mov r0, r0, lsr r2
rsbne r0, r0, #0 rsbne r0, r0, #0
RET RET
1: teq r2, #0xff000000 1: mov r0, #0
bne 2f RET
movs r0, r0, lsl #8
bne 3f @ r0 is NAN. 2: cmp r2, #(127 + 31 - 0xff)
2: ands r0, r1, #0x80000000 @ the sign bit bne 3f
movs r2, r0, lsl #9
bne 4f @ r0 is NAN.
3: ands r0, r0, #0x80000000 @ the sign bit
moveq r0, #0x7fffffff @ the maximum signed positive si moveq r0, #0x7fffffff @ the maximum signed positive si
RET RET
3: mov r0, #0 @ What should we convert NAN to? 4: mov r0, #0 @ What should we convert NAN to?
RET RET
FUNC_END aeabi_f2iz FUNC_END aeabi_f2iz
...@@ -1014,34 +939,33 @@ ARM_FUNC_ALIAS aeabi_f2iz fixsfsi ...@@ -1014,34 +939,33 @@ ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
ARM_FUNC_START fixunssfsi ARM_FUNC_START fixunssfsi
ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
movs r0, r0, lsl #1
movcss r0, #0 @ value is negative...
RETc(eq) @ ... or 0.
@ check exponent range. @ check exponent range.
and r2, r0, #0xff000000 movs r2, r0, lsl #1
bcs 1f @ value is negative
cmp r2, #(127 << 24) cmp r2, #(127 << 24)
movcc r0, #0 @ value is too small bcc 1f @ value is too small
RETc(cc) mov r3, #(127 + 31)
cmp r2, #((127 + 32) << 24) subs r2, r3, r2, lsr #24
bcs 1f @ value is too large bmi 2f @ value is too large
@ scale the value
mov r3, r0, lsl #8
orr r3, r3, #0x80000000
mov r0, r3, lsr r2
RET
mov r0, r0, lsl #7 1: mov r0, #0
orr r0, r0, #0x80000000
mov r2, r2, lsr #24
rsb r2, r2, #(127 + 31)
mov r0, r0, lsr r2
RET RET
1: teq r2, #0xff000000 2: cmp r2, #(127 + 31 - 0xff)
bne 2f bne 3f
movs r0, r0, lsl #8 movs r2, r0, lsl #9
bne 3f @ r0 is NAN. bne 4f @ r0 is NAN.
2: mov r0, #0xffffffff @ maximum unsigned si 3: mov r0, #0xffffffff @ maximum unsigned si
RET RET
3: mov r0, #0 @ What should we convert NAN to? 4: mov r0, #0 @ What should we convert NAN to?
RET RET
FUNC_END aeabi_f2uiz FUNC_END aeabi_f2uiz
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment