Commit 6883a666 by Nicolas Pitre Committed by Nicolas Pitre

ieee754-sf.S: Large speed improvements.

* config/arm/ieee754-sf.S: Large speed improvements. Fix NAN handling.
* config/arm/ieee754-df.S: Ditto.

From-SVN: r89364
parent 0aab7a4b
2004-10-21 Nicolas Pitre <nico@cam.org>
* config/arm/ieee754-sf.S: Large speed improvements. Fix NAN handling.
* config/arm/ieee754-df.S: Ditto.
2004-10-20 Zack Weinberg <zack@codesourcery.com>
* dbxout.c (asmfile): Delete. All uses changed to asm_out_file.
......
......@@ -60,6 +60,7 @@
ARM_FUNC_START negdf2
ARM_FUNC_ALIAS aeabi_dneg negdf2
@ flip sign bit
eor xh, xh, #0x80000000
RET
......@@ -76,10 +77,10 @@ ARM_FUNC_START aeabi_drsub
eor xh, xh, #0x80000000 @ flip sign bit of first arg
b 1f
ARM_FUNC_START subdf3
ARM_FUNC_START subdf3
ARM_FUNC_ALIAS aeabi_dsub subdf3
@ flip sign bit of second arg
eor yh, yh, #0x80000000
eor yh, yh, #0x80000000 @ flip sign bit of second arg
#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
b 1f @ Skip Thumb-code prologue
#endif
......@@ -87,36 +88,23 @@ ARM_FUNC_ALIAS aeabi_dsub subdf3
ARM_FUNC_START adddf3
ARM_FUNC_ALIAS aeabi_dadd adddf3
1: @ Compare both args, return zero if equal but the sign.
teq xl, yl
eoreq ip, xh, yh
teqeq ip, #0x80000000
beq LSYM(Lad_z)
@ If first arg is 0 or -0, return second arg.
@ If second arg is 0 or -0, return first arg.
orrs ip, xl, xh, lsl #1
moveq xl, yl
moveq xh, yh
orrnes ip, yl, yh, lsl #1
RETc(eq)
stmfd sp!, {r4, r5, lr}
@ Mask out exponents.
mov ip, #0x7f000000
orr ip, ip, #0x00f00000
and r4, xh, ip
and r5, yh, ip
1: stmfd sp!, {r4, r5, lr}
@ If either of them is 0x7ff, result will be INF or NAN
teq r4, ip
teqne r5, ip
beq LSYM(Lad_i)
@ Look for zeroes, equal values, INF, or NAN.
mov r4, xh, lsl #1
mov r5, yh, lsl #1
teq r4, r5
teqeq xl, yl
orrnes ip, r4, xl
orrnes ip, r5, yl
mvnnes ip, r4, asr #21
mvnnes ip, r5, asr #21
beq LSYM(Lad_s)
@ Compute exponent difference. Make largest exponent in r4,
@ corresponding arg in xh-xl, and positive exponent difference in r5.
subs r5, r5, r4
mov r4, r4, lsr #21
rsbs r5, r4, r5, lsr #21
rsblt r5, r5, #0
ble 1f
add r4, r4, r5
......@@ -127,24 +115,24 @@ ARM_FUNC_ALIAS aeabi_dadd adddf3
eor yl, xl, yl
eor yh, xh, yh
1:
@ If exponent difference is too large, return largest argument
@ already in xh-xl. We need up to 54 bit to handle proper rounding
@ of 0x1p54 - 1.1.
cmp r5, #(54 << 20)
cmp r5, #54
RETLDM "r4, r5" hi
@ Convert mantissa to signed integer.
tst xh, #0x80000000
bic xh, xh, ip, lsl #1
orr xh, xh, #0x00100000
mov xh, xh, lsl #12
mov ip, #0x00100000
orr xh, ip, xh, lsr #12
beq 1f
rsbs xl, xl, #0
rsc xh, xh, #0
1:
tst yh, #0x80000000
bic yh, yh, ip, lsl #1
orr yh, yh, #0x00100000
mov yh, yh, lsl #12
orr yh, ip, yh, lsr #12
beq 1f
rsbs yl, yl, #0
rsc yh, yh, #0
......@@ -154,42 +142,30 @@ ARM_FUNC_ALIAS aeabi_dadd adddf3
teq r4, r5
beq LSYM(Lad_d)
LSYM(Lad_x):
@ Scale down second arg with exponent difference.
@ Apply shift one bit left to first arg and the rest to second arg
@ to simplify things later, but only if exponent does not become 0.
mov ip, #0
movs r5, r5, lsr #20
beq 3f
teq r4, #(1 << 20)
beq 1f
movs xl, xl, lsl #1
adc xh, ip, xh, lsl #1
sub r4, r4, #(1 << 20)
subs r5, r5, #1
beq 3f
@ Shift yh-yl right per r5, keep leftover bits into ip.
1: rsbs lr, r5, #32
blt 2f
@ Compensate for the exponent overlapping the mantissa MSB added later
sub r4, r4, #1
@ Shift yh-yl right per r5, add to xh-xl, keep leftover bits into ip.
rsbs lr, r5, #32
blt 1f
mov ip, yl, lsl lr
mov yl, yl, lsr r5
orr yl, yl, yh, lsl lr
mov yh, yh, asr r5
b 3f
2: sub r5, r5, #32
adds xl, xl, yl, lsr r5
adc xh, xh, #0
adds xl, xl, yh, lsl lr
adcs xh, xh, yh, asr r5
b 2f
1: sub r5, r5, #32
add lr, lr, #32
cmp yl, #1
adc ip, ip, yh, lsl lr
mov yl, yh, asr r5
mov yh, yh, asr #32
3:
@ the actual addition
adds xl, xl, yl
adc xh, xh, yh
mov ip, yh, lsl lr
orrcs ip, ip, #2 @ 2 not 1, to allow lsr #1 later
adds xl, xl, yh, asr r5
adcs xh, xh, yh, asr #31
2:
@ We now have a result in xh-xl-ip.
@ Keep absolute value in xh-xl-ip, sign in r5.
ands r5, xh, #0x80000000
@ Keep absolute value in xh-xl-ip, sign in r5 (the n bit was set above)
and r5, xh, #0x80000000
bpl LSYM(Lad_p)
rsbs ip, ip, #0
rscs xl, xl, #0
......@@ -198,75 +174,66 @@ LSYM(Lad_x):
@ Determine how to normalize the result.
LSYM(Lad_p):
cmp xh, #0x00100000
bcc LSYM(Lad_l)
bcc LSYM(Lad_a)
cmp xh, #0x00200000
bcc LSYM(Lad_r0)
cmp xh, #0x00400000
bcc LSYM(Lad_r1)
bcc LSYM(Lad_e)
@ Result needs to be shifted right.
movs xh, xh, lsr #1
movs xl, xl, rrx
movs ip, ip, rrx
orrcs ip, ip, #1
add r4, r4, #(1 << 20)
LSYM(Lad_r1):
movs xh, xh, lsr #1
movs xl, xl, rrx
movs ip, ip, rrx
orrcs ip, ip, #1
add r4, r4, #(1 << 20)
mov ip, ip, rrx
add r4, r4, #1
@ Make sure we did not bust our exponent.
mov r2, r4, lsl #21
cmn r2, #(2 << 21)
bcs LSYM(Lad_o)
@ Our result is now properly aligned into xh-xl, remaining bits in ip.
@ Round with MSB of ip. If halfway between two numbers, round towards
@ LSB of xl = 0.
LSYM(Lad_r0):
adds xl, xl, ip, lsr #31
adc xh, xh, #0
teq ip, #0x80000000
biceq xl, xl, #1
@ One extreme rounding case may add a new MSB. Adjust exponent.
@ That MSB will be cleared when exponent is merged below.
tst xh, #0x00200000
addne r4, r4, #(1 << 20)
@ Make sure we did not bust our exponent.
adds ip, r4, #(1 << 20)
bmi LSYM(Lad_o)
@ Pack final result together.
LSYM(Lad_e):
bic xh, xh, #0x00300000
orr xh, xh, r4
cmp ip, #0x80000000
moveqs ip, xl, lsr #1
adcs xl, xl, #0
adc xh, xh, r4, lsl #20
orr xh, xh, r5
RETLDM "r4, r5"
LSYM(Lad_l):
@ Result must be shifted left and exponent adjusted.
@ No rounding necessary since ip will always be 0.
LSYM(Lad_a):
movs ip, ip, lsl #1
adcs xl, xl, xl
adc xh, xh, xh
tst xh, #0x00100000
sub r4, r4, #1
bne LSYM(Lad_e)
@ No rounding necessary since ip will always be 0 at this point.
LSYM(Lad_l):
#if __ARM_ARCH__ < 5
teq xh, #0
movne r3, #-11
moveq r3, #21
movne r3, #20
moveq r3, #52
moveq xh, xl
moveq xl, #0
mov r2, xh
movs ip, xh, lsr #16
moveq r2, r2, lsl #16
addeq r3, r3, #16
tst r2, #0xff000000
moveq r2, r2, lsl #8
addeq r3, r3, #8
tst r2, #0xf0000000
moveq r2, r2, lsl #4
addeq r3, r3, #4
tst r2, #0xc0000000
moveq r2, r2, lsl #2
addeq r3, r3, #2
tst r2, #0x80000000
addeq r3, r3, #1
cmp r2, #(1 << 16)
movhs r2, r2, lsr #16
subhs r3, r3, #16
cmp r2, #(1 << 8)
movhs r2, r2, lsr #8
subhs r3, r3, #8
cmp r2, #(1 << 4)
movhs r2, r2, lsr #4
subhs r3, r3, #4
cmp r2, #(1 << 2)
subhs r3, r3, #2
sublo r3, r3, r2, lsr #1
sub r3, r3, r2, lsr #3
#else
......@@ -302,13 +269,15 @@ LSYM(Lad_l):
movle xl, xl, lsl r2
@ adjust exponent accordingly.
3: subs r4, r4, r3, lsl #20
bgt LSYM(Lad_e)
3: subs r4, r4, r3
addge xh, xh, r4, lsl #20
orrge xh, xh, r5
RETLDM "r4, r5" ge
@ Exponent too small, denormalize result.
@ Find out proper shift value.
mvn r4, r4, asr #20
subs r4, r4, #30
mvn r4, r4
subs r4, r4, #31
bge 2f
adds r4, r4, #12
bgt 1f
......@@ -337,23 +306,49 @@ LSYM(Lad_l):
RETLDM "r4, r5"
@ Adjust exponents for denormalized arguments.
@ Note that r4 must not remain equal to 0.
LSYM(Lad_d):
teq r4, #0
eoreq xh, xh, #0x00100000
addeq r4, r4, #(1 << 20)
eor yh, yh, #0x00100000
subne r5, r5, #(1 << 20)
eoreq xh, xh, #0x00100000
addeq r4, r4, #1
subne r5, r5, #1
b LSYM(Lad_x)
@ Result is x - x = 0, unless x = INF or NAN.
LSYM(Lad_z):
sub ip, ip, #0x00100000 @ ip becomes 0x7ff00000
and r2, xh, ip
teq r2, ip
orreq xh, ip, #0x00080000
LSYM(Lad_s):
mvns ip, r4, asr #21
mvnnes ip, r5, asr #21
beq LSYM(Lad_i)
teq r4, r5
teqeq xl, yl
beq 1f
@ Result is x + 0.0 = x or 0.0 + y = y.
teq r4, #0
moveq xh, yh
moveq xl, yl
RETLDM "r4, r5"
1: teq xh, yh
@ Result is x - x = 0.
movne xh, #0
mov xl, #0
RET
movne xl, #0
RETLDM "r4, r5" ne
@ Result is x + x = 2x.
movs ip, r4, lsr #21
bne 2f
movs xl, xl, lsl #1
adcs xh, xh, xh
orrcs xh, xh, #0x80000000
RETLDM "r4, r5"
2: adds r4, r4, #(2 << 21)
addcc xh, xh, #(1 << 20)
RETLDM "r4, r5" cc
and r5, xh, #0x80000000
@ Overflow: return INF.
LSYM(Lad_o):
......@@ -367,19 +362,18 @@ LSYM(Lad_o):
@ if yh-yl != INF/NAN: return xh-xl (which is INF/NAN)
@ if either is NAN: return NAN
@ if opposite sign: return NAN
@ return xh-xl (which is INF or -INF)
@ otherwise return xh-xl (which is INF or -INF)
LSYM(Lad_i):
teq r4, ip
mvns ip, r4, asr #21
movne xh, yh
movne xl, yl
teqeq r5, ip
RETLDM "r4, r5" ne
mvneqs ip, r5, asr #21
movne yh, xh
movne yl, xl
orrs r4, xl, xh, lsl #12
orreqs r4, yl, yh, lsl #12
orreqs r5, yl, yh, lsl #12
teqeq xh, yh
orrne xh, r5, #0x00080000
movne xl, #0
orrne xh, xh, #0x00080000 @ quiet NAN
RETLDM "r4, r5"
FUNC_END aeabi_dsub
......@@ -389,14 +383,17 @@ LSYM(Lad_i):
ARM_FUNC_START floatunsidf
ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
teq r0, #0
moveq r1, #0
RETc(eq)
stmfd sp!, {r4, r5, lr}
mov r4, #(0x400 << 20) @ initial exponent
add r4, r4, #((52-1) << 20)
mov r4, #0x400 @ initial exponent
add r4, r4, #(52-1 - 1)
mov r5, #0 @ sign bit is 0
.ifnc xl, r0
mov xl, r0
.endif
mov xh, #0
b LSYM(Lad_l)
......@@ -405,15 +402,18 @@ ARM_FUNC_ALIAS aeabi_ui2d floatunsidf
ARM_FUNC_START floatsidf
ARM_FUNC_ALIAS aeabi_i2d floatsidf
teq r0, #0
moveq r1, #0
RETc(eq)
stmfd sp!, {r4, r5, lr}
mov r4, #(0x400 << 20) @ initial exponent
add r4, r4, #((52-1) << 20)
mov r4, #0x400 @ initial exponent
add r4, r4, #(52-1 - 1)
ands r5, r0, #0x80000000 @ sign bit in r5
rsbmi r0, r0, #0 @ absolute value
.ifnc xl, r0
mov xl, r0
.endif
mov xh, #0
b LSYM(Lad_l)
......@@ -422,26 +422,23 @@ ARM_FUNC_ALIAS aeabi_i2d floatsidf
ARM_FUNC_START extendsfdf2
ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
movs r2, r0, lsl #1
beq 1f @ value is 0.0 or -0.0
movs r2, r0, lsl #1 @ toss sign bit
mov xh, r2, asr #3 @ stretch exponent
mov xh, xh, rrx @ retrieve sign bit
mov xl, r2, lsl #28 @ retrieve remaining bits
ands r2, r2, #0xff000000 @ isolate exponent
beq 2f @ exponent was 0 but not mantissa
teq r2, #0xff000000 @ check if INF or NAN
andnes r3, r2, #0xff000000 @ isolate exponent
teqne r3, #0xff000000 @ if not 0, check if INF or NAN
eorne xh, xh, #0x38000000 @ fixup exponent otherwise.
RET
RETc(ne) @ and return it.
1: mov xh, r0
mov xl, #0
RET
teq r2, #0 @ if actually 0
teqne r3, #0xff000000 @ or INF or NAN
RETc(eq) @ we are done already.
2: @ value was denormalized. We can normalize it now.
@ value was denormalized. We can normalize it now.
stmfd sp!, {r4, r5, lr}
mov r4, #(0x380 << 20) @ setup corresponding exponent
add r4, r4, #(1 << 20)
mov r4, #0x380 @ setup corresponding exponent
and r5, xh, #0x80000000 @ move sign bit in r5
bic xh, xh, #0x80000000
b LSYM(Lad_l)
......@@ -451,76 +448,90 @@ ARM_FUNC_ALIAS aeabi_f2d extendsfdf2
ARM_FUNC_START floatundidf
ARM_FUNC_ALIAS aeabi_ul2d floatundidf
orrs r2, r0, r1
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
mvfeqd f0, #0.0
#endif
RETc(eq)
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
@ For hard FPA code we want to return via the tail below so that
@ we can return the result in f0 as well as in r0/r1 for backwards
@ compatibility.
adr ip, 1f
adr ip, LSYM(f0_ret)
stmfd sp!, {r4, r5, ip, lr}
#else
stmfd sp!, {r4, r5, lr}
#endif
mov r5, #0
b 2f
ARM_FUNC_START floatdidf
ARM_FUNC_ALIAS aeabi_l2d floatdidf
orrs r2, r0, r1
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
mvfeqd f0, #0.0
#endif
RETc(eq)
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
@ For hard FPA code we want to return via the tail below so that
@ we can return the result in f0 as well as in r0/r1 for backwards
@ compatibility.
adr ip, 1f
adr ip, LSYM(f0_ret)
stmfd sp!, {r4, r5, ip, lr}
#else
stmfd sp!, {r4, r5, lr}
#endif
ands r5, ah, #0x80000000 @ sign bit in r5
bpl 2f
rsbs al, al, #0
rsc ah, ah, #0
2:
mov r4, #(0x400 << 20) @ initial exponent
add r4, r4, #((52 - 1) << 20)
#if !defined (__VFP_FP__) && !defined(__ARMEB__)
mov r4, #0x400 @ initial exponent
add r4, r4, #(52-1 - 1)
@ FPA little-endian: must swap the word order.
.ifnc xh, ah
mov ip, al
mov xh, ah
mov xl, ip
#endif
movs ip, xh, lsr #23
.endif
movs ip, xh, lsr #22
beq LSYM(Lad_p)
@ The value's too big. Scale it down a bit...
@ The value is too big. Scale it down a bit...
mov r2, #3
movs ip, ip, lsr #3
addne r2, r2, #3
movs ip, ip, lsr #3
addne r2, r2, #3
add r2, r2, ip
rsb r3, r2, #32
mov ip, xl, lsl r3
mov xl, xl, lsr r2
orr xl, xl, xh, lsl r3
mov xh, xh, lsr r2
add r4, r4, r2, lsl #20
add r4, r4, r2
b LSYM(Lad_p)
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
1:
@ Legacy code expects the result to be returned in f0. Copy it
@ there as well.
LSYM(f0_ret):
stmfd sp!, {r0, r1}
ldfd f0, [sp], #8
RETLDM
#endif
FUNC_END floatdidf
FUNC_END aeabi_l2d
FUNC_END floatundidf
......@@ -534,46 +545,38 @@ ARM_FUNC_START muldf3
ARM_FUNC_ALIAS aeabi_dmul muldf3
stmfd sp!, {r4, r5, r6, lr}
@ Mask out exponents.
mov ip, #0x7f000000
orr ip, ip, #0x00f00000
and r4, xh, ip
and r5, yh, ip
@ Trap any INF/NAN.
teq r4, ip
@ Mask out exponents, trap any zero/denormal/INF/NAN.
mov ip, #0xff
orr ip, ip, #0x700
ands r4, ip, xh, lsr #20
andnes r5, ip, yh, lsr #20
teqne r4, ip
teqne r5, ip
beq LSYM(Lml_s)
bleq LSYM(Lml_s)
@ Trap any multiplication by 0.
orrs r6, xl, xh, lsl #1
orrnes r6, yl, yh, lsl #1
beq LSYM(Lml_z)
@ Shift exponents right one bit to make room for overflow bit.
@ If either of them is 0, scale denormalized arguments off line.
@ Then add both exponents together.
movs r4, r4, lsr #1
teqne r5, #0
beq LSYM(Lml_d)
LSYM(Lml_x):
add r4, r4, r5, asr #1
@ Preserve final sign in r4 along with exponent for now.
teq xh, yh
orrmi r4, r4, #0x8000
@ Add exponents together
add r4, r4, r5
@ Determine final sign.
eor r6, xh, yh
@ Convert mantissa to unsigned integer.
bic xh, xh, ip, lsl #1
bic yh, yh, ip, lsl #1
@ If power of two, branch to a separate path.
bic xh, xh, ip, lsl #21
bic yh, yh, ip, lsl #21
orrs r5, xl, xh, lsl #12
orrnes r5, yl, yh, lsl #12
orr xh, xh, #0x00100000
orr yh, yh, #0x00100000
beq LSYM(Lml_1)
#if __ARM_ARCH__ < 4
@ Put sign bit in r6, which will be restored in yl later.
and r6, r6, #0x80000000
@ Well, no way to make it shorter without the umull instruction.
@ We must perform that 53 x 53 bit multiplication by hand.
stmfd sp!, {r7, r8, r9, sl, fp}
stmfd sp!, {r6, r7, r8, r9, sl, fp}
mov r7, xl, lsr #16
mov r8, yl, lsr #16
mov r9, xh, lsr #16
......@@ -625,92 +628,83 @@ LSYM(Lml_x):
mul fp, xh, yh
adcs r5, r5, fp
adc r6, r6, #0
ldmfd sp!, {r7, r8, r9, sl, fp}
ldmfd sp!, {yl, r7, r8, r9, sl, fp}
#else
@ Here is the actual multiplication: 53 bits * 53 bits -> 106 bits.
@ Here is the actual multiplication.
umull ip, lr, xl, yl
mov r5, #0
umlal lr, r5, xl, yh
umlal lr, r5, xh, yl
and yl, r6, #0x80000000
umlal lr, r5, xl, yh
mov r6, #0
umlal r5, r6, xh, yh
#endif
@ The LSBs in ip are only significant for the final rounding.
@ Fold them into one bit of lr.
@ Fold them into lr.
teq ip, #0
orrne lr, lr, #1
@ Put final sign in xh.
mov xh, r4, lsl #16
bic r4, r4, #0x8000
@ Adjust result if one extra MSB appeared (one of four times).
tst r6, #(1 << 9)
beq 1f
add r4, r4, #(1 << 19)
movs r6, r6, lsr #1
movs r5, r5, rrx
movs lr, lr, rrx
orrcs lr, lr, #1
1:
@ Scale back to 53 bits.
@ xh contains sign bit already.
orr xh, xh, r6, lsl #12
orr xh, xh, r5, lsr #20
mov xl, r5, lsl #12
orr xl, xl, lr, lsr #20
@ Apply exponent bias, check range for underflow.
sub r4, r4, #0x00f80000
subs r4, r4, #0x1f000000
ble LSYM(Lml_u)
@ Round the result.
movs lr, lr, lsl #12
bpl 1f
adds xl, xl, #1
adc xh, xh, #0
teq lr, #0x80000000
biceq xl, xl, #1
@ Rounding may have produced an extra MSB here.
@ The extra bit is cleared before merging the exponent below.
tst xh, #0x00200000
addne r4, r4, #(1 << 19)
@ Adjust result upon the MSB position.
sub r4, r4, #0xff
cmp r6, #(1 << (20-11))
sbc r4, r4, #0x300
bcs 1f
movs lr, lr, lsl #1
adcs r5, r5, r5
adc r6, r6, r6
1:
@ Check exponent for overflow.
adds ip, r4, #(1 << 19)
tst ip, #(1 << 30)
bne LSYM(Lml_o)
@ Add final exponent.
bic xh, xh, #0x00300000
orr xh, xh, r4, lsl #1
@ Shift to final position, add sign to result.
orr xh, yl, r6, lsl #11
orr xh, xh, r5, lsr #21
mov xl, r5, lsl #11
orr xl, xl, lr, lsr #21
mov lr, lr, lsl #11
@ Check exponent range for under/overflow.
subs ip, r4, #(254 - 1)
cmphi ip, #0x700
bhi LSYM(Lml_u)
@ Round the result, merge final exponent.
cmp lr, #0x80000000
moveqs lr, xl, lsr #1
adcs xl, xl, #0
adc xh, xh, r4, lsl #20
RETLDM "r4, r5, r6"
@ Result is 0, but determine sign anyway.
LSYM(Lml_z):
@ Multiplication by 0x1p*: let''s shortcut a lot of code.
LSYM(Lml_1):
and r6, r6, #0x80000000
orr xh, r6, xh
orr xl, xl, yl
eor xh, xh, yh
LSYM(Ldv_z):
bic xh, xh, #0x7fffffff
mov xl, #0
RETLDM "r4, r5, r6"
subs r4, r4, ip, lsr #1
rsbgts r5, r4, ip
orrgt xh, xh, r4, lsl #20
RETLDM "r4, r5, r6" gt
@ Under/overflow: fix things up for the code below.
orr xh, xh, #0x00100000
mov lr, #0
subs r4, r4, #1
@ Check if denormalized result is possible, otherwise return signed 0.
LSYM(Lml_u):
cmn r4, #(53 << 19)
@ Overflow?
bgt LSYM(Lml_o)
@ Check if denormalized result is possible, otherwise return signed 0.
cmn r4, #(53 + 1)
movle xl, #0
bicle xh, xh, #0x7fffffff
RETLDM "r4, r5, r6" le
@ Find out proper shift value.
LSYM(Lml_r):
mvn r4, r4, asr #19
subs r4, r4, #30
rsb r4, r4, #0
subs r4, r4, #32
bge 2f
adds r4, r4, #12
bgt 1f
......@@ -721,14 +715,12 @@ LSYM(Lml_r):
mov r3, xl, lsl r5
mov xl, xl, lsr r4
orr xl, xl, xh, lsl r5
movs xh, xh, lsl #1
mov xh, xh, lsr r4
mov xh, xh, rrx
and r2, xh, #0x80000000
bic xh, xh, #0x80000000
adds xl, xl, r3, lsr #31
adc xh, xh, #0
teq lr, #0
teqeq r3, #0x80000000
biceq xl, xl, #1
adc xh, r2, xh, lsr r4
orrs lr, lr, r3, lsl #1
biceq xl, xl, r3, lsr #31
RETLDM "r4, r5, r6"
@ shift result right of 21 to 31 bits, or left 11 to 1 bits after
......@@ -741,53 +733,70 @@ LSYM(Lml_r):
bic xh, xh, #0x7fffffff
adds xl, xl, r3, lsr #31
adc xh, xh, #0
teq lr, #0
teqeq r3, #0x80000000
biceq xl, xl, #1
orrs lr, lr, r3, lsl #1
biceq xl, xl, r3, lsr #31
RETLDM "r4, r5, r6"
@ Shift value right of 32 to 64 bits, or 0 to 32 bits after a switch
@ from xh to xl. Leftover bits are in r3-r6-lr for rounding.
2: rsb r5, r4, #32
mov r6, xl, lsl r5
orr lr, lr, xl, lsl r5
mov r3, xl, lsr r4
orr r3, r3, xh, lsl r5
mov xl, xh, lsr r4
bic xh, xh, #0x7fffffff
bic xl, xl, xh, lsr r4
add xl, xl, r3, lsr #31
orrs r6, r6, lr
teqeq r3, #0x80000000
biceq xl, xl, #1
orrs lr, lr, r3, lsl #1
biceq xl, xl, r3, lsr #31
RETLDM "r4, r5, r6"
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
LSYM(Lml_d):
mov lr, #0
teq r4, #0
bne 2f
and r6, xh, #0x80000000
1: movs xl, xl, lsl #1
adc xh, lr, xh, lsl #1
adc xh, xh, xh
tst xh, #0x00100000
subeq r4, r4, #(1 << 19)
subeq r4, r4, #1
beq 1b
orr xh, xh, r6
teq r5, #0
bne LSYM(Lml_x)
movne pc, lr
2: and r6, yh, #0x80000000
3: movs yl, yl, lsl #1
adc yh, lr, yh, lsl #1
adc yh, yh, yh
tst yh, #0x00100000
subeq r5, r5, #(1 << 20)
subeq r5, r5, #1
beq 3b
orr yh, yh, r6
b LSYM(Lml_x)
mov pc, lr
@ One or both args are INF or NAN.
LSYM(Lml_s):
@ Isolate the INF and NAN cases away
teq r4, ip
and r5, ip, yh, lsr #20
teqne r5, ip
beq 1f
@ Here, one or more arguments are either denormalized or zero.
orrs r6, xl, xh, lsl #1
orrnes r6, yl, yh, lsl #1
bne LSYM(Lml_d)
@ Result is 0, but determine sign anyway.
LSYM(Lml_z):
eor xh, xh, yh
bic xh, xh, #0x7fffffff
mov xl, #0
RETLDM "r4, r5, r6"
1: @ One or both args are INF or NAN.
orrs r6, xl, xh, lsl #1
moveq xl, yl
moveq xh, yh
orrnes r6, yl, yh, lsl #1
beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
teq r4, ip
......@@ -797,6 +806,8 @@ LSYM(Lml_s):
1: teq r5, ip
bne LSYM(Lml_i)
orrs r6, yl, yh, lsl #12
movne xl, yl
movne xh, yh
bne LSYM(Lml_n) @ <anything> * NAN -> NAN
@ Result is INF, but we need to determine its sign.
......@@ -811,9 +822,9 @@ LSYM(Lml_o):
mov xl, #0
RETLDM "r4, r5, r6"
@ Return NAN.
@ Return a quiet NAN.
LSYM(Lml_n):
mov xh, #0x7f000000
orr xh, xh, #0x7f000000
orr xh, xh, #0x00f80000
RETLDM "r4, r5, r6"
......@@ -825,41 +836,31 @@ ARM_FUNC_ALIAS aeabi_ddiv divdf3
stmfd sp!, {r4, r5, r6, lr}
@ Mask out exponents.
mov ip, #0x7f000000
orr ip, ip, #0x00f00000
and r4, xh, ip
and r5, yh, ip
@ Trap any INF/NAN or zeroes.
teq r4, ip
@ Mask out exponents, trap any zero/denormal/INF/NAN.
mov ip, #0xff
orr ip, ip, #0x700
ands r4, ip, xh, lsr #20
andnes r5, ip, yh, lsr #20
teqne r4, ip
teqne r5, ip
orrnes r6, xl, xh, lsl #1
orrnes r6, yl, yh, lsl #1
beq LSYM(Ldv_s)
bleq LSYM(Ldv_s)
@ Shift exponents right one bit to make room for overflow bit.
@ If either of them is 0, scale denormalized arguments off line.
@ Then substract divisor exponent from dividend''s.
movs r4, r4, lsr #1
teqne r5, #0
beq LSYM(Ldv_d)
LSYM(Ldv_x):
sub r4, r4, r5, asr #1
@ Substract divisor exponent from dividend''s.
sub r4, r4, r5
@ Preserve final sign into lr.
eor lr, xh, yh
@ Convert mantissa to unsigned integer.
@ Dividend -> r5-r6, divisor -> yh-yl.
mov r5, #0x10000000
orrs r5, yl, yh, lsl #12
mov xh, xh, lsl #12
beq LSYM(Ldv_1)
mov yh, yh, lsl #12
mov r5, #0x10000000
orr yh, r5, yh, lsr #4
orr yh, yh, yl, lsr #24
movs yl, yl, lsl #8
mov xh, xh, lsl #12
teqeq yh, r5
beq LSYM(Ldv_1)
mov yl, yl, lsl #8
orr r5, r5, xh, lsr #4
orr r5, r5, xl, lsr #24
mov r6, xl, lsl #8
......@@ -868,21 +869,15 @@ LSYM(Ldv_x):
and xh, lr, #0x80000000
@ Ensure result will land to known bit position.
@ Apply exponent bias accordingly.
cmp r5, yh
cmpeq r6, yl
adc r4, r4, #(255 - 2)
add r4, r4, #0x300
bcs 1f
sub r4, r4, #(1 << 19)
movs yh, yh, lsr #1
mov yl, yl, rrx
1:
@ Apply exponent bias, check range for over/underflow.
add r4, r4, #0x1f000000
add r4, r4, #0x00f80000
cmn r4, #(53 << 19)
ble LSYM(Ldv_z)
cmp r4, ip, lsr #1
bge LSYM(Lml_o)
@ Perform first substraction to align result to a nibble.
subs r6, r6, yl
sbc r5, r5, yh
......@@ -944,73 +939,42 @@ LSYM(Ldv_x):
orreq xh, xh, xl
moveq xl, #0
3:
@ Check if denormalized result is needed.
cmp r4, #0
ble LSYM(Ldv_u)
@ Check exponent range for under/overflow.
subs ip, r4, #(254 - 1)
cmphi ip, #0x700
bhi LSYM(Lml_u)
@ Apply proper rounding.
@ Round the result, merge final exponent.
subs ip, r5, yh
subeqs ip, r6, yl
moveqs ip, xl, lsr #1
adcs xl, xl, #0
adc xh, xh, #0
teq ip, #0
biceq xl, xl, #1
@ Add exponent to result.
bic xh, xh, #0x00100000
orr xh, xh, r4, lsl #1
adc xh, xh, r4, lsl #20
RETLDM "r4, r5, r6"
@ Division by 0x1p*: shortcut a lot of code.
LSYM(Ldv_1):
and lr, lr, #0x80000000
orr xh, lr, xh, lsr #12
add r4, r4, #0x1f000000
add r4, r4, #0x00f80000
cmp r4, ip, lsr #1
bge LSYM(Lml_o)
cmp r4, #0
orrgt xh, xh, r4, lsl #1
adds r4, r4, ip, lsr #1
rsbgts r5, r4, ip
orrgt xh, xh, r4, lsl #20
RETLDM "r4, r5, r6" gt
cmn r4, #(53 << 19)
ble LSYM(Ldv_z)
orr xh, xh, #0x00100000
mov lr, #0
b LSYM(Lml_r)
subs r4, r4, #1
b LSYM(Lml_u)
@ Result must be denormalized: put remainder in lr for
@ rounding considerations.
@ Result mightt need to be denormalized: put remainder bits
@ in lr for rounding considerations.
LSYM(Ldv_u):
orr lr, r5, r6
b LSYM(Lml_r)
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
LSYM(Ldv_d):
mov lr, #0
teq r4, #0
bne 2f
and r6, xh, #0x80000000
1: movs xl, xl, lsl #1
adc xh, lr, xh, lsl #1
tst xh, #0x00100000
subeq r4, r4, #(1 << 19)
beq 1b
orr xh, xh, r6
teq r5, #0
bne LSYM(Ldv_x)
2: and r6, yh, #0x80000000
3: movs yl, yl, lsl #1
adc yh, lr, yh, lsl #1
tst yh, #0x00100000
subeq r5, r5, #(1 << 20)
beq 3b
orr yh, yh, r6
b LSYM(Ldv_x)
b LSYM(Lml_u)
@ One or both arguments is either INF, NAN or zero.
LSYM(Ldv_s):
and r5, ip, yh, lsr #20
teq r4, ip
teqeq r5, ip
beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
......@@ -1018,13 +982,23 @@ LSYM(Ldv_s):
bne 1f
orrs r4, xl, xh, lsl #12
bne LSYM(Lml_n) @ NAN / <anything> -> NAN
b LSYM(Lml_i) @ INF / <anything> -> INF
teq r5, ip
bne LSYM(Lml_i) @ INF / <anything> -> INF
mov xl, yl
mov xh, yh
b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
1: teq r5, ip
bne 2f
orrs r5, yl, yh, lsl #12
bne LSYM(Lml_n) @ <anything> / NAN -> NAN
b LSYM(Lml_z) @ <anything> / INF -> 0
2: @ One or both arguments are 0.
beq LSYM(Lml_z) @ <anything> / INF -> 0
mov xl, yl
mov xh, yh
b LSYM(Lml_n) @ <anything> / NAN -> NAN
2: @ If both are non-zero, we need to normalize and resume above.
orrs r6, xl, xh, lsl #1
orrnes r6, yl, yh, lsl #1
bne LSYM(Lml_d)
@ One or both arguments are 0.
orrs r4, xl, xh, lsl #1
bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
orrs r5, yl, yh, lsl #1
......@@ -1038,6 +1012,8 @@ LSYM(Ldv_s):
#ifdef L_cmpdf2
@ Note: only r0 (return value) and ip are clobbered here.
ARM_FUNC_START gtdf2
ARM_FUNC_ALIAS gedf2 gtdf2
mov ip, #-1
......@@ -1053,15 +1029,13 @@ ARM_FUNC_ALIAS nedf2 cmpdf2
ARM_FUNC_ALIAS eqdf2 cmpdf2
mov ip, #1 @ how should we specify unordered here?
1: stmfd sp!, {r4, r5, lr}
1: str ip, [sp, #-4]
@ Trap any INF/NAN first.
mov lr, #0x7f000000
orr lr, lr, #0x00f00000
and r4, xh, lr
and r5, yh, lr
teq r4, lr
teqne r5, lr
mov ip, xh, lsl #1
mvns ip, ip, asr #21
mov ip, yh, lsl #1
mvnnes ip, ip, asr #21
beq 3f
@ Test for equality.
......@@ -1071,37 +1045,37 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2
teqne xh, yh @ or xh == yh
teqeq xl, yl @ and xl == yl
moveq r0, #0 @ then equal.
RETLDM "r4, r5" eq
RETc(eq)
@ Check for sign difference.
teq xh, yh
movmi r0, xh, asr #31
orrmi r0, r0, #1
RETLDM "r4, r5" mi
@ Clear C flag
cmn r0, #0
@ Compare exponents.
cmp r4, r5
@ Compare sign,
teq xh, yh
@ Compare mantissa if exponents are equal.
moveq xh, xh, lsl #12
cmpeq xh, yh, lsl #12
@ Compare values if same sign
cmppl xh, yh
cmpeq xl, yl
@ Result:
movcs r0, yh, asr #31
mvncc r0, yh, asr #31
orr r0, r0, #1
RETLDM "r4, r5"
RET
@ Look for a NAN.
3: teq r4, lr
3: mov ip, xh, lsl #1
mvns ip, ip, asr #21
bne 4f
orrs xl, xl, xh, lsl #12
orrs ip, xl, xh, lsl #12
bne 5f @ x is NAN
4: teq r5, lr
4: mov ip, yh, lsl #1
mvns ip, ip, asr #21
bne 2b
orrs yl, yl, yh, lsl #12
orrs ip, yl, yh, lsl #12
beq 2b @ y is not NAN
5: mov r0, ip @ return unordered code from ip
RETLDM "r4, r5"
5: ldr r0, [sp, #-4] @ unordered return code
RET
FUNC_END gedf2
FUNC_END gtdf2
......@@ -1112,6 +1086,7 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2
FUNC_END cmpdf2
ARM_FUNC_START aeabi_cdrcmple
mov ip, r0
mov r0, r2
mov r2, ip
......@@ -1122,85 +1097,95 @@ ARM_FUNC_START aeabi_cdrcmple
ARM_FUNC_START aeabi_cdcmpeq
ARM_FUNC_ALIAS aeabi_cdcmple aeabi_cdcmpeq
@ The status-returning routines are required to preserve all
@ registers except ip, lr, and cpsr.
6: stmfd sp!, {r0, r1, r2, r3, lr}
6: stmfd sp!, {r0, lr}
ARM_CALL cmpdf2
@ Set the Z flag correctly, and the C flag unconditionally.
cmp r0, #0
@ Clear the C flag if the return value was -1, indicating
@ that the first operand was smaller than the second.
cmnmi r0, #0
RETLDM "r0, r1, r2, r3"
RETLDM "r0"
FUNC_END aeabi_cdcmple
FUNC_END aeabi_cdcmpeq
FUNC_END aeabi_cdrcmple
ARM_FUNC_START aeabi_dcmpeq
str lr, [sp, #-4]!
ARM_CALL aeabi_cdcmple
moveq r0, #1 @ Equal to.
movne r0, #0 @ Less than, greater than, or unordered.
RETLDM
FUNC_END aeabi_dcmpeq
ARM_FUNC_START aeabi_dcmplt
str lr, [sp, #-4]!
ARM_CALL aeabi_cdcmple
movcc r0, #1 @ Less than.
movcs r0, #0 @ Equal to, greater than, or unordered.
RETLDM
FUNC_END aeabi_dcmplt
ARM_FUNC_START aeabi_dcmple
str lr, [sp, #-4]!
ARM_CALL aeabi_cdcmple
movls r0, #1 @ Less than or equal to.
movhi r0, #0 @ Greater than or unordered.
RETLDM
FUNC_END aeabi_dcmple
ARM_FUNC_START aeabi_dcmpge
str lr, [sp, #-4]!
ARM_CALL aeabi_cdrcmple
movls r0, #1 @ Operand 2 is less than or equal to operand 1.
movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
RETLDM
FUNC_END aeabi_dcmpge
ARM_FUNC_START aeabi_dcmpgt
str lr, [sp, #-4]!
ARM_CALL aeabi_cdrcmple
movcc r0, #1 @ Operand 2 is less than operand 1.
movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
@ or they are unordered.
RETLDM
FUNC_END aeabi_dcmpgt
#endif /* L_cmpdf2 */
#ifdef L_unorddf2
ARM_FUNC_START unorddf2
ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
str lr, [sp, #-4]!
mov ip, #0x7f000000
orr ip, ip, #0x00f00000
and lr, xh, ip
teq lr, ip
mov ip, xh, lsl #1
mvns ip, ip, asr #21
bne 1f
orrs xl, xl, xh, lsl #12
orrs ip, xl, xh, lsl #12
bne 3f @ x is NAN
1: and lr, yh, ip
teq lr, ip
1: mov ip, yh, lsl #1
mvns ip, ip, asr #21
bne 2f
orrs yl, yl, yh, lsl #12
orrs ip, yl, yh, lsl #12
bne 3f @ y is NAN
2: mov r0, #0 @ arguments are ordered.
RETLDM
RET
3: mov r0, #1 @ arguments are unordered.
RETLDM
RET
FUNC_END aeabi_dcmpun
FUNC_END unorddf2
......@@ -1211,31 +1196,22 @@ ARM_FUNC_ALIAS aeabi_dcmpun unorddf2
ARM_FUNC_START fixdfsi
ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
orrs ip, xl, xh, lsl #1
beq 1f @ value is 0.
mov r3, r3, rrx @ preserve C flag (the actual sign)
@ check exponent range.
mov ip, #0x7f000000
orr ip, ip, #0x00f00000
and r2, xh, ip
teq r2, ip
beq 2f @ value is INF or NAN
bic ip, ip, #0x40000000
cmp r2, ip
bcc 1f @ value is too small
add ip, ip, #(31 << 20)
cmp r2, ip
bcs 3f @ value is too large
rsb r2, r2, ip
mov ip, xh, lsl #11
orr ip, ip, #0x80000000
orr ip, ip, xl, lsr #21
mov r2, r2, lsr #20
tst r3, #0x80000000 @ the sign bit
mov r0, ip, lsr r2
mov r2, xh, lsl #1
adds r2, r2, #(1 << 21)
bcs 2f @ value is INF or NAN
bpl 1f @ value is too small
mov r3, #(0xfffffc00 + 31)
subs r2, r3, r2, asr #21
bls 3f @ value is too large
@ scale value
mov r3, xh, lsl #11
orr r3, r3, #0x80000000
orr r3, r3, xl, lsr #21
tst xh, #0x80000000 @ the sign bit
mov r0, r3, lsr r2
rsbne r0, r0, #0
RET
......@@ -1243,8 +1219,8 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
RET
2: orrs xl, xl, xh, lsl #12
bne 4f @ r0 is NAN.
3: ands r0, r3, #0x80000000 @ the sign bit
bne 4f @ x is NAN.
3: ands r0, xh, #0x80000000 @ the sign bit
moveq r0, #0x7fffffff @ maximum signed positive si
RET
......@@ -1260,29 +1236,22 @@ ARM_FUNC_ALIAS aeabi_d2iz fixdfsi
ARM_FUNC_START fixunsdfsi
ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
orrs ip, xl, xh, lsl #1
movcss r0, #0 @ value is negative
RETc(eq) @ or 0 (xl, xh overlap r0)
@ check exponent range.
mov ip, #0x7f000000
orr ip, ip, #0x00f00000
and r2, xh, ip
teq r2, ip
beq 2f @ value is INF or NAN
bic ip, ip, #0x40000000
cmp r2, ip
bcc 1f @ value is too small
add ip, ip, #(31 << 20)
cmp r2, ip
bhi 3f @ value is too large
rsb r2, r2, ip
mov ip, xh, lsl #11
orr ip, ip, #0x80000000
orr ip, ip, xl, lsr #21
mov r2, r2, lsr #20
mov r0, ip, lsr r2
movs r2, xh, lsl #1
bcs 1f @ value is negative
adds r2, r2, #(1 << 21)
bcs 2f @ value is INF or NAN
bpl 1f @ value is too small
mov r3, #(0xfffffc00 + 31)
subs r2, r3, r2, asr #21
bmi 3f @ value is too large
@ scale value
mov r3, xh, lsl #11
orr r3, r3, #0x80000000
orr r3, r3, xl, lsr #21
mov r0, r3, lsr r2
RET
1: mov r0, #0
......@@ -1305,90 +1274,60 @@ ARM_FUNC_ALIAS aeabi_d2uiz fixunsdfsi
ARM_FUNC_START truncdfsf2
ARM_FUNC_ALIAS aeabi_d2f truncdfsf2
orrs r2, xl, xh, lsl #1
moveq r0, r2, rrx
RETc(eq) @ value is 0.0 or -0.0
@ check exponent range.
mov ip, #0x7f000000
orr ip, ip, #0x00f00000
and r2, ip, xh
teq r2, ip
beq 2f @ value is INF or NAN
bic xh, xh, ip
cmp r2, #(0x380 << 20)
bls 4f @ value is too small
@ shift and round mantissa
1: movs r3, xl, lsr #29
adc r3, r3, xh, lsl #3
@ if halfway between two numbers, round towards LSB = 0.
mov xl, xl, lsl #3
teq xl, #0x80000000
biceq r3, r3, #1
@ rounding might have created an extra MSB. If so adjust exponent.
tst r3, #0x00800000
addne r2, r2, #(1 << 20)
bicne r3, r3, #0x00800000
@ check exponent for overflow
mov ip, #(0x400 << 20)
orr ip, ip, #(0x07f << 20)
cmp r2, ip
bcs 3f @ overflow
@ adjust exponent, merge with sign bit and mantissa.
movs xh, xh, lsl #1
mov r2, r2, lsl #4
orr r0, r3, r2, rrx
eor r0, r0, #0x40000000
mov r2, xh, lsl #1
subs r3, r2, #((1023 - 127) << 21)
subcss ip, r3, #(1 << 21)
rsbcss ip, ip, #(254 << 21)
bls 2f @ value is out of range
1: @ shift and round mantissa
and ip, xh, #0x80000000
mov r2, xl, lsl #3
orr xl, ip, xl, lsr #29
cmp r2, #0x80000000
adc r0, xl, r3, lsl #2
biceq r0, r0, #1
RET
2: @ chech for NAN
orrs xl, xl, xh, lsl #12
movne r0, #0x7f000000
orrne r0, r0, #0x00c00000
RETc(ne) @ return NAN
2: @ either overflow or underflow
tst xh, #0x40000000
bne 3f @ overflow
3: @ return INF with sign
and r0, xh, #0x80000000
orr r0, r0, #0x7f000000
orr r0, r0, #0x00800000
RET
@ check if denormalized value is possible
adds r2, r3, #(23 << 21)
andlt r0, xh, #0x80000000 @ too small, return signed 0.
RETc(lt)
4: @ check if denormalized value is possible
subs r2, r2, #((0x380 - 24) << 20)
andle r0, xh, #0x80000000 @ too small, return signed 0.
RETc(le)
@ denormalize value so we can resume with the code above afterwards.
orr xh, xh, #0x00100000
mov r2, r2, lsr #20
rsb r2, r2, #25
cmp r2, #20
bgt 6f
mov r2, r2, lsr #21
rsb r2, r2, #24
rsb ip, r2, #32
mov r3, xl, lsl ip
movs r3, xl, lsl ip
mov xl, xl, lsr r2
orr xl, xl, xh, lsl ip
movs xh, xh, lsl #1
mov xh, xh, lsr r2
mov xh, xh, rrx
5: teq r3, #0 @ fold r3 bits into the LSB
orrne xl, xl, #1 @ for rounding considerations.
mov r2, #(0x380 << 20) @ equivalent to the 0 float exponent
orrne xl, xl, #1 @ fold r3 for rounding considerations.
mov r3, xh, lsl #11
mov r3, r3, lsr #11
orr xl, xl, r3, lsl ip
mov r3, r3, lsr r2
mov r3, r3, lsl #1
b 1b
6: rsb r2, r2, #(12 + 20)
rsb ip, r2, #32
mov r3, xl, lsl r2
mov xl, xl, lsr ip
orr xl, xl, xh, lsl r2
and xh, xh, #0x80000000
b 5b
3: @ chech for NAN
mvns r3, r2, asr #21
bne 5f @ simple overflow
orrs r3, xl, xh, lsl #12
movne r0, #0x7f000000
orrne r0, r0, #0x00c00000
RETc(ne) @ return NAN
5: @ return INF with sign
and r0, xh, #0x80000000
orr r0, r0, #0x7f000000
orr r0, r0, #0x00800000
RET
FUNC_END aeabi_d2f
FUNC_END truncdfsf2
......
......@@ -42,7 +42,7 @@
ARM_FUNC_START negsf2
ARM_FUNC_ALIAS aeabi_fneg negsf2
eor r0, r0, #0x80000000 @ flip sign bit
RET
......@@ -56,11 +56,11 @@ ARM_FUNC_ALIAS aeabi_fneg negsf2
ARM_FUNC_START aeabi_frsub
eor r0, r0, #0x80000000 @ flip sign bit of first arg
b 1f
b 1f
ARM_FUNC_START subsf3
ARM_FUNC_ALIAS aeabi_fsub subsf3
eor r1, r1, #0x80000000 @ flip sign bit of second arg
#if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
b 1f @ Skip Thumb-code prologue
......@@ -68,32 +68,19 @@ ARM_FUNC_ALIAS aeabi_fsub subsf3
ARM_FUNC_START addsf3
ARM_FUNC_ALIAS aeabi_fadd addsf3
1: @ Compare both args, return zero if equal but the sign.
eor r2, r0, r1
teq r2, #0x80000000
beq LSYM(Lad_z)
@ If first arg is 0 or -0, return second arg.
@ If second arg is 0 or -0, return first arg.
bics r2, r0, #0x80000000
moveq r0, r1
bicnes r2, r1, #0x80000000
RETc(eq)
@ Mask out exponents.
mov ip, #0xff000000
and r2, r0, ip, lsr #1
and r3, r1, ip, lsr #1
@ If either of them is 255, result will be INF or NAN
teq r2, ip, lsr #1
teqne r3, ip, lsr #1
beq LSYM(Lad_i)
1: @ Look for zeroes, equal values, INF, or NAN.
movs r2, r0, lsl #1
movnes r3, r1, lsl #1
teqne r2, r3
mvnnes ip, r2, asr #24
mvnnes ip, r3, asr #24
beq LSYM(Lad_s)
@ Compute exponent difference. Make largest exponent in r2,
@ corresponding arg in r0, and positive exponent difference in r3.
subs r3, r3, r2
mov r2, r2, lsr #24
rsbs r3, r2, r3, lsr #24
addgt r2, r2, r3
eorgt r1, r0, r1
eorgt r0, r1, r0
......@@ -103,7 +90,7 @@ ARM_FUNC_ALIAS aeabi_fadd addsf3
@ If exponent difference is too large, return largest argument
@ already in r0. We need up to 25 bit to handle proper rounding
@ of 0x1p25 - 1.1.
cmp r3, #(25 << 23)
cmp r3, #25
RETc(hi)
@ Convert mantissa to signed integer.
......@@ -122,25 +109,17 @@ ARM_FUNC_ALIAS aeabi_fadd addsf3
beq LSYM(Lad_d)
LSYM(Lad_x):
@ Scale down second arg with exponent difference.
@ Apply shift one bit left to first arg and the rest to second arg
@ to simplify things later, but only if exponent does not become 0.
movs r3, r3, lsr #23
teqne r2, #(1 << 23)
movne r0, r0, lsl #1
subne r2, r2, #(1 << 23)
subne r3, r3, #1
@ Compensate for the exponent overlapping the mantissa MSB added later
sub r2, r2, #1
@ Shift second arg into ip, keep leftover bits into r1.
mov ip, r1, asr r3
@ Shift and add second arg to first arg in r0.
@ Keep leftover bits into r1.
adds r0, r0, r1, asr r3
rsb r3, r3, #32
mov r1, r1, lsl r3
add r0, r0, ip @ the actual addition
@ We now have a 64 bit result in r0-r1.
@ Keep absolute value in r0-r1, sign in r3.
ands r3, r0, #0x80000000
@ Keep absolute value in r0-r1, sign in r3 (the n bit was set above)
and r3, r0, #0x80000000
bpl LSYM(Lad_p)
rsbs r1, r1, #0
rsc r0, r0, #0
......@@ -148,103 +127,117 @@ LSYM(Lad_x):
@ Determine how to normalize the result.
LSYM(Lad_p):
cmp r0, #0x00800000
bcc LSYM(Lad_l)
bcc LSYM(Lad_a)
cmp r0, #0x01000000
bcc LSYM(Lad_r0)
cmp r0, #0x02000000
bcc LSYM(Lad_r1)
bcc LSYM(Lad_e)
@ Result needs to be shifted right.
movs r0, r0, lsr #1
mov r1, r1, rrx
add r2, r2, #(1 << 23)
LSYM(Lad_r1):
movs r0, r0, lsr #1
mov r1, r1, rrx
add r2, r2, #(1 << 23)
@ Our result is now properly aligned into r0, remaining bits in r1.
@ Round with MSB of r1. If halfway between two numbers, round towards
@ LSB of r0 = 0.
LSYM(Lad_r0):
add r0, r0, r1, lsr #31
teq r1, #0x80000000
biceq r0, r0, #1
@ Rounding may have added a new MSB. Adjust exponent.
@ That MSB will be cleared when exponent is merged below.
tst r0, #0x01000000
addne r2, r2, #(1 << 23)
add r2, r2, #1
@ Make sure we did not bust our exponent.
cmp r2, #(254 << 23)
bhi LSYM(Lad_o)
cmp r2, #254
bhs LSYM(Lad_o)
@ Our result is now properly aligned into r0, remaining bits in r1.
@ Pack final result together.
@ Round with MSB of r1. If halfway between two numbers, round towards
@ LSB of r0 = 0.
LSYM(Lad_e):
bic r0, r0, #0x01800000
orr r0, r0, r2
cmp r1, #0x80000000
adc r0, r0, r2, lsl #23
biceq r0, r0, #1
orr r0, r0, r3
RET
@ Result must be shifted left.
@ No rounding necessary since r1 will always be 0.
@ Result must be shifted left and exponent adjusted.
LSYM(Lad_a):
movs r1, r1, lsl #1
adc r0, r0, r0
tst r0, #0x00800000
sub r2, r2, #1
bne LSYM(Lad_e)
@ No rounding necessary since r1 will always be 0 at this point.
LSYM(Lad_l):
#if __ARM_ARCH__ < 5
movs ip, r0, lsr #12
moveq r0, r0, lsl #12
subeq r2, r2, #(12 << 23)
subeq r2, r2, #12
tst r0, #0x00ff0000
moveq r0, r0, lsl #8
subeq r2, r2, #(8 << 23)
subeq r2, r2, #8
tst r0, #0x00f00000
moveq r0, r0, lsl #4
subeq r2, r2, #(4 << 23)
subeq r2, r2, #4
tst r0, #0x00c00000
moveq r0, r0, lsl #2
subeq r2, r2, #(2 << 23)
tst r0, #0x00800000
moveq r0, r0, lsl #1
subeq r2, r2, #(1 << 23)
cmp r2, #0
bgt LSYM(Lad_e)
subeq r2, r2, #2
cmp r0, #0x00800000
movcc r0, r0, lsl #1
sbcs r2, r2, #0
#else
clz ip, r0
sub ip, ip, #8
subs r2, r2, ip
mov r0, r0, lsl ip
subs r2, r2, ip, lsl #23
bgt LSYM(Lad_e)
#endif
@ Exponent too small, denormalize result.
mvn r2, r2, asr #23
add r2, r2, #2
orr r0, r3, r0, lsr r2
@ Final result with sign
@ If exponent negative, denormalize result.
addge r0, r0, r2, lsl #23
rsblt r2, r2, #0
orrge r0, r0, r3
orrlt r0, r3, r0, lsr r2
RET
@ Fixup and adjust bit position for denormalized arguments.
@ Note that r2 must not remain equal to 0.
LSYM(Lad_d):
teq r2, #0
eoreq r0, r0, #0x00800000
addeq r2, r2, #(1 << 23)
eor r1, r1, #0x00800000
subne r3, r3, #(1 << 23)
eoreq r0, r0, #0x00800000
addeq r2, r2, #1
subne r3, r3, #1
b LSYM(Lad_x)
@ Result is x - x = 0, unless x is INF or NAN.
LSYM(Lad_z):
mov ip, #0xff000000
and r2, r0, ip, lsr #1
teq r2, ip, lsr #1
moveq r0, ip, asr #2
LSYM(Lad_s):
mov r3, r1, lsl #1
mvns ip, r2, asr #24
mvnnes ip, r3, asr #24
beq LSYM(Lad_i)
teq r2, r3
beq 1f
@ Result is x + 0.0 = x or 0.0 + y = y.
teq r2, #0
moveq r0, r1
RET
1: teq r0, r1
@ Result is x - x = 0.
movne r0, #0
RETc(ne)
@ Result is x + x = 2x.
tst r2, #0xff000000
bne 2f
movs r0, r0, lsl #1
orrcs r0, r0, #0x80000000
RET
2: adds r2, r2, #(2 << 24)
addcc r0, r0, #(1 << 23)
RETc(cc)
and r3, r0, #0x80000000
@ Overflow: return INF.
LSYM(Lad_o):
......@@ -257,16 +250,16 @@ LSYM(Lad_o):
@ if r1 != INF/NAN: return r0 (which is INF/NAN)
@ if r0 or r1 is NAN: return NAN
@ if opposite sign: return NAN
@ return r0 (which is INF or -INF)
@ otherwise return r0 (which is INF or -INF)
LSYM(Lad_i):
teq r2, ip, lsr #1
mvns r2, r2, asr #24
movne r0, r1
teqeq r3, ip, lsr #1
RETc(ne)
mvneqs r3, r3, asr #24
movne r1, r0
movs r2, r0, lsl #9
moveqs r2, r1, lsl #9
moveqs r3, r1, lsl #9
teqeq r0, r1
orrne r0, r3, #0x00400000 @ NAN
orrne r0, r0, #0x00400000 @ quiet NAN
RET
FUNC_END aeabi_frsub
......@@ -287,28 +280,17 @@ ARM_FUNC_ALIAS aeabi_i2f floatsisf
ands r3, r0, #0x80000000
rsbmi r0, r0, #0
1: teq r0, #0
1: movs ip, r0
RETc(eq)
3:
mov r1, #0
mov r2, #((127 + 23) << 23)
tst r0, #0xfc000000
beq LSYM(Lad_p)
@ We need to scale the value a little before branching to code above.
tst r0, #0xf0000000
4:
orrne r1, r1, r0, lsl #28
movne r0, r0, lsr #4
addne r2, r2, #(4 << 23)
tst r0, #0x0c000000
beq LSYM(Lad_p)
mov r1, r1, lsr #2
orr r1, r1, r0, lsl #30
mov r0, r0, lsr #2
add r2, r2, #(2 << 23)
b LSYM(Lad_p)
@ Add initial exponent to sign
orr r3, r3, #((127 + 23) << 23)
.ifnc ah, r0
mov ah, r0
.endif
mov al, #0
b 2f
FUNC_END aeabi_i2f
FUNC_END floatsisf
......@@ -317,22 +299,15 @@ ARM_FUNC_ALIAS aeabi_i2f floatsisf
ARM_FUNC_START floatundisf
ARM_FUNC_ALIAS aeabi_ul2f floatundisf
orrs r2, r0, r1
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
mvfeqs f0, #0.0
#endif
RETc(eq)
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
@ For hard FPA code we want to return via the tail below so that
@ we can return the result in f0 as well as in r0 for backwards
@ compatibility.
str lr, [sp, #-4]!
adr lr, 4f
#endif
mov r3, #0
b 2f
b 1f
ARM_FUNC_START floatdisf
ARM_FUNC_ALIAS aeabi_l2f floatdisf
......@@ -342,78 +317,80 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf
mvfeqs f0, #0.0
#endif
RETc(eq)
ands r3, ah, #0x80000000 @ sign bit in r3
bpl 1f
rsbs al, al, #0
rsc ah, ah, #0
1:
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
@ For hard FPA code we want to return via the tail below so that
@ we can return the result in f0 as well as in r0 for backwards
@ compatibility.
str lr, [sp, #-4]!
adr lr, 4f
adr lr, LSYM(f0_ret)
#endif
ands r3, ah, #0x80000000 @ sign bit in r3
bpl 2f
rsbs al, al, #0
rsc ah, ah, #0
2:
movs ip, ah
#ifdef __ARMEB__
moveq r0, al
#endif
beq 3b
mov r2, #((127 + 23 + 32) << 23) @ initial exponent
#ifndef __ARMEB__
mov r1, al
mov r0, ip
#endif
tst r0, #0xfc000000
bne 3f
moveq ip, al
@ Add initial exponent to sign
orr r3, r3, #((127 + 23 + 32) << 23)
subeq r3, r3, #(32 << 23)
2: sub r3, r3, #(1 << 23)
#if __ARM_ARCH__ < 5
cmp r0, #(1 << 13)
movlo ip, #13
movlo r0, r0, lsl #13
movhs ip, #0
tst r0, #0x03fc0000
addeq ip, ip, #8
moveq r0, r0, lsl #8
tst r0, #0x03c00000
addeq ip, ip, #4
moveq r0, r0, lsl #4
tst r0, #0x03000000
addeq ip, ip, #2
moveq r0, r0, lsl #2
mov r2, #23
cmp ip, #(1 << 16)
movhs ip, ip, lsr #16
subhs r2, r2, #16
cmp ip, #(1 << 8)
movhs ip, ip, lsr #8
subhs r2, r2, #8
cmp ip, #(1 << 4)
movhs ip, ip, lsr #4
subhs r2, r2, #4
cmp ip, #(1 << 2)
subhs r2, r2, #2
sublo r2, r2, ip, lsr #1
subs r2, r2, ip, lsr #3
#else
clz ip, r0
sub ip, ip, #6
mov r0, r0, lsl ip
clz r2, ip
subs r2, r2, #8
#endif
sub r2, r2, ip, lsl #23
rsb ip, ip, #32
orr r0, r0, r1, lsr ip
rsb ip, ip, #32
mov r1, r1, asl ip
@ At this point we no-longer care about the precise value in r1, only
@ whether only the top bit is set, or if the top bit and some others
@ are set.
and ip, r1, #0xff
orr r1, r1, ip, lsl #8
b LSYM(Lad_p)
3:
@ We need to scale the value a little before branching to code above.
@ At this point we no-longer care about the precise value in r1, only
@ whether only the top bit is set, or if the top bit and some others
@ are set.
and ip, r1, #0xff
orr r1, r1, ip, lsl #8
tst r0, #0xf0000000
movne r1, r1, lsr #4
b 4b
sub r3, r3, r2, lsl #23
blt 3f
add r3, r3, ah, lsl r2
mov ip, al, lsl r2
rsb r2, r2, #32
cmp ip, #0x80000000
adc r0, r3, al, lsr r2
biceq r0, r0, #1
RET
3: add r2, r2, #32
mov ip, ah, lsl r2
rsb r2, r2, #32
orrs al, al, ip, lsl #1
adc r0, r3, ah, lsr r2
biceq r0, r0, ip, lsr #31
RET
#if !defined (__VFP_FP__) && !defined(__SOFTFP__)
4:
LSYM(f0_ret)
str r0, [sp, #-4]!
ldfs f0, [sp], #4
RETLDM
#endif
FUNC_END floatdisf
FUNC_END aeabi_l2f
FUNC_END floatundisf
......@@ -425,139 +402,117 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf
ARM_FUNC_START mulsf3
ARM_FUNC_ALIAS aeabi_fmul mulsf3
@ Mask out exponents.
mov ip, #0xff000000
and r2, r0, ip, lsr #1
and r3, r1, ip, lsr #1
@ Trap any INF/NAN.
teq r2, ip, lsr #1
teqne r3, ip, lsr #1
beq LSYM(Lml_s)
@ Trap any multiplication by 0.
bics ip, r0, #0x80000000
bicnes ip, r1, #0x80000000
beq LSYM(Lml_z)
@ Shift exponents right one bit to make room for overflow bit.
@ If either of them is 0, scale denormalized arguments off line.
@ Then add both exponents together.
movs r2, r2, lsr #1
teqne r3, #0
beq LSYM(Lml_d)
@ Mask out exponents, trap any zero/denormal/INF/NAN.
mov ip, #0xff
ands r2, ip, r0, lsr #23
andnes r3, ip, r1, lsr #23
teqne r2, ip
teqne r3, ip
beq LSYM(Lml_s)
LSYM(Lml_x):
add r2, r2, r3, asr #1
@ Preserve final sign in r2 along with exponent for now.
teq r0, r1
orrmi r2, r2, #0x8000
@ Add exponents together
add r2, r2, r3
@ Determine final sign.
eor ip, r0, r1
@ Convert mantissa to unsigned integer.
bic r0, r0, #0xff000000
bic r1, r1, #0xff000000
orr r0, r0, #0x00800000
orr r1, r1, #0x00800000
@ If power of two, branch to a separate path.
@ Make up for final alignment.
movs r0, r0, lsl #9
movnes r1, r1, lsl #9
beq LSYM(Lml_1)
mov r3, #0x08000000
orr r0, r3, r0, lsr #5
orr r1, r3, r1, lsr #5
#if __ARM_ARCH__ < 4
@ Put sign bit in r3, which will be restored into r0 later.
and r3, ip, #0x80000000
@ Well, no way to make it shorter without the umull instruction.
@ We must perform that 24 x 24 -> 48 bit multiplication by hand.
stmfd sp!, {r4, r5}
stmfd sp!, {r3, r4, r5}
mov r4, r0, lsr #16
mov r5, r1, lsr #16
bic r0, r0, #0x00ff0000
bic r1, r1, #0x00ff0000
bic r0, r0, r4, lsl #16
bic r1, r1, r5, lsl #16
mul ip, r4, r5
mul r3, r0, r1
mul r0, r5, r0
mla r0, r4, r1, r0
adds r3, r3, r0, lsl #16
adc ip, ip, r0, lsr #16
ldmfd sp!, {r4, r5}
adc r1, ip, r0, lsr #16
ldmfd sp!, {r0, r4, r5}
#else
umull r3, ip, r0, r1 @ The actual multiplication.
@ The actual multiplication.
umull r3, r1, r0, r1
@ Put final sign in r0.
and r0, ip, #0x80000000
#endif
@ Put final sign in r0.
mov r0, r2, lsl #16
bic r2, r2, #0x8000
@ Adjust result if one extra MSB appeared.
@ The LSB may be lost but this never changes the result in this case.
tst ip, #(1 << 15)
addne r2, r2, #(1 << 22)
movnes ip, ip, lsr #1
movne r3, r3, rrx
@ Apply exponent bias, check range for underflow.
subs r2, r2, #(127 << 22)
ble LSYM(Lml_u)
@ Scale back to 24 bits with rounding.
@ r0 contains sign bit already.
orrs r0, r0, r3, lsr #23
adc r0, r0, ip, lsl #9
@ If halfway between two numbers, rounding should be towards LSB = 0.
mov r3, r3, lsl #9
teq r3, #0x80000000
biceq r0, r0, #1
@ Adjust result upon the MSB position.
cmp r1, #(1 << 23)
movcc r1, r1, lsl #1
orrcc r1, r1, r3, lsr #31
movcc r3, r3, lsl #1
@ Note: rounding may have produced an extra MSB here.
@ The extra bit is cleared before merging the exponent below.
tst r0, #0x01000000
addne r2, r2, #(1 << 22)
@ Add sign to result.
orr r0, r0, r1
@ Check for exponent overflow
cmp r2, #(255 << 22)
bge LSYM(Lml_o)
@ Apply exponent bias, check for under/overflow.
sbc r2, r2, #127
cmp r2, #(254 - 1)
bhi LSYM(Lml_u)
@ Add final exponent.
bic r0, r0, #0x01800000
orr r0, r0, r2, lsl #1
@ Round the result, merge final exponent.
cmp r3, #0x80000000
adc r0, r0, r2, lsl #23
biceq r0, r0, #1
RET
@ Result is 0, but determine sign anyway.
LSYM(Lml_z):
eor r0, r0, r1
bic r0, r0, #0x7fffffff
RET
@ Multiplication by 0x1p*: let''s shortcut a lot of code.
LSYM(Lml_1):
teq r0, #0
and ip, ip, #0x80000000
moveq r1, r1, lsl #9
orr r0, ip, r0, lsr #9
orr r0, r0, r1, lsr #9
subs r2, r2, #127
rsbgts r3, r2, #255
orrgt r0, r0, r2, lsl #23
RETc(gt)
@ Under/overflow: fix things up for the code below.
orr r0, r0, #0x00800000
mov r3, #0
subs r2, r2, #1
@ Check if denormalized result is possible, otherwise return signed 0.
LSYM(Lml_u):
cmn r2, #(24 << 22)
RETc(le)
@ Overflow?
bgt LSYM(Lml_o)
@ Find out proper shift value.
mvn r1, r2, asr #22
subs r1, r1, #7
bgt LSYM(Lml_ur)
@ Shift value left, round, etc.
add r1, r1, #32
orrs r0, r0, r3, lsr r1
rsb r1, r1, #32
adc r0, r0, ip, lsl r1
mov ip, r3, lsl r1
teq ip, #0x80000000
biceq r0, r0, #1
RET
@ Check if denormalized result is possible, otherwise return signed 0.
cmn r2, #(24 + 1)
bicle r0, r0, #0x7fffffff
RETc(le)
@ Shift value right, round, etc.
@ Note: r1 must not be 0 otherwise carry does not get set.
LSYM(Lml_ur):
orrs r0, r0, ip, lsr r1
rsb r2, r2, #0
movs r1, r0, lsl #1
mov r1, r1, lsr r2
rsb r2, r2, #32
mov ip, r0, lsl r2
movs r0, r1, rrx
adc r0, r0, #0
rsb r1, r1, #32
mov ip, ip, lsl r1
teq r3, #0
teqeq ip, #0x80000000
biceq r0, r0, #1
orrs r3, r3, ip, lsl #1
biceq r0, r0, ip, lsr #31
RET
@ One or both arguments are denormalized.
......@@ -567,32 +522,51 @@ LSYM(Lml_d):
and ip, r0, #0x80000000
1: moveq r0, r0, lsl #1
tsteq r0, #0x00800000
subeq r2, r2, #(1 << 22)
subeq r2, r2, #1
beq 1b
orr r0, r0, ip
teq r3, #0
and ip, r1, #0x80000000
2: moveq r1, r1, lsl #1
tsteq r1, #0x00800000
subeq r3, r3, #(1 << 23)
subeq r3, r3, #1
beq 2b
orr r1, r1, ip
b LSYM(Lml_x)
@ One or both args are INF or NAN.
LSYM(Lml_s):
@ Isolate the INF and NAN cases away
and r3, ip, r1, lsr #23
teq r2, ip
teqne r3, ip
beq 1f
@ Here, one or more arguments are either denormalized or zero.
bics ip, r0, #0x80000000
bicnes ip, r1, #0x80000000
bne LSYM(Lml_d)
@ Result is 0, but determine sign anyway.
LSYM(Lml_z):
eor r0, r0, r1
bic r0, r0, #0x7fffffff
RET
1: @ One or both args are INF or NAN.
teq r0, #0x0
teqne r1, #0x0
teqne r0, #0x80000000
moveq r0, r1
teqne r1, #0x0
teqne r1, #0x80000000
beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
teq r2, ip, lsr #1
teq r2, ip
bne 1f
movs r2, r0, lsl #9
bne LSYM(Lml_n) @ NAN * <anything> -> NAN
1: teq r3, ip, lsr #1
1: teq r3, ip
bne LSYM(Lml_i)
movs r3, r1, lsl #9
movne r0, r1
bne LSYM(Lml_n) @ <anything> * NAN -> NAN
@ Result is INF, but we need to determine its sign.
......@@ -606,9 +580,9 @@ LSYM(Lml_o):
orr r0, r0, #0x00800000
RET
@ Return NAN.
@ Return a quiet NAN.
LSYM(Lml_n):
mov r0, #0x7f000000
orr r0, r0, #0x7f000000
orr r0, r0, #0x00c00000
RET
......@@ -617,37 +591,28 @@ LSYM(Lml_n):
ARM_FUNC_START divsf3
ARM_FUNC_ALIAS aeabi_fdiv divsf3
@ Mask out exponents.
mov ip, #0xff000000
and r2, r0, ip, lsr #1
and r3, r1, ip, lsr #1
@ Trap any INF/NAN or zeroes.
teq r2, ip, lsr #1
teqne r3, ip, lsr #1
bicnes ip, r0, #0x80000000
bicnes ip, r1, #0x80000000
beq LSYM(Ldv_s)
@ Shift exponents right one bit to make room for overflow bit.
@ If either of them is 0, scale denormalized arguments off line.
@ Then substract divisor exponent from dividend''s.
movs r2, r2, lsr #1
teqne r3, #0
beq LSYM(Ldv_d)
@ Mask out exponents, trap any zero/denormal/INF/NAN.
mov ip, #0xff
ands r2, ip, r0, lsr #23
andnes r3, ip, r1, lsr #23
teqne r2, ip
teqne r3, ip
beq LSYM(Ldv_s)
LSYM(Ldv_x):
sub r2, r2, r3, asr #1
@ Substract divisor exponent from dividend''s
sub r2, r2, r3
@ Preserve final sign into ip.
eor ip, r0, r1
@ Convert mantissa to unsigned integer.
@ Dividend -> r3, divisor -> r1.
mov r3, #0x10000000
movs r1, r1, lsl #9
mov r0, r0, lsl #9
beq LSYM(Ldv_1)
mov r3, #0x10000000
orr r1, r3, r1, lsr #4
orr r3, r3, r0, lsr #4
......@@ -655,16 +620,10 @@ LSYM(Ldv_x):
and r0, ip, #0x80000000
@ Ensure result will land to known bit position.
@ Apply exponent bias accordingly.
cmp r3, r1
subcc r2, r2, #(1 << 22)
movcc r3, r3, lsl #1
@ Apply exponent bias, check range for over/underflow.
add r2, r2, #(127 << 22)
cmn r2, #(24 << 22)
RETc(le)
cmp r2, #(255 << 22)
bge LSYM(Lml_o)
adc r2, r2, #(127 - 2)
@ The actual division loop.
mov ip, #0x00800000
......@@ -684,44 +643,29 @@ LSYM(Ldv_x):
movnes ip, ip, lsr #4
bne 1b
@ Check if denormalized result is needed.
cmp r2, #0
ble LSYM(Ldv_u)
@ Check exponent for under/overflow.
cmp r2, #(254 - 1)
bhi LSYM(Lml_u)
@ Apply proper rounding.
@ Round the result, merge final exponent.
cmp r3, r1
addcs r0, r0, #1
adc r0, r0, r2, lsl #23
biceq r0, r0, #1
@ Add exponent to result.
bic r0, r0, #0x00800000
orr r0, r0, r2, lsl #1
RET
@ Division by 0x1p*: let''s shortcut a lot of code.
LSYM(Ldv_1):
and ip, ip, #0x80000000
orr r0, ip, r0, lsr #9
add r2, r2, #(127 << 22)
cmp r2, #(255 << 22)
bge LSYM(Lml_o)
cmp r2, #0
orrgt r0, r0, r2, lsl #1
adds r2, r2, #127
rsbgts r3, r2, #255
orrgt r0, r0, r2, lsl #23
RETc(gt)
cmn r2, #(24 << 22)
movle r0, ip
RETc(le)
orr r0, r0, #0x00800000
mov r3, #0
@ Result must be denormalized: prepare parameters to use code above.
@ r3 already contains remainder for rounding considerations.
LSYM(Ldv_u):
bic ip, r0, #0x80000000
and r0, r0, #0x80000000
mvn r1, r2, asr #22
add r1, r1, #2
b LSYM(Lml_ur)
subs r2, r2, #1
b LSYM(Lml_u)
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
......@@ -730,35 +674,40 @@ LSYM(Ldv_d):
and ip, r0, #0x80000000
1: moveq r0, r0, lsl #1
tsteq r0, #0x00800000
subeq r2, r2, #(1 << 22)
subeq r2, r2, #1
beq 1b
orr r0, r0, ip
teq r3, #0
and ip, r1, #0x80000000
2: moveq r1, r1, lsl #1
tsteq r1, #0x00800000
subeq r3, r3, #(1 << 23)
subeq r3, r3, #1
beq 2b
orr r1, r1, ip
b LSYM(Ldv_x)
@ One or both arguments is either INF, NAN or zero.
@ One or both arguments are either INF, NAN, zero or denormalized.
LSYM(Ldv_s):
mov ip, #0xff000000
teq r2, ip, lsr #1
teqeq r3, ip, lsr #1
beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
teq r2, ip, lsr #1
and r3, ip, r1, lsr #23
teq r2, ip
bne 1f
movs r2, r0, lsl #9
bne LSYM(Lml_n) @ NAN / <anything> -> NAN
b LSYM(Lml_i) @ INF / <anything> -> INF
1: teq r3, ip, lsr #1
teq r3, ip
bne LSYM(Lml_i) @ INF / <anything> -> INF
mov r0, r1
b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
1: teq r3, ip
bne 2f
movs r3, r1, lsl #9
bne LSYM(Lml_n) @ <anything> / NAN -> NAN
b LSYM(Lml_z) @ <anything> / INF -> 0
2: @ One or both arguments are 0.
beq LSYM(Lml_z) @ <anything> / INF -> 0
mov r0, r1
b LSYM(Lml_n) @ <anything> / NAN -> NAN
2: @ If both are non-zero, we need to normalize and resume above.
bics ip, r0, #0x80000000
bicnes ip, r1, #0x80000000
bne LSYM(Ldv_d)
@ One or both arguments are zero.
bics r2, r0, #0x80000000
bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
bics r3, r1, #0x80000000
......@@ -789,85 +738,50 @@ LSYM(Ldv_s):
ARM_FUNC_START gtsf2
ARM_FUNC_ALIAS gesf2 gtsf2
mov r3, #-1
mov ip, #-1
b 1f
ARM_FUNC_START ltsf2
ARM_FUNC_ALIAS lesf2 ltsf2
mov r3, #1
mov ip, #1
b 1f
ARM_FUNC_START cmpsf2
ARM_FUNC_ALIAS nesf2 cmpsf2
ARM_FUNC_ALIAS eqsf2 cmpsf2
mov r3, #1 @ how should we specify unordered here?
@ Both Inf and NaN have an exponent of 255. Therefore, we
@ compute (r1 & 0x8f80000) || (r2 & 0x8f8000).
1: mov ip, #0xff000000
and r2, r1, ip, lsr #1
teq r2, ip, lsr #1
and r2, r0, ip, lsr #1
teqne r2, ip, lsr #1
mov ip, #1 @ how should we specify unordered here?
1: str ip, [sp, #-4]
@ Trap any INF/NAN first.
mov r2, r0, lsl #1
mov r3, r1, lsl #1
mvns ip, r2, asr #24
mvnnes ip, r3, asr #24
beq 3f
@ Test for equality. The representations of +0.0 and -0.0
@ have all bits set to zero, except for the sign bit. Since
@ 0.0 is equal to -0.0, we begin by testing
@ ((r0 | r1) & ~0x8000000).
2: orr r3, r0, r1
@ If the result of the bitwise and is zero, then the Z flag
@ will be set. In any case, the C flag will be set.
bics r3, r3, #0x80000000 @ either 0.0 or -0.0
teqne r0, r1 @ or both the same
@ If the Z flag is set, the two operands were equal. Return zero.
moveq r0, #0
RETc(eq)
@ Compare values.
@ Note that 0.0 is equal to -0.0.
2: orrs ip, r2, r3, lsr #1 @ test if both are 0, clear C flag
teqne r0, r1 @ if not 0 compare sign
subpls r0, r2, r3 @ if same sign compare values, set r0
@ Check for sign difference. The N flag is set (due to the
@ use of teq above) if the sign bit is set on exactly one
@ of the operands. Return the sign of the first operand.
movmi r0, r0, asr #31
orrmi r0, r0, #1
RETc(mi)
@ Compare exponents.
and r3, r1, ip, lsr #1
cmp r2, r3
@ Compare mantissa if exponents are equal
moveq r0, r0, lsl #9
cmpeq r0, r1, lsl #9
@ We know the operands cannot be equal at this point, so the
@ Z flag is clear. The C flag is set if the first operand has
@ the greater exponent, or the exponents are equal and the
@ first operand has the greater mantissa. Therefore, if the C
@ flag is set, the first operand is greater iff the sign is
@ positive. These next two instructions will put zero in
@ r0 if the first operand is greater, and -1 if the second
@ operand is greater.
movcs r0, r1, asr #31
mvncc r0, r1, asr #31
@ If r0 is 0, the first operand is greater, so return 1. Leave
@ -1 unchanged.
orr r0, r0, #1
@ Result:
movhi r0, r1, asr #31
mvnlo r0, r1, asr #31
orrne r0, r0, #1
RET
@ We know that at least one argument is either Inf or NaN.
@ Look for a NaN.
3: and r2, r1, ip, lsr #1
teq r2, ip, lsr #1
@ Look for a NAN.
3: mvns ip, r2, asr #24
bne 4f
movs r2, r1, lsl #9
bne 5f @ r1 is NAN
4: and r2, r0, ip, lsr #1
teq r2, ip, lsr #1
bne 2b
movs ip, r0, lsl #9
beq 2b @ r0 is not NAN
5: @ The Z flag is clear at this point.
mov r0, r3 @ return unordered code from r3.
bne 5f @ r0 is NAN
4: mvns ip, r3, asr #24
bne 2b
movs ip, r1, lsl #9
beq 2b @ r1 is not NAN
5: ldr r0, [sp, #-4] @ return unordered code.
RET
FUNC_END gesf2
......@@ -879,13 +793,15 @@ ARM_FUNC_ALIAS eqsf2 cmpsf2
FUNC_END cmpsf2
ARM_FUNC_START aeabi_cfrcmple
mov ip, r0
mov r0, r1
mov r1, ip
b 6f
ARM_FUNC_START aeabi_cfcmpeq
ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
@ The status-returning routines are required to preserve all
@ registers except ip, lr, and cpsr.
6: stmfd sp!, {r0, r1, r2, r3, lr}
......@@ -896,68 +812,79 @@ ARM_FUNC_ALIAS aeabi_cfcmple aeabi_cfcmpeq
@ that the first operand was smaller than the second.
cmnmi r0, #0
RETLDM "r0, r1, r2, r3"
FUNC_END aeabi_cfcmple
FUNC_END aeabi_cfcmpeq
FUNC_END aeabi_cfrcmple
ARM_FUNC_START aeabi_fcmpeq
str lr, [sp, #-4]!
ARM_CALL aeabi_cfcmple
moveq r0, #1 @ Equal to.
movne r0, #0 @ Less than, greater than, or unordered.
RETLDM
FUNC_END aeabi_fcmpeq
ARM_FUNC_START aeabi_fcmplt
str lr, [sp, #-4]!
ARM_CALL aeabi_cfcmple
movcc r0, #1 @ Less than.
movcs r0, #0 @ Equal to, greater than, or unordered.
RETLDM
FUNC_END aeabi_fcmplt
ARM_FUNC_START aeabi_fcmple
str lr, [sp, #-4]!
ARM_CALL aeabi_cfcmple
movls r0, #1 @ Less than or equal to.
movhi r0, #0 @ Greater than or unordered.
RETLDM
FUNC_END aeabi_fcmple
ARM_FUNC_START aeabi_fcmpge
str lr, [sp, #-4]!
ARM_CALL aeabi_cfrcmple
movls r0, #1 @ Operand 2 is less than or equal to operand 1.
movhi r0, #0 @ Operand 2 greater than operand 1, or unordered.
RETLDM
FUNC_END aeabi_fcmpge
ARM_FUNC_START aeabi_fcmpgt
str lr, [sp, #-4]!
ARM_CALL aeabi_cfrcmple
movcc r0, #1 @ Operand 2 is less than operand 1.
movcs r0, #0 @ Operand 2 is greater than or equal to operand 1,
@ or they are unordered.
RETLDM
FUNC_END aeabi_fcmpgt
#endif /* L_cmpsf2 */
#ifdef L_unordsf2
ARM_FUNC_START unordsf2
ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
mov ip, #0xff000000
and r2, r1, ip, lsr #1
teq r2, ip, lsr #1
mov r2, r0, lsl #1
mov r3, r1, lsl #1
mvns ip, r2, asr #24
bne 1f
movs r2, r1, lsl #9
bne 3f @ r1 is NAN
1: and r2, r0, ip, lsr #1
teq r2, ip, lsr #1
bne 2f
movs r2, r0, lsl #9
movs ip, r0, lsl #9
bne 3f @ r0 is NAN
1: mvns ip, r3, asr #24
bne 2f
movs ip, r1, lsl #9
bne 3f @ r1 is NAN
2: mov r0, #0 @ arguments are ordered.
RET
3: mov r0, #1 @ arguments are unordered.
......@@ -972,37 +899,35 @@ ARM_FUNC_ALIAS aeabi_fcmpun unordsf2
ARM_FUNC_START fixsfsi
ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
movs r0, r0, lsl #1
RETc(eq) @ value is 0.
mov r1, r1, rrx @ preserve C flag (the actual sign)
@ check exponent range.
and r2, r0, #0xff000000
mov r2, r0, lsl #1
cmp r2, #(127 << 24)
movcc r0, #0 @ value is too small
RETc(cc)
cmp r2, #((127 + 31) << 24)
bcs 1f @ value is too large
mov r0, r0, lsl #7
orr r0, r0, #0x80000000
mov r2, r2, lsr #24
rsb r2, r2, #(127 + 31)
tst r1, #0x80000000 @ the sign bit
mov r0, r0, lsr r2
bcc 1f @ value is too small
mov r3, #(127 + 31)
subs r2, r3, r2, lsr #24
bls 2f @ value is too large
@ scale value
mov r3, r0, lsl #8
orr r3, r3, #0x80000000
tst r0, #0x80000000 @ the sign bit
mov r0, r3, lsr r2
rsbne r0, r0, #0
RET
1: teq r2, #0xff000000
bne 2f
movs r0, r0, lsl #8
bne 3f @ r0 is NAN.
2: ands r0, r1, #0x80000000 @ the sign bit
1: mov r0, #0
RET
2: cmp r2, #(127 + 31 - 0xff)
bne 3f
movs r2, r0, lsl #9
bne 4f @ r0 is NAN.
3: ands r0, r0, #0x80000000 @ the sign bit
moveq r0, #0x7fffffff @ the maximum signed positive si
RET
3: mov r0, #0 @ What should we convert NAN to?
4: mov r0, #0 @ What should we convert NAN to?
RET
FUNC_END aeabi_f2iz
......@@ -1014,34 +939,33 @@ ARM_FUNC_ALIAS aeabi_f2iz fixsfsi
ARM_FUNC_START fixunssfsi
ARM_FUNC_ALIAS aeabi_f2uiz fixunssfsi
movs r0, r0, lsl #1
movcss r0, #0 @ value is negative...
RETc(eq) @ ... or 0.
@ check exponent range.
and r2, r0, #0xff000000
movs r2, r0, lsl #1
bcs 1f @ value is negative
cmp r2, #(127 << 24)
movcc r0, #0 @ value is too small
RETc(cc)
cmp r2, #((127 + 32) << 24)
bcs 1f @ value is too large
bcc 1f @ value is too small
mov r3, #(127 + 31)
subs r2, r3, r2, lsr #24
bmi 2f @ value is too large
@ scale the value
mov r3, r0, lsl #8
orr r3, r3, #0x80000000
mov r0, r3, lsr r2
RET
mov r0, r0, lsl #7
orr r0, r0, #0x80000000
mov r2, r2, lsr #24
rsb r2, r2, #(127 + 31)
mov r0, r0, lsr r2
1: mov r0, #0
RET
1: teq r2, #0xff000000
bne 2f
movs r0, r0, lsl #8
bne 3f @ r0 is NAN.
2: mov r0, #0xffffffff @ maximum unsigned si
2: cmp r2, #(127 + 31 - 0xff)
bne 3f
movs r2, r0, lsl #9
bne 4f @ r0 is NAN.
3: mov r0, #0xffffffff @ maximum unsigned si
RET
3: mov r0, #0 @ What should we convert NAN to?
4: mov r0, #0 @ What should we convert NAN to?
RET
FUNC_END aeabi_f2uiz
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment