Commit 4de1b7a9 by J"orn Rennecke Committed by Joern Rennecke

lib1funcs.asm (udivdi3): Make first divide step produce a 32 bit result before normalization...

	* config/sh/lib1funcs.asm (udivdi3): Make first divide step
	produce a 32 bit result before normalization, then normalize with a
	left shift.  Compute approximative error of 2nd reciprocal
	approximation in 2's complement.  Fix mask generation from upper
	longword of second divide stage result.
	For large divisor, fix shift count used to truncate first stage
	divide result; make decision if to adjust upwards based on comparison
	of higher parts of normalized values.
	(udivdi): Likewise.  Undo normalization of result for large divisor
	case.

From-SVN: r54993
parent 0e20c0b5
Tue Jun 25 20:59:56 2002 J"orn Rennecke <joern.rennecke@superh.com>
* config/sh/lib1funcs.asm (udivdi3): Make first divide step
produce a 32 bit result before normalization, then normalize with a
left shift. Compute approximative error of 2nd reciprocal
approximation in 2's complement. Fix mask generation from upper
longword of second divide stage result.
For large divisor, fix shift count used to truncate first stage
divide result; make decision if to adjust upwards based on comparison
of higher parts of normalized values.
(udivdi): Likewise. Undo normalization of result for large divisor
case.
2002-06-25 David S. Miller <davem@redhat.com> 2002-06-25 David S. Miller <davem@redhat.com>
* config/sparc/sparc.md: Change \\{t,n} to \{t,n}. * config/sparc/sparc.md: Change \\{t,n} to \{t,n}.
......
...@@ -1501,33 +1501,33 @@ GLOBAL(udivdi3): ...@@ -1501,33 +1501,33 @@ GLOBAL(udivdi3):
shlri r6,32,r7 shlri r6,32,r7
bgt/u r9,r63,tr0 // large_divisor bgt/u r9,r63,tr0 // large_divisor
mmulfx.w r5,r4,r4 mmulfx.w r5,r4,r4
shlri r2,32,r19 shlri r2,32+14,r19
addi r20,14-1,r0 addi r22,-31,r0
msub.w r1,r4,r1 msub.w r1,r4,r1
mulu.l r1,r7,r4 mulu.l r1,r7,r4
addi r1,-3,r5 addi r1,-3,r5
mulu.l r5,r19,r5 mulu.l r5,r19,r5
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
the case may be, %0000000000000000 000.11111111111, still */ the case may be, %0000000000000000 000.11111111111, still */
muls.l r1,r4,r4 /* leaving at least one sign bit. */ muls.l r1,r4,r4 /* leaving at least one sign bit. */
shlrd r5,r0,r8 mulu.l r5,r3,r8
mulu.l r8,r3,r5
mshalds.l r1,r21,r1 mshalds.l r1,r21,r1
shari r4,26,r4 shari r4,26,r4
shlli r5,32,r5 shlld r8,r0,r8
sub r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
sub r2,r5,r2 sub r2,r8,r2
/* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
shlri r2,22,r21 shlri r2,22,r21
mulu.l r21,r1,r21 mulu.l r21,r1,r21
shlld r5,r0,r8
addi r20,30-22,r0 addi r20,30-22,r0
shlli r8,32,r8
shlrd r21,r0,r21 shlrd r21,r0,r21
mulu.l r21,r3,r5 mulu.l r21,r3,r5
add r8,r21,r8 add r8,r21,r8
mcmpeq.l r21,r63,r21 // See Note 1 mcmpgt.l r21,r63,r21 // See Note 1
addi r20,30,r0 addi r20,30,r0
mshfhi.l r63,r21,r21 mshfhi.l r63,r21,r21
sub r2,r5,r2 sub r2,r5,r2
...@@ -1555,14 +1555,15 @@ LOCAL(large_divisor): ...@@ -1555,14 +1555,15 @@ LOCAL(large_divisor):
mulu.l r1,r7,r4 mulu.l r1,r7,r4
addi r1,-3,r5 addi r1,-3,r5
mulu.l r5,r8,r5 mulu.l r5,r8,r5
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
the case may be, %0000000000000000 000.11111111111, still */ the case may be, %0000000000000000 000.11111111111, still */
muls.l r1,r4,r4 /* leaving at least one sign bit. */ muls.l r1,r4,r4 /* leaving at least one sign bit. */
shlri r5,14-1+32,r8 shlri r5,14-1,r8
mulu.l r8,r7,r5 mulu.l r8,r7,r5
mshalds.l r1,r21,r1 mshalds.l r1,r21,r1
shari r4,26,r4 shari r4,26,r4
sub r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
sub r25,r5,r25 sub r25,r5,r25
/* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
...@@ -1575,11 +1576,11 @@ LOCAL(large_divisor): ...@@ -1575,11 +1576,11 @@ LOCAL(large_divisor):
add r8,r21,r8 add r8,r21,r8
shlld r2,r0,r2 shlld r2,r0,r2
sub r25,r5,r25 sub r25,r5,r25
mextr4 r2,r25,r2 bgtu/u r7,r25,tr0 // no_lo_adj
bgtu/u r6,r2,tr0 // no_lo_adj
addi r8,1,r8 addi r8,1,r8
sub r2,r6,r2 sub r25,r7,r25
LOCAL(no_lo_adj): LOCAL(no_lo_adj):
mextr4 r2,r25,r2
/* large_divisor: only needs a few adjustments. */ /* large_divisor: only needs a few adjustments. */
mulu.l r8,r6,r5 mulu.l r8,r6,r5
...@@ -1647,22 +1648,22 @@ GLOBAL(umoddi3): ...@@ -1647,22 +1648,22 @@ GLOBAL(umoddi3):
shlri r6,32,r7 shlri r6,32,r7
bgt/u r9,r63,tr0 // large_divisor bgt/u r9,r63,tr0 // large_divisor
mmulfx.w r5,r4,r4 mmulfx.w r5,r4,r4
shlri r2,32,r19 shlri r2,32+14,r19
addi r20,14-1,r0 addi r22,-31,r0
msub.w r1,r4,r1 msub.w r1,r4,r1
mulu.l r1,r7,r4 mulu.l r1,r7,r4
addi r1,-3,r5 addi r1,-3,r5
mulu.l r5,r19,r5 mulu.l r5,r19,r5
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
the case may be, %0000000000000000 000.11111111111, still */ the case may be, %0000000000000000 000.11111111111, still */
muls.l r1,r4,r4 /* leaving at least one sign bit. */ muls.l r1,r4,r4 /* leaving at least one sign bit. */
shlrd r5,r0,r8 mulu.l r5,r3,r5
mulu.l r8,r3,r5
mshalds.l r1,r21,r1 mshalds.l r1,r21,r1
shari r4,26,r4 shari r4,26,r4
shlli r5,32,r5 shlld r5,r0,r5
sub r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
sub r2,r5,r2 sub r2,r5,r2
/* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */ /* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
...@@ -1672,7 +1673,7 @@ GLOBAL(umoddi3): ...@@ -1672,7 +1673,7 @@ GLOBAL(umoddi3):
/* bubble */ /* could test r3 here to check for divide by zero. */ /* bubble */ /* could test r3 here to check for divide by zero. */
shlrd r21,r0,r21 shlrd r21,r0,r21
mulu.l r21,r3,r5 mulu.l r21,r3,r5
mcmpeq.l r21,r63,r21 // See Note 1 mcmpgt.l r21,r63,r21 // See Note 1
addi r20,30,r0 addi r20,30,r0
mshfhi.l r63,r21,r21 mshfhi.l r63,r21,r21
sub r2,r5,r2 sub r2,r5,r2
...@@ -1700,14 +1701,15 @@ LOCAL(large_divisor): ...@@ -1700,14 +1701,15 @@ LOCAL(large_divisor):
mulu.l r1,r7,r4 mulu.l r1,r7,r4
addi r1,-3,r5 addi r1,-3,r5
mulu.l r5,r8,r5 mulu.l r5,r8,r5
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
the case may be, %0000000000000000 000.11111111111, still */ the case may be, %0000000000000000 000.11111111111, still */
muls.l r1,r4,r4 /* leaving at least one sign bit. */ muls.l r1,r4,r4 /* leaving at least one sign bit. */
shlri r5,14-1+32,r8 shlri r5,14-1,r8
mulu.l r8,r7,r5 mulu.l r8,r7,r5
mshalds.l r1,r21,r1 mshalds.l r1,r21,r1
shari r4,26,r4 shari r4,26,r4
sub r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5) add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
sub r25,r5,r25 sub r25,r5,r25
/* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */ /* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
...@@ -1720,19 +1722,20 @@ LOCAL(large_divisor): ...@@ -1720,19 +1722,20 @@ LOCAL(large_divisor):
add r8,r21,r8 add r8,r21,r8
shlld r2,r0,r2 shlld r2,r0,r2
sub r25,r5,r25 sub r25,r5,r25
mextr4 r2,r25,r2 bgtu/u r7,r25,tr0 // no_lo_adj
bgtu/u r6,r2,tr0 // no_lo_adj
addi r8,1,r8 addi r8,1,r8
sub r2,r6,r2 sub r25,r7,r25
LOCAL(no_lo_adj): LOCAL(no_lo_adj):
mextr4 r2,r25,r2
/* large_divisor: only needs a few adjustments. */ /* large_divisor: only needs a few adjustments. */
mulu.l r8,r6,r5 mulu.l r8,r6,r5
ptabs r18,tr0 ptabs r18,tr0
add r2,r3,r7 add r2,r6,r7
cmpgtu r5,r2,r8 cmpgtu r5,r2,r8
cmvne r8,r7,r2 cmvne r8,r7,r2
sub r2,r5,r2 sub r2,r5,r2
shlrd r2,r22,r2
blink tr0,r63 blink tr0,r63
/* Note 1: To shift the result of the second divide stage so that the result /* Note 1: To shift the result of the second divide stage so that the result
always fits into 32 bits, yet we still reduce the rest sufficiently always fits into 32 bits, yet we still reduce the rest sufficiently
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment