Commit 0426e049 by Nick Clifton Committed by Nick Clifton

Faster __mulsi routine

From-SVN: r38736
parent f7114e17
Fri Jan 5 16:34:18 2001 Nick Clifton <nickc@redhat.com>
* config/v850/lib1funcs.asm: Replace __mulsi3 routine with faster
version supplied by Matteo Frigo.
2001-01-05 Neil Booth <neil@daikokuya.demon.co.uk> 2001-01-05 Neil Booth <neil@daikokuya.demon.co.uk>
* cpp.texi: Update for -MQ. * cpp.texi: Update for -MQ.
......
...@@ -33,57 +33,60 @@ Boston, MA 02111-1307, USA. */ ...@@ -33,57 +33,60 @@ Boston, MA 02111-1307, USA. */
.type ___mulsi3,@function .type ___mulsi3,@function
/* /*
* In order to not deal with negative numbers (mulh is a signed multiply * #define SHIFT 12
* and we want an unsigned multiply, code the multiplication as a series * #define MASK ((1 << SHIFT) - 1)
* of 7 bit multiplies). *
* #define STEP(i, j) \
* ({ \
* short a_part = (a >> (i)) & MASK; \
* short b_part = (b >> (j)) & MASK; \
* int res = (((int)a_part) * ((int)b_part)); \
* res; \
* })
* *
* int __mulsi3 (unsigned a, unsigned b) * int
* __mulsi3 (unsigned a, unsigned b)
* { * {
* int i, j; * return STEP (0, 0) +
* int ret = 0; * ((STEP (SHIFT, 0) + STEP (0, SHIFT)) << SHIFT) +
* * ((STEP (0, 2 * SHIFT) + STEP (SHIFT, SHIFT) + STEP (2 * SHIFT, 0))
* for (i = 0; i < 32; i += 7) * << (2 * SHIFT));
* {
* short a_part = a & 0x7f;
* unsigned b_tmp = b;
* a >>= 7;
*
* for (j = 0; (i+j) < 32; j += 7)
* {
* short b_part = b_tmp & 0x7f;
* ret += (((int)a_part) * ((int)b_part)) << (i+j);
* b_tmp >>= 7;
* }
* }
*
* return ret;
* } * }
*/ */
___mulsi3: ___mulsi3:
mov 0,r10 /* total */ mov r6,r13
mov 0,r14 /* i = 0, index for multiply a's part */ movea lo(4095),r0,r16
movea lo(31),r0,r16 /* upper bounds for loop */ and r16,r13
.L5: mov r7,r15
mov r7,r13 /* b_tmp = b */ and r16,r15
andi 0x7f,r6,r15 /* a_part = (a & 127) */ mov r13,r10
shr 7,r6 /* a >>= 7 */ mulh r15,r10
mov r14,r12 /* i+j = i */ shr 12,r6
.L9: mov r6,r14
andi 0x7f,r13,r11 /* b_part = (b_tmp & 127) */ and r16,r14
mulh r15,r11 /* ((int)a_part) * ((int)b_part) */ mov r14,r11
shr 7,r13 /* b_tmp >>= 7 */ mulh r15,r11
shl r12,r11 /* (((int)a_part) * ((int)b_part)) << (i+j) */ shr 12,r7
add r11,r10 /* ret += (((int)a_part) * ((int)b_part)) << (i+j) */ mov r7,r12
add 7,r12 /* i+j += 7 */ and r16,r12
cmp r16,r12 /* i+j < 32 */ shr 12,r7
ble .L9 and r16,r7
mulh r13,r7
add 7,r14 /* i += 7 */ shr 12,r6
cmp r16,r14 /* i < 32 */ mulh r12,r13
ble .L5 and r16,r6
add r13,r11
jmp [r31] /* return */ shl 12,r11
add r11,r10
mov r14,r11
mulh r12,r11
mulh r15,r6
add r11,r7
add r6,r7
shl 24,r7
add r7,r10
jmp [r31]
.size ___mulsi3,.-___mulsi3 .size ___mulsi3,.-___mulsi3
#endif #endif
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment