From 888e552fda3782604fff70acc32285b09e5c4246 Mon Sep 17 00:00:00 2001 From: Nick Clifton Date: Tue, 22 Aug 2000 19:50:12 +0000 Subject: [PATCH] Use macros to replace duplicated bodies of assembler code. From-SVN: r35890 --- gcc/ChangeLog | 12 + gcc/config/arm/lib1funcs.asm | 935 ++++++++++++----------------------- 2 files changed, 316 insertions(+), 631 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 236b38f1454..d82e97d16e4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2000-08-22 Nick Clifton + + * config/arm/lib1funcs.asm (ARM_DIV_MOD_BODY): New macro. + Common code for ARM divide and modulus functions. + (THUMB_DIV_MOD_BODY): New macro. Thumb equivalent of + ARM_DIV_MOD_BODY. + (FUNC_END): New macro: Common code at the end of the division and + modulo functions. + (THUMB_FUNCTION_START): New macro: Common code at the start of + Thumb functions. + (__divsi3, __udivsi3, __modsi3, __umodsi3): Use new macros. + Tue Aug 22 20:34:52 2000 Kaz Kojima * config/sh/sh.md (cmpeqdi_t splitter): Fix a reverse testing. diff --git a/gcc/config/arm/lib1funcs.asm b/gcc/config/arm/lib1funcs.asm index 619a4802e49..08fa589cca1 100644 --- a/gcc/config/arm/lib1funcs.asm +++ b/gcc/config/arm/lib1funcs.asm @@ -27,6 +27,9 @@ along with this program; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* ------------------------------------------------------------------------ */ + +/* We need to know what prefix to add to function names. */ + #ifndef __USER_LABEL_PREFIX__ #error __USER_LABEL_PREFIX__ not defined #endif @@ -55,6 +58,7 @@ Boston, MA 02111-1307, USA. */ #endif /* Function end macros. Variants for 26 bit APCS and interworking. */ + #ifdef __APCS_26__ # define RET movs pc, lr # define RETc(x) mov##x##s pc, lr @@ -71,6 +75,7 @@ Ldiv0: # define RET bx lr # define RETc(x) bx##x lr .macro THUMB_LDIV0 +Ldiv0: push { lr } bl SYM (__div0) mov r0, #0 @ About as wrong as it could be. @@ -78,6 +83,7 @@ Ldiv0: bx r1 .endm .macro ARM_LDIV0 +Ldiv0: str lr, [sp, #-4]! bl SYM (__div0) __PLT__ mov r0, #0 @ About as wrong as it could be. @@ -88,12 +94,14 @@ Ldiv0: # define RET mov pc, lr # define RETc(x) mov##x pc, lr .macro THUMB_LDIV0 +Ldiv0: push { lr } bl SYM (__div0) mov r0, #0 @ About as wrong as it could be. pop { pc } .endm .macro ARM_LDIV0 +Ldiv0: str lr, [sp, #-4]! bl SYM (__div0) __PLT__ mov r0, #0 @ About as wrong as it could be. @@ -103,25 +111,6 @@ Ldiv0: # define RETCOND #endif -#ifdef __thumb__ -#define THUMB_FUNC .thumb_func -#define THUMB_CODE .force_thumb -#else -#define THUMB_FUNC -#define THUMB_CODE -#endif - - -.macro FUNC_START name - .text - .globl SYM (__\name) - TYPE (__\name) - .align 0 - THUMB_CODE - THUMB_FUNC -SYM (__\name): -.endm - .macro FUNC_END name Ldiv0: #ifdef __thumb__ @@ -138,36 +127,134 @@ Ldiv0: .thumb_func SYM (\name): .endm + +/* Function start macros. Variants for ARM and Thumb. */ + +#ifdef __thumb__ +#define THUMB_FUNC .thumb_func +#define THUMB_CODE .force_thumb +#else +#define THUMB_FUNC +#define THUMB_CODE +#endif + +.macro FUNC_START name + .text + .globl SYM (__\name) + TYPE (__\name) + .align 0 + THUMB_CODE + THUMB_FUNC +SYM (__\name): +.endm -/* Used for Thumb code. */ +/* Register aliases. */ + work .req r4 @ XXXX is this safe ? - -/* ------------------------------------------------------------------------ */ -#ifdef L_udivsi3 - dividend .req r0 divisor .req r1 +overdone .req r2 result .req r2 curbit .req r3 ip .req r12 sp .req r13 lr .req r14 pc .req r15 - - FUNC_START udivsi3 -#ifdef __thumb__ +/* ------------------------------------------------------------------------ */ +/* Bodies of the divsion and modulo routines. */ +/* ------------------------------------------------------------------------ */ +.macro ARM_DIV_MOD_BODY modulo +Loop1: + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, #0x10000000 + cmpLO divisor, dividend + movLO divisor, divisor, lsl #4 + movLO curbit, curbit, lsl #4 + bLO Loop1 - cmp divisor, #0 - beq Ldiv0 - mov curbit, #1 - mov result, #0 +Lbignum: + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + cmp divisor, #0x80000000 + cmpLO divisor, dividend + movLO divisor, divisor, lsl #1 + movLO curbit, curbit, lsl #1 + bLO Lbignum + +Loop3: + @ Test for possible subtractions. On the final pass, this may + @ subtract too much from the dividend ... - push { work } + .if \modulo + @ ... so keep track of which subtractions are done in OVERDONE. + @ We can fix them up afterwards. + mov overdone, #0 cmp dividend, divisor - bcc Lgot_result + subHS dividend, dividend, divisor + cmp dividend, divisor, lsr #1 + subHS dividend, dividend, divisor, lsr #1 + orrHS overdone, overdone, curbit, ror #1 + cmp dividend, divisor, lsr #2 + subHS dividend, dividend, divisor, lsr #2 + orrHS overdone, overdone, curbit, ror #2 + cmp dividend, divisor, lsr #3 + subHS dividend, dividend, divisor, lsr #3 + orrHS overdone, overdone, curbit, ror #3 + mov ip, curbit + .else + @ ... so keep track of which subtractions are done in RESULT. + @ The result will be ok, since the "bit" will have been + @ shifted out at the bottom. + cmp dividend, divisor + subHS dividend, dividend, divisor + orrHS result, result, curbit + cmp dividend, divisor, lsr #1 + subHS dividend, dividend, divisor, lsr #1 + orrHS result, result, curbit, lsr #1 + cmp dividend, divisor, lsr #2 + subHS dividend, dividend, divisor, lsr #2 + orrHS result, result, curbit, lsr #2 + cmp dividend, divisor, lsr #3 + subHS dividend, dividend, divisor, lsr #3 + orrHS result, result, curbit, lsr #3 + .endif - @ Load the constant 0x10000000 into our work register + cmp dividend, #0 @ Early termination? + movNEs curbit, curbit, lsr #4 @ No, any more bits to do? + movNE divisor, divisor, lsr #4 + bNE Loop3 + + .if \modulo +Lfixup_dividend: + @ Any subtractions that we should not have done will be recorded in + @ the top three bits of OVERDONE. Exactly which were not needed + @ are governed by the position of the bit, stored in IP. + ands overdone, overdone, #0xe0000000 + @ If we terminated early, because dividend became zero, then the + @ bit in ip will not be in the bottom nibble, and we should not + @ perform the additions below. We must test for this though + @ (rather relying upon the TSTs to prevent the additions) since + @ the bit in ip could be in the top two bits which might then match + @ with one of the smaller RORs. + tstNE ip, #0x7 + bEQ Lgot_result + tst overdone, ip, ror #3 + addNE dividend, dividend, divisor, lsr #3 + tst overdone, ip, ror #2 + addNE dividend, dividend, divisor, lsr #2 + tst overdone, ip, ror #1 + addNE dividend, dividend, divisor, lsr #1 + .endif + +Lgot_result: +.endm +/* ------------------------------------------------------------------------ */ +.macro THUMB_DIV_MOD_BODY modulo + @ Load the constant 0x10000000 into our work register. mov work, #1 lsl work, #28 Loop1: @@ -176,65 +263,172 @@ Loop1: @ division loop. Continue shifting until the divisor is @ larger than the dividend. cmp divisor, work - bcs Lbignum + bHS Lbignum cmp divisor, dividend - bcs Lbignum + bHS Lbignum lsl divisor, #4 lsl curbit, #4 b Loop1 - Lbignum: @ Set work to 0x80000000 lsl work, #3 -Loop2: +Loop2: @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. cmp divisor, work - bcs Loop3 + bHS Loop3 cmp divisor, dividend - bcs Loop3 + bHS Loop3 lsl divisor, #1 lsl curbit, #1 b Loop2 - Loop3: - @ Test for possible subtractions, and note which bits - @ are done in the result. On the final pass, this may subtract - @ too much from the dividend, but the result will be ok, since the - @ "bit" will have been shifted out at the bottom. + @ Test for possible subtractions ... + .if \modulo + @ ... On the final pass, this may subtract too much from the dividend, + @ so keep track of which subtractions are done, we can fix them up + @ afterwards. + mov overdone, #0 cmp dividend, divisor - bcc Over1 + bLO Lover1 sub dividend, dividend, divisor - orr result, result, curbit -Over1: +Lover1: lsr work, divisor, #1 cmp dividend, work - bcc Over2 + bLO Lover2 + sub dividend, dividend, work + mov ip, curbit + mov work, #1 + ror curbit, work + orr overdone, curbit + mov curbit, ip +Lover2: + lsr work, divisor, #2 + cmp dividend, work + bLO Lover3 + sub dividend, dividend, work + mov ip, curbit + mov work, #2 + ror curbit, work + orr overdone, curbit + mov curbit, ip +Lover3: + lsr work, divisor, #3 + cmp dividend, work + bLO Lover4 + sub dividend, dividend, work + mov ip, curbit + mov work, #3 + ror curbit, work + orr overdone, curbit + mov curbit, ip +Lover4: + mov ip, curbit + .else + @ ... and note which bits are done in the result. On the final pass, + @ this may subtract too much from the dividend, but the result will be ok, + @ since the "bit" will have been shifted out at the bottom. + cmp dividend, divisor + bLO Lover1 + sub dividend, dividend, divisor + orr result, result, curbit +Lover1: + lsr work, divisor, #1 + cmp dividend, work + bLO Lover2 sub dividend, dividend, work lsr work, curbit, #1 orr result, work -Over2: +Lover2: lsr work, divisor, #2 cmp dividend, work - bcc Over3 + bLO Lover3 sub dividend, dividend, work lsr work, curbit, #2 orr result, work -Over3: +Lover3: lsr work, divisor, #3 cmp dividend, work - bcc Over4 + bLO Lover4 sub dividend, dividend, work lsr work, curbit, #3 orr result, work -Over4: +Lover4: + .endif + cmp dividend, #0 @ Early termination? - beq Lgot_result + bEQ Lover5 lsr curbit, #4 @ No, any more bits to do? - beq Lgot_result + bEQ Lover5 lsr divisor, #4 b Loop3 +Lover5: + .if \modulo + @ Any subtractions that we should not have done will be recorded in + @ the top three bits of "overdone". Exactly which were not needed + @ are governed by the position of the bit, stored in ip. + mov work, #0xe + lsl work, #28 + and overdone, work + bEQ Lgot_result + + @ If we terminated early, because dividend became zero, then the + @ bit in ip will not be in the bottom nibble, and we should not + @ perform the additions below. We must test for this though + @ (rather relying upon the TSTs to prevent the additions) since + @ the bit in ip could be in the top two bits which might then match + @ with one of the smaller RORs. + mov curbit, ip + mov work, #0x7 + tst curbit, work + bEQ Lgot_result + + mov curbit, ip + mov work, #3 + ror curbit, work + tst overdone, curbit + bEQ Lover6 + lsr work, divisor, #3 + add dividend, work +Lover6: + mov curbit, ip + mov work, #2 + ror curbit, work + tst overdone, curbit + bEQ Lover7 + lsr work, divisor, #2 + add dividend, work +Lover7: + mov curbit, ip + mov work, #1 + ror curbit, work + tst overdone, curbit + bEQ Lgot_result + lsr work, divisor, #1 + add dividend, work + .endif Lgot_result: +.endm +/* ------------------------------------------------------------------------ */ +/* Start of the Real Functions */ +/* ------------------------------------------------------------------------ */ +#ifdef L_udivsi3 + + FUNC_START udivsi3 + +#ifdef __thumb__ + + cmp divisor, #0 + bEQ Ldiv0 + mov curbit, #1 + mov result, #0 + + push { work } + cmp dividend, divisor + bLO Lgot_result + + THUMB_DIV_MOD_BODY 0 + mov r0, result pop { work } RET @@ -242,53 +436,14 @@ Lgot_result: #else /* ARM version. */ cmp divisor, #0 - beq Ldiv0 + bEQ Ldiv0 mov curbit, #1 mov result, #0 cmp dividend, divisor - bcc Lgot_result -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 - -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum - -Loop3: - @ Test for possible subtractions, and note which bits - @ are done in the result. On the final pass, this may subtract - @ too much from the dividend, but the result will be ok, since the - @ "bit" will have been shifted out at the bottom. - cmp dividend, divisor - subcs dividend, dividend, divisor - orrcs result, result, curbit - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs result, result, curbit, lsr #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs result, result, curbit, lsr #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs result, result, curbit, lsr #3 - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 -Lgot_result: + bLO Lgot_result + + ARM_DIV_MOD_BODY 0 + mov r0, result RET @@ -300,219 +455,37 @@ Lgot_result: /* ------------------------------------------------------------------------ */ #ifdef L_umodsi3 -dividend .req r0 -divisor .req r1 -overdone .req r2 -curbit .req r3 -ip .req r12 -sp .req r13 -lr .req r14 -pc .req r15 - FUNC_START umodsi3 #ifdef __thumb__ cmp divisor, #0 - beq Ldiv0 + bEQ Ldiv0 mov curbit, #1 cmp dividend, divisor - bcs Over1 + bHS Lover10 RET -Over1: - @ Load the constant 0x10000000 into our work register +Lover10: push { work } - mov work, #1 - lsl work, #28 -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, work - bcs Lbignum - cmp divisor, dividend - bcs Lbignum - lsl divisor, #4 - lsl curbit, #4 - b Loop1 -Lbignum: - @ Set work to 0x80000000 - lsl work, #3 -Loop2: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, work - bcs Loop3 - cmp divisor, dividend - bcs Loop3 - lsl divisor, #1 - lsl curbit, #1 - b Loop2 -Loop3: - @ Test for possible subtractions. On the final pass, this may - @ subtract too much from the dividend, so keep track of which - @ subtractions are done, we can fix them up afterwards... - mov overdone, #0 - cmp dividend, divisor - bcc Over2 - sub dividend, dividend, divisor -Over2: - lsr work, divisor, #1 - cmp dividend, work - bcc Over3 - sub dividend, dividend, work - mov ip, curbit - mov work, #1 - ror curbit, work - orr overdone, curbit - mov curbit, ip -Over3: - lsr work, divisor, #2 - cmp dividend, work - bcc Over4 - sub dividend, dividend, work - mov ip, curbit - mov work, #2 - ror curbit, work - orr overdone, curbit - mov curbit, ip -Over4: - lsr work, divisor, #3 - cmp dividend, work - bcc Over5 - sub dividend, dividend, work - mov ip, curbit - mov work, #3 - ror curbit, work - orr overdone, curbit - mov curbit, ip -Over5: - mov ip, curbit - cmp dividend, #0 @ Early termination? - beq Over6 - lsr curbit, #4 @ No, any more bits to do? - beq Over6 - lsr divisor, #4 - b Loop3 -Over6: - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - mov work, #0xe - lsl work, #28 - and overdone, work - bne Over7 - pop { work } - RET @ No fixups needed + + THUMB_DIV_MOD_BODY 1 - @ If we terminated early, because dividend became zero, then the - @ bit in ip will not be in the bottom nibble, and we should not - @ perform the additions below. We must test for this though - @ (rather relying upon the TSTs to prevent the additions) since - @ the bit in ip could be in the top two bits which might then match - @ with one of the smaller RORs. - mov curbit, ip - mov work, #0x7 - tst curbit, work - beq Over10 - -Over7: - mov curbit, ip - mov work, #3 - ror curbit, work - tst overdone, curbit - beq Over8 - lsr work, divisor, #3 - add dividend, dividend, work -Over8: - mov curbit, ip - mov work, #2 - ror curbit, work - tst overdone, curbit - beq Over9 - lsr work, divisor, #2 - add dividend, dividend, work -Over9: - mov curbit, ip - mov work, #1 - ror curbit, work - tst overdone, curbit - beq Over10 - lsr work, divisor, #1 - add dividend, dividend, work -Over10: pop { work } RET #else /* ARM version. */ cmp divisor, #0 - beq Ldiv0 + bEQ Ldiv0 + cmp divisor, #1 + cmpNE dividend, divisor + movEQ dividend, #0 + RETc(LO) mov curbit, #1 - cmp dividend, divisor - RETc(cc) -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum - -Loop3: - @ Test for possible subtractions. On the final pass, this may - @ subtract too much from the dividend, so keep track of which - @ subtractions are done, we can fix them up afterwards... - mov overdone, #0 - cmp dividend, divisor - subcs dividend, dividend, divisor - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs overdone, overdone, curbit, ror #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs overdone, overdone, curbit, ror #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs overdone, overdone, curbit, ror #3 - mov ip, curbit - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 - - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - ands overdone, overdone, #0xe0000000 - @ If we terminated early, because dividend became zero, then the - @ bit in ip will not be in the bottom nibble, and we should not - @ perform the additions below. We must test for this though - @ (rather relying upon the TSTs to prevent the additions) since - @ the bit in ip could be in the top two bits which might then match - @ with one of the smaller RORs. - tstNE ip, #0x7 - RETc(eq) @ No fixups needed - tst overdone, ip, ror #3 - addne dividend, dividend, divisor, lsr #3 - tst overdone, ip, ror #2 - addne dividend, dividend, divisor, lsr #2 - tst overdone, ip, ror #1 - addne dividend, dividend, divisor, lsr #1 + ARM_DIV_MOD_BODY 1 + RET #endif /* ARM version. */ @@ -523,20 +496,11 @@ Loop3: /* ------------------------------------------------------------------------ */ #ifdef L_divsi3 -dividend .req r0 -divisor .req r1 -result .req r2 -curbit .req r3 -ip .req r12 -sp .req r13 -lr .req r14 -pc .req r15 - FUNC_START divsi3 #ifdef __thumb__ cmp divisor, #0 - beq Ldiv0 + bEQ Ldiv0 push { work } mov work, dividend @@ -545,91 +509,26 @@ pc .req r15 mov curbit, #1 mov result, #0 cmp divisor, #0 - bpl Over1 + bPL Lover10 neg divisor, divisor @ Loops below use unsigned. -Over1: +Lover10: cmp dividend, #0 - bpl Over2 + bPL Lover11 neg dividend, dividend -Over2: +Lover11: cmp dividend, divisor - bcc Lgot_result + bLO Lgot_result - mov work, #1 - lsl work, #28 -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, work - Bcs Lbignum - cmp divisor, dividend - Bcs Lbignum - lsl divisor, #4 - lsl curbit, #4 - b Loop1 - -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - lsl work, #3 -Loop2: - cmp divisor, work - Bcs Loop3 - cmp divisor, dividend - Bcs Loop3 - lsl divisor, #1 - lsl curbit, #1 - b Loop2 - -Loop3: - @ Test for possible subtractions, and note which bits - @ are done in the result. On the final pass, this may subtract - @ too much from the dividend, but the result will be ok, since the - @ "bit" will have been shifted out at the bottom. - cmp dividend, divisor - Bcc Over3 - sub dividend, dividend, divisor - orr result, result, curbit -Over3: - lsr work, divisor, #1 - cmp dividend, work - Bcc Over4 - sub dividend, dividend, work - lsr work, curbit, #1 - orr result, work -Over4: - lsr work, divisor, #2 - cmp dividend, work - Bcc Over5 - sub dividend, dividend, work - lsr work, curbit, #2 - orr result, result, work -Over5: - lsr work, divisor, #3 - cmp dividend, work - Bcc Over6 - sub dividend, dividend, work - lsr work, curbit, #3 - orr result, result, work -Over6: - cmp dividend, #0 @ Early termination? - Beq Lgot_result - lsr curbit, #4 @ No, any more bits to do? - Beq Lgot_result - lsr divisor, #4 - b Loop3 + THUMB_DIV_MOD_BODY 0 -Lgot_result: mov r0, result mov work, ip cmp work, #0 - Bpl Over7 + bPL Lover12 neg r0, r0 -Over7: +Lover12: pop { work } - RET + RET #else /* ARM version. */ @@ -637,58 +536,18 @@ Over7: mov curbit, #1 mov result, #0 cmp divisor, #0 - rsbmi divisor, divisor, #0 @ Loops below use unsigned. - beq Ldiv0 + rsbMI divisor, divisor, #0 @ Loops below use unsigned. + bEQ Ldiv0 cmp dividend, #0 - rsbmi dividend, dividend, #0 + rsbMI dividend, dividend, #0 cmp dividend, divisor - bcc Lgot_result + bLO Lgot_result -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 - -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum - -Loop3: - @ Test for possible subtractions, and note which bits - @ are done in the result. On the final pass, this may subtract - @ too much from the dividend, but the result will be ok, since the - @ "bit" will have been shifted out at the bottom. - cmp dividend, divisor - subcs dividend, dividend, divisor - orrcs result, result, curbit - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs result, result, curbit, lsr #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs result, result, curbit, lsr #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs result, result, curbit, lsr #3 - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 -Lgot_result: + ARM_DIV_MOD_BODY 0 + mov r0, result cmp ip, #0 - rsbmi r0, r0, #0 + rsbMI r0, r0, #0 RET #endif /* ARM version */ @@ -699,242 +558,57 @@ Lgot_result: /* ------------------------------------------------------------------------ */ #ifdef L_modsi3 -dividend .req r0 -divisor .req r1 -overdone .req r2 -curbit .req r3 -ip .req r12 -sp .req r13 -lr .req r14 -pc .req r15 - FUNC_START modsi3 #ifdef __thumb__ mov curbit, #1 cmp divisor, #0 - beq Ldiv0 - Bpl Over1 + bEQ Ldiv0 + bPL Lover10 neg divisor, divisor @ Loops below use unsigned. -Over1: +Lover10: push { work } @ Need to save the sign of the dividend, unfortunately, we need - @ ip later on. Must do this after saving the original value of + @ work later on. Must do this after saving the original value of @ the work register, because we will pop this value off first. push { dividend } cmp dividend, #0 - Bpl Over2 + bPL Lover11 neg dividend, dividend -Over2: +Lover11: cmp dividend, divisor - bcc Lgot_result - mov work, #1 - lsl work, #28 -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, work - bcs Lbignum - cmp divisor, dividend - bcs Lbignum - lsl divisor, #4 - lsl curbit, #4 - b Loop1 + bLO Lgot_result -Lbignum: - @ Set work to 0x80000000 - lsl work, #3 -Loop2: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, work - bcs Loop3 - cmp divisor, dividend - bcs Loop3 - lsl divisor, #1 - lsl curbit, #1 - b Loop2 - -Loop3: - @ Test for possible subtractions. On the final pass, this may - @ subtract too much from the dividend, so keep track of which - @ subtractions are done, we can fix them up afterwards... - mov overdone, #0 - cmp dividend, divisor - bcc Over3 - sub dividend, dividend, divisor -Over3: - lsr work, divisor, #1 - cmp dividend, work - bcc Over4 - sub dividend, dividend, work - mov ip, curbit - mov work, #1 - ror curbit, work - orr overdone, curbit - mov curbit, ip -Over4: - lsr work, divisor, #2 - cmp dividend, work - bcc Over5 - sub dividend, dividend, work - mov ip, curbit - mov work, #2 - ror curbit, work - orr overdone, curbit - mov curbit, ip -Over5: - lsr work, divisor, #3 - cmp dividend, work - bcc Over6 - sub dividend, dividend, work - mov ip, curbit - mov work, #3 - ror curbit, work - orr overdone, curbit - mov curbit, ip -Over6: - mov ip, curbit - cmp dividend, #0 @ Early termination? - beq Over7 - lsr curbit, #4 @ No, any more bits to do? - beq Over7 - lsr divisor, #4 - b Loop3 - -Over7: - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - mov work, #0xe - lsl work, #28 - and overdone, work - beq Lgot_result - - @ If we terminated early, because dividend became zero, then the - @ bit in ip will not be in the bottom nibble, and we should not - @ perform the additions below. We must test for this though - @ (rather relying upon the TSTs to prevent the additions) since - @ the bit in ip could be in the top two bits which might then match - @ with one of the smaller RORs. - mov curbit, ip - mov work, #0x7 - tst curbit, work - beq Lgot_result - - mov curbit, ip - mov work, #3 - ror curbit, work - tst overdone, curbit - beq Over8 - lsr work, divisor, #3 - add dividend, dividend, work -Over8: - mov curbit, ip - mov work, #2 - ror curbit, work - tst overdone, curbit - beq Over9 - lsr work, divisor, #2 - add dividend, dividend, work -Over9: - mov curbit, ip - mov work, #1 - ror curbit, work - tst overdone, curbit - beq Lgot_result - lsr work, divisor, #1 - add dividend, dividend, work -Lgot_result: + THUMB_DIV_MOD_BODY 1 + pop { work } cmp work, #0 - bpl Over10 + bPL Lover12 neg dividend, dividend -Over10: +Lover12: pop { work } RET #else /* ARM version. */ - mov curbit, #1 cmp divisor, #0 - rsbmi divisor, divisor, #0 @ Loops below use unsigned. - beq Ldiv0 + rsbMI divisor, divisor, #0 @ Loops below use unsigned. + bEQ Ldiv0 @ Need to save the sign of the dividend, unfortunately, we need @ ip later on; this is faster than pushing lr and using that. str dividend, [sp, #-4]! - cmp dividend, #0 - rsbmi dividend, dividend, #0 - cmp dividend, divisor - bcc Lgot_result + cmp dividend, #0 @ Test dividend against zero + rsbMI dividend, dividend, #0 @ If negative make positive + cmp dividend, divisor @ else if zero return zero + bLO Lgot_result @ if smaller return dividend + mov curbit, #1 -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 + ARM_DIV_MOD_BODY 1 -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum - -Loop3: - @ Test for possible subtractions. On the final pass, this may - @ subtract too much from the dividend, so keep track of which - @ subtractions are done, we can fix them up afterwards... - mov overdone, #0 - cmp dividend, divisor - subcs dividend, dividend, divisor - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs overdone, overdone, curbit, ror #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs overdone, overdone, curbit, ror #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs overdone, overdone, curbit, ror #3 - mov ip, curbit - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 - - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - ands overdone, overdone, #0xe0000000 - @ If we terminated early, because dividend became zero, then the - @ bit in ip will not be in the bottom nibble, and we should not - @ perform the additions below. We must test for this though - @ (rather relying upon the TSTs to prevent the additions) since - @ the bit in ip could be in the top two bits which might then match - @ with one of the smaller RORs. - tstNE ip, #0x7 - beq Lgot_result - tst overdone, ip, ror #3 - addne dividend, dividend, divisor, lsr #3 - tst overdone, ip, ror #2 - addne dividend, dividend, divisor, lsr #2 - tst overdone, ip, ror #1 - addne dividend, dividend, divisor, lsr #1 -Lgot_result: ldr ip, [sp], #4 cmp ip, #0 - rsbmi dividend, dividend, #0 + rsbMI dividend, dividend, #0 RET #endif /* ARM version */ @@ -1105,4 +779,3 @@ _arm_return: SIZE (_interwork_call_via_lr) #endif /* L_interwork_call_via_rX */ -