openwrt-xburst/target/linux/omap24xx/patches-2.6.35/100-optimized-arm-div.patch

---
 arch/arm/boot/compressed/lib1funcs.S |  348 +++++++++++++++++++++++++++++++++++
 1 file changed, 348 insertions(+)

--- /dev/null
+++ linux-2.6.35/arch/arm/boot/compressed/lib1funcs.S
@@ -0,0 +1,348 @@
+/*
+ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
+ *
+ * Author: Nicolas Pitre <nico@fluxnic.net>
+ *   - contributed to gcc-3.4 on Sep 30, 2003
+ *   - adapted for the Linux kernel on Oct 2, 2003
+ */
+
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	\curbit, \divisor
+	clz	\result, \dividend
+	sub	\result, \curbit, \result
+	mov	\curbit, #1
+	mov	\divisor, \divisor, lsl \result
+	mov	\curbit, \curbit, lsl \result
+	mov	\result, #0
+	
+#else
+
+	@ Initially shift the divisor left 3 bits if possible,
+	@ set curbit accordingly.  This allows for curbit to be located
+	@ at the left end of each 4 bit nibbles in the division loop
+	@ to save one loop in most cases.
+	tst	\divisor, #0xe0000000
+	moveq	\divisor, \divisor, lsl #3
+	moveq	\curbit, #8
+	movne	\curbit, #1
+
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+1:	cmp	\divisor, #0x10000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #4
+	movlo	\curbit, \curbit, lsl #4
+	blo	1b
+
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+1:	cmp	\divisor, #0x80000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #1
+	movlo	\curbit, \curbit, lsl #1
+	blo	1b
+
+	mov	\result, #0
+
+#endif
+
+	@ Division loop
+1:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	orrhs	\result,   \result,   \curbit
+	cmp	\dividend, \divisor,  lsr #1
+	subhs	\dividend, \dividend, \divisor, lsr #1
+	orrhs	\result,   \result,   \curbit,  lsr #1
+	cmp	\dividend, \divisor,  lsr #2
+	subhs	\dividend, \dividend, \divisor, lsr #2
+	orrhs	\result,   \result,   \curbit,  lsr #2
+	cmp	\dividend, \divisor,  lsr #3
+	subhs	\dividend, \dividend, \divisor, lsr #3
+	orrhs	\result,   \result,   \curbit,  lsr #3
+	cmp	\dividend, #0			@ Early termination?
+	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
+	movne	\divisor,  \divisor, lsr #4
+	bne	1b
+
+.endm
+
+
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	\order, \divisor
+	rsb	\order, \order, #31
+
+#else
+
+	cmp	\divisor, #(1 << 16)
+	movhs	\divisor, \divisor, lsr #16
+	movhs	\order, #16
+	movlo	\order, #0
+
+	cmp	\divisor, #(1 << 8)
+	movhs	\divisor, \divisor, lsr #8
+	addhs	\order, \order, #8
+
+	cmp	\divisor, #(1 << 4)
+	movhs	\divisor, \divisor, lsr #4
+	addhs	\order, \order, #4
+
+	cmp	\divisor, #(1 << 2)
+	addhi	\order, \order, #3
+	addls	\order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+
+
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+	clz	\order, \divisor
+	clz	\spare, \dividend
+	sub	\order, \order, \spare
+	mov	\divisor, \divisor, lsl \order
+
+#else
+
+	mov	\order, #0
+
+	@ Unless the divisor is very big, shift it up in multiples of
+	@ four bits, since this is the amount of unwinding in the main
+	@ division loop.  Continue shifting until the divisor is 
+	@ larger than the dividend.
+1:	cmp	\divisor, #0x10000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #4
+	addlo	\order, \order, #4
+	blo	1b
+
+	@ For very big divisors, we must shift it a bit at a time, or
+	@ we will be in danger of overflowing.
+1:	cmp	\divisor, #0x80000000
+	cmplo	\divisor, \dividend
+	movlo	\divisor, \divisor, lsl #1
+	addlo	\order, \order, #1
+	blo	1b
+
+#endif
+
+	@ Perform all needed substractions to keep only the reminder.
+	@ Do comparisons in batch of 4 first.
+	subs	\order, \order, #3		@ yes, 3 is intended here
+	blt	2f
+
+1:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	cmp	\dividend, \divisor,  lsr #1
+	subhs	\dividend, \dividend, \divisor, lsr #1
+	cmp	\dividend, \divisor,  lsr #2
+	subhs	\dividend, \dividend, \divisor, lsr #2
+	cmp	\dividend, \divisor,  lsr #3
+	subhs	\dividend, \dividend, \divisor, lsr #3
+	cmp	\dividend, #1
+	mov	\divisor, \divisor, lsr #4
+	subges	\order, \order, #4
+	bge	1b
+
+	tst	\order, #3
+	teqne	\dividend, #0
+	beq	5f
+
+	@ Either 1, 2 or 3 comparison/substractions are left.
+2:	cmn	\order, #2
+	blt	4f
+	beq	3f
+	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	mov	\divisor,  \divisor,  lsr #1
+3:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+	mov	\divisor,  \divisor,  lsr #1
+4:	cmp	\dividend, \divisor
+	subhs	\dividend, \dividend, \divisor
+5:
+.endm
+
+
+ENTRY(__udivsi3)
+ENTRY(__aeabi_uidiv)
+
+	subs	r2, r1, #1
+	moveq	pc, lr
+	bcc	Ldiv0
+	cmp	r0, r1
+	bls	11f
+	tst	r1, r2
+	beq	12f
+
+	ARM_DIV_BODY r0, r1, r2, r3
+
+	mov	r0, r2
+	mov	pc, lr
+
+11:	moveq	r0, #1
+	movne	r0, #0
+	mov	pc, lr
+
+12:	ARM_DIV2_ORDER r1, r2
+
+	mov	r0, r0, lsr r2
+	mov	pc, lr
+
+ENDPROC(__udivsi3)
+ENDPROC(__aeabi_uidiv)
+
+ENTRY(__umodsi3)
+
+	subs	r2, r1, #1			@ compare divisor with 1
+	bcc	Ldiv0
+	cmpne	r0, r1				@ compare dividend with divisor
+	moveq   r0, #0
+	tsthi	r1, r2				@ see if divisor is power of 2
+	andeq	r0, r0, r2
+	movls	pc, lr
+
+	ARM_MOD_BODY r0, r1, r2, r3
+
+	mov	pc, lr
+
+ENDPROC(__umodsi3)
+
+ENTRY(__divsi3)
+ENTRY(__aeabi_idiv)
+
+	cmp	r1, #0
+	eor	ip, r0, r1			@ save the sign of the result.
+	beq	Ldiv0
+	rsbmi	r1, r1, #0			@ loops below use unsigned.
+	subs	r2, r1, #1			@ division by 1 or -1 ?
+	beq	10f
+	movs	r3, r0
+	rsbmi	r3, r0, #0			@ positive dividend value
+	cmp	r3, r1
+	bls	11f
+	tst	r1, r2				@ divisor is power of 2 ?
+	beq	12f
+
+	ARM_DIV_BODY r3, r1, r0, r2
+
+	cmp	ip, #0
+	rsbmi	r0, r0, #0
+	mov	pc, lr
+
+10:	teq	ip, r0				@ same sign ?
+	rsbmi	r0, r0, #0
+	mov	pc, lr
+
+11:	movlo	r0, #0
+	moveq	r0, ip, asr #31
+	orreq	r0, r0, #1
+	mov	pc, lr
+
+12:	ARM_DIV2_ORDER r1, r2
+
+	cmp	ip, #0
+	mov	r0, r3, lsr r2
+	rsbmi	r0, r0, #0
+	mov	pc, lr
+
+ENDPROC(__divsi3)
+ENDPROC(__aeabi_idiv)
+
+ENTRY(__modsi3)
+
+	cmp	r1, #0
+	beq	Ldiv0
+	rsbmi	r1, r1, #0			@ loops below use unsigned.
+	movs	ip, r0				@ preserve sign of dividend
+	rsbmi	r0, r0, #0			@ if negative make positive
+	subs	r2, r1, #1			@ compare divisor with 1
+	cmpne	r0, r1				@ compare dividend with divisor
+	moveq	r0, #0
+	tsthi	r1, r2				@ see if divisor is power of 2
+	andeq	r0, r0, r2
+	bls	10f
+
+	ARM_MOD_BODY r0, r1, r2, r3
+
+10:	cmp	ip, #0
+	rsbmi	r0, r0, #0
+	mov	pc, lr
+
+ENDPROC(__modsi3)
+
+#ifdef CONFIG_AEABI
+
+ENTRY(__aeabi_uidivmod)
+
+	stmfd	sp!, {r0, r1, ip, lr}
+	bl	__aeabi_uidiv
+	ldmfd	sp!, {r1, r2, ip, lr}
+	mul	r3, r0, r2
+	sub	r1, r1, r3
+	mov	pc, lr
+
+ENDPROC(__aeabi_uidivmod)
+
+ENTRY(__aeabi_idivmod)
+
+	stmfd	sp!, {r0, r1, ip, lr}
+	bl	__aeabi_idiv
+	ldmfd	sp!, {r1, r2, ip, lr}
+	mul	r3, r0, r2
+	sub	r1, r1, r3
+	mov	pc, lr
+
+ENDPROC(__aeabi_idivmod)
+
+#endif
+
+Ldiv0:
+
+	str	lr, [sp, #-8]!
+	bl	__div0
+	mov	r0, #0			@ About as wrong as it could be.
+	ldr	pc, [sp], #8
+
+
Add omap24xx. Boots the kernel with working video and serial console. Userland is untested. git-svn-id: svn://svn.openwrt.org/openwrt/trunk@22530 3c298f89-4303-0410-b956-a3cf2f4a3e73 2010-08-08 14:16:48 +00:00			`---`
			`arch/arm/boot/compressed/lib1funcs.S \| 348 +++++++++++++++++++++++++++++++++++`
			`1 file changed, 348 insertions(+)`

			`--- /dev/null`
			`+++ linux-2.6.35/arch/arm/boot/compressed/lib1funcs.S`
			`@@ -0,0 +1,348 @@`
			`+/*`
			`+ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines`
			`+ *`
			`+ * Author: Nicolas Pitre <nico@fluxnic.net>`
			`+ * - contributed to gcc-3.4 on Sep 30, 2003`
			`+ * - adapted for the Linux kernel on Oct 2, 2003`
			`+ */`
			`+`
			`+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.`
			`+`
			`+This file is free software; you can redistribute it and/or modify it`
			`+under the terms of the GNU General Public License as published by the`
			`+Free Software Foundation; either version 2, or (at your option) any`
			`+later version.`
			`+`
			`+In addition to the permissions in the GNU General Public License, the`
			`+Free Software Foundation gives you unlimited permission to link the`
			`+compiled version of this file into combinations with other programs,`
			`+and to distribute those combinations without any restriction coming`
			`+from the use of this file. (The General Public License restrictions`
			`+do apply in other respects; for example, they cover modification of`
			`+the file, and distribution when not linked into a combine`
			`+executable.)`
			`+`
			`+This file is distributed in the hope that it will be useful, but`
			`+WITHOUT ANY WARRANTY; without even the implied warranty of`
			`+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`+General Public License for more details.`
			`+`
			`+You should have received a copy of the GNU General Public License`
			`+along with this program; see the file COPYING. If not, write to`
			`+the Free Software Foundation, 59 Temple Place - Suite 330,`
			`+Boston, MA 02111-1307, USA. */`
			`+`
			`+`
			`+#include <linux/linkage.h>`
			`+#include <asm/assembler.h>`
			`+`
			`+`
			`+.macro ARM_DIV_BODY dividend, divisor, result, curbit`
			`+`
			`+#if __LINUX_ARM_ARCH__ >= 5`
			`+`
			`+ clz \curbit, \divisor`
			`+ clz \result, \dividend`
			`+ sub \result, \curbit, \result`
			`+ mov \curbit, #1`
			`+ mov \divisor, \divisor, lsl \result`
			`+ mov \curbit, \curbit, lsl \result`
			`+ mov \result, #0`
			`+`
			`+#else`
			`+`
			`+ @ Initially shift the divisor left 3 bits if possible,`
			`+ @ set curbit accordingly. This allows for curbit to be located`
			`+ @ at the left end of each 4 bit nibbles in the division loop`
			`+ @ to save one loop in most cases.`
			`+ tst \divisor, #0xe0000000`
			`+ moveq \divisor, \divisor, lsl #3`
			`+ moveq \curbit, #8`
			`+ movne \curbit, #1`
			`+`
			`+ @ Unless the divisor is very big, shift it up in multiples of`
			`+ @ four bits, since this is the amount of unwinding in the main`
			`+ @ division loop. Continue shifting until the divisor is`
			`+ @ larger than the dividend.`
			`+1: cmp \divisor, #0x10000000`
			`+ cmplo \divisor, \dividend`
			`+ movlo \divisor, \divisor, lsl #4`
			`+ movlo \curbit, \curbit, lsl #4`
			`+ blo 1b`
			`+`
			`+ @ For very big divisors, we must shift it a bit at a time, or`
			`+ @ we will be in danger of overflowing.`
			`+1: cmp \divisor, #0x80000000`
			`+ cmplo \divisor, \dividend`
			`+ movlo \divisor, \divisor, lsl #1`
			`+ movlo \curbit, \curbit, lsl #1`
			`+ blo 1b`
			`+`
			`+ mov \result, #0`
			`+`
			`+#endif`
			`+`
			`+ @ Division loop`
			`+1: cmp \dividend, \divisor`
			`+ subhs \dividend, \dividend, \divisor`
			`+ orrhs \result, \result, \curbit`
			`+ cmp \dividend, \divisor, lsr #1`
			`+ subhs \dividend, \dividend, \divisor, lsr #1`
			`+ orrhs \result, \result, \curbit, lsr #1`
			`+ cmp \dividend, \divisor, lsr #2`
			`+ subhs \dividend, \dividend, \divisor, lsr #2`
			`+ orrhs \result, \result, \curbit, lsr #2`
			`+ cmp \dividend, \divisor, lsr #3`
			`+ subhs \dividend, \dividend, \divisor, lsr #3`
			`+ orrhs \result, \result, \curbit, lsr #3`
			`+ cmp \dividend, #0 @ Early termination?`
			`+ movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?`
			`+ movne \divisor, \divisor, lsr #4`
			`+ bne 1b`
			`+`
			`+.endm`
			`+`
			`+`
			`+.macro ARM_DIV2_ORDER divisor, order`
			`+`
			`+#if __LINUX_ARM_ARCH__ >= 5`
			`+`
			`+ clz \order, \divisor`
			`+ rsb \order, \order, #31`
			`+`
			`+#else`
			`+`
			`+ cmp \divisor, #(1 << 16)`
			`+ movhs \divisor, \divisor, lsr #16`
			`+ movhs \order, #16`
			`+ movlo \order, #0`
			`+`
			`+ cmp \divisor, #(1 << 8)`
			`+ movhs \divisor, \divisor, lsr #8`
			`+ addhs \order, \order, #8`
			`+`
			`+ cmp \divisor, #(1 << 4)`
			`+ movhs \divisor, \divisor, lsr #4`
			`+ addhs \order, \order, #4`
			`+`
			`+ cmp \divisor, #(1 << 2)`
			`+ addhi \order, \order, #3`
			`+ addls \order, \order, \divisor, lsr #1`
			`+`
			`+#endif`
			`+`
			`+.endm`
			`+`
			`+`
			`+.macro ARM_MOD_BODY dividend, divisor, order, spare`
			`+`
			`+#if __LINUX_ARM_ARCH__ >= 5`
			`+`
			`+ clz \order, \divisor`
			`+ clz \spare, \dividend`
			`+ sub \order, \order, \spare`
			`+ mov \divisor, \divisor, lsl \order`
			`+`
			`+#else`
			`+`
			`+ mov \order, #0`
			`+`
			`+ @ Unless the divisor is very big, shift it up in multiples of`
			`+ @ four bits, since this is the amount of unwinding in the main`
			`+ @ division loop. Continue shifting until the divisor is`
			`+ @ larger than the dividend.`
			`+1: cmp \divisor, #0x10000000`
			`+ cmplo \divisor, \dividend`
			`+ movlo \divisor, \divisor, lsl #4`
			`+ addlo \order, \order, #4`
			`+ blo 1b`
			`+`
			`+ @ For very big divisors, we must shift it a bit at a time, or`
			`+ @ we will be in danger of overflowing.`
			`+1: cmp \divisor, #0x80000000`
			`+ cmplo \divisor, \dividend`
			`+ movlo \divisor, \divisor, lsl #1`
			`+ addlo \order, \order, #1`
			`+ blo 1b`
			`+`
			`+#endif`
			`+`
			`+ @ Perform all needed substractions to keep only the reminder.`
			`+ @ Do comparisons in batch of 4 first.`
			`+ subs \order, \order, #3 @ yes, 3 is intended here`
			`+ blt 2f`
			`+`
			`+1: cmp \dividend, \divisor`
			`+ subhs \dividend, \dividend, \divisor`
			`+ cmp \dividend, \divisor, lsr #1`
			`+ subhs \dividend, \dividend, \divisor, lsr #1`
			`+ cmp \dividend, \divisor, lsr #2`
			`+ subhs \dividend, \dividend, \divisor, lsr #2`
			`+ cmp \dividend, \divisor, lsr #3`
			`+ subhs \dividend, \dividend, \divisor, lsr #3`
			`+ cmp \dividend, #1`
			`+ mov \divisor, \divisor, lsr #4`
			`+ subges \order, \order, #4`
			`+ bge 1b`
			`+`
			`+ tst \order, #3`
			`+ teqne \dividend, #0`
			`+ beq 5f`
			`+`
			`+ @ Either 1, 2 or 3 comparison/substractions are left.`
			`+2: cmn \order, #2`
			`+ blt 4f`
			`+ beq 3f`
			`+ cmp \dividend, \divisor`
			`+ subhs \dividend, \dividend, \divisor`
			`+ mov \divisor, \divisor, lsr #1`
			`+3: cmp \dividend, \divisor`
			`+ subhs \dividend, \dividend, \divisor`
			`+ mov \divisor, \divisor, lsr #1`
			`+4: cmp \dividend, \divisor`
			`+ subhs \dividend, \dividend, \divisor`
			`+5:`
			`+.endm`
			`+`
			`+`
			`+ENTRY(__udivsi3)`
			`+ENTRY(__aeabi_uidiv)`
			`+`
			`+ subs r2, r1, #1`
			`+ moveq pc, lr`
			`+ bcc Ldiv0`
			`+ cmp r0, r1`
			`+ bls 11f`
			`+ tst r1, r2`
			`+ beq 12f`
			`+`
			`+ ARM_DIV_BODY r0, r1, r2, r3`
			`+`
			`+ mov r0, r2`
			`+ mov pc, lr`
			`+`
			`+11: moveq r0, #1`
			`+ movne r0, #0`
			`+ mov pc, lr`
			`+`
			`+12: ARM_DIV2_ORDER r1, r2`
			`+`
			`+ mov r0, r0, lsr r2`
			`+ mov pc, lr`
			`+`
			`+ENDPROC(__udivsi3)`
			`+ENDPROC(__aeabi_uidiv)`
			`+`
			`+ENTRY(__umodsi3)`
			`+`
			`+ subs r2, r1, #1 @ compare divisor with 1`
			`+ bcc Ldiv0`
			`+ cmpne r0, r1 @ compare dividend with divisor`
			`+ moveq r0, #0`
			`+ tsthi r1, r2 @ see if divisor is power of 2`
			`+ andeq r0, r0, r2`
			`+ movls pc, lr`
			`+`
			`+ ARM_MOD_BODY r0, r1, r2, r3`
			`+`
			`+ mov pc, lr`
			`+`
			`+ENDPROC(__umodsi3)`
			`+`
			`+ENTRY(__divsi3)`
			`+ENTRY(__aeabi_idiv)`
			`+`
			`+ cmp r1, #0`
			`+ eor ip, r0, r1 @ save the sign of the result.`
			`+ beq Ldiv0`
			`+ rsbmi r1, r1, #0 @ loops below use unsigned.`
			`+ subs r2, r1, #1 @ division by 1 or -1 ?`
			`+ beq 10f`
			`+ movs r3, r0`
			`+ rsbmi r3, r0, #0 @ positive dividend value`
			`+ cmp r3, r1`
			`+ bls 11f`
			`+ tst r1, r2 @ divisor is power of 2 ?`
			`+ beq 12f`
			`+`
			`+ ARM_DIV_BODY r3, r1, r0, r2`
			`+`
			`+ cmp ip, #0`
			`+ rsbmi r0, r0, #0`
			`+ mov pc, lr`
			`+`
			`+10: teq ip, r0 @ same sign ?`
			`+ rsbmi r0, r0, #0`
			`+ mov pc, lr`
			`+`
			`+11: movlo r0, #0`
			`+ moveq r0, ip, asr #31`
			`+ orreq r0, r0, #1`
			`+ mov pc, lr`
			`+`
			`+12: ARM_DIV2_ORDER r1, r2`
			`+`
			`+ cmp ip, #0`
			`+ mov r0, r3, lsr r2`
			`+ rsbmi r0, r0, #0`
			`+ mov pc, lr`
			`+`
			`+ENDPROC(__divsi3)`
			`+ENDPROC(__aeabi_idiv)`
			`+`
			`+ENTRY(__modsi3)`
			`+`
			`+ cmp r1, #0`
			`+ beq Ldiv0`
			`+ rsbmi r1, r1, #0 @ loops below use unsigned.`
			`+ movs ip, r0 @ preserve sign of dividend`
			`+ rsbmi r0, r0, #0 @ if negative make positive`
			`+ subs r2, r1, #1 @ compare divisor with 1`
			`+ cmpne r0, r1 @ compare dividend with divisor`
			`+ moveq r0, #0`
			`+ tsthi r1, r2 @ see if divisor is power of 2`
			`+ andeq r0, r0, r2`
			`+ bls 10f`
			`+`
			`+ ARM_MOD_BODY r0, r1, r2, r3`
			`+`
			`+10: cmp ip, #0`
			`+ rsbmi r0, r0, #0`
			`+ mov pc, lr`
			`+`
			`+ENDPROC(__modsi3)`
			`+`
			`+#ifdef CONFIG_AEABI`
			`+`
			`+ENTRY(__aeabi_uidivmod)`
			`+`
			`+ stmfd sp!, {r0, r1, ip, lr}`
			`+ bl __aeabi_uidiv`
			`+ ldmfd sp!, {r1, r2, ip, lr}`
			`+ mul r3, r0, r2`
			`+ sub r1, r1, r3`
			`+ mov pc, lr`
			`+`
			`+ENDPROC(__aeabi_uidivmod)`
			`+`
			`+ENTRY(__aeabi_idivmod)`
			`+`
			`+ stmfd sp!, {r0, r1, ip, lr}`
			`+ bl __aeabi_idiv`
			`+ ldmfd sp!, {r1, r2, ip, lr}`
			`+ mul r3, r0, r2`
			`+ sub r1, r1, r3`
			`+ mov pc, lr`
			`+`
			`+ENDPROC(__aeabi_idivmod)`
			`+`
			`+#endif`
			`+`
			`+Ldiv0:`
			`+`
			`+ str lr, [sp, #-8]!`
			`+ bl __div0`
			`+ mov r0, #0 @ About as wrong as it could be.`
			`+ ldr pc, [sp], #8`
			`+`
			`+`