1 files changed, 200 insertions, 0 deletions
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
new file mode 100644
index 000000000000..ec9a1cd6176f
--- /dev/null
+++ b/arch/arm/lib/div64.S
@@ -0,0 +1,200 @@
+/*
+ *  linux/arch/arm/lib/div64.S
+ *
+ *  Optimized computation of 64-bit dividend / 32-bit divisor
+ *
+ *  Author:     Nicolas Pitre
+ *  Created:    Oct 5, 2003
+ *  Copyright:  Monta Vista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#ifdef __ARMEB__
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#else
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#endif
+/*
+ * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
+ *
+ * Note: Calling convention is totally non standard for optimal code.
+ *       This is meant to be used by do_div() from include/asm/div64.h only.
+ *
+ * Input parameters:
+ *      xh-xl   = dividend (clobbered)
+ *      r4      = divisor (preserved)
+ *
+ * Output values:
+ *      yh-yl   = result
+ *      xh      = remainder
+ *
+ * Clobbered regs: xl, ip
+ */
+ENTRY(__do_div64)
+        @ Test for easy paths first.
+        subs    ip, r4, #1
+        bls     9f                      @ divisor is 0 or 1
+        tst     ip, r4
+        beq     8f                      @ divisor is power of 2
+        @ See if we need to handle upper 32-bit result.
+        cmp     xh, r4
+        mov     yh, #0
+        blo     3f
+        @ Align divisor with upper part of dividend.
+        @ The aligned divisor is stored in yl preserving the original.
+        @ The bit position is stored in ip.
+#if __LINUX_ARM_ARCH__ >= 5
+        clz     yl, r4
+        clz     ip, xh
+        sub     yl, yl, ip
+        mov     ip, #1
+        mov     ip, ip, lsl yl
+        mov     yl, r4, lsl yl
+#else
+        mov     yl, r4
+        mov     ip, #1
+1:      cmp     yl, #0x80000000
+        cmpcc   yl, xh
+        movcc   yl, yl, lsl #1
+        movcc   ip, ip, lsl #1
+        bcc     1b
+#endif
+        @ The division loop for needed upper bit positions.
+        @ Break out early if dividend reaches 0.
+2:      cmp     xh, yl
+        orrcs   yh, yh, ip
+        subcss  xh, xh, yl
+        movnes  ip, ip, lsr #1
+        mov     yl, yl, lsr #1
+        bne     2b
+        @ See if we need to handle lower 32-bit result.
+3:      cmp     xh, #0
+        mov     yl, #0
+        cmpeq   xl, r4
+        movlo   xh, xl
+        movlo   pc, lr
+        @ The division loop for lower bit positions.
+        @ Here we shift remainer bits leftwards rather than moving the
+        @ divisor for comparisons, considering the carry-out bit as well.
+        mov     ip, #0x80000000
+4:      movs    xl, xl, lsl #1
+        adcs    xh, xh, xh
+        beq     6f
+        cmpcc   xh, r4
+5:      orrcs   yl, yl, ip
+        subcs   xh, xh, r4
+        movs    ip, ip, lsr #1
+        bne     4b
+        mov     pc, lr
+        @ The top part of remainder became zero.  If carry is set
+        @ (the 33th bit) this is a false positive so resume the loop.
+        @ Otherwise, if lower part is also null then we are done.
+6:      bcs     5b
+        cmp     xl, #0
+        moveq   pc, lr
+        @ We still have remainer bits in the low part.  Bring them up.
+#if __LINUX_ARM_ARCH__ >= 5
+        clz     xh, xl                  @ we know xh is zero here so...
+        add     xh, xh, #1
+        mov     xl, xl, lsl xh
+        mov     ip, ip, lsr xh
+#else
+7:      movs    xl, xl, lsl #1
+        mov     ip, ip, lsr #1
+        bcc     7b
+#endif
+        @ Current remainder is now 1.  It is worthless to compare with
+        @ divisor at this point since divisor can not be smaller than 3 here.
+        @ If possible, branch for another shift in the division loop.
+        @ If no bit position left then we are done.
+        movs    ip, ip, lsr #1
+        mov     xh, #1
+        bne     4b
+        mov     pc, lr
+8:      @ Division by a power of 2: determine what that divisor order is
+        @ then simply shift values around
+#if __LINUX_ARM_ARCH__ >= 5
+        clz     ip, r4
+        rsb     ip, ip, #31
+#else
+        mov     yl, r4
+        cmp     r4, #(1 << 16)
+        mov     ip, #0
+        movhs   yl, yl, lsr #16
+        movhs   ip, #16
+        cmp     yl, #(1 << 8)
+        movhs   yl, yl, lsr #8
+        addhs   ip, ip, #8
+        cmp     yl, #(1 << 4)
+        movhs   yl, yl, lsr #4
+        addhs   ip, ip, #4
+        cmp     yl, #(1 << 2)
+        addhi   ip, ip, #3
+        addls   ip, ip, yl, lsr #1
+#endif
+        mov     yh, xh, lsr ip
+        mov     yl, xl, lsr ip
+        rsb     ip, ip, #32
+        orr     yl, yl, xh, lsl ip
+        mov     xh, xl, lsl ip
+        mov     xh, xh, lsr ip
+        mov     pc, lr
+        @ eq -> division by 1: obvious enough...
+9:      moveq   yl, xl
+        moveq   yh, xh
+        moveq   xh, #0
+        moveq   pc, lr
+        @ Division by 0:
+        str     lr, [sp, #-4]!
+        bl      __div0
+        @ as wrong as it could be...
+        mov     yl, #0
+        mov     yh, #0
+        mov     xh, #0
+        ldr     pc, [sp], #4

diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S new file mode 100644 index 000000000000..ec9a1cd6176f --- /dev/null +++ b/arch/arm/lib/div64.S
@@ -0,0 +1,200 @@
	1	/*
	2	* linux/arch/arm/lib/div64.S
	3	*
	4	* Optimized computation of 64-bit dividend / 32-bit divisor
	5	*
	6	* Author: Nicolas Pitre
	7	* Created: Oct 5, 2003
	8	* Copyright: Monta Vista Software, Inc.
	9	*
	10	* This program is free software; you can redistribute it and/or modify
	11	* it under the terms of the GNU General Public License version 2 as
	12	* published by the Free Software Foundation.
	13	*/
	14
	15	#include <linux/linkage.h>
	16
	17	#ifdef __ARMEB__
	18	#define xh r0
	19	#define xl r1
	20	#define yh r2
	21	#define yl r3
	22	#else
	23	#define xl r0
	24	#define xh r1
	25	#define yl r2
	26	#define yh r3
	27	#endif
	28
	29	/*
	30	* __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
	31	*
	32	* Note: Calling convention is totally non standard for optimal code.
	33	* This is meant to be used by do_div() from include/asm/div64.h only.
	34	*
	35	* Input parameters:
	36	* xh-xl = dividend (clobbered)
	37	* r4 = divisor (preserved)
	38	*
	39	* Output values:
	40	* yh-yl = result
	41	* xh = remainder
	42	*
	43	* Clobbered regs: xl, ip
	44	*/
	45
	46	ENTRY(__do_div64)
	47
	48	@ Test for easy paths first.
	49	subs ip, r4, #1
	50	bls 9f @ divisor is 0 or 1
	51	tst ip, r4
	52	beq 8f @ divisor is power of 2
	53
	54	@ See if we need to handle upper 32-bit result.
	55	cmp xh, r4
	56	mov yh, #0
	57	blo 3f
	58
	59	@ Align divisor with upper part of dividend.
	60	@ The aligned divisor is stored in yl preserving the original.
	61	@ The bit position is stored in ip.
	62
	63	#if __LINUX_ARM_ARCH__ >= 5
	64
	65	clz yl, r4
	66	clz ip, xh
	67	sub yl, yl, ip
	68	mov ip, #1
	69	mov ip, ip, lsl yl
	70	mov yl, r4, lsl yl
	71
	72	#else
	73
	74	mov yl, r4
	75	mov ip, #1
	76	1: cmp yl, #0x80000000
	77	cmpcc yl, xh
	78	movcc yl, yl, lsl #1
	79	movcc ip, ip, lsl #1
	80	bcc 1b
	81
	82	#endif
	83
	84	@ The division loop for needed upper bit positions.
	85	@ Break out early if dividend reaches 0.
	86	2: cmp xh, yl
	87	orrcs yh, yh, ip
	88	subcss xh, xh, yl
	89	movnes ip, ip, lsr #1
	90	mov yl, yl, lsr #1
	91	bne 2b
	92
	93	@ See if we need to handle lower 32-bit result.
	94	3: cmp xh, #0
	95	mov yl, #0
	96	cmpeq xl, r4
	97	movlo xh, xl
	98	movlo pc, lr
	99
	100	@ The division loop for lower bit positions.
	101	@ Here we shift remainer bits leftwards rather than moving the
	102	@ divisor for comparisons, considering the carry-out bit as well.
	103	mov ip, #0x80000000
	104	4: movs xl, xl, lsl #1
	105	adcs xh, xh, xh
	106	beq 6f
	107	cmpcc xh, r4
	108	5: orrcs yl, yl, ip
	109	subcs xh, xh, r4
	110	movs ip, ip, lsr #1
	111	bne 4b
	112	mov pc, lr
	113
	114	@ The top part of remainder became zero. If carry is set
	115	@ (the 33th bit) this is a false positive so resume the loop.
	116	@ Otherwise, if lower part is also null then we are done.
	117	6: bcs 5b
	118	cmp xl, #0
	119	moveq pc, lr
	120
	121	@ We still have remainer bits in the low part. Bring them up.
	122
	123	#if __LINUX_ARM_ARCH__ >= 5
	124
	125	clz xh, xl @ we know xh is zero here so...
	126	add xh, xh, #1
	127	mov xl, xl, lsl xh
	128	mov ip, ip, lsr xh
	129
	130	#else
	131
	132	7: movs xl, xl, lsl #1
	133	mov ip, ip, lsr #1
	134	bcc 7b
	135
	136	#endif
	137
	138	@ Current remainder is now 1. It is worthless to compare with
	139	@ divisor at this point since divisor can not be smaller than 3 here.
	140	@ If possible, branch for another shift in the division loop.
	141	@ If no bit position left then we are done.
	142	movs ip, ip, lsr #1
	143	mov xh, #1
	144	bne 4b
	145	mov pc, lr
	146
	147	8: @ Division by a power of 2: determine what that divisor order is
	148	@ then simply shift values around
	149
	150	#if __LINUX_ARM_ARCH__ >= 5
	151
	152	clz ip, r4
	153	rsb ip, ip, #31
	154
	155	#else
	156
	157	mov yl, r4
	158	cmp r4, #(1 << 16)
	159	mov ip, #0
	160	movhs yl, yl, lsr #16
	161	movhs ip, #16
	162
	163	cmp yl, #(1 << 8)
	164	movhs yl, yl, lsr #8
	165	addhs ip, ip, #8
	166
	167	cmp yl, #(1 << 4)
	168	movhs yl, yl, lsr #4
	169	addhs ip, ip, #4
	170
	171	cmp yl, #(1 << 2)
	172	addhi ip, ip, #3
	173	addls ip, ip, yl, lsr #1
	174
	175	#endif
	176
	177	mov yh, xh, lsr ip
	178	mov yl, xl, lsr ip
	179	rsb ip, ip, #32
	180	orr yl, yl, xh, lsl ip
	181	mov xh, xl, lsl ip
	182	mov xh, xh, lsr ip
	183	mov pc, lr
	184
	185	@ eq -> division by 1: obvious enough...
	186	9: moveq yl, xl
	187	moveq yh, xh
	188	moveq xh, #0
	189	moveq pc, lr
	190
	191	@ Division by 0:
	192	str lr, [sp, #-4]!
	193	bl __div0
	194
	195	@ as wrong as it could be...
	196	mov yl, #0
	197	mov yh, #0
	198	mov xh, #0
	199	ldr pc, [sp], #4
	200