Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/sparc/lib/urem.S
1 files changed, 355 insertions, 0 deletions
diff --git a/arch/sparc/lib/urem.S b/arch/sparc/lib/urem.S
new file mode 100644
index 000000000000..ec7f0c502c56
--- /dev/null
+++ b/arch/sparc/lib/urem.S
@@ -0,0 +1,355 @@
+/* $Id: urem.S,v 1.4 1996/09/30 02:22:42 davem Exp $
+ * urem.S:      This routine was taken from glibc-1.09 and is covered
+ *              by the GNU Library General Public License Version 2.
+ */
+/* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .urem       name of function to generate
+ *  rem         rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ *  false               false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N           how many bits per iteration we try to get (4)
+ *  WORDSIZE    total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS     number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q           the partial quotient under development (initially 0)
+ *  R           the remainder so far, initially the dividend
+ *  ITER        number of main division loop iterations required;
+ *              equal to ceil(log2(quotient) / N).  Note that this
+ *              is the log base (2^N) of the quotient.
+ *  V           the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+        .globl .urem
+.urem:
+        ! Ready to divide.  Compute size of quotient; scale comparand.
+        orcc    %o1, %g0, %o5
+        bne     1f
+         mov    %o0, %o3
+                ! Divide by zero trap.  If it returns, return 0 (about as
+                ! wrong as possible, but that is what SunOS does...).
+                ta      ST_DIV0
+                retl
+                 clr    %o0
+1:
+        cmp     %o3, %o5                        ! if %o1 exceeds %o0, done
+        blu     Lgot_result             ! (and algorithm fails otherwise)
+         clr    %o2
+        sethi   %hi(1 << (32 - 4 - 1)), %g1
+        cmp     %o3, %g1
+        blu     Lnot_really_big
+         clr    %o4
+        ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+        ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+        ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+        ! Compute ITER in an unorthodox manner: know we need to shift V into
+        ! the top decade: so do not even bother to compare to R.
+        1:
+                cmp     %o5, %g1
+                bgeu    3f
+                 mov    1, %g7
+                sll     %o5, 4, %o5
+                b       1b
+                 add    %o4, 1, %o4
+        ! Now compute %g7.
+        2:
+                addcc   %o5, %o5, %o5
+                bcc     Lnot_too_big
+                 add    %g7, 1, %g7
+                ! We get here if the %o1 overflowed while shifting.
+                ! This means that %o3 has the high-order bit set.
+                ! Restore %o5 and subtract from %o3.
+                sll     %g1, 4, %g1     ! high order bit
+                srl     %o5, 1, %o5             ! rest of %o5
+                add     %o5, %g1, %o5
+                b       Ldo_single_div
+                 sub    %g7, 1, %g7
+        Lnot_too_big:
+        3:
+                cmp     %o5, %o3
+                blu     2b
+                 nop
+                be      Ldo_single_div
+                 nop
+        /* NB: these are commented out in the V8-Sparc manual as well */
+        /* (I do not understand this) */
+        ! %o5 > %o3: went too far: back up 1 step
+        !       srl     %o5, 1, %o5
+        !       dec     %g7
+        ! do single-bit divide steps
+        !
+        ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+        ! first divide step without thinking.  BUT, the others are conditional,
+        ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+        ! order bit set in the first step, just falling into the regular
+        ! division loop will mess up the first time around.
+        ! So we unroll slightly...
+        Ldo_single_div:
+                subcc   %g7, 1, %g7
+                bl      Lend_regular_divide
+                 nop
+                sub     %o3, %o5, %o3
+                mov     1, %o2
+                b       Lend_single_divloop
+                 nop
+        Lsingle_divloop:
+                sll     %o2, 1, %o2
+                bl      1f
+                 srl    %o5, 1, %o5
+                ! %o3 >= 0
+                sub     %o3, %o5, %o3
+                b       2f
+                 add    %o2, 1, %o2
+        1:      ! %o3 < 0
+                add     %o3, %o5, %o3
+                sub     %o2, 1, %o2
+        2:
+        Lend_single_divloop:
+                subcc   %g7, 1, %g7
+                bge     Lsingle_divloop
+                 tst    %o3
+                b,a     Lend_regular_divide
+Lnot_really_big:
+1:
+        sll     %o5, 4, %o5
+        cmp     %o5, %o3
+        bleu    1b
+         addcc  %o4, 1, %o4
+        be      Lgot_result
+         sub    %o4, 1, %o4
+        tst     %o3     ! set up for initial iteration
+Ldivloop:
+        sll     %o2, 4, %o2
+                ! depth 1, accumulated bits 0
+        bl      L.1.16
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 2, accumulated bits 1
+        bl      L.2.17
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits 3
+        bl      L.3.19
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 7
+        bl      L.4.23
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (7*2+1), %o2
+L.4.23:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (7*2-1), %o2
+L.3.19:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 5
+        bl      L.4.21
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (5*2+1), %o2
+L.4.21:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (5*2-1), %o2
+L.2.17:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits 1
+        bl      L.3.17
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 3
+        bl      L.4.19
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (3*2+1), %o2
+L.4.19:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (3*2-1), %o2
+L.3.17:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 1
+        bl      L.4.17
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (1*2+1), %o2
+        
+L.4.17:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (1*2-1), %o2
+L.1.16:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 2, accumulated bits -1
+        bl      L.2.15
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits -1
+        bl      L.3.15
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -1
+        bl      L.4.15
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-1*2+1), %o2
+L.4.15:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-1*2-1), %o2
+L.3.15:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -3
+        bl      L.4.13
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-3*2+1), %o2
+L.4.13:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-3*2-1), %o2
+L.2.15:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits -3
+        bl      L.3.13
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -5
+        bl      L.4.11
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-5*2+1), %o2
+        
+L.4.11:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-5*2-1), %o2
+L.3.13:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -7
+        bl      L.4.9
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-7*2+1), %o2
+L.4.9:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-7*2-1), %o2
+        9:
+Lend_regular_divide:
+        subcc   %o4, 1, %o4
+        bge     Ldivloop
+         tst    %o3
+        bl,a    Lgot_result
+        ! non-restoring fixup here (one instruction only!)
+        add     %o3, %o1, %o3
+Lgot_result:
+        retl
+         mov %o3, %o0
+        .globl  .urem_patch
+.urem_patch:
+        wr      %g0, 0x0, %y
+        nop
+        nop
+        nop
+        udiv    %o0, %o1, %o2
+        umul    %o2, %o1, %o2
+        retl
+         sub    %o0, %o2, %o0
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/sparc/lib/urem.S

diff --git a/arch/sparc/lib/urem.S b/arch/sparc/lib/urem.S new file mode 100644 index 000000000000..ec7f0c502c56 --- /dev/null +++ b/arch/sparc/lib/urem.S
@@ -0,0 +1,355 @@
	1	/* $Id: urem.S,v 1.4 1996/09/30 02:22:42 davem Exp $
	2	* urem.S: This routine was taken from glibc-1.09 and is covered
	3	* by the GNU Library General Public License Version 2.
	4	*/
	5
	6	/* This file is generated from divrem.m4; DO NOT EDIT! */
	7	/*
	8	* Division and remainder, from Appendix E of the Sparc Version 8
	9	* Architecture Manual, with fixes from Gordon Irlam.
	10	*/
	11
	12	/*
	13	* Input: dividend and divisor in %o0 and %o1 respectively.
	14	*
	15	* m4 parameters:
	16	* .urem name of function to generate
	17	* rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
	18	* false false=true => signed; false=false => unsigned
	19	*
	20	* Algorithm parameters:
	21	* N how many bits per iteration we try to get (4)
	22	* WORDSIZE total number of bits (32)
	23	*
	24	* Derived constants:
	25	* TOPBITS number of bits in the top decade of a number
	26	*
	27	* Important variables:
	28	* Q the partial quotient under development (initially 0)
	29	* R the remainder so far, initially the dividend
	30	* ITER number of main division loop iterations required;
	31	* equal to ceil(log2(quotient) / N). Note that this
	32	* is the log base (2^N) of the quotient.
	33	* V the current comparand, initially divisor2^(ITERN-1)
	34	*
	35	* Cost:
	36	* Current estimate for non-large dividend is
	37	* ceil(log2(quotient) / N) * (10 + 7N/2) + C
	38	* A large dividend is one greater than 2^(31-TOPBITS) and takes a
	39	* different path, as the upper bits of the quotient must be developed
	40	* one bit at a time.
	41	*/
	42
	43	.globl .urem
	44	.urem:
	45
	46	! Ready to divide. Compute size of quotient; scale comparand.
	47	orcc %o1, %g0, %o5
	48	bne 1f
	49	mov %o0, %o3
	50
	51	! Divide by zero trap. If it returns, return 0 (about as
	52	! wrong as possible, but that is what SunOS does...).
	53	ta ST_DIV0
	54	retl
	55	clr %o0
	56
	57	1:
	58	cmp %o3, %o5 ! if %o1 exceeds %o0, done
	59	blu Lgot_result ! (and algorithm fails otherwise)
	60	clr %o2
	61
	62	sethi %hi(1 << (32 - 4 - 1)), %g1
	63
	64	cmp %o3, %g1
	65	blu Lnot_really_big
	66	clr %o4
	67
	68	! Here the dividend is >= 2**(31-N) or so. We must be careful here,
	69	! as our usual N-at-a-shot divide step will cause overflow and havoc.
	70	! The number of bits in the result here is N*ITER+SC, where SC <= N.
	71	! Compute ITER in an unorthodox manner: know we need to shift V into
	72	! the top decade: so do not even bother to compare to R.
	73	1:
	74	cmp %o5, %g1
	75	bgeu 3f
	76	mov 1, %g7
	77
	78	sll %o5, 4, %o5
	79
	80	b 1b
	81	add %o4, 1, %o4
	82
	83	! Now compute %g7.
	84	2:
	85	addcc %o5, %o5, %o5
	86	bcc Lnot_too_big
	87	add %g7, 1, %g7
	88
	89	! We get here if the %o1 overflowed while shifting.
	90	! This means that %o3 has the high-order bit set.
	91	! Restore %o5 and subtract from %o3.
	92	sll %g1, 4, %g1 ! high order bit
	93	srl %o5, 1, %o5 ! rest of %o5
	94	add %o5, %g1, %o5
	95
	96	b Ldo_single_div
	97	sub %g7, 1, %g7
	98
	99	Lnot_too_big:
	100	3:
	101	cmp %o5, %o3
	102	blu 2b
	103	nop
	104
	105	be Ldo_single_div
	106	nop
	107	/* NB: these are commented out in the V8-Sparc manual as well */
	108	/* (I do not understand this) */
	109	! %o5 > %o3: went too far: back up 1 step
	110	! srl %o5, 1, %o5
	111	! dec %g7
	112	! do single-bit divide steps
	113	!
	114	! We have to be careful here. We know that %o3 >= %o5, so we can do the
	115	! first divide step without thinking. BUT, the others are conditional,
	116	! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
	117	! order bit set in the first step, just falling into the regular
	118	! division loop will mess up the first time around.
	119	! So we unroll slightly...
	120	Ldo_single_div:
	121	subcc %g7, 1, %g7
	122	bl Lend_regular_divide
	123	nop
	124
	125	sub %o3, %o5, %o3
	126	mov 1, %o2
	127
	128	b Lend_single_divloop
	129	nop
	130	Lsingle_divloop:
	131	sll %o2, 1, %o2
	132	bl 1f
	133	srl %o5, 1, %o5
	134	! %o3 >= 0
	135	sub %o3, %o5, %o3
	136	b 2f
	137	add %o2, 1, %o2
	138	1: ! %o3 < 0
	139	add %o3, %o5, %o3
	140	sub %o2, 1, %o2
	141	2:
	142	Lend_single_divloop:
	143	subcc %g7, 1, %g7
	144	bge Lsingle_divloop
	145	tst %o3
	146
	147	b,a Lend_regular_divide
	148
	149	Lnot_really_big:
	150	1:
	151	sll %o5, 4, %o5
	152
	153	cmp %o5, %o3
	154	bleu 1b
	155	addcc %o4, 1, %o4
	156
	157	be Lgot_result
	158	sub %o4, 1, %o4
	159
	160	tst %o3 ! set up for initial iteration
	161	Ldivloop:
	162	sll %o2, 4, %o2
	163	! depth 1, accumulated bits 0
	164	bl L.1.16
	165	srl %o5,1,%o5
	166	! remainder is positive
	167	subcc %o3,%o5,%o3
	168	! depth 2, accumulated bits 1
	169	bl L.2.17
	170	srl %o5,1,%o5
	171	! remainder is positive
	172	subcc %o3,%o5,%o3
	173	! depth 3, accumulated bits 3
	174	bl L.3.19
	175	srl %o5,1,%o5
	176	! remainder is positive
	177	subcc %o3,%o5,%o3
	178	! depth 4, accumulated bits 7
	179	bl L.4.23
	180	srl %o5,1,%o5
	181	! remainder is positive
	182	subcc %o3,%o5,%o3
	183	b 9f
	184	add %o2, (7*2+1), %o2
	185
	186	L.4.23:
	187	! remainder is negative
	188	addcc %o3,%o5,%o3
	189	b 9f
	190	add %o2, (7*2-1), %o2
	191
	192	L.3.19:
	193	! remainder is negative
	194	addcc %o3,%o5,%o3
	195	! depth 4, accumulated bits 5
	196	bl L.4.21
	197	srl %o5,1,%o5
	198	! remainder is positive
	199	subcc %o3,%o5,%o3
	200	b 9f
	201	add %o2, (5*2+1), %o2
	202
	203	L.4.21:
	204	! remainder is negative
	205	addcc %o3,%o5,%o3
	206	b 9f
	207	add %o2, (5*2-1), %o2
	208
	209	L.2.17:
	210	! remainder is negative
	211	addcc %o3,%o5,%o3
	212	! depth 3, accumulated bits 1
	213	bl L.3.17
	214	srl %o5,1,%o5
	215	! remainder is positive
	216	subcc %o3,%o5,%o3
	217	! depth 4, accumulated bits 3
	218	bl L.4.19
	219	srl %o5,1,%o5
	220	! remainder is positive
	221	subcc %o3,%o5,%o3
	222	b 9f
	223	add %o2, (3*2+1), %o2
	224
	225	L.4.19:
	226	! remainder is negative
	227	addcc %o3,%o5,%o3
	228	b 9f
	229	add %o2, (3*2-1), %o2
	230
	231	L.3.17:
	232	! remainder is negative
	233	addcc %o3,%o5,%o3
	234	! depth 4, accumulated bits 1
	235	bl L.4.17
	236	srl %o5,1,%o5
	237	! remainder is positive
	238	subcc %o3,%o5,%o3
	239	b 9f
	240	add %o2, (1*2+1), %o2
	241
	242	L.4.17:
	243	! remainder is negative
	244	addcc %o3,%o5,%o3
	245	b 9f
	246	add %o2, (1*2-1), %o2
	247
	248	L.1.16:
	249	! remainder is negative
	250	addcc %o3,%o5,%o3
	251	! depth 2, accumulated bits -1
	252	bl L.2.15
	253	srl %o5,1,%o5
	254	! remainder is positive
	255	subcc %o3,%o5,%o3
	256	! depth 3, accumulated bits -1
	257	bl L.3.15
	258	srl %o5,1,%o5
	259	! remainder is positive
	260	subcc %o3,%o5,%o3
	261	! depth 4, accumulated bits -1
	262	bl L.4.15
	263	srl %o5,1,%o5
	264	! remainder is positive
	265	subcc %o3,%o5,%o3
	266	b 9f
	267	add %o2, (-1*2+1), %o2
	268
	269	L.4.15:
	270	! remainder is negative
	271	addcc %o3,%o5,%o3
	272	b 9f
	273	add %o2, (-1*2-1), %o2
	274
	275	L.3.15:
	276	! remainder is negative
	277	addcc %o3,%o5,%o3
	278	! depth 4, accumulated bits -3
	279	bl L.4.13
	280	srl %o5,1,%o5
	281	! remainder is positive
	282	subcc %o3,%o5,%o3
	283	b 9f
	284	add %o2, (-3*2+1), %o2
	285
	286	L.4.13:
	287	! remainder is negative
	288	addcc %o3,%o5,%o3
	289	b 9f
	290	add %o2, (-3*2-1), %o2
	291
	292	L.2.15:
	293	! remainder is negative
	294	addcc %o3,%o5,%o3
	295	! depth 3, accumulated bits -3
	296	bl L.3.13
	297	srl %o5,1,%o5
	298	! remainder is positive
	299	subcc %o3,%o5,%o3
	300	! depth 4, accumulated bits -5
	301	bl L.4.11
	302	srl %o5,1,%o5
	303	! remainder is positive
	304	subcc %o3,%o5,%o3
	305	b 9f
	306	add %o2, (-5*2+1), %o2
	307
	308	L.4.11:
	309	! remainder is negative
	310	addcc %o3,%o5,%o3
	311	b 9f
	312	add %o2, (-5*2-1), %o2
	313
	314	L.3.13:
	315	! remainder is negative
	316	addcc %o3,%o5,%o3
	317	! depth 4, accumulated bits -7
	318	bl L.4.9
	319	srl %o5,1,%o5
	320	! remainder is positive
	321	subcc %o3,%o5,%o3
	322	b 9f
	323	add %o2, (-7*2+1), %o2
	324
	325	L.4.9:
	326	! remainder is negative
	327	addcc %o3,%o5,%o3
	328	b 9f
	329	add %o2, (-7*2-1), %o2
	330
	331	9:
	332	Lend_regular_divide:
	333	subcc %o4, 1, %o4
	334	bge Ldivloop
	335	tst %o3
	336
	337	bl,a Lgot_result
	338	! non-restoring fixup here (one instruction only!)
	339	add %o3, %o1, %o3
	340
	341	Lgot_result:
	342
	343	retl
	344	mov %o3, %o0
	345
	346	.globl .urem_patch
	347	.urem_patch:
	348	wr %g0, 0x0, %y
	349	nop
	350	nop
	351	nop
	352	udiv %o0, %o1, %o2
	353	umul %o2, %o1, %o2
	354	retl
	355	sub %o0, %o2, %o0