1 files changed, 357 insertions, 0 deletions
diff --git a/arch/sparc/lib/urem.S b/arch/sparc/lib/urem.S
new file mode 100644
index 00000000000..77123eb83c4
--- /dev/null
+++ b/arch/sparc/lib/urem.S
@@ -0,0 +1,357 @@
+/*
+ * urem.S:      This routine was taken from glibc-1.09 and is covered
+ *              by the GNU Library General Public License Version 2.
+ */
+/* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .urem       name of function to generate
+ *  rem         rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ *  false               false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N           how many bits per iteration we try to get (4)
+ *  WORDSIZE    total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS     number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q           the partial quotient under development (initially 0)
+ *  R           the remainder so far, initially the dividend
+ *  ITER        number of main division loop iterations required;
+ *              equal to ceil(log2(quotient) / N).  Note that this
+ *              is the log base (2^N) of the quotient.
+ *  V           the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+        .globl .urem
+        .globl _Urem
+.urem:
+_Urem:  /* needed for export */
+        ! Ready to divide.  Compute size of quotient; scale comparand.
+        orcc    %o1, %g0, %o5
+        bne     1f
+         mov    %o0, %o3
+                ! Divide by zero trap.  If it returns, return 0 (about as
+                ! wrong as possible, but that is what SunOS does...).
+                ta      ST_DIV0
+                retl
+                 clr    %o0
+1:
+        cmp     %o3, %o5                        ! if %o1 exceeds %o0, done
+        blu     Lgot_result             ! (and algorithm fails otherwise)
+         clr    %o2
+        sethi   %hi(1 << (32 - 4 - 1)), %g1
+        cmp     %o3, %g1
+        blu     Lnot_really_big
+         clr    %o4
+        ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+        ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+        ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+        ! Compute ITER in an unorthodox manner: know we need to shift V into
+        ! the top decade: so do not even bother to compare to R.
+        1:
+                cmp     %o5, %g1
+                bgeu    3f
+                 mov    1, %g7
+                sll     %o5, 4, %o5
+                b       1b
+                 add    %o4, 1, %o4
+        ! Now compute %g7.
+        2:
+                addcc   %o5, %o5, %o5
+                bcc     Lnot_too_big
+                 add    %g7, 1, %g7
+                ! We get here if the %o1 overflowed while shifting.
+                ! This means that %o3 has the high-order bit set.
+                ! Restore %o5 and subtract from %o3.
+                sll     %g1, 4, %g1     ! high order bit
+                srl     %o5, 1, %o5             ! rest of %o5
+                add     %o5, %g1, %o5
+                b       Ldo_single_div
+                 sub    %g7, 1, %g7
+        Lnot_too_big:
+        3:
+                cmp     %o5, %o3
+                blu     2b
+                 nop
+                be      Ldo_single_div
+                 nop
+        /* NB: these are commented out in the V8-Sparc manual as well */
+        /* (I do not understand this) */
+        ! %o5 > %o3: went too far: back up 1 step
+        !       srl     %o5, 1, %o5
+        !       dec     %g7
+        ! do single-bit divide steps
+        !
+        ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+        ! first divide step without thinking.  BUT, the others are conditional,
+        ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+        ! order bit set in the first step, just falling into the regular
+        ! division loop will mess up the first time around.
+        ! So we unroll slightly...
+        Ldo_single_div:
+                subcc   %g7, 1, %g7
+                bl      Lend_regular_divide
+                 nop
+                sub     %o3, %o5, %o3
+                mov     1, %o2
+                b       Lend_single_divloop
+                 nop
+        Lsingle_divloop:
+                sll     %o2, 1, %o2
+                bl      1f
+                 srl    %o5, 1, %o5
+                ! %o3 >= 0
+                sub     %o3, %o5, %o3
+                b       2f
+                 add    %o2, 1, %o2
+        1:      ! %o3 < 0
+                add     %o3, %o5, %o3
+                sub     %o2, 1, %o2
+        2:
+        Lend_single_divloop:
+                subcc   %g7, 1, %g7
+                bge     Lsingle_divloop
+                 tst    %o3
+                b,a     Lend_regular_divide
+Lnot_really_big:
+1:
+        sll     %o5, 4, %o5
+        cmp     %o5, %o3
+        bleu    1b
+         addcc  %o4, 1, %o4
+        be      Lgot_result
+         sub    %o4, 1, %o4
+        tst     %o3     ! set up for initial iteration
+Ldivloop:
+        sll     %o2, 4, %o2
+                ! depth 1, accumulated bits 0
+        bl      L.1.16
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 2, accumulated bits 1
+        bl      L.2.17
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits 3
+        bl      L.3.19
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 7
+        bl      L.4.23
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (7*2+1), %o2
+L.4.23:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (7*2-1), %o2
+L.3.19:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 5
+        bl      L.4.21
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (5*2+1), %o2
+L.4.21:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (5*2-1), %o2
+L.2.17:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits 1
+        bl      L.3.17
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 3
+        bl      L.4.19
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (3*2+1), %o2
+L.4.19:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (3*2-1), %o2
+L.3.17:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 1
+        bl      L.4.17
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (1*2+1), %o2
+        
+L.4.17:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (1*2-1), %o2
+L.1.16:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 2, accumulated bits -1
+        bl      L.2.15
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits -1
+        bl      L.3.15
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -1
+        bl      L.4.15
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-1*2+1), %o2
+L.4.15:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-1*2-1), %o2
+L.3.15:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -3
+        bl      L.4.13
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-3*2+1), %o2
+L.4.13:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-3*2-1), %o2
+L.2.15:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits -3
+        bl      L.3.13
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -5
+        bl      L.4.11
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-5*2+1), %o2
+        
+L.4.11:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-5*2-1), %o2
+L.3.13:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -7
+        bl      L.4.9
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-7*2+1), %o2
+L.4.9:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-7*2-1), %o2
+        9:
+Lend_regular_divide:
+        subcc   %o4, 1, %o4
+        bge     Ldivloop
+         tst    %o3
+        bl,a    Lgot_result
+        ! non-restoring fixup here (one instruction only!)
+        add     %o3, %o1, %o3
+Lgot_result:
+        retl
+         mov %o3, %o0
+        .globl  .urem_patch
+.urem_patch:
+        wr      %g0, 0x0, %y
+        nop
+        nop
+        nop
+        udiv    %o0, %o1, %o2
+        umul    %o2, %o1, %o2
+        retl
+         sub    %o0, %o2, %o0

diff --git a/arch/sparc/lib/urem.S b/arch/sparc/lib/urem.S new file mode 100644 index 00000000000..77123eb83c4 --- /dev/null +++ b/arch/sparc/lib/urem.S
@@ -0,0 +1,357 @@
	1	/*
	2	* urem.S: This routine was taken from glibc-1.09 and is covered
	3	* by the GNU Library General Public License Version 2.
	4	*/
	5
	6	/* This file is generated from divrem.m4; DO NOT EDIT! */
	7	/*
	8	* Division and remainder, from Appendix E of the Sparc Version 8
	9	* Architecture Manual, with fixes from Gordon Irlam.
	10	*/
	11
	12	/*
	13	* Input: dividend and divisor in %o0 and %o1 respectively.
	14	*
	15	* m4 parameters:
	16	* .urem name of function to generate
	17	* rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
	18	* false false=true => signed; false=false => unsigned
	19	*
	20	* Algorithm parameters:
	21	* N how many bits per iteration we try to get (4)
	22	* WORDSIZE total number of bits (32)
	23	*
	24	* Derived constants:
	25	* TOPBITS number of bits in the top decade of a number
	26	*
	27	* Important variables:
	28	* Q the partial quotient under development (initially 0)
	29	* R the remainder so far, initially the dividend
	30	* ITER number of main division loop iterations required;
	31	* equal to ceil(log2(quotient) / N). Note that this
	32	* is the log base (2^N) of the quotient.
	33	* V the current comparand, initially divisor2^(ITERN-1)
	34	*
	35	* Cost:
	36	* Current estimate for non-large dividend is
	37	* ceil(log2(quotient) / N) * (10 + 7N/2) + C
	38	* A large dividend is one greater than 2^(31-TOPBITS) and takes a
	39	* different path, as the upper bits of the quotient must be developed
	40	* one bit at a time.
	41	*/
	42
	43	.globl .urem
	44	.globl _Urem
	45	.urem:
	46	_Urem: /* needed for export */
	47
	48	! Ready to divide. Compute size of quotient; scale comparand.
	49	orcc %o1, %g0, %o5
	50	bne 1f
	51	mov %o0, %o3
	52
	53	! Divide by zero trap. If it returns, return 0 (about as
	54	! wrong as possible, but that is what SunOS does...).
	55	ta ST_DIV0
	56	retl
	57	clr %o0
	58
	59	1:
	60	cmp %o3, %o5 ! if %o1 exceeds %o0, done
	61	blu Lgot_result ! (and algorithm fails otherwise)
	62	clr %o2
	63
	64	sethi %hi(1 << (32 - 4 - 1)), %g1
	65
	66	cmp %o3, %g1
	67	blu Lnot_really_big
	68	clr %o4
	69
	70	! Here the dividend is >= 2**(31-N) or so. We must be careful here,
	71	! as our usual N-at-a-shot divide step will cause overflow and havoc.
	72	! The number of bits in the result here is N*ITER+SC, where SC <= N.
	73	! Compute ITER in an unorthodox manner: know we need to shift V into
	74	! the top decade: so do not even bother to compare to R.
	75	1:
	76	cmp %o5, %g1
	77	bgeu 3f
	78	mov 1, %g7
	79
	80	sll %o5, 4, %o5
	81
	82	b 1b
	83	add %o4, 1, %o4
	84
	85	! Now compute %g7.
	86	2:
	87	addcc %o5, %o5, %o5
	88	bcc Lnot_too_big
	89	add %g7, 1, %g7
	90
	91	! We get here if the %o1 overflowed while shifting.
	92	! This means that %o3 has the high-order bit set.
	93	! Restore %o5 and subtract from %o3.
	94	sll %g1, 4, %g1 ! high order bit
	95	srl %o5, 1, %o5 ! rest of %o5
	96	add %o5, %g1, %o5
	97
	98	b Ldo_single_div
	99	sub %g7, 1, %g7
	100
	101	Lnot_too_big:
	102	3:
	103	cmp %o5, %o3
	104	blu 2b
	105	nop
	106
	107	be Ldo_single_div
	108	nop
	109	/* NB: these are commented out in the V8-Sparc manual as well */
	110	/* (I do not understand this) */
	111	! %o5 > %o3: went too far: back up 1 step
	112	! srl %o5, 1, %o5
	113	! dec %g7
	114	! do single-bit divide steps
	115	!
	116	! We have to be careful here. We know that %o3 >= %o5, so we can do the
	117	! first divide step without thinking. BUT, the others are conditional,
	118	! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
	119	! order bit set in the first step, just falling into the regular
	120	! division loop will mess up the first time around.
	121	! So we unroll slightly...
	122	Ldo_single_div:
	123	subcc %g7, 1, %g7
	124	bl Lend_regular_divide
	125	nop
	126
	127	sub %o3, %o5, %o3
	128	mov 1, %o2
	129
	130	b Lend_single_divloop
	131	nop
	132	Lsingle_divloop:
	133	sll %o2, 1, %o2
	134	bl 1f
	135	srl %o5, 1, %o5
	136	! %o3 >= 0
	137	sub %o3, %o5, %o3
	138	b 2f
	139	add %o2, 1, %o2
	140	1: ! %o3 < 0
	141	add %o3, %o5, %o3
	142	sub %o2, 1, %o2
	143	2:
	144	Lend_single_divloop:
	145	subcc %g7, 1, %g7
	146	bge Lsingle_divloop
	147	tst %o3
	148
	149	b,a Lend_regular_divide
	150
	151	Lnot_really_big:
	152	1:
	153	sll %o5, 4, %o5
	154
	155	cmp %o5, %o3
	156	bleu 1b
	157	addcc %o4, 1, %o4
	158
	159	be Lgot_result
	160	sub %o4, 1, %o4
	161
	162	tst %o3 ! set up for initial iteration
	163	Ldivloop:
	164	sll %o2, 4, %o2
	165	! depth 1, accumulated bits 0
	166	bl L.1.16
	167	srl %o5,1,%o5
	168	! remainder is positive
	169	subcc %o3,%o5,%o3
	170	! depth 2, accumulated bits 1
	171	bl L.2.17
	172	srl %o5,1,%o5
	173	! remainder is positive
	174	subcc %o3,%o5,%o3
	175	! depth 3, accumulated bits 3
	176	bl L.3.19
	177	srl %o5,1,%o5
	178	! remainder is positive
	179	subcc %o3,%o5,%o3
	180	! depth 4, accumulated bits 7
	181	bl L.4.23
	182	srl %o5,1,%o5
	183	! remainder is positive
	184	subcc %o3,%o5,%o3
	185	b 9f
	186	add %o2, (7*2+1), %o2
	187
	188	L.4.23:
	189	! remainder is negative
	190	addcc %o3,%o5,%o3
	191	b 9f
	192	add %o2, (7*2-1), %o2
	193
	194	L.3.19:
	195	! remainder is negative
	196	addcc %o3,%o5,%o3
	197	! depth 4, accumulated bits 5
	198	bl L.4.21
	199	srl %o5,1,%o5
	200	! remainder is positive
	201	subcc %o3,%o5,%o3
	202	b 9f
	203	add %o2, (5*2+1), %o2
	204
	205	L.4.21:
	206	! remainder is negative
	207	addcc %o3,%o5,%o3
	208	b 9f
	209	add %o2, (5*2-1), %o2
	210
	211	L.2.17:
	212	! remainder is negative
	213	addcc %o3,%o5,%o3
	214	! depth 3, accumulated bits 1
	215	bl L.3.17
	216	srl %o5,1,%o5
	217	! remainder is positive
	218	subcc %o3,%o5,%o3
	219	! depth 4, accumulated bits 3
	220	bl L.4.19
	221	srl %o5,1,%o5
	222	! remainder is positive
	223	subcc %o3,%o5,%o3
	224	b 9f
	225	add %o2, (3*2+1), %o2
	226
	227	L.4.19:
	228	! remainder is negative
	229	addcc %o3,%o5,%o3
	230	b 9f
	231	add %o2, (3*2-1), %o2
	232
	233	L.3.17:
	234	! remainder is negative
	235	addcc %o3,%o5,%o3
	236	! depth 4, accumulated bits 1
	237	bl L.4.17
	238	srl %o5,1,%o5
	239	! remainder is positive
	240	subcc %o3,%o5,%o3
	241	b 9f
	242	add %o2, (1*2+1), %o2
	243
	244	L.4.17:
	245	! remainder is negative
	246	addcc %o3,%o5,%o3
	247	b 9f
	248	add %o2, (1*2-1), %o2
	249
	250	L.1.16:
	251	! remainder is negative
	252	addcc %o3,%o5,%o3
	253	! depth 2, accumulated bits -1
	254	bl L.2.15
	255	srl %o5,1,%o5
	256	! remainder is positive
	257	subcc %o3,%o5,%o3
	258	! depth 3, accumulated bits -1
	259	bl L.3.15
	260	srl %o5,1,%o5
	261	! remainder is positive
	262	subcc %o3,%o5,%o3
	263	! depth 4, accumulated bits -1
	264	bl L.4.15
	265	srl %o5,1,%o5
	266	! remainder is positive
	267	subcc %o3,%o5,%o3
	268	b 9f
	269	add %o2, (-1*2+1), %o2
	270
	271	L.4.15:
	272	! remainder is negative
	273	addcc %o3,%o5,%o3
	274	b 9f
	275	add %o2, (-1*2-1), %o2
	276
	277	L.3.15:
	278	! remainder is negative
	279	addcc %o3,%o5,%o3
	280	! depth 4, accumulated bits -3
	281	bl L.4.13
	282	srl %o5,1,%o5
	283	! remainder is positive
	284	subcc %o3,%o5,%o3
	285	b 9f
	286	add %o2, (-3*2+1), %o2
	287
	288	L.4.13:
	289	! remainder is negative
	290	addcc %o3,%o5,%o3
	291	b 9f
	292	add %o2, (-3*2-1), %o2
	293
	294	L.2.15:
	295	! remainder is negative
	296	addcc %o3,%o5,%o3
	297	! depth 3, accumulated bits -3
	298	bl L.3.13
	299	srl %o5,1,%o5
	300	! remainder is positive
	301	subcc %o3,%o5,%o3
	302	! depth 4, accumulated bits -5
	303	bl L.4.11
	304	srl %o5,1,%o5
	305	! remainder is positive
	306	subcc %o3,%o5,%o3
	307	b 9f
	308	add %o2, (-5*2+1), %o2
	309
	310	L.4.11:
	311	! remainder is negative
	312	addcc %o3,%o5,%o3
	313	b 9f
	314	add %o2, (-5*2-1), %o2
	315
	316	L.3.13:
	317	! remainder is negative
	318	addcc %o3,%o5,%o3
	319	! depth 4, accumulated bits -7
	320	bl L.4.9
	321	srl %o5,1,%o5
	322	! remainder is positive
	323	subcc %o3,%o5,%o3
	324	b 9f
	325	add %o2, (-7*2+1), %o2
	326
	327	L.4.9:
	328	! remainder is negative
	329	addcc %o3,%o5,%o3
	330	b 9f
	331	add %o2, (-7*2-1), %o2
	332
	333	9:
	334	Lend_regular_divide:
	335	subcc %o4, 1, %o4
	336	bge Ldivloop
	337	tst %o3
	338
	339	bl,a Lgot_result
	340	! non-restoring fixup here (one instruction only!)
	341	add %o3, %o1, %o3
	342
	343	Lgot_result:
	344
	345	retl
	346	mov %o3, %o0
	347
	348	.globl .urem_patch
	349	.urem_patch:
	350	wr %g0, 0x0, %y
	351	nop
	352	nop
	353	nop
	354	udiv %o0, %o1, %o2
	355	umul %o2, %o1, %o2
	356	retl
	357	sub %o0, %o2, %o0