Added missing tegra files.HEAD master

author: Jonathan Herman <hermanjl@cs.unc.edu> 2013-01-22 10:38:37 -0500
committer: Jonathan Herman <hermanjl@cs.unc.edu> 2013-01-22 10:38:37 -0500
commit: fcc9d2e5a6c89d22b8b773a64fb4ad21ac318446 (patch)
tree: a57612d1888735a2ec7972891b68c1ac5ec8faea /arch/sparc/lib/udiv.S
parent: 8dea78da5cee153b8af9c07a2745f6c55057fe12 (diff)
1 files changed, 357 insertions, 0 deletions
diff --git a/arch/sparc/lib/udiv.S b/arch/sparc/lib/udiv.S
new file mode 100644
index 00000000000..2101405bdfc
--- /dev/null
+++ b/arch/sparc/lib/udiv.S
@@ -0,0 +1,357 @@
+/*
+ * udiv.S:      This routine was taken from glibc-1.09 and is covered
+ *              by the GNU Library General Public License Version 2.
+ */
+/* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .udiv       name of function to generate
+ *  div         div=div => %o0 / %o1; div=rem => %o0 % %o1
+ *  false               false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N           how many bits per iteration we try to get (4)
+ *  WORDSIZE    total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS     number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q           the partial quotient under development (initially 0)
+ *  R           the remainder so far, initially the dividend
+ *  ITER        number of main division loop iterations required;
+ *              equal to ceil(log2(quotient) / N).  Note that this
+ *              is the log base (2^N) of the quotient.
+ *  V           the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+        .globl .udiv
+        .globl _Udiv
+.udiv:
+_Udiv:  /* needed for export */
+        ! Ready to divide.  Compute size of quotient; scale comparand.
+        orcc    %o1, %g0, %o5
+        bne     1f
+         mov    %o0, %o3
+                ! Divide by zero trap.  If it returns, return 0 (about as
+                ! wrong as possible, but that is what SunOS does...).
+                ta      ST_DIV0
+                retl
+                 clr    %o0
+1:
+        cmp     %o3, %o5                        ! if %o1 exceeds %o0, done
+        blu     Lgot_result             ! (and algorithm fails otherwise)
+         clr    %o2
+        sethi   %hi(1 << (32 - 4 - 1)), %g1
+        cmp     %o3, %g1
+        blu     Lnot_really_big
+         clr    %o4
+        ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+        ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+        ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+        ! Compute ITER in an unorthodox manner: know we need to shift V into
+        ! the top decade: so do not even bother to compare to R.
+        1:
+                cmp     %o5, %g1
+                bgeu    3f
+                 mov    1, %g7
+                sll     %o5, 4, %o5
+                b       1b
+                 add    %o4, 1, %o4
+        ! Now compute %g7.
+        2:
+                addcc   %o5, %o5, %o5
+                bcc     Lnot_too_big
+                 add    %g7, 1, %g7
+                ! We get here if the %o1 overflowed while shifting.
+                ! This means that %o3 has the high-order bit set.
+                ! Restore %o5 and subtract from %o3.
+                sll     %g1, 4, %g1     ! high order bit
+                srl     %o5, 1, %o5             ! rest of %o5
+                add     %o5, %g1, %o5
+                b       Ldo_single_div
+                 sub    %g7, 1, %g7
+        Lnot_too_big:
+        3:
+                cmp     %o5, %o3
+                blu     2b
+                 nop
+                be      Ldo_single_div
+                 nop
+        /* NB: these are commented out in the V8-Sparc manual as well */
+        /* (I do not understand this) */
+        ! %o5 > %o3: went too far: back up 1 step
+        !       srl     %o5, 1, %o5
+        !       dec     %g7
+        ! do single-bit divide steps
+        !
+        ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+        ! first divide step without thinking.  BUT, the others are conditional,
+        ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+        ! order bit set in the first step, just falling into the regular
+        ! division loop will mess up the first time around.
+        ! So we unroll slightly...
+        Ldo_single_div:
+                subcc   %g7, 1, %g7
+                bl      Lend_regular_divide
+                 nop
+                sub     %o3, %o5, %o3
+                mov     1, %o2
+                b       Lend_single_divloop
+                 nop
+        Lsingle_divloop:
+                sll     %o2, 1, %o2
+                bl      1f
+                 srl    %o5, 1, %o5
+                ! %o3 >= 0
+                sub     %o3, %o5, %o3
+                b       2f
+                 add    %o2, 1, %o2
+        1:      ! %o3 < 0
+                add     %o3, %o5, %o3
+                sub     %o2, 1, %o2
+        2:
+        Lend_single_divloop:
+                subcc   %g7, 1, %g7
+                bge     Lsingle_divloop
+                 tst    %o3
+                b,a     Lend_regular_divide
+Lnot_really_big:
+1:
+        sll     %o5, 4, %o5
+        cmp     %o5, %o3
+        bleu    1b
+         addcc  %o4, 1, %o4
+        be      Lgot_result
+         sub    %o4, 1, %o4
+        tst     %o3     ! set up for initial iteration
+Ldivloop:
+        sll     %o2, 4, %o2
+                ! depth 1, accumulated bits 0
+        bl      L.1.16
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 2, accumulated bits 1
+        bl      L.2.17
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits 3
+        bl      L.3.19
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 7
+        bl      L.4.23
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (7*2+1), %o2
+L.4.23:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (7*2-1), %o2
+L.3.19:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 5
+        bl      L.4.21
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (5*2+1), %o2
+L.4.21:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (5*2-1), %o2
+L.2.17:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits 1
+        bl      L.3.17
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 3
+        bl      L.4.19
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (3*2+1), %o2
+L.4.19:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (3*2-1), %o2
+L.3.17:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 1
+        bl      L.4.17
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (1*2+1), %o2
+L.4.17:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (1*2-1), %o2
+L.1.16:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 2, accumulated bits -1
+        bl      L.2.15
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits -1
+        bl      L.3.15
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -1
+        bl      L.4.15
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-1*2+1), %o2
+L.4.15:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-1*2-1), %o2
+L.3.15:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -3
+        bl      L.4.13
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-3*2+1), %o2
+L.4.13:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-3*2-1), %o2
+L.2.15:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits -3
+        bl      L.3.13
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -5
+        bl      L.4.11
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-5*2+1), %o2
+L.4.11:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-5*2-1), %o2
+L.3.13:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -7
+        bl      L.4.9
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-7*2+1), %o2
+L.4.9:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-7*2-1), %o2
+        9:
+Lend_regular_divide:
+        subcc   %o4, 1, %o4
+        bge     Ldivloop
+         tst    %o3
+        bl,a    Lgot_result
+        ! non-restoring fixup here (one instruction only!)
+        sub     %o2, 1, %o2
+Lgot_result:
+        retl
+         mov %o2, %o0
+        .globl  .udiv_patch
+.udiv_patch:
+        wr      %g0, 0x0, %y
+        nop
+        nop
+        retl
+         udiv   %o0, %o1, %o0
+        nop
author	Jonathan Herman <hermanjl@cs.unc.edu>	2013-01-22 10:38:37 -0500
committer	Jonathan Herman <hermanjl@cs.unc.edu>	2013-01-22 10:38:37 -0500
commit	fcc9d2e5a6c89d22b8b773a64fb4ad21ac318446 (patch)
tree	a57612d1888735a2ec7972891b68c1ac5ec8faea /arch/sparc/lib/udiv.S
parent	8dea78da5cee153b8af9c07a2745f6c55057fe12 (diff)

diff --git a/arch/sparc/lib/udiv.S b/arch/sparc/lib/udiv.S new file mode 100644 index 00000000000..2101405bdfc --- /dev/null +++ b/arch/sparc/lib/udiv.S
@@ -0,0 +1,357 @@
	1	/*
	2	* udiv.S: This routine was taken from glibc-1.09 and is covered
	3	* by the GNU Library General Public License Version 2.
	4	*/
	5
	6
	7	/* This file is generated from divrem.m4; DO NOT EDIT! */
	8	/*
	9	* Division and remainder, from Appendix E of the Sparc Version 8
	10	* Architecture Manual, with fixes from Gordon Irlam.
	11	*/
	12
	13	/*
	14	* Input: dividend and divisor in %o0 and %o1 respectively.
	15	*
	16	* m4 parameters:
	17	* .udiv name of function to generate
	18	* div div=div => %o0 / %o1; div=rem => %o0 % %o1
	19	* false false=true => signed; false=false => unsigned
	20	*
	21	* Algorithm parameters:
	22	* N how many bits per iteration we try to get (4)
	23	* WORDSIZE total number of bits (32)
	24	*
	25	* Derived constants:
	26	* TOPBITS number of bits in the top decade of a number
	27	*
	28	* Important variables:
	29	* Q the partial quotient under development (initially 0)
	30	* R the remainder so far, initially the dividend
	31	* ITER number of main division loop iterations required;
	32	* equal to ceil(log2(quotient) / N). Note that this
	33	* is the log base (2^N) of the quotient.
	34	* V the current comparand, initially divisor2^(ITERN-1)
	35	*
	36	* Cost:
	37	* Current estimate for non-large dividend is
	38	* ceil(log2(quotient) / N) * (10 + 7N/2) + C
	39	* A large dividend is one greater than 2^(31-TOPBITS) and takes a
	40	* different path, as the upper bits of the quotient must be developed
	41	* one bit at a time.
	42	*/
	43
	44
	45	.globl .udiv
	46	.globl _Udiv
	47	.udiv:
	48	_Udiv: /* needed for export */
	49
	50	! Ready to divide. Compute size of quotient; scale comparand.
	51	orcc %o1, %g0, %o5
	52	bne 1f
	53	mov %o0, %o3
	54
	55	! Divide by zero trap. If it returns, return 0 (about as
	56	! wrong as possible, but that is what SunOS does...).
	57	ta ST_DIV0
	58	retl
	59	clr %o0
	60
	61	1:
	62	cmp %o3, %o5 ! if %o1 exceeds %o0, done
	63	blu Lgot_result ! (and algorithm fails otherwise)
	64	clr %o2
	65
	66	sethi %hi(1 << (32 - 4 - 1)), %g1
	67
	68	cmp %o3, %g1
	69	blu Lnot_really_big
	70	clr %o4
	71
	72	! Here the dividend is >= 2**(31-N) or so. We must be careful here,
	73	! as our usual N-at-a-shot divide step will cause overflow and havoc.
	74	! The number of bits in the result here is N*ITER+SC, where SC <= N.
	75	! Compute ITER in an unorthodox manner: know we need to shift V into
	76	! the top decade: so do not even bother to compare to R.
	77	1:
	78	cmp %o5, %g1
	79	bgeu 3f
	80	mov 1, %g7
	81
	82	sll %o5, 4, %o5
	83
	84	b 1b
	85	add %o4, 1, %o4
	86
	87	! Now compute %g7.
	88	2:
	89	addcc %o5, %o5, %o5
	90	bcc Lnot_too_big
	91	add %g7, 1, %g7
	92
	93	! We get here if the %o1 overflowed while shifting.
	94	! This means that %o3 has the high-order bit set.
	95	! Restore %o5 and subtract from %o3.
	96	sll %g1, 4, %g1 ! high order bit
	97	srl %o5, 1, %o5 ! rest of %o5
	98	add %o5, %g1, %o5
	99
	100	b Ldo_single_div
	101	sub %g7, 1, %g7
	102
	103	Lnot_too_big:
	104	3:
	105	cmp %o5, %o3
	106	blu 2b
	107	nop
	108
	109	be Ldo_single_div
	110	nop
	111	/* NB: these are commented out in the V8-Sparc manual as well */
	112	/* (I do not understand this) */
	113	! %o5 > %o3: went too far: back up 1 step
	114	! srl %o5, 1, %o5
	115	! dec %g7
	116	! do single-bit divide steps
	117	!
	118	! We have to be careful here. We know that %o3 >= %o5, so we can do the
	119	! first divide step without thinking. BUT, the others are conditional,
	120	! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
	121	! order bit set in the first step, just falling into the regular
	122	! division loop will mess up the first time around.
	123	! So we unroll slightly...
	124	Ldo_single_div:
	125	subcc %g7, 1, %g7
	126	bl Lend_regular_divide
	127	nop
	128
	129	sub %o3, %o5, %o3
	130	mov 1, %o2
	131
	132	b Lend_single_divloop
	133	nop
	134	Lsingle_divloop:
	135	sll %o2, 1, %o2
	136	bl 1f
	137	srl %o5, 1, %o5
	138	! %o3 >= 0
	139	sub %o3, %o5, %o3
	140	b 2f
	141	add %o2, 1, %o2
	142	1: ! %o3 < 0
	143	add %o3, %o5, %o3
	144	sub %o2, 1, %o2
	145	2:
	146	Lend_single_divloop:
	147	subcc %g7, 1, %g7
	148	bge Lsingle_divloop
	149	tst %o3
	150
	151	b,a Lend_regular_divide
	152
	153	Lnot_really_big:
	154	1:
	155	sll %o5, 4, %o5
	156
	157	cmp %o5, %o3
	158	bleu 1b
	159	addcc %o4, 1, %o4
	160
	161	be Lgot_result
	162	sub %o4, 1, %o4
	163
	164	tst %o3 ! set up for initial iteration
	165	Ldivloop:
	166	sll %o2, 4, %o2
	167	! depth 1, accumulated bits 0
	168	bl L.1.16
	169	srl %o5,1,%o5
	170	! remainder is positive
	171	subcc %o3,%o5,%o3
	172	! depth 2, accumulated bits 1
	173	bl L.2.17
	174	srl %o5,1,%o5
	175	! remainder is positive
	176	subcc %o3,%o5,%o3
	177	! depth 3, accumulated bits 3
	178	bl L.3.19
	179	srl %o5,1,%o5
	180	! remainder is positive
	181	subcc %o3,%o5,%o3
	182	! depth 4, accumulated bits 7
	183	bl L.4.23
	184	srl %o5,1,%o5
	185	! remainder is positive
	186	subcc %o3,%o5,%o3
	187	b 9f
	188	add %o2, (7*2+1), %o2
	189
	190	L.4.23:
	191	! remainder is negative
	192	addcc %o3,%o5,%o3
	193	b 9f
	194	add %o2, (7*2-1), %o2
	195
	196	L.3.19:
	197	! remainder is negative
	198	addcc %o3,%o5,%o3
	199	! depth 4, accumulated bits 5
	200	bl L.4.21
	201	srl %o5,1,%o5
	202	! remainder is positive
	203	subcc %o3,%o5,%o3
	204	b 9f
	205	add %o2, (5*2+1), %o2
	206
	207	L.4.21:
	208	! remainder is negative
	209	addcc %o3,%o5,%o3
	210	b 9f
	211	add %o2, (5*2-1), %o2
	212
	213	L.2.17:
	214	! remainder is negative
	215	addcc %o3,%o5,%o3
	216	! depth 3, accumulated bits 1
	217	bl L.3.17
	218	srl %o5,1,%o5
	219	! remainder is positive
	220	subcc %o3,%o5,%o3
	221	! depth 4, accumulated bits 3
	222	bl L.4.19
	223	srl %o5,1,%o5
	224	! remainder is positive
	225	subcc %o3,%o5,%o3
	226	b 9f
	227	add %o2, (3*2+1), %o2
	228
	229	L.4.19:
	230	! remainder is negative
	231	addcc %o3,%o5,%o3
	232	b 9f
	233	add %o2, (3*2-1), %o2
	234
	235	L.3.17:
	236	! remainder is negative
	237	addcc %o3,%o5,%o3
	238	! depth 4, accumulated bits 1
	239	bl L.4.17
	240	srl %o5,1,%o5
	241	! remainder is positive
	242	subcc %o3,%o5,%o3
	243	b 9f
	244	add %o2, (1*2+1), %o2
	245
	246	L.4.17:
	247	! remainder is negative
	248	addcc %o3,%o5,%o3
	249	b 9f
	250	add %o2, (1*2-1), %o2
	251
	252	L.1.16:
	253	! remainder is negative
	254	addcc %o3,%o5,%o3
	255	! depth 2, accumulated bits -1
	256	bl L.2.15
	257	srl %o5,1,%o5
	258	! remainder is positive
	259	subcc %o3,%o5,%o3
	260	! depth 3, accumulated bits -1
	261	bl L.3.15
	262	srl %o5,1,%o5
	263	! remainder is positive
	264	subcc %o3,%o5,%o3
	265	! depth 4, accumulated bits -1
	266	bl L.4.15
	267	srl %o5,1,%o5
	268	! remainder is positive
	269	subcc %o3,%o5,%o3
	270	b 9f
	271	add %o2, (-1*2+1), %o2
	272
	273	L.4.15:
	274	! remainder is negative
	275	addcc %o3,%o5,%o3
	276	b 9f
	277	add %o2, (-1*2-1), %o2
	278
	279	L.3.15:
	280	! remainder is negative
	281	addcc %o3,%o5,%o3
	282	! depth 4, accumulated bits -3
	283	bl L.4.13
	284	srl %o5,1,%o5
	285	! remainder is positive
	286	subcc %o3,%o5,%o3
	287	b 9f
	288	add %o2, (-3*2+1), %o2
	289
	290	L.4.13:
	291	! remainder is negative
	292	addcc %o3,%o5,%o3
	293	b 9f
	294	add %o2, (-3*2-1), %o2
	295
	296	L.2.15:
	297	! remainder is negative
	298	addcc %o3,%o5,%o3
	299	! depth 3, accumulated bits -3
	300	bl L.3.13
	301	srl %o5,1,%o5
	302	! remainder is positive
	303	subcc %o3,%o5,%o3
	304	! depth 4, accumulated bits -5
	305	bl L.4.11
	306	srl %o5,1,%o5
	307	! remainder is positive
	308	subcc %o3,%o5,%o3
	309	b 9f
	310	add %o2, (-5*2+1), %o2
	311
	312	L.4.11:
	313	! remainder is negative
	314	addcc %o3,%o5,%o3
	315	b 9f
	316	add %o2, (-5*2-1), %o2
	317
	318	L.3.13:
	319	! remainder is negative
	320	addcc %o3,%o5,%o3
	321	! depth 4, accumulated bits -7
	322	bl L.4.9
	323	srl %o5,1,%o5
	324	! remainder is positive
	325	subcc %o3,%o5,%o3
	326	b 9f
	327	add %o2, (-7*2+1), %o2
	328
	329	L.4.9:
	330	! remainder is negative
	331	addcc %o3,%o5,%o3
	332	b 9f
	333	add %o2, (-7*2-1), %o2
	334
	335	9:
	336	Lend_regular_divide:
	337	subcc %o4, 1, %o4
	338	bge Ldivloop
	339	tst %o3
	340
	341	bl,a Lgot_result
	342	! non-restoring fixup here (one instruction only!)
	343	sub %o2, 1, %o2
	344
	345	Lgot_result:
	346
	347	retl
	348	mov %o2, %o0
	349
	350	.globl .udiv_patch
	351	.udiv_patch:
	352	wr %g0, 0x0, %y
	353	nop
	354	nop
	355	retl
	356	udiv %o0, %o1, %o0
	357	nop