1 files changed, 382 insertions, 0 deletions
diff --git a/arch/sparc/lib/rem.S b/arch/sparc/lib/rem.S
new file mode 100644
index 000000000000..44508148d055
--- /dev/null
+++ b/arch/sparc/lib/rem.S
@@ -0,0 +1,382 @@
+/* $Id: rem.S,v 1.7 1996/09/30 02:22:34 davem Exp $
+ * rem.S:       This routine was taken from glibc-1.09 and is covered
+ *              by the GNU Library General Public License Version 2.
+ */
+/* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .rem        name of function to generate
+ *  rem         rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ *  true                true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N           how many bits per iteration we try to get (4)
+ *  WORDSIZE    total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS     number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q           the partial quotient under development (initially 0)
+ *  R           the remainder so far, initially the dividend
+ *  ITER        number of main division loop iterations required;
+ *              equal to ceil(log2(quotient) / N).  Note that this
+ *              is the log base (2^N) of the quotient.
+ *  V           the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *      ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+        .globl .rem
+.rem:
+        ! compute sign of result; if neither is negative, no problem
+        orcc    %o1, %o0, %g0   ! either negative?
+        bge     2f                      ! no, go do the divide
+         mov    %o0, %g2        ! compute sign in any case
+        tst     %o1
+        bge     1f
+         tst    %o0
+        ! %o1 is definitely negative; %o0 might also be negative
+        bge     2f                      ! if %o0 not negative...
+         sub    %g0, %o1, %o1   ! in any case, make %o1 nonneg
+1:      ! %o0 is negative, %o1 is nonnegative
+        sub     %g0, %o0, %o0   ! make %o0 nonnegative
+2:
+        ! Ready to divide.  Compute size of quotient; scale comparand.
+        orcc    %o1, %g0, %o5
+        bne     1f
+         mov    %o0, %o3
+                ! Divide by zero trap.  If it returns, return 0 (about as
+                ! wrong as possible, but that is what SunOS does...).
+                ta      ST_DIV0
+                retl
+                 clr    %o0
+1:
+        cmp     %o3, %o5                        ! if %o1 exceeds %o0, done
+        blu     Lgot_result             ! (and algorithm fails otherwise)
+         clr    %o2
+        sethi   %hi(1 << (32 - 4 - 1)), %g1
+        cmp     %o3, %g1
+        blu     Lnot_really_big
+         clr    %o4
+        ! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+        ! as our usual N-at-a-shot divide step will cause overflow and havoc.
+        ! The number of bits in the result here is N*ITER+SC, where SC <= N.
+        ! Compute ITER in an unorthodox manner: know we need to shift V into
+        ! the top decade: so do not even bother to compare to R.
+        1:
+                cmp     %o5, %g1
+                bgeu    3f
+                 mov    1, %g7
+                sll     %o5, 4, %o5
+                b       1b
+                 add    %o4, 1, %o4
+        ! Now compute %g7.
+        2:
+                addcc   %o5, %o5, %o5
+                bcc     Lnot_too_big
+                 add    %g7, 1, %g7
+                ! We get here if the %o1 overflowed while shifting.
+                ! This means that %o3 has the high-order bit set.
+                ! Restore %o5 and subtract from %o3.
+                sll     %g1, 4, %g1     ! high order bit
+                srl     %o5, 1, %o5             ! rest of %o5
+                add     %o5, %g1, %o5
+                b       Ldo_single_div
+                 sub    %g7, 1, %g7
+        Lnot_too_big:
+        3:
+                cmp     %o5, %o3
+                blu     2b
+                 nop
+                be      Ldo_single_div
+                 nop
+        /* NB: these are commented out in the V8-Sparc manual as well */
+        /* (I do not understand this) */
+        ! %o5 > %o3: went too far: back up 1 step
+        !       srl     %o5, 1, %o5
+        !       dec     %g7
+        ! do single-bit divide steps
+        !
+        ! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+        ! first divide step without thinking.  BUT, the others are conditional,
+        ! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+        ! order bit set in the first step, just falling into the regular
+        ! division loop will mess up the first time around.
+        ! So we unroll slightly...
+        Ldo_single_div:
+                subcc   %g7, 1, %g7
+                bl      Lend_regular_divide
+                 nop
+                sub     %o3, %o5, %o3
+                mov     1, %o2
+                b       Lend_single_divloop
+                 nop
+        Lsingle_divloop:
+                sll     %o2, 1, %o2
+                bl      1f
+                 srl    %o5, 1, %o5
+                ! %o3 >= 0
+                sub     %o3, %o5, %o3
+                b       2f
+                 add    %o2, 1, %o2
+        1:      ! %o3 < 0
+                add     %o3, %o5, %o3
+                sub     %o2, 1, %o2
+        2:
+        Lend_single_divloop:
+                subcc   %g7, 1, %g7
+                bge     Lsingle_divloop
+                 tst    %o3
+                b,a     Lend_regular_divide
+Lnot_really_big:
+1:
+        sll     %o5, 4, %o5
+        cmp     %o5, %o3
+        bleu    1b
+         addcc  %o4, 1, %o4
+        be      Lgot_result
+         sub    %o4, 1, %o4
+        tst     %o3     ! set up for initial iteration
+Ldivloop:
+        sll     %o2, 4, %o2
+                ! depth 1, accumulated bits 0
+        bl      L.1.16
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 2, accumulated bits 1
+        bl      L.2.17
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits 3
+        bl      L.3.19
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 7
+        bl      L.4.23
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (7*2+1), %o2
+        
+L.4.23:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (7*2-1), %o2
+        
+L.3.19:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 5
+        bl      L.4.21
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (5*2+1), %o2
+        
+L.4.21:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (5*2-1), %o2
+        
+L.2.17:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits 1
+        bl      L.3.17
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 3
+        bl      L.4.19
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (3*2+1), %o2
+L.4.19:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (3*2-1), %o2
+L.3.17:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits 1
+        bl      L.4.17
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (1*2+1), %o2
+L.4.17:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (1*2-1), %o2
+L.1.16:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 2, accumulated bits -1
+        bl      L.2.15
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits -1
+        bl      L.3.15
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -1
+        bl      L.4.15
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-1*2+1), %o2
+L.4.15:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-1*2-1), %o2
+L.3.15:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -3
+        bl      L.4.13
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-3*2+1), %o2
+L.4.13:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-3*2-1), %o2
+L.2.15:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 3, accumulated bits -3
+        bl      L.3.13
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -5
+        bl      L.4.11
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-5*2+1), %o2
+L.4.11:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-5*2-1), %o2
+L.3.13:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+                        ! depth 4, accumulated bits -7
+        bl      L.4.9
+         srl    %o5,1,%o5
+        ! remainder is positive
+        subcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-7*2+1), %o2
+L.4.9:
+        ! remainder is negative
+        addcc   %o3,%o5,%o3
+        b       9f
+         add    %o2, (-7*2-1), %o2
+        9:
+Lend_regular_divide:
+        subcc   %o4, 1, %o4
+        bge     Ldivloop
+         tst    %o3
+        bl,a    Lgot_result
+        ! non-restoring fixup here (one instruction only!)
+        add     %o3, %o1, %o3
+Lgot_result:
+        ! check to see if answer should be < 0
+        tst     %g2
+        bl,a    1f
+         sub %g0, %o3, %o3
+1:
+        retl
+         mov %o3, %o0
+        .globl  .rem_patch
+.rem_patch:
+        sra     %o0, 0x1f, %o4
+        wr      %o4, 0x0, %y
+        nop
+        nop
+        nop
+        sdivcc  %o0, %o1, %o2
+        bvs,a   1f
+         xnor   %o2, %g0, %o2
+1:      smul    %o2, %o1, %o2
+        retl
+         sub    %o0, %o2, %o0
+        nop

diff --git a/arch/sparc/lib/rem.S b/arch/sparc/lib/rem.S new file mode 100644 index 000000000000..44508148d055 --- /dev/null +++ b/arch/sparc/lib/rem.S
@@ -0,0 +1,382 @@
	1	/* $Id: rem.S,v 1.7 1996/09/30 02:22:34 davem Exp $
	2	* rem.S: This routine was taken from glibc-1.09 and is covered
	3	* by the GNU Library General Public License Version 2.
	4	*/
	5
	6
	7	/* This file is generated from divrem.m4; DO NOT EDIT! */
	8	/*
	9	* Division and remainder, from Appendix E of the Sparc Version 8
	10	* Architecture Manual, with fixes from Gordon Irlam.
	11	*/
	12
	13	/*
	14	* Input: dividend and divisor in %o0 and %o1 respectively.
	15	*
	16	* m4 parameters:
	17	* .rem name of function to generate
	18	* rem rem=div => %o0 / %o1; rem=rem => %o0 % %o1
	19	* true true=true => signed; true=false => unsigned
	20	*
	21	* Algorithm parameters:
	22	* N how many bits per iteration we try to get (4)
	23	* WORDSIZE total number of bits (32)
	24	*
	25	* Derived constants:
	26	* TOPBITS number of bits in the top decade of a number
	27	*
	28	* Important variables:
	29	* Q the partial quotient under development (initially 0)
	30	* R the remainder so far, initially the dividend
	31	* ITER number of main division loop iterations required;
	32	* equal to ceil(log2(quotient) / N). Note that this
	33	* is the log base (2^N) of the quotient.
	34	* V the current comparand, initially divisor2^(ITERN-1)
	35	*
	36	* Cost:
	37	* Current estimate for non-large dividend is
	38	* ceil(log2(quotient) / N) * (10 + 7N/2) + C
	39	* A large dividend is one greater than 2^(31-TOPBITS) and takes a
	40	* different path, as the upper bits of the quotient must be developed
	41	* one bit at a time.
	42	*/
	43
	44
	45	.globl .rem
	46	.rem:
	47	! compute sign of result; if neither is negative, no problem
	48	orcc %o1, %o0, %g0 ! either negative?
	49	bge 2f ! no, go do the divide
	50	mov %o0, %g2 ! compute sign in any case
	51
	52	tst %o1
	53	bge 1f
	54	tst %o0
	55	! %o1 is definitely negative; %o0 might also be negative
	56	bge 2f ! if %o0 not negative...
	57	sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
	58	1: ! %o0 is negative, %o1 is nonnegative
	59	sub %g0, %o0, %o0 ! make %o0 nonnegative
	60	2:
	61
	62	! Ready to divide. Compute size of quotient; scale comparand.
	63	orcc %o1, %g0, %o5
	64	bne 1f
	65	mov %o0, %o3
	66
	67	! Divide by zero trap. If it returns, return 0 (about as
	68	! wrong as possible, but that is what SunOS does...).
	69	ta ST_DIV0
	70	retl
	71	clr %o0
	72
	73	1:
	74	cmp %o3, %o5 ! if %o1 exceeds %o0, done
	75	blu Lgot_result ! (and algorithm fails otherwise)
	76	clr %o2
	77
	78	sethi %hi(1 << (32 - 4 - 1)), %g1
	79
	80	cmp %o3, %g1
	81	blu Lnot_really_big
	82	clr %o4
	83
	84	! Here the dividend is >= 2**(31-N) or so. We must be careful here,
	85	! as our usual N-at-a-shot divide step will cause overflow and havoc.
	86	! The number of bits in the result here is N*ITER+SC, where SC <= N.
	87	! Compute ITER in an unorthodox manner: know we need to shift V into
	88	! the top decade: so do not even bother to compare to R.
	89	1:
	90	cmp %o5, %g1
	91	bgeu 3f
	92	mov 1, %g7
	93
	94	sll %o5, 4, %o5
	95
	96	b 1b
	97	add %o4, 1, %o4
	98
	99	! Now compute %g7.
	100	2:
	101	addcc %o5, %o5, %o5
	102
	103	bcc Lnot_too_big
	104	add %g7, 1, %g7
	105
	106	! We get here if the %o1 overflowed while shifting.
	107	! This means that %o3 has the high-order bit set.
	108	! Restore %o5 and subtract from %o3.
	109	sll %g1, 4, %g1 ! high order bit
	110	srl %o5, 1, %o5 ! rest of %o5
	111	add %o5, %g1, %o5
	112
	113	b Ldo_single_div
	114	sub %g7, 1, %g7
	115
	116	Lnot_too_big:
	117	3:
	118	cmp %o5, %o3
	119	blu 2b
	120	nop
	121
	122	be Ldo_single_div
	123	nop
	124	/* NB: these are commented out in the V8-Sparc manual as well */
	125	/* (I do not understand this) */
	126	! %o5 > %o3: went too far: back up 1 step
	127	! srl %o5, 1, %o5
	128	! dec %g7
	129	! do single-bit divide steps
	130	!
	131	! We have to be careful here. We know that %o3 >= %o5, so we can do the
	132	! first divide step without thinking. BUT, the others are conditional,
	133	! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
	134	! order bit set in the first step, just falling into the regular
	135	! division loop will mess up the first time around.
	136	! So we unroll slightly...
	137	Ldo_single_div:
	138	subcc %g7, 1, %g7
	139	bl Lend_regular_divide
	140	nop
	141
	142	sub %o3, %o5, %o3
	143	mov 1, %o2
	144
	145	b Lend_single_divloop
	146	nop
	147	Lsingle_divloop:
	148	sll %o2, 1, %o2
	149
	150	bl 1f
	151	srl %o5, 1, %o5
	152	! %o3 >= 0
	153	sub %o3, %o5, %o3
	154
	155	b 2f
	156	add %o2, 1, %o2
	157	1: ! %o3 < 0
	158	add %o3, %o5, %o3
	159	sub %o2, 1, %o2
	160	2:
	161	Lend_single_divloop:
	162	subcc %g7, 1, %g7
	163	bge Lsingle_divloop
	164	tst %o3
	165
	166	b,a Lend_regular_divide
	167
	168	Lnot_really_big:
	169	1:
	170	sll %o5, 4, %o5
	171	cmp %o5, %o3
	172	bleu 1b
	173	addcc %o4, 1, %o4
	174	be Lgot_result
	175	sub %o4, 1, %o4
	176
	177	tst %o3 ! set up for initial iteration
	178	Ldivloop:
	179	sll %o2, 4, %o2
	180	! depth 1, accumulated bits 0
	181	bl L.1.16
	182	srl %o5,1,%o5
	183	! remainder is positive
	184	subcc %o3,%o5,%o3
	185	! depth 2, accumulated bits 1
	186	bl L.2.17
	187	srl %o5,1,%o5
	188	! remainder is positive
	189	subcc %o3,%o5,%o3
	190	! depth 3, accumulated bits 3
	191	bl L.3.19
	192	srl %o5,1,%o5
	193	! remainder is positive
	194	subcc %o3,%o5,%o3
	195	! depth 4, accumulated bits 7
	196	bl L.4.23
	197	srl %o5,1,%o5
	198	! remainder is positive
	199	subcc %o3,%o5,%o3
	200
	201	b 9f
	202	add %o2, (7*2+1), %o2
	203
	204	L.4.23:
	205	! remainder is negative
	206	addcc %o3,%o5,%o3
	207	b 9f
	208	add %o2, (7*2-1), %o2
	209
	210	L.3.19:
	211	! remainder is negative
	212	addcc %o3,%o5,%o3
	213	! depth 4, accumulated bits 5
	214	bl L.4.21
	215	srl %o5,1,%o5
	216	! remainder is positive
	217	subcc %o3,%o5,%o3
	218	b 9f
	219	add %o2, (5*2+1), %o2
	220
	221	L.4.21:
	222	! remainder is negative
	223	addcc %o3,%o5,%o3
	224	b 9f
	225	add %o2, (5*2-1), %o2
	226
	227	L.2.17:
	228	! remainder is negative
	229	addcc %o3,%o5,%o3
	230	! depth 3, accumulated bits 1
	231	bl L.3.17
	232	srl %o5,1,%o5
	233	! remainder is positive
	234	subcc %o3,%o5,%o3
	235	! depth 4, accumulated bits 3
	236	bl L.4.19
	237	srl %o5,1,%o5
	238	! remainder is positive
	239	subcc %o3,%o5,%o3
	240	b 9f
	241	add %o2, (3*2+1), %o2
	242
	243	L.4.19:
	244	! remainder is negative
	245	addcc %o3,%o5,%o3
	246	b 9f
	247	add %o2, (3*2-1), %o2
	248
	249	L.3.17:
	250	! remainder is negative
	251	addcc %o3,%o5,%o3
	252	! depth 4, accumulated bits 1
	253	bl L.4.17
	254	srl %o5,1,%o5
	255	! remainder is positive
	256	subcc %o3,%o5,%o3
	257	b 9f
	258	add %o2, (1*2+1), %o2
	259
	260	L.4.17:
	261	! remainder is negative
	262	addcc %o3,%o5,%o3
	263	b 9f
	264	add %o2, (1*2-1), %o2
	265
	266	L.1.16:
	267	! remainder is negative
	268	addcc %o3,%o5,%o3
	269	! depth 2, accumulated bits -1
	270	bl L.2.15
	271	srl %o5,1,%o5
	272	! remainder is positive
	273	subcc %o3,%o5,%o3
	274	! depth 3, accumulated bits -1
	275	bl L.3.15
	276	srl %o5,1,%o5
	277	! remainder is positive
	278	subcc %o3,%o5,%o3
	279	! depth 4, accumulated bits -1
	280	bl L.4.15
	281	srl %o5,1,%o5
	282	! remainder is positive
	283	subcc %o3,%o5,%o3
	284	b 9f
	285	add %o2, (-1*2+1), %o2
	286
	287	L.4.15:
	288	! remainder is negative
	289	addcc %o3,%o5,%o3
	290	b 9f
	291	add %o2, (-1*2-1), %o2
	292
	293	L.3.15:
	294	! remainder is negative
	295	addcc %o3,%o5,%o3
	296	! depth 4, accumulated bits -3
	297	bl L.4.13
	298	srl %o5,1,%o5
	299	! remainder is positive
	300	subcc %o3,%o5,%o3
	301	b 9f
	302	add %o2, (-3*2+1), %o2
	303
	304	L.4.13:
	305	! remainder is negative
	306	addcc %o3,%o5,%o3
	307	b 9f
	308	add %o2, (-3*2-1), %o2
	309
	310	L.2.15:
	311	! remainder is negative
	312	addcc %o3,%o5,%o3
	313	! depth 3, accumulated bits -3
	314	bl L.3.13
	315	srl %o5,1,%o5
	316	! remainder is positive
	317	subcc %o3,%o5,%o3
	318	! depth 4, accumulated bits -5
	319	bl L.4.11
	320	srl %o5,1,%o5
	321	! remainder is positive
	322	subcc %o3,%o5,%o3
	323	b 9f
	324	add %o2, (-5*2+1), %o2
	325
	326	L.4.11:
	327	! remainder is negative
	328	addcc %o3,%o5,%o3
	329	b 9f
	330	add %o2, (-5*2-1), %o2
	331
	332
	333	L.3.13:
	334	! remainder is negative
	335	addcc %o3,%o5,%o3
	336	! depth 4, accumulated bits -7
	337	bl L.4.9
	338	srl %o5,1,%o5
	339	! remainder is positive
	340	subcc %o3,%o5,%o3
	341	b 9f
	342	add %o2, (-7*2+1), %o2
	343
	344	L.4.9:
	345	! remainder is negative
	346	addcc %o3,%o5,%o3
	347	b 9f
	348	add %o2, (-7*2-1), %o2
	349
	350	9:
	351	Lend_regular_divide:
	352	subcc %o4, 1, %o4
	353	bge Ldivloop
	354	tst %o3
	355
	356	bl,a Lgot_result
	357	! non-restoring fixup here (one instruction only!)
	358	add %o3, %o1, %o3
	359
	360	Lgot_result:
	361	! check to see if answer should be < 0
	362	tst %g2
	363	bl,a 1f
	364	sub %g0, %o3, %o3
	365	1:
	366	retl
	367	mov %o3, %o0
	368
	369	.globl .rem_patch
	370	.rem_patch:
	371	sra %o0, 0x1f, %o4
	372	wr %o4, 0x0, %y
	373	nop
	374	nop
	375	nop
	376	sdivcc %o0, %o1, %o2
	377	bvs,a 1f
	378	xnor %o2, %g0, %o2
	379	1: smul %o2, %o1, %o2
	380	retl
	381	sub %o0, %o2, %o0
	382	nop