Blackfin arch: add assembly function for doing 64bit unsigned division

Signed-off-by: Mike Frysinger <michael.frysinger@analog.com> Signed-off-by: Bryan Wu <bryan.wu@analog.com>
author: Mike Frysinger <michael.frysinger@analog.com> 2007-10-21 10:57:36 -0400
committer: Bryan Wu <bryan.wu@analog.com> 2007-10-21 10:57:36 -0400
commit: b0a68dc07ec395d44849ce98eb417713ca333410 (patch)
tree: 5e469225188d63fd296fc04d0c04e9580d8e47db /arch/blackfin/lib
parent: 1c668d82465cd5c17030c0f69561841374380ac8 (diff)
2 files changed, 376 insertions, 1 deletions
diff --git a/arch/blackfin/lib/Makefile b/arch/blackfin/lib/Makefile
index 635288fc5f54..bfdad52c570b 100644
--- a/arch/blackfin/lib/Makefile
+++ b/arch/blackfin/lib/Makefile
@@ -4,7 +4,7 @@
 lib-y := \
        ashldi3.o ashrdi3.o lshrdi3.o \
-        muldi3.o divsi3.o udivsi3.o modsi3.o umodsi3.o \
+        muldi3.o divsi3.o udivsi3.o udivdi3.o modsi3.o umodsi3.o \
        checksum.o memcpy.o memset.o memcmp.o memchr.o memmove.o \
        strcmp.o strcpy.o strncmp.o strncpy.o \
        umulsi3_highpart.o smulsi3_highpart.o \
diff --git a/arch/blackfin/lib/udivdi3.S b/arch/blackfin/lib/udivdi3.S
new file mode 100644
index 000000000000..ad1ebee675e1
--- /dev/null
+++ b/arch/blackfin/lib/udivdi3.S
@@ -0,0 +1,375 @@
+/*
+ * udivdi3.S - unsigned long long division
+ *
+ * Copyright 2003-2007 Analog Devices Inc.
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Licensed under the GPLv2 or later.
+ */
+#include <linux/linkage.h>
+#define CARRY AC0
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+.section .l1.text
+#else
+.text
+#endif
+ENTRY(___udivdi3)
+   R3 = [SP + 12];
+   [--SP] = (R7:4, P5:3);
+   /* Attempt to use divide primitive first; these will handle
+   **  most cases, and they're quick - avoids stalls incurred by
+   ** testing for identities.
+   */
+   R4 = R2 | R3;
+   CC = R4 == 0;
+   IF CC JUMP .LDIV_BY_ZERO;
+   R4.H = 0x8000;
+   R4 >>>= 16;                  // R4 now 0xFFFF8000
+   R5 = R0 | R2;                // If either dividend or
+   R4 = R5 & R4;                // divisor have bits in
+   CC = R4;                     // top half or low half's sign
+   IF CC JUMP .LIDENTS;          // bit, skip builtins.
+   R4 = R1 | R3;                // Also check top halves
+   CC = R4;
+   IF CC JUMP .LIDENTS;
+   /* Can use the builtins. */
+   AQ = CC;                     // Clear AQ (CC==0)
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   DIVQ(R0, R2);
+   R0 = R0.L (Z);
+   R1 = 0;
+   (R7:4, P5:3) = [SP++];
+   RTS;
+.LIDENTS:
+   /* Test for common identities. Value to be returned is
+   ** placed in R6,R7.
+   */
+                                // Check for 0/y, return 0
+   R4 = R0 | R1;
+   CC = R4 == 0;
+   IF CC JUMP .LRETURN_R0;
+                                // Check for x/x, return 1
+   R6 = R0 - R2;                // If x == y, then both R6 and R7 will be zero
+   R7 = R1 - R3;
+   R4 = R6 | R7;                // making R4 zero.
+   R6 += 1;                     // which would now make R6:R7==1.
+   CC = R4 == 0;
+   IF CC JUMP .LRETURN_IDENT;
+                                // Check for x/1, return x
+   R6 = R0;
+   R7 = R1;
+   CC = R3 == 0;
+   IF !CC JUMP .Lnexttest;
+   CC = R2 == 1;
+   IF CC JUMP .LRETURN_IDENT;
+.Lnexttest:
+   R4.L = ONES R2;              // check for div by power of two which
+   R5.L = ONES R3;              // can be done using a shift
+   R6 = PACK (R5.L, R4.L);
+   CC = R6 == 1;
+   IF CC JUMP .Lpower_of_two_upper_zero;
+   R6 = PACK (R4.L, R5.L);
+   CC = R6 == 1;
+   IF CC JUMP .Lpower_of_two_lower_zero;
+                                // Check for x < y, return 0
+   R6 = 0;
+   R7 = R6;
+   CC = R1 < R3 (IU);
+   IF CC JUMP .LRETURN_IDENT;
+   CC = R1 == R3;
+   IF !CC JUMP .Lno_idents;
+   CC = R0 < R2 (IU);
+   IF CC JUMP .LRETURN_IDENT;
+.Lno_idents:                    // Idents don't match. Go for the full operation
+   // If X, or X and Y have high bit set, it'll affect the
+   // results, so shift right one to stop this. Note: we've already
+   // checked that X >= Y, so Y's msb won't be set unless X's
+   // is.
+   R4 = 0;
+   CC = R1 < 0;
+   IF !CC JUMP .Lx_msb_clear;
+   CC = !CC;                   // 1 -> 0;
+   R1 = ROT R1 BY -1;          // Shift X >> 1
+   R0 = ROT R0 BY -1;          // lsb -> CC
+   BITSET(R4,31);              // to record only x msb was set
+   CC = R3 < 0;
+   IF !CC JUMP .Ly_msb_clear;
+   CC = !CC;
+   R3 = ROT R3 BY -1;          // Shift Y >> 1
+   R2 = ROT R2 BY -1;
+   BITCLR(R4,31);              // clear bit to record only x msb was set
+.Ly_msb_clear:
+.Lx_msb_clear:
+   // Bit 31 in R4 indicates X msb set, but Y msb wasn't, and no bits
+   // were lost, so we should shift result left by one.
+   [--SP] = R4;                // save for later
+   // In the loop that follows, each iteration we add
+   // either Y' or -Y' to the Remainder. We compute the
+   // negated Y', and store, for convenience. Y' goes
+   // into P0:P1, while -Y' goes into P2:P3.
+   P0 = R2;
+   P1 = R3;
+   R2 = -R2;
+   CC = CARRY;
+   CC = !CC;
+   R4 = CC;
+   R3 = -R3;
+   R3 = R3 - R4;
+   R6 = 0;                     // remainder = 0
+   R7 = R6;
+   [--SP] = R2; P2 = SP;
+   [--SP] = R3; P3 = SP;
+   [--SP] = R6; P5 = SP;       // AQ = 0
+   [--SP] = P1;
+   /* In the loop that follows, we use the following
+   ** register assignments:
+   ** R0,R1 X, workspace
+   ** R2,R3 Y, workspace
+   ** R4,R5 partial Div
+   ** R6,R7 partial remainder
+   ** P5 AQ
+   ** The remainder and div form a 128-bit number, with
+   ** the remainder in the high 64-bits.
+   */
+   R4 = R0;                    // Div = X'
+   R5 = R1;
+   R3 = 0;
+   P4 = 64;                    // Iterate once per bit
+   LSETUP(.LULST,.LULEND) LC0 = P4;
+.LULST:
+        /* Shift Div and remainder up by one. The bit shifted
+        ** out of the top of the quotient is shifted into the bottom
+        ** of the remainder.
+        */
+        CC = R3;
+        R4 = ROT R4 BY 1;
+        R5 = ROT R5 BY 1 ||        // low q to high q
+             R2 = [P5];            // load saved AQ
+        R6 = ROT R6 BY 1 ||        // high q to low r
+             R0 = [P2];            // load -Y'
+        R7 = ROT R7 BY 1 ||        // low r to high r
+             R1 = [P3];
+                                   // Assume add -Y'
+        CC = R2 < 0;               // But if AQ is set...
+        IF CC R0 = P0;             // then add Y' instead
+        IF CC R1 = P1;
+        R6 = R6 + R0;              // Rem += (Y' or -Y')
+        CC = CARRY;
+        R0 = CC;
+        R7 = R7 + R1;
+        R7 = R7 + R0 (NS) ||
+             R1 = [SP];
+                                   // Set the next AQ bit
+        R1 = R7 ^ R1;              // from Remainder and Y'
+        R1 = R1 >> 31 ||           // Negate AQ's value, and
+             [P5] = R1;            // save next AQ
+        BITTGL(R1, 0);             // add neg AQ  to the Div
+.LULEND: R4 = R4 + R1;
+   R6 = [SP + 16];
+   R0 = R4;
+   R1 = R5;
+   CC = BITTST(R6,30);         // Just set CC=0
+   R4 = ROT R0 BY 1;           // but if we had to shift X,
+   R5 = ROT R1 BY 1;           // and didn't shift any bits out,
+   CC = BITTST(R6,31);         // then the result will be half as
+   IF CC R0 = R4;              // much as required, so shift left
+   IF CC R1 = R5;              // one space.
+   SP += 20;
+   (R7:4, P5:3) = [SP++];
+   RTS;
+.Lpower_of_two:
+   /* Y has a single bit set, which means it's a power of two.
+   ** That means we can perform the division just by shifting
+   ** X to the right the appropriate number of bits
+   */
+   /* signbits returns the number of sign bits, minus one.
+   ** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
+   ** to shift right n-signbits spaces. It also means 0x80000000
+   ** is a special case, because that *also* gives a signbits of 0
+   */
+.Lpower_of_two_lower_zero:
+   R7 = 0;
+   R6 = R1 >> 31;
+   CC = R3 < 0;
+   IF CC JUMP .LRETURN_IDENT;
+   R2.L = SIGNBITS R3;
+   R2 = R2.L (Z);
+   R2 += -62;
+   (R7:4, P5:3) = [SP++];
+   JUMP ___lshftli;
+.Lpower_of_two_upper_zero:
+   CC = R2 < 0;
+   IF CC JUMP .Lmaxint_shift;
+   R2.L = SIGNBITS R2;
+   R2 = R2.L (Z);
+   R2 += -30;
+   (R7:4, P5:3) = [SP++];
+   JUMP ___lshftli;
+.Lmaxint_shift:
+   R2 = -31;
+   (R7:4, P5:3) = [SP++];
+   JUMP ___lshftli;
+.LRETURN_IDENT:
+   R0 = R6;
+   R1 = R7;
+.LRETURN_R0:
+   (R7:4, P5:3) = [SP++];
+   RTS;
+.LDIV_BY_ZERO:
+   R0 = ~R2;
+   R1 = R0;
+   (R7:4, P5:3) = [SP++];
+   RTS;
+ENDPROC(___udivdi3)
+ENTRY(___lshftli)
+        CC = R2 == 0;
+        IF CC JUMP .Lfinished;  // nothing to do
+        CC = R2 < 0;
+        IF CC JUMP .Lrshift;
+        R3 = 64;
+        CC = R2 < R3;
+        IF !CC JUMP .Lretzero;
+        // We're shifting left, and it's less than 64 bits, so
+        // a valid result will be returned.
+        R3 >>= 1;       // R3 now 32
+        CC = R2 < R3;
+        IF !CC JUMP .Lzerohalf;
+        // We're shifting left, between 1 and 31 bits, which means
+        // some of the low half will be shifted into the high half.
+        // Work out how much.
+        R3 = R3 - R2;
+        // Save that much data from the bottom half.
+        P1 = R7;
+        R7 = R0;
+        R7 >>= R3;
+        // Adjust both parts of the parameter.
+        R0 <<= R2;
+        R1 <<= R2;
+        // And include the bits moved across.
+        R1 = R1 | R7;
+        R7 = P1;
+        RTS;
+.Lzerohalf:
+        // We're shifting left, between 32 and 63 bits, so the
+        // bottom half will become zero, and the top half will
+        // lose some bits. How many?
+        R2 = R2 - R3;   // N - 32
+        R1 = LSHIFT R0 BY R2.L;
+        R0 = R0 - R0;
+        RTS;
+.Lretzero:
+        R0 = R0 - R0;
+        R1 = R0;
+.Lfinished:
+        RTS;
+.Lrshift:
+        // We're shifting right, but by how much?
+        R2 = -R2;
+        R3 = 64;
+        CC = R2 < R3;
+        IF !CC JUMP .Lretzero;
+        // Shifting right less than 64 bits, so some result bits will
+        // be retained.
+        R3 >>= 1;       // R3 now 32
+        CC = R2 < R3;
+        IF !CC JUMP .Lsignhalf;
+        // Shifting right between 1 and 31 bits, so need to copy
+        // data across words.
+        P1 = R7;
+        R3 = R3 - R2;
+        R7 = R1;
+        R7 <<= R3;
+        R1 >>= R2;
+        R0 >>= R2;
+        R0 = R7 | R0;
+        R7 = P1;
+        RTS;
+.Lsignhalf:
+        // Shifting right between 32 and 63 bits, so the top half
+        // will become all zero-bits, and the bottom half is some
+        // of the top half. But how much?
+        R2 = R2 - R3;
+        R0 = R1;
+        R0 >>= R2;
+        R1 = 0;
+        RTS;
+ENDPROC(___lshftli)
author	Mike Frysinger <michael.frysinger@analog.com>	2007-10-21 10:57:36 -0400
committer	Bryan Wu <bryan.wu@analog.com>	2007-10-21 10:57:36 -0400
commit	b0a68dc07ec395d44849ce98eb417713ca333410 (patch)
tree	5e469225188d63fd296fc04d0c04e9580d8e47db /arch/blackfin/lib
parent	1c668d82465cd5c17030c0f69561841374380ac8 (diff)

diff --git a/arch/blackfin/lib/Makefile b/arch/blackfin/lib/Makefile index 635288fc5f54..bfdad52c570b 100644 --- a/arch/blackfin/lib/Makefile +++ b/arch/blackfin/lib/Makefile
@@ -4,7 +4,7 @@
4		4
5	lib-y := \	5	lib-y := \
6	ashldi3.o ashrdi3.o lshrdi3.o \	6	ashldi3.o ashrdi3.o lshrdi3.o \
7	muldi3.o divsi3.o udivsi3.o modsi3.o umodsi3.o \	7	muldi3.o divsi3.o udivsi3.o udivdi3.o modsi3.o umodsi3.o \
8	checksum.o memcpy.o memset.o memcmp.o memchr.o memmove.o \	8	checksum.o memcpy.o memset.o memcmp.o memchr.o memmove.o \
9	strcmp.o strcpy.o strncmp.o strncpy.o \	9	strcmp.o strcpy.o strncmp.o strncpy.o \
10	umulsi3_highpart.o smulsi3_highpart.o \	10	umulsi3_highpart.o smulsi3_highpart.o \


diff --git a/arch/blackfin/lib/udivdi3.S b/arch/blackfin/lib/udivdi3.S new file mode 100644 index 000000000000..ad1ebee675e1 --- /dev/null +++ b/arch/blackfin/lib/udivdi3.S
@@ -0,0 +1,375 @@
		1	/*
		2	* udivdi3.S - unsigned long long division
		3	*
		4	* Copyright 2003-2007 Analog Devices Inc.
		5	* Enter bugs at http://blackfin.uclinux.org/
		6	*
		7	* Licensed under the GPLv2 or later.
		8	*/
		9
		10	#include <linux/linkage.h>
		11
		12	#define CARRY AC0
		13
		14	#ifdef CONFIG_ARITHMETIC_OPS_L1
		15	.section .l1.text
		16	#else
		17	.text
		18	#endif
		19
		20
		21	ENTRY(___udivdi3)
		22	R3 = [SP + 12];
		23	[--SP] = (R7:4, P5:3);
		24
		25	/* Attempt to use divide primitive first; these will handle
		26	** most cases, and they're quick - avoids stalls incurred by
		27	** testing for identities.
		28	*/
		29
		30	R4 = R2 \| R3;
		31	CC = R4 == 0;
		32	IF CC JUMP .LDIV_BY_ZERO;
		33
		34	R4.H = 0x8000;
		35	R4 >>>= 16; // R4 now 0xFFFF8000
		36	R5 = R0 \| R2; // If either dividend or
		37	R4 = R5 & R4; // divisor have bits in
		38	CC = R4; // top half or low half's sign
		39	IF CC JUMP .LIDENTS; // bit, skip builtins.
		40	R4 = R1 \| R3; // Also check top halves
		41	CC = R4;
		42	IF CC JUMP .LIDENTS;
		43
		44	/* Can use the builtins. */
		45
		46	AQ = CC; // Clear AQ (CC==0)
		47	DIVQ(R0, R2);
		48	DIVQ(R0, R2);
		49	DIVQ(R0, R2);
		50	DIVQ(R0, R2);
		51	DIVQ(R0, R2);
		52	DIVQ(R0, R2);
		53	DIVQ(R0, R2);
		54	DIVQ(R0, R2);
		55	DIVQ(R0, R2);
		56	DIVQ(R0, R2);
		57	DIVQ(R0, R2);
		58	DIVQ(R0, R2);
		59	DIVQ(R0, R2);
		60	DIVQ(R0, R2);
		61	DIVQ(R0, R2);
		62	DIVQ(R0, R2);
		63	DIVQ(R0, R2);
		64	R0 = R0.L (Z);
		65	R1 = 0;
		66	(R7:4, P5:3) = [SP++];
		67	RTS;
		68
		69	.LIDENTS:
		70	/* Test for common identities. Value to be returned is
		71	** placed in R6,R7.
		72	*/
		73	// Check for 0/y, return 0
		74	R4 = R0 \| R1;
		75	CC = R4 == 0;
		76	IF CC JUMP .LRETURN_R0;
		77
		78	// Check for x/x, return 1
		79	R6 = R0 - R2; // If x == y, then both R6 and R7 will be zero
		80	R7 = R1 - R3;
		81	R4 = R6 \| R7; // making R4 zero.
		82	R6 += 1; // which would now make R6:R7==1.
		83	CC = R4 == 0;
		84	IF CC JUMP .LRETURN_IDENT;
		85
		86	// Check for x/1, return x
		87	R6 = R0;
		88	R7 = R1;
		89	CC = R3 == 0;
		90	IF !CC JUMP .Lnexttest;
		91	CC = R2 == 1;
		92	IF CC JUMP .LRETURN_IDENT;
		93
		94	.Lnexttest:
		95	R4.L = ONES R2; // check for div by power of two which
		96	R5.L = ONES R3; // can be done using a shift
		97	R6 = PACK (R5.L, R4.L);
		98	CC = R6 == 1;
		99	IF CC JUMP .Lpower_of_two_upper_zero;
		100	R6 = PACK (R4.L, R5.L);
		101	CC = R6 == 1;
		102	IF CC JUMP .Lpower_of_two_lower_zero;
		103
		104	// Check for x < y, return 0
		105	R6 = 0;
		106	R7 = R6;
		107	CC = R1 < R3 (IU);
		108	IF CC JUMP .LRETURN_IDENT;
		109	CC = R1 == R3;
		110	IF !CC JUMP .Lno_idents;
		111	CC = R0 < R2 (IU);
		112	IF CC JUMP .LRETURN_IDENT;
		113
		114	.Lno_idents: // Idents don't match. Go for the full operation
		115
		116
		117	// If X, or X and Y have high bit set, it'll affect the
		118	// results, so shift right one to stop this. Note: we've already
		119	// checked that X >= Y, so Y's msb won't be set unless X's
		120	// is.
		121
		122	R4 = 0;
		123	CC = R1 < 0;
		124	IF !CC JUMP .Lx_msb_clear;
		125	CC = !CC; // 1 -> 0;
		126	R1 = ROT R1 BY -1; // Shift X >> 1
		127	R0 = ROT R0 BY -1; // lsb -> CC
		128	BITSET(R4,31); // to record only x msb was set
		129	CC = R3 < 0;
		130	IF !CC JUMP .Ly_msb_clear;
		131	CC = !CC;
		132	R3 = ROT R3 BY -1; // Shift Y >> 1
		133	R2 = ROT R2 BY -1;
		134	BITCLR(R4,31); // clear bit to record only x msb was set
		135
		136	.Ly_msb_clear:
		137	.Lx_msb_clear:
		138	// Bit 31 in R4 indicates X msb set, but Y msb wasn't, and no bits
		139	// were lost, so we should shift result left by one.
		140
		141	[--SP] = R4; // save for later
		142
		143	// In the loop that follows, each iteration we add
		144	// either Y' or -Y' to the Remainder. We compute the
		145	// negated Y', and store, for convenience. Y' goes
		146	// into P0:P1, while -Y' goes into P2:P3.
		147
		148	P0 = R2;
		149	P1 = R3;
		150	R2 = -R2;
		151	CC = CARRY;
		152	CC = !CC;
		153	R4 = CC;
		154	R3 = -R3;
		155	R3 = R3 - R4;
		156
		157	R6 = 0; // remainder = 0
		158	R7 = R6;
		159
		160	[--SP] = R2; P2 = SP;
		161	[--SP] = R3; P3 = SP;
		162	[--SP] = R6; P5 = SP; // AQ = 0
		163	[--SP] = P1;
		164
		165	/* In the loop that follows, we use the following
		166	** register assignments:
		167	** R0,R1 X, workspace
		168	** R2,R3 Y, workspace
		169	** R4,R5 partial Div
		170	** R6,R7 partial remainder
		171	** P5 AQ
		172	** The remainder and div form a 128-bit number, with
		173	** the remainder in the high 64-bits.
		174	*/
		175	R4 = R0; // Div = X'
		176	R5 = R1;
		177	R3 = 0;
		178
		179	P4 = 64; // Iterate once per bit
		180	LSETUP(.LULST,.LULEND) LC0 = P4;
		181	.LULST:
		182	/* Shift Div and remainder up by one. The bit shifted
		183	** out of the top of the quotient is shifted into the bottom
		184	** of the remainder.
		185	*/
		186	CC = R3;
		187	R4 = ROT R4 BY 1;
		188	R5 = ROT R5 BY 1 \|\| // low q to high q
		189	R2 = [P5]; // load saved AQ
		190	R6 = ROT R6 BY 1 \|\| // high q to low r
		191	R0 = [P2]; // load -Y'
		192	R7 = ROT R7 BY 1 \|\| // low r to high r
		193	R1 = [P3];
		194
		195	// Assume add -Y'
		196	CC = R2 < 0; // But if AQ is set...
		197	IF CC R0 = P0; // then add Y' instead
		198	IF CC R1 = P1;
		199
		200	R6 = R6 + R0; // Rem += (Y' or -Y')
		201	CC = CARRY;
		202	R0 = CC;
		203	R7 = R7 + R1;
		204	R7 = R7 + R0 (NS) \|\|
		205	R1 = [SP];
		206	// Set the next AQ bit
		207	R1 = R7 ^ R1; // from Remainder and Y'
		208	R1 = R1 >> 31 \|\| // Negate AQ's value, and
		209	[P5] = R1; // save next AQ
		210	BITTGL(R1, 0); // add neg AQ to the Div
		211	.LULEND: R4 = R4 + R1;
		212
		213	R6 = [SP + 16];
		214
		215	R0 = R4;
		216	R1 = R5;
		217	CC = BITTST(R6,30); // Just set CC=0
		218	R4 = ROT R0 BY 1; // but if we had to shift X,
		219	R5 = ROT R1 BY 1; // and didn't shift any bits out,
		220	CC = BITTST(R6,31); // then the result will be half as
		221	IF CC R0 = R4; // much as required, so shift left
		222	IF CC R1 = R5; // one space.
		223
		224	SP += 20;
		225	(R7:4, P5:3) = [SP++];
		226	RTS;
		227
		228	.Lpower_of_two:
		229	/* Y has a single bit set, which means it's a power of two.
		230	** That means we can perform the division just by shifting
		231	** X to the right the appropriate number of bits
		232	*/
		233
		234	/* signbits returns the number of sign bits, minus one.
		235	** 1=>30, 2=>29, ..., 0x40000000=>0. Which means we need
		236	** to shift right n-signbits spaces. It also means 0x80000000
		237	** is a special case, because that also gives a signbits of 0
		238	*/
		239	.Lpower_of_two_lower_zero:
		240	R7 = 0;
		241	R6 = R1 >> 31;
		242	CC = R3 < 0;
		243	IF CC JUMP .LRETURN_IDENT;
		244
		245	R2.L = SIGNBITS R3;
		246	R2 = R2.L (Z);
		247	R2 += -62;
		248	(R7:4, P5:3) = [SP++];
		249	JUMP ___lshftli;
		250
		251	.Lpower_of_two_upper_zero:
		252	CC = R2 < 0;
		253	IF CC JUMP .Lmaxint_shift;
		254
		255	R2.L = SIGNBITS R2;
		256	R2 = R2.L (Z);
		257	R2 += -30;
		258	(R7:4, P5:3) = [SP++];
		259	JUMP ___lshftli;
		260
		261	.Lmaxint_shift:
		262	R2 = -31;
		263	(R7:4, P5:3) = [SP++];
		264	JUMP ___lshftli;
		265
		266	.LRETURN_IDENT:
		267	R0 = R6;
		268	R1 = R7;
		269	.LRETURN_R0:
		270	(R7:4, P5:3) = [SP++];
		271	RTS;
		272	.LDIV_BY_ZERO:
		273	R0 = ~R2;
		274	R1 = R0;
		275	(R7:4, P5:3) = [SP++];
		276	RTS;
		277
		278	ENDPROC(___udivdi3)
		279
		280
		281	ENTRY(___lshftli)
		282	CC = R2 == 0;
		283	IF CC JUMP .Lfinished; // nothing to do
		284	CC = R2 < 0;
		285	IF CC JUMP .Lrshift;
		286	R3 = 64;
		287	CC = R2 < R3;
		288	IF !CC JUMP .Lretzero;
		289
		290	// We're shifting left, and it's less than 64 bits, so
		291	// a valid result will be returned.
		292
		293	R3 >>= 1; // R3 now 32
		294	CC = R2 < R3;
		295
		296	IF !CC JUMP .Lzerohalf;
		297
		298	// We're shifting left, between 1 and 31 bits, which means
		299	// some of the low half will be shifted into the high half.
		300	// Work out how much.
		301
		302	R3 = R3 - R2;
		303
		304	// Save that much data from the bottom half.
		305
		306	P1 = R7;
		307	R7 = R0;
		308	R7 >>= R3;
		309
		310	// Adjust both parts of the parameter.
		311
		312	R0 <<= R2;
		313	R1 <<= R2;
		314
		315	// And include the bits moved across.
		316
		317	R1 = R1 \| R7;
		318	R7 = P1;
		319	RTS;
		320
		321	.Lzerohalf:
		322	// We're shifting left, between 32 and 63 bits, so the
		323	// bottom half will become zero, and the top half will
		324	// lose some bits. How many?
		325
		326	R2 = R2 - R3; // N - 32
		327	R1 = LSHIFT R0 BY R2.L;
		328	R0 = R0 - R0;
		329	RTS;
		330
		331	.Lretzero:
		332	R0 = R0 - R0;
		333	R1 = R0;
		334	.Lfinished:
		335	RTS;
		336
		337	.Lrshift:
		338	// We're shifting right, but by how much?
		339	R2 = -R2;
		340	R3 = 64;
		341	CC = R2 < R3;
		342	IF !CC JUMP .Lretzero;
		343
		344	// Shifting right less than 64 bits, so some result bits will
		345	// be retained.
		346
		347	R3 >>= 1; // R3 now 32
		348	CC = R2 < R3;
		349	IF !CC JUMP .Lsignhalf;
		350
		351	// Shifting right between 1 and 31 bits, so need to copy
		352	// data across words.
		353
		354	P1 = R7;
		355	R3 = R3 - R2;
		356	R7 = R1;
		357	R7 <<= R3;
		358	R1 >>= R2;
		359	R0 >>= R2;
		360	R0 = R7 \| R0;
		361	R7 = P1;
		362	RTS;
		363
		364	.Lsignhalf:
		365	// Shifting right between 32 and 63 bits, so the top half
		366	// will become all zero-bits, and the bottom half is some
		367	// of the top half. But how much?
		368
		369	R2 = R2 - R3;
		370	R0 = R1;
		371	R0 >>= R2;
		372	R1 = 0;
		373	RTS;
		374
		375	ENDPROC(___lshftli)