diff options
author | Michal Simek <monstr@monstr.eu> | 2010-08-12 08:28:53 -0400 |
---|---|---|
committer | Michal Simek <monstr@monstr.eu> | 2010-10-21 01:51:42 -0400 |
commit | 4e07dba7cb8c9c76a52d0e32b69f13bb583a9674 (patch) | |
tree | 2f12579e642d5eea3c28f4b7c51a35a465b5e2a2 /arch/microblaze/lib | |
parent | cec051671dc2bb72cc2870aa149d7101ea494b8b (diff) |
microblaze: Add libgcc function directly to kernel
Replaced libgcc functions with asm optimized implementation.
Signed-off-by: Michal Simek <monstr@monstr.eu>
Diffstat (limited to 'arch/microblaze/lib')
-rw-r--r-- | arch/microblaze/lib/Makefile | 10 | ||||
-rw-r--r-- | arch/microblaze/lib/ashldi3.c | 29 | ||||
-rw-r--r-- | arch/microblaze/lib/ashrdi3.c | 31 | ||||
-rw-r--r-- | arch/microblaze/lib/divsi3.S | 73 | ||||
-rw-r--r-- | arch/microblaze/lib/libgcc.h | 25 | ||||
-rw-r--r-- | arch/microblaze/lib/lshrdi3.c | 29 | ||||
-rw-r--r-- | arch/microblaze/lib/modsi3.S | 73 | ||||
-rw-r--r-- | arch/microblaze/lib/muldi3.S | 121 | ||||
-rw-r--r-- | arch/microblaze/lib/mulsi3.S | 46 | ||||
-rw-r--r-- | arch/microblaze/lib/udivsi3.S | 84 | ||||
-rw-r--r-- | arch/microblaze/lib/umodsi3.S | 86 |
11 files changed, 607 insertions, 0 deletions
diff --git a/arch/microblaze/lib/Makefile b/arch/microblaze/lib/Makefile index 4dfe47d3cd91..f1fcbff3da25 100644 --- a/arch/microblaze/lib/Makefile +++ b/arch/microblaze/lib/Makefile | |||
@@ -11,3 +11,13 @@ lib-y += memcpy.o memmove.o | |||
11 | endif | 11 | endif |
12 | 12 | ||
13 | lib-y += uaccess_old.o | 13 | lib-y += uaccess_old.o |
14 | |||
15 | lib-y += ashldi3.o | ||
16 | lib-y += ashrdi3.o | ||
17 | lib-y += divsi3.o | ||
18 | lib-y += lshrdi3.o | ||
19 | lib-y += modsi3.o | ||
20 | lib-y += muldi3.o | ||
21 | lib-y += mulsi3.o | ||
22 | lib-y += udivsi3.o | ||
23 | lib-y += umodsi3.o | ||
diff --git a/arch/microblaze/lib/ashldi3.c b/arch/microblaze/lib/ashldi3.c new file mode 100644 index 000000000000..beb80f316095 --- /dev/null +++ b/arch/microblaze/lib/ashldi3.c | |||
@@ -0,0 +1,29 @@ | |||
1 | #include <linux/module.h> | ||
2 | |||
3 | #include "libgcc.h" | ||
4 | |||
5 | long long __ashldi3(long long u, word_type b) | ||
6 | { | ||
7 | DWunion uu, w; | ||
8 | word_type bm; | ||
9 | |||
10 | if (b == 0) | ||
11 | return u; | ||
12 | |||
13 | uu.ll = u; | ||
14 | bm = 32 - b; | ||
15 | |||
16 | if (bm <= 0) { | ||
17 | w.s.low = 0; | ||
18 | w.s.high = (unsigned int) uu.s.low << -bm; | ||
19 | } else { | ||
20 | const unsigned int carries = (unsigned int) uu.s.low >> bm; | ||
21 | |||
22 | w.s.low = (unsigned int) uu.s.low << b; | ||
23 | w.s.high = ((unsigned int) uu.s.high << b) | carries; | ||
24 | } | ||
25 | |||
26 | return w.ll; | ||
27 | } | ||
28 | |||
29 | EXPORT_SYMBOL(__ashldi3); | ||
diff --git a/arch/microblaze/lib/ashrdi3.c b/arch/microblaze/lib/ashrdi3.c new file mode 100644 index 000000000000..c884a912b660 --- /dev/null +++ b/arch/microblaze/lib/ashrdi3.c | |||
@@ -0,0 +1,31 @@ | |||
1 | #include <linux/module.h> | ||
2 | |||
3 | #include "libgcc.h" | ||
4 | |||
5 | long long __ashrdi3(long long u, word_type b) | ||
6 | { | ||
7 | DWunion uu, w; | ||
8 | word_type bm; | ||
9 | |||
10 | if (b == 0) | ||
11 | return u; | ||
12 | |||
13 | uu.ll = u; | ||
14 | bm = 32 - b; | ||
15 | |||
16 | if (bm <= 0) { | ||
17 | /* w.s.high = 1..1 or 0..0 */ | ||
18 | w.s.high = | ||
19 | uu.s.high >> 31; | ||
20 | w.s.low = uu.s.high >> -bm; | ||
21 | } else { | ||
22 | const unsigned int carries = (unsigned int) uu.s.high << bm; | ||
23 | |||
24 | w.s.high = uu.s.high >> b; | ||
25 | w.s.low = ((unsigned int) uu.s.low >> b) | carries; | ||
26 | } | ||
27 | |||
28 | return w.ll; | ||
29 | } | ||
30 | |||
31 | EXPORT_SYMBOL(__ashrdi3); | ||
diff --git a/arch/microblaze/lib/divsi3.S b/arch/microblaze/lib/divsi3.S new file mode 100644 index 000000000000..595b02d6e86b --- /dev/null +++ b/arch/microblaze/lib/divsi3.S | |||
@@ -0,0 +1,73 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | |||
3 | /* | ||
4 | * Divide operation for 32 bit integers. | ||
5 | * Input : Dividend in Reg r5 | ||
6 | * Divisor in Reg r6 | ||
7 | * Output: Result in Reg r3 | ||
8 | */ | ||
9 | .text | ||
10 | .globl __divsi3 | ||
11 | .type __divsi3, @function | ||
12 | .ent __divsi3 | ||
13 | __divsi3: | ||
14 | .frame r1, 0, r15 | ||
15 | |||
16 | addik r1, r1, -16 | ||
17 | swi r28, r1, 0 | ||
18 | swi r29, r1, 4 | ||
19 | swi r30, r1, 8 | ||
20 | swi r31, r1, 12 | ||
21 | |||
22 | beqi r6, div_by_zero /* div_by_zero - division error */ | ||
23 | beqi r5, result_is_zero /* result is zero */ | ||
24 | bgeid r5, r5_pos | ||
25 | xor r28, r5, r6 /* get the sign of the result */ | ||
26 | rsubi r5, r5, 0 /* make r5 positive */ | ||
27 | r5_pos: | ||
28 | bgei r6, r6_pos | ||
29 | rsubi r6, r6, 0 /* make r6 positive */ | ||
30 | r6_pos: | ||
31 | addik r30, r0, 0 /* clear mod */ | ||
32 | addik r3, r0, 0 /* clear div */ | ||
33 | addik r29, r0, 32 /* initialize the loop count */ | ||
34 | |||
35 | /* first part try to find the first '1' in the r5 */ | ||
36 | div0: | ||
37 | blti r5, div2 /* this traps r5 == 0x80000000 */ | ||
38 | div1: | ||
39 | add r5, r5, r5 /* left shift logical r5 */ | ||
40 | bgtid r5, div1 | ||
41 | addik r29, r29, -1 | ||
42 | div2: | ||
43 | /* left shift logical r5 get the '1' into the carry */ | ||
44 | add r5, r5, r5 | ||
45 | addc r30, r30, r30 /* move that bit into the mod register */ | ||
46 | rsub r31, r6, r30 /* try to subtract (r30 a r6) */ | ||
47 | blti r31, mod_too_small | ||
48 | /* move the r31 to mod since the result was positive */ | ||
49 | or r30, r0, r31 | ||
50 | addik r3, r3, 1 | ||
51 | mod_too_small: | ||
52 | addik r29, r29, -1 | ||
53 | beqi r29, loop_end | ||
54 | add r3, r3, r3 /* shift in the '1' into div */ | ||
55 | bri div2 /* div2 */ | ||
56 | loop_end: | ||
57 | bgei r28, return_here | ||
58 | brid return_here | ||
59 | rsubi r3, r3, 0 /* negate the result */ | ||
60 | div_by_zero: | ||
61 | result_is_zero: | ||
62 | or r3, r0, r0 /* set result to 0 */ | ||
63 | return_here: | ||
64 | /* restore values of csrs and that of r3 and the divisor and the dividend */ | ||
65 | lwi r28, r1, 0 | ||
66 | lwi r29, r1, 4 | ||
67 | lwi r30, r1, 8 | ||
68 | lwi r31, r1, 12 | ||
69 | rtsd r15, 8 | ||
70 | addik r1, r1, 16 | ||
71 | |||
72 | .size __divsi3, . - __divsi3 | ||
73 | .end __divsi3 | ||
diff --git a/arch/microblaze/lib/libgcc.h b/arch/microblaze/lib/libgcc.h new file mode 100644 index 000000000000..05909d58e2fe --- /dev/null +++ b/arch/microblaze/lib/libgcc.h | |||
@@ -0,0 +1,25 @@ | |||
1 | #ifndef __ASM_LIBGCC_H | ||
2 | #define __ASM_LIBGCC_H | ||
3 | |||
4 | #include <asm/byteorder.h> | ||
5 | |||
6 | typedef int word_type __attribute__ ((mode (__word__))); | ||
7 | |||
8 | #ifdef __BIG_ENDIAN | ||
9 | struct DWstruct { | ||
10 | int high, low; | ||
11 | }; | ||
12 | #elif defined(__LITTLE_ENDIAN) | ||
13 | struct DWstruct { | ||
14 | int low, high; | ||
15 | }; | ||
16 | #else | ||
17 | #error I feel sick. | ||
18 | #endif | ||
19 | |||
20 | typedef union { | ||
21 | struct DWstruct s; | ||
22 | long long ll; | ||
23 | } DWunion; | ||
24 | |||
25 | #endif /* __ASM_LIBGCC_H */ | ||
diff --git a/arch/microblaze/lib/lshrdi3.c b/arch/microblaze/lib/lshrdi3.c new file mode 100644 index 000000000000..dcf8d6810b7c --- /dev/null +++ b/arch/microblaze/lib/lshrdi3.c | |||
@@ -0,0 +1,29 @@ | |||
1 | #include <linux/module.h> | ||
2 | |||
3 | #include "libgcc.h" | ||
4 | |||
5 | long long __lshrdi3(long long u, word_type b) | ||
6 | { | ||
7 | DWunion uu, w; | ||
8 | word_type bm; | ||
9 | |||
10 | if (b == 0) | ||
11 | return u; | ||
12 | |||
13 | uu.ll = u; | ||
14 | bm = 32 - b; | ||
15 | |||
16 | if (bm <= 0) { | ||
17 | w.s.high = 0; | ||
18 | w.s.low = (unsigned int) uu.s.high >> -bm; | ||
19 | } else { | ||
20 | const unsigned int carries = (unsigned int) uu.s.high << bm; | ||
21 | |||
22 | w.s.high = (unsigned int) uu.s.high >> b; | ||
23 | w.s.low = ((unsigned int) uu.s.low >> b) | carries; | ||
24 | } | ||
25 | |||
26 | return w.ll; | ||
27 | } | ||
28 | |||
29 | EXPORT_SYMBOL(__lshrdi3); | ||
diff --git a/arch/microblaze/lib/modsi3.S b/arch/microblaze/lib/modsi3.S new file mode 100644 index 000000000000..84e0bee6e8c7 --- /dev/null +++ b/arch/microblaze/lib/modsi3.S | |||
@@ -0,0 +1,73 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | |||
3 | /* | ||
4 | * modulo operation for 32 bit integers. | ||
5 | * Input : op1 in Reg r5 | ||
6 | * op2 in Reg r6 | ||
7 | * Output: op1 mod op2 in Reg r3 | ||
8 | */ | ||
9 | |||
10 | .text | ||
11 | .globl __modsi3 | ||
12 | .type __modsi3, @function | ||
13 | .ent __modsi3 | ||
14 | |||
15 | __modsi3: | ||
16 | .frame r1, 0, r15 | ||
17 | |||
18 | addik r1, r1, -16 | ||
19 | swi r28, r1, 0 | ||
20 | swi r29, r1, 4 | ||
21 | swi r30, r1, 8 | ||
22 | swi r31, r1, 12 | ||
23 | |||
24 | beqi r6, div_by_zero /* div_by_zero division error */ | ||
25 | beqi r5, result_is_zero /* result is zero */ | ||
26 | bgeid r5, r5_pos | ||
27 | /* get the sign of the result [ depends only on the first arg] */ | ||
28 | add r28, r5, r0 | ||
29 | rsubi r5, r5, 0 /* make r5 positive */ | ||
30 | r5_pos: | ||
31 | bgei r6, r6_pos | ||
32 | rsubi r6, r6, 0 /* make r6 positive */ | ||
33 | r6_pos: | ||
34 | addik r3, r0, 0 /* clear mod */ | ||
35 | addik r30, r0, 0 /* clear div */ | ||
36 | addik r29, r0, 32 /* initialize the loop count */ | ||
37 | /* first part try to find the first '1' in the r5 */ | ||
38 | div1: | ||
39 | add r5, r5, r5 /* left shift logical r5 */ | ||
40 | bgeid r5, div1 | ||
41 | addik r29, r29, -1 | ||
42 | div2: | ||
43 | /* left shift logical r5 get the '1' into the carry */ | ||
44 | add r5, r5, r5 | ||
45 | addc r3, r3, r3 /* move that bit into the mod register */ | ||
46 | rsub r31, r6, r3 /* try to subtract (r30 a r6) */ | ||
47 | blti r31, mod_too_small | ||
48 | /* move the r31 to mod since the result was positive */ | ||
49 | or r3, r0, r31 | ||
50 | addik r30, r30, 1 | ||
51 | mod_too_small: | ||
52 | addik r29, r29, -1 | ||
53 | beqi r29, loop_end | ||
54 | add r30, r30, r30 /* shift in the '1' into div */ | ||
55 | bri div2 /* div2 */ | ||
56 | loop_end: | ||
57 | bgei r28, return_here | ||
58 | brid return_here | ||
59 | rsubi r3, r3, 0 /* negate the result */ | ||
60 | div_by_zero: | ||
61 | result_is_zero: | ||
62 | or r3, r0, r0 /* set result to 0 [both mod as well as div are 0] */ | ||
63 | return_here: | ||
64 | /* restore values of csrs and that of r3 and the divisor and the dividend */ | ||
65 | lwi r28, r1, 0 | ||
66 | lwi r29, r1, 4 | ||
67 | lwi r30, r1, 8 | ||
68 | lwi r31, r1, 12 | ||
69 | rtsd r15, 8 | ||
70 | addik r1, r1, 16 | ||
71 | |||
72 | .size __modsi3, . - __modsi3 | ||
73 | .end __modsi3 | ||
diff --git a/arch/microblaze/lib/muldi3.S b/arch/microblaze/lib/muldi3.S new file mode 100644 index 000000000000..ceeaa8c407f2 --- /dev/null +++ b/arch/microblaze/lib/muldi3.S | |||
@@ -0,0 +1,121 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | |||
3 | /* | ||
4 | * Multiply operation for 64 bit integers, for devices with hard multiply | ||
5 | * Input : Operand1[H] in Reg r5 | ||
6 | * Operand1[L] in Reg r6 | ||
7 | * Operand2[H] in Reg r7 | ||
8 | * Operand2[L] in Reg r8 | ||
9 | * Output: Result[H] in Reg r3 | ||
10 | * Result[L] in Reg r4 | ||
11 | * | ||
12 | * Explaination: | ||
13 | * | ||
14 | * Both the input numbers are divided into 16 bit number as follows | ||
15 | * op1 = A B C D | ||
16 | * op2 = E F G H | ||
17 | * result = D * H | ||
18 | * + (C * H + D * G) << 16 | ||
19 | * + (B * H + C * G + D * F) << 32 | ||
20 | * + (A * H + B * G + C * F + D * E) << 48 | ||
21 | * | ||
22 | * Only 64 bits of the output are considered | ||
23 | */ | ||
24 | |||
25 | .text | ||
26 | .globl __muldi3 | ||
27 | .type __muldi3, @function | ||
28 | .ent __muldi3 | ||
29 | |||
30 | __muldi3: | ||
31 | addi r1, r1, -40 | ||
32 | |||
33 | /* Save the input operands on the caller's stack */ | ||
34 | swi r5, r1, 44 | ||
35 | swi r6, r1, 48 | ||
36 | swi r7, r1, 52 | ||
37 | swi r8, r1, 56 | ||
38 | |||
39 | /* Store all the callee saved registers */ | ||
40 | sw r20, r1, r0 | ||
41 | swi r21, r1, 4 | ||
42 | swi r22, r1, 8 | ||
43 | swi r23, r1, 12 | ||
44 | swi r24, r1, 16 | ||
45 | swi r25, r1, 20 | ||
46 | swi r26, r1, 24 | ||
47 | swi r27, r1, 28 | ||
48 | |||
49 | /* Load all the 16 bit values for A thru H */ | ||
50 | lhui r20, r1, 44 /* A */ | ||
51 | lhui r21, r1, 46 /* B */ | ||
52 | lhui r22, r1, 48 /* C */ | ||
53 | lhui r23, r1, 50 /* D */ | ||
54 | lhui r24, r1, 52 /* E */ | ||
55 | lhui r25, r1, 54 /* F */ | ||
56 | lhui r26, r1, 56 /* G */ | ||
57 | lhui r27, r1, 58 /* H */ | ||
58 | |||
59 | /* D * H ==> LSB of the result on stack ==> Store1 */ | ||
60 | mul r9, r23, r27 | ||
61 | swi r9, r1, 36 /* Pos2 and Pos3 */ | ||
62 | |||
63 | /* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */ | ||
64 | /* Store the carry generated in position 2 for Pos 3 */ | ||
65 | lhui r11, r1, 36 /* Pos2 */ | ||
66 | mul r9, r22, r27 /* C * H */ | ||
67 | mul r10, r23, r26 /* D * G */ | ||
68 | add r9, r9, r10 | ||
69 | addc r12, r0, r0 | ||
70 | add r9, r9, r11 | ||
71 | addc r12, r12, r0 /* Store the Carry */ | ||
72 | shi r9, r1, 36 /* Store Pos2 */ | ||
73 | swi r9, r1, 32 | ||
74 | lhui r11, r1, 32 | ||
75 | shi r11, r1, 34 /* Store Pos1 */ | ||
76 | |||
77 | /* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */ | ||
78 | mul r9, r21, r27 /* B * H */ | ||
79 | mul r10, r22, r26 /* C * G */ | ||
80 | mul r7, r23, r25 /* D * F */ | ||
81 | add r9, r9, r11 | ||
82 | add r9, r9, r10 | ||
83 | add r9, r9, r7 | ||
84 | swi r9, r1, 32 /* Pos0 and Pos1 */ | ||
85 | |||
86 | /* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */ | ||
87 | lhui r11, r1, 32 /* Pos0 */ | ||
88 | mul r9, r20, r27 /* A * H */ | ||
89 | mul r10, r21, r26 /* B * G */ | ||
90 | mul r7, r22, r25 /* C * F */ | ||
91 | mul r8, r23, r24 /* D * E */ | ||
92 | add r9, r9, r11 | ||
93 | add r9, r9, r10 | ||
94 | add r9, r9, r7 | ||
95 | add r9, r9, r8 | ||
96 | sext16 r9, r9 /* Sign extend the MSB */ | ||
97 | shi r9, r1, 32 | ||
98 | |||
99 | /* Move results to r3 and r4 */ | ||
100 | lhui r3, r1, 32 | ||
101 | add r3, r3, r12 | ||
102 | shi r3, r1, 32 | ||
103 | lwi r3, r1, 32 /* Hi Part */ | ||
104 | lwi r4, r1, 36 /* Lo Part */ | ||
105 | |||
106 | /* Restore Callee saved registers */ | ||
107 | lw r20, r1, r0 | ||
108 | lwi r21, r1, 4 | ||
109 | lwi r22, r1, 8 | ||
110 | lwi r23, r1, 12 | ||
111 | lwi r24, r1, 16 | ||
112 | lwi r25, r1, 20 | ||
113 | lwi r26, r1, 24 | ||
114 | lwi r27, r1, 28 | ||
115 | |||
116 | /* Restore Frame and return */ | ||
117 | rtsd r15, 8 | ||
118 | addi r1, r1, 40 | ||
119 | |||
120 | .size __muldi3, . - __muldi3 | ||
121 | .end __muldi3 | ||
diff --git a/arch/microblaze/lib/mulsi3.S b/arch/microblaze/lib/mulsi3.S new file mode 100644 index 000000000000..90bd7b93afe6 --- /dev/null +++ b/arch/microblaze/lib/mulsi3.S | |||
@@ -0,0 +1,46 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | |||
3 | /* | ||
4 | * Multiply operation for 32 bit integers. | ||
5 | * Input : Operand1 in Reg r5 | ||
6 | * Operand2 in Reg r6 | ||
7 | * Output: Result [op1 * op2] in Reg r3 | ||
8 | */ | ||
9 | .text | ||
10 | .globl __mulsi3 | ||
11 | .type __mulsi3, @function | ||
12 | .ent __mulsi3 | ||
13 | |||
14 | __mulsi3: | ||
15 | .frame r1, 0, r15 | ||
16 | add r3, r0, r0 | ||
17 | beqi r5, result_is_zero /* multiply by zero */ | ||
18 | beqi r6, result_is_zero /* multiply by zero */ | ||
19 | bgeid r5, r5_pos | ||
20 | xor r4, r5, r6 /* get the sign of the result */ | ||
21 | rsubi r5, r5, 0 /* make r5 positive */ | ||
22 | r5_pos: | ||
23 | bgei r6, r6_pos | ||
24 | rsubi r6, r6, 0 /* make r6 positive */ | ||
25 | r6_pos: | ||
26 | bri l1 | ||
27 | l2: | ||
28 | add r5, r5, r5 | ||
29 | l1: | ||
30 | srl r6, r6 | ||
31 | addc r7, r0, r0 | ||
32 | beqi r7, l2 | ||
33 | bneid r6, l2 | ||
34 | add r3, r3, r5 | ||
35 | blti r4, negateresult | ||
36 | rtsd r15, 8 | ||
37 | nop | ||
38 | negateresult: | ||
39 | rtsd r15, 8 | ||
40 | rsub r3, r3, r0 | ||
41 | result_is_zero: | ||
42 | rtsd r15, 8 | ||
43 | addi r3, r0, 0 | ||
44 | |||
45 | .size __mulsi3, . - __mulsi3 | ||
46 | .end __mulsi3 | ||
diff --git a/arch/microblaze/lib/udivsi3.S b/arch/microblaze/lib/udivsi3.S new file mode 100644 index 000000000000..64cf57e4bb85 --- /dev/null +++ b/arch/microblaze/lib/udivsi3.S | |||
@@ -0,0 +1,84 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | |||
3 | /* | ||
4 | * Unsigned divide operation. | ||
5 | * Input : Divisor in Reg r5 | ||
6 | * Dividend in Reg r6 | ||
7 | * Output: Result in Reg r3 | ||
8 | */ | ||
9 | |||
10 | .text | ||
11 | .globl __udivsi3 | ||
12 | .type __udivsi3, @function | ||
13 | .ent __udivsi3 | ||
14 | |||
15 | __udivsi3: | ||
16 | |||
17 | .frame r1, 0, r15 | ||
18 | |||
19 | addik r1, r1, -12 | ||
20 | swi r29, r1, 0 | ||
21 | swi r30, r1, 4 | ||
22 | swi r31, r1, 8 | ||
23 | |||
24 | beqi r6, div_by_zero /* div_by_zero /* division error */ | ||
25 | beqid r5, result_is_zero /* result is zero */ | ||
26 | addik r30, r0, 0 /* clear mod */ | ||
27 | addik r29, r0, 32 /* initialize the loop count */ | ||
28 | |||
29 | /* check if r6 and r5 are equal - if yes, return 1 */ | ||
30 | rsub r18, r5, r6 | ||
31 | beqid r18, return_here | ||
32 | addik r3, r0, 1 | ||
33 | |||
34 | /* check if (uns)r6 is greater than (uns)r5. in that case, just return 0 */ | ||
35 | xor r18, r5, r6 | ||
36 | bgeid r18, 16 | ||
37 | add r3, r0, r0 /* we would anyways clear r3 */ | ||
38 | blti r6, return_here /* r6[bit 31 = 1] hence is greater */ | ||
39 | bri checkr6 | ||
40 | rsub r18, r6, r5 /* microblazecmp */ | ||
41 | blti r18, return_here | ||
42 | |||
43 | /* if r6 [bit 31] is set, then return result as 1 */ | ||
44 | checkr6: | ||
45 | bgti r6, div0 | ||
46 | brid return_here | ||
47 | addik r3, r0, 1 | ||
48 | |||
49 | /* first part try to find the first '1' in the r5 */ | ||
50 | div0: | ||
51 | blti r5, div2 | ||
52 | div1: | ||
53 | add r5, r5, r5 /* left shift logical r5 */ | ||
54 | bgtid r5, div1 | ||
55 | addik r29, r29, -1 | ||
56 | div2: | ||
57 | /* left shift logical r5 get the '1' into the carry */ | ||
58 | add r5, r5, r5 | ||
59 | addc r30, r30, r30 /* move that bit into the mod register */ | ||
60 | rsub r31, r6, r30 /* try to subtract (r30 a r6) */ | ||
61 | blti r31, mod_too_small | ||
62 | /* move the r31 to mod since the result was positive */ | ||
63 | or r30, r0, r31 | ||
64 | addik r3, r3, 1 | ||
65 | mod_too_small: | ||
66 | addik r29, r29, -1 | ||
67 | beqi r29, loop_end | ||
68 | add r3, r3, r3 /* shift in the '1' into div */ | ||
69 | bri div2 /* div2 */ | ||
70 | loop_end: | ||
71 | bri return_here | ||
72 | div_by_zero: | ||
73 | result_is_zero: | ||
74 | or r3, r0, r0 /* set result to 0 */ | ||
75 | return_here: | ||
76 | /* restore values of csrs and that of r3 and the divisor and the dividend */ | ||
77 | lwi r29, r1, 0 | ||
78 | lwi r30, r1, 4 | ||
79 | lwi r31, r1, 8 | ||
80 | rtsd r15, 8 | ||
81 | addik r1, r1, 12 | ||
82 | |||
83 | .size __udivsi3, . - __udivsi3 | ||
84 | .end __udivsi3 | ||
diff --git a/arch/microblaze/lib/umodsi3.S b/arch/microblaze/lib/umodsi3.S new file mode 100644 index 000000000000..17d16bafae58 --- /dev/null +++ b/arch/microblaze/lib/umodsi3.S | |||
@@ -0,0 +1,86 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | |||
3 | /* | ||
4 | * Unsigned modulo operation for 32 bit integers. | ||
5 | * Input : op1 in Reg r5 | ||
6 | * op2 in Reg r6 | ||
7 | * Output: op1 mod op2 in Reg r3 | ||
8 | */ | ||
9 | |||
10 | .text | ||
11 | .globl __umodsi3 | ||
12 | .type __umodsi3, @function | ||
13 | .ent __umodsi3 | ||
14 | |||
15 | __umodsi3: | ||
16 | .frame r1, 0, r15 | ||
17 | |||
18 | addik r1, r1, -12 | ||
19 | swi r29, r1, 0 | ||
20 | swi r30, r1, 4 | ||
21 | swi r31, r1, 8 | ||
22 | |||
23 | beqi r6, div_by_zero /* div_by_zero - division error */ | ||
24 | beqid r5, result_is_zero /* result is zero */ | ||
25 | addik r3, r0, 0 /* clear div */ | ||
26 | addik r30, r0, 0 /* clear mod */ | ||
27 | addik r29, r0, 32 /* initialize the loop count */ | ||
28 | |||
29 | /* check if r6 and r5 are equal /* if yes, return 0 */ | ||
30 | rsub r18, r5, r6 | ||
31 | beqi r18, return_here | ||
32 | |||
33 | /* check if (uns)r6 is greater than (uns)r5. in that case, just return r5 */ | ||
34 | xor r18, r5, r6 | ||
35 | bgeid r18, 16 | ||
36 | addik r3, r5, 0 | ||
37 | blti r6, return_here | ||
38 | bri $lcheckr6 | ||
39 | rsub r18, r5, r6 /* microblazecmp */ | ||
40 | bgti r18, return_here | ||
41 | |||
42 | /* if r6 [bit 31] is set, then return result as r5-r6 */ | ||
43 | $lcheckr6: | ||
44 | bgtid r6, div0 | ||
45 | addik r3, r0, 0 | ||
46 | addik r18, r0, 0x7fffffff | ||
47 | and r5, r5, r18 | ||
48 | and r6, r6, r18 | ||
49 | brid return_here | ||
50 | rsub r3, r6, r5 | ||
51 | /* first part: try to find the first '1' in the r5 */ | ||
52 | div0: | ||
53 | blti r5, div2 | ||
54 | div1: | ||
55 | add r5, r5, r5 /* left shift logical r5 */ | ||
56 | bgeid r5, div1 | ||
57 | addik r29, r29, -1 | ||
58 | div2: | ||
59 | /* left shift logical r5 get the '1' into the carry */ | ||
60 | add r5, r5, r5 | ||
61 | addc r3, r3, r3 /* move that bit into the mod register */ | ||
62 | rsub r31, r6, r3 /* try to subtract (r3 a r6) */ | ||
63 | blti r31, mod_too_small | ||
64 | /* move the r31 to mod since the result was positive */ | ||
65 | or r3, r0, r31 | ||
66 | addik r30, r30, 1 | ||
67 | mod_too_small: | ||
68 | addik r29, r29, -1 | ||
69 | beqi r29, loop_end | ||
70 | add r30, r30, r30 /* shift in the '1' into div */ | ||
71 | bri div2 /* div2 */ | ||
72 | loop_end: | ||
73 | bri return_here | ||
74 | div_by_zero: | ||
75 | result_is_zero: | ||
76 | or r3, r0, r0 /* set result to 0 */ | ||
77 | return_here: | ||
78 | /* restore values of csrs and that of r3 and the divisor and the dividend */ | ||
79 | lwi r29, r1, 0 | ||
80 | lwi r30, r1, 4 | ||
81 | lwi r31, r1, 8 | ||
82 | rtsd r15, 8 | ||
83 | addik r1, r1, 12 | ||
84 | |||
85 | .size __umodsi3, . - __umodsi3 | ||
86 | .end __umodsi3 | ||