diff options
author | Michal Simek <monstr@monstr.eu> | 2010-12-07 05:55:06 -0500 |
---|---|---|
committer | Michal Simek <monstr@monstr.eu> | 2011-01-03 04:30:31 -0500 |
commit | 3370d82f3b3ff04d082a9c343a80019282e41261 (patch) | |
tree | 4466336416d76206c29cfb78b05c21648845b33c /arch/microblaze/lib | |
parent | 17b931468729df6921981700bf18c75609c2f6af (diff) |
microblaze: Fix __muldi3 function for little-endian.
__muldi3 was written for big endian platforms.
Code contained half word read/write instructions which
are not compatible with little endian cpu.
Asm __muldi3 implementation is replaced by C version.
Signed-off-by: Michal Simek <monstr@monstr.eu>
Diffstat (limited to 'arch/microblaze/lib')
-rw-r--r-- | arch/microblaze/lib/muldi3.S | 121 | ||||
-rw-r--r-- | arch/microblaze/lib/muldi3.c | 60 |
2 files changed, 60 insertions, 121 deletions
diff --git a/arch/microblaze/lib/muldi3.S b/arch/microblaze/lib/muldi3.S deleted file mode 100644 index ceeaa8c407f2..000000000000 --- a/arch/microblaze/lib/muldi3.S +++ /dev/null | |||
@@ -1,121 +0,0 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | |||
3 | /* | ||
4 | * Multiply operation for 64 bit integers, for devices with hard multiply | ||
5 | * Input : Operand1[H] in Reg r5 | ||
6 | * Operand1[L] in Reg r6 | ||
7 | * Operand2[H] in Reg r7 | ||
8 | * Operand2[L] in Reg r8 | ||
9 | * Output: Result[H] in Reg r3 | ||
10 | * Result[L] in Reg r4 | ||
11 | * | ||
12 | * Explaination: | ||
13 | * | ||
14 | * Both the input numbers are divided into 16 bit number as follows | ||
15 | * op1 = A B C D | ||
16 | * op2 = E F G H | ||
17 | * result = D * H | ||
18 | * + (C * H + D * G) << 16 | ||
19 | * + (B * H + C * G + D * F) << 32 | ||
20 | * + (A * H + B * G + C * F + D * E) << 48 | ||
21 | * | ||
22 | * Only 64 bits of the output are considered | ||
23 | */ | ||
24 | |||
25 | .text | ||
26 | .globl __muldi3 | ||
27 | .type __muldi3, @function | ||
28 | .ent __muldi3 | ||
29 | |||
30 | __muldi3: | ||
31 | addi r1, r1, -40 | ||
32 | |||
33 | /* Save the input operands on the caller's stack */ | ||
34 | swi r5, r1, 44 | ||
35 | swi r6, r1, 48 | ||
36 | swi r7, r1, 52 | ||
37 | swi r8, r1, 56 | ||
38 | |||
39 | /* Store all the callee saved registers */ | ||
40 | sw r20, r1, r0 | ||
41 | swi r21, r1, 4 | ||
42 | swi r22, r1, 8 | ||
43 | swi r23, r1, 12 | ||
44 | swi r24, r1, 16 | ||
45 | swi r25, r1, 20 | ||
46 | swi r26, r1, 24 | ||
47 | swi r27, r1, 28 | ||
48 | |||
49 | /* Load all the 16 bit values for A thru H */ | ||
50 | lhui r20, r1, 44 /* A */ | ||
51 | lhui r21, r1, 46 /* B */ | ||
52 | lhui r22, r1, 48 /* C */ | ||
53 | lhui r23, r1, 50 /* D */ | ||
54 | lhui r24, r1, 52 /* E */ | ||
55 | lhui r25, r1, 54 /* F */ | ||
56 | lhui r26, r1, 56 /* G */ | ||
57 | lhui r27, r1, 58 /* H */ | ||
58 | |||
59 | /* D * H ==> LSB of the result on stack ==> Store1 */ | ||
60 | mul r9, r23, r27 | ||
61 | swi r9, r1, 36 /* Pos2 and Pos3 */ | ||
62 | |||
63 | /* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */ | ||
64 | /* Store the carry generated in position 2 for Pos 3 */ | ||
65 | lhui r11, r1, 36 /* Pos2 */ | ||
66 | mul r9, r22, r27 /* C * H */ | ||
67 | mul r10, r23, r26 /* D * G */ | ||
68 | add r9, r9, r10 | ||
69 | addc r12, r0, r0 | ||
70 | add r9, r9, r11 | ||
71 | addc r12, r12, r0 /* Store the Carry */ | ||
72 | shi r9, r1, 36 /* Store Pos2 */ | ||
73 | swi r9, r1, 32 | ||
74 | lhui r11, r1, 32 | ||
75 | shi r11, r1, 34 /* Store Pos1 */ | ||
76 | |||
77 | /* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */ | ||
78 | mul r9, r21, r27 /* B * H */ | ||
79 | mul r10, r22, r26 /* C * G */ | ||
80 | mul r7, r23, r25 /* D * F */ | ||
81 | add r9, r9, r11 | ||
82 | add r9, r9, r10 | ||
83 | add r9, r9, r7 | ||
84 | swi r9, r1, 32 /* Pos0 and Pos1 */ | ||
85 | |||
86 | /* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */ | ||
87 | lhui r11, r1, 32 /* Pos0 */ | ||
88 | mul r9, r20, r27 /* A * H */ | ||
89 | mul r10, r21, r26 /* B * G */ | ||
90 | mul r7, r22, r25 /* C * F */ | ||
91 | mul r8, r23, r24 /* D * E */ | ||
92 | add r9, r9, r11 | ||
93 | add r9, r9, r10 | ||
94 | add r9, r9, r7 | ||
95 | add r9, r9, r8 | ||
96 | sext16 r9, r9 /* Sign extend the MSB */ | ||
97 | shi r9, r1, 32 | ||
98 | |||
99 | /* Move results to r3 and r4 */ | ||
100 | lhui r3, r1, 32 | ||
101 | add r3, r3, r12 | ||
102 | shi r3, r1, 32 | ||
103 | lwi r3, r1, 32 /* Hi Part */ | ||
104 | lwi r4, r1, 36 /* Lo Part */ | ||
105 | |||
106 | /* Restore Callee saved registers */ | ||
107 | lw r20, r1, r0 | ||
108 | lwi r21, r1, 4 | ||
109 | lwi r22, r1, 8 | ||
110 | lwi r23, r1, 12 | ||
111 | lwi r24, r1, 16 | ||
112 | lwi r25, r1, 20 | ||
113 | lwi r26, r1, 24 | ||
114 | lwi r27, r1, 28 | ||
115 | |||
116 | /* Restore Frame and return */ | ||
117 | rtsd r15, 8 | ||
118 | addi r1, r1, 40 | ||
119 | |||
120 | .size __muldi3, . - __muldi3 | ||
121 | .end __muldi3 | ||
diff --git a/arch/microblaze/lib/muldi3.c b/arch/microblaze/lib/muldi3.c new file mode 100644 index 000000000000..d4860e154d29 --- /dev/null +++ b/arch/microblaze/lib/muldi3.c | |||
@@ -0,0 +1,60 @@ | |||
1 | #include <linux/module.h> | ||
2 | |||
3 | #include "libgcc.h" | ||
4 | |||
5 | #define DWtype long long | ||
6 | #define UWtype unsigned long | ||
7 | #define UHWtype unsigned short | ||
8 | |||
9 | #define W_TYPE_SIZE 32 | ||
10 | |||
11 | #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) | ||
12 | #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) | ||
13 | #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) | ||
14 | |||
15 | /* If we still don't have umul_ppmm, define it using plain C. */ | ||
16 | #if !defined(umul_ppmm) | ||
17 | #define umul_ppmm(w1, w0, u, v) \ | ||
18 | do { \ | ||
19 | UWtype __x0, __x1, __x2, __x3; \ | ||
20 | UHWtype __ul, __vl, __uh, __vh; \ | ||
21 | \ | ||
22 | __ul = __ll_lowpart(u); \ | ||
23 | __uh = __ll_highpart(u); \ | ||
24 | __vl = __ll_lowpart(v); \ | ||
25 | __vh = __ll_highpart(v); \ | ||
26 | \ | ||
27 | __x0 = (UWtype) __ul * __vl; \ | ||
28 | __x1 = (UWtype) __ul * __vh; \ | ||
29 | __x2 = (UWtype) __uh * __vl; \ | ||
30 | __x3 = (UWtype) __uh * __vh; \ | ||
31 | \ | ||
32 | __x1 += __ll_highpart(__x0); /* this can't give carry */\ | ||
33 | __x1 += __x2; /* but this indeed can */ \ | ||
34 | if (__x1 < __x2) /* did we get it? */ \ | ||
35 | __x3 += __ll_B; /* yes, add it in the proper pos */ \ | ||
36 | \ | ||
37 | (w1) = __x3 + __ll_highpart(__x1); \ | ||
38 | (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\ | ||
39 | } while (0) | ||
40 | #endif | ||
41 | |||
42 | #if !defined(__umulsidi3) | ||
43 | #define __umulsidi3(u, v) ({ \ | ||
44 | DWunion __w; \ | ||
45 | umul_ppmm(__w.s.high, __w.s.low, u, v); \ | ||
46 | __w.ll; \ | ||
47 | }) | ||
48 | #endif | ||
49 | |||
50 | DWtype __muldi3(DWtype u, DWtype v) | ||
51 | { | ||
52 | const DWunion uu = {.ll = u}; | ||
53 | const DWunion vv = {.ll = v}; | ||
54 | DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)}; | ||
55 | |||
56 | w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high | ||
57 | + (UWtype) uu.s.high * (UWtype) vv.s.low); | ||
58 | |||
59 | return w.ll; | ||
60 | } | ||