aboutsummaryrefslogtreecommitdiffstats
path: root/arch/microblaze/lib
diff options
context:
space:
mode:
authorMichal Simek <monstr@monstr.eu>2010-12-07 05:55:06 -0500
committerMichal Simek <monstr@monstr.eu>2011-01-03 04:30:31 -0500
commit3370d82f3b3ff04d082a9c343a80019282e41261 (patch)
tree4466336416d76206c29cfb78b05c21648845b33c /arch/microblaze/lib
parent17b931468729df6921981700bf18c75609c2f6af (diff)
microblaze: Fix __muldi3 function for little-endian.
__muldi3 was written for big endian platforms. Code contained half word read/write instructions which are not compatible with little endian cpu. Asm __muldi3 implementation is replaced by C version. Signed-off-by: Michal Simek <monstr@monstr.eu>
Diffstat (limited to 'arch/microblaze/lib')
-rw-r--r--arch/microblaze/lib/muldi3.S121
-rw-r--r--arch/microblaze/lib/muldi3.c60
2 files changed, 60 insertions, 121 deletions
diff --git a/arch/microblaze/lib/muldi3.S b/arch/microblaze/lib/muldi3.S
deleted file mode 100644
index ceeaa8c407f2..000000000000
--- a/arch/microblaze/lib/muldi3.S
+++ /dev/null
@@ -1,121 +0,0 @@
1#include <linux/linkage.h>
2
3/*
4 * Multiply operation for 64 bit integers, for devices with hard multiply
5 * Input : Operand1[H] in Reg r5
6 * Operand1[L] in Reg r6
7 * Operand2[H] in Reg r7
8 * Operand2[L] in Reg r8
9 * Output: Result[H] in Reg r3
10 * Result[L] in Reg r4
11 *
12 * Explaination:
13 *
14 * Both the input numbers are divided into 16 bit number as follows
15 * op1 = A B C D
16 * op2 = E F G H
17 * result = D * H
18 * + (C * H + D * G) << 16
19 * + (B * H + C * G + D * F) << 32
20 * + (A * H + B * G + C * F + D * E) << 48
21 *
22 * Only 64 bits of the output are considered
23 */
24
25 .text
26 .globl __muldi3
27 .type __muldi3, @function
28 .ent __muldi3
29
30__muldi3:
31 addi r1, r1, -40
32
33/* Save the input operands on the caller's stack */
34 swi r5, r1, 44
35 swi r6, r1, 48
36 swi r7, r1, 52
37 swi r8, r1, 56
38
39/* Store all the callee saved registers */
40 sw r20, r1, r0
41 swi r21, r1, 4
42 swi r22, r1, 8
43 swi r23, r1, 12
44 swi r24, r1, 16
45 swi r25, r1, 20
46 swi r26, r1, 24
47 swi r27, r1, 28
48
49/* Load all the 16 bit values for A thru H */
50 lhui r20, r1, 44 /* A */
51 lhui r21, r1, 46 /* B */
52 lhui r22, r1, 48 /* C */
53 lhui r23, r1, 50 /* D */
54 lhui r24, r1, 52 /* E */
55 lhui r25, r1, 54 /* F */
56 lhui r26, r1, 56 /* G */
57 lhui r27, r1, 58 /* H */
58
59/* D * H ==> LSB of the result on stack ==> Store1 */
60 mul r9, r23, r27
61 swi r9, r1, 36 /* Pos2 and Pos3 */
62
63/* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */
64/* Store the carry generated in position 2 for Pos 3 */
65 lhui r11, r1, 36 /* Pos2 */
66 mul r9, r22, r27 /* C * H */
67 mul r10, r23, r26 /* D * G */
68 add r9, r9, r10
69 addc r12, r0, r0
70 add r9, r9, r11
71 addc r12, r12, r0 /* Store the Carry */
72 shi r9, r1, 36 /* Store Pos2 */
73 swi r9, r1, 32
74 lhui r11, r1, 32
75 shi r11, r1, 34 /* Store Pos1 */
76
77/* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */
78 mul r9, r21, r27 /* B * H */
79 mul r10, r22, r26 /* C * G */
80 mul r7, r23, r25 /* D * F */
81 add r9, r9, r11
82 add r9, r9, r10
83 add r9, r9, r7
84 swi r9, r1, 32 /* Pos0 and Pos1 */
85
86/* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */
87 lhui r11, r1, 32 /* Pos0 */
88 mul r9, r20, r27 /* A * H */
89 mul r10, r21, r26 /* B * G */
90 mul r7, r22, r25 /* C * F */
91 mul r8, r23, r24 /* D * E */
92 add r9, r9, r11
93 add r9, r9, r10
94 add r9, r9, r7
95 add r9, r9, r8
96 sext16 r9, r9 /* Sign extend the MSB */
97 shi r9, r1, 32
98
99/* Move results to r3 and r4 */
100 lhui r3, r1, 32
101 add r3, r3, r12
102 shi r3, r1, 32
103 lwi r3, r1, 32 /* Hi Part */
104 lwi r4, r1, 36 /* Lo Part */
105
106/* Restore Callee saved registers */
107 lw r20, r1, r0
108 lwi r21, r1, 4
109 lwi r22, r1, 8
110 lwi r23, r1, 12
111 lwi r24, r1, 16
112 lwi r25, r1, 20
113 lwi r26, r1, 24
114 lwi r27, r1, 28
115
116/* Restore Frame and return */
117 rtsd r15, 8
118 addi r1, r1, 40
119
120.size __muldi3, . - __muldi3
121.end __muldi3
diff --git a/arch/microblaze/lib/muldi3.c b/arch/microblaze/lib/muldi3.c
new file mode 100644
index 000000000000..d4860e154d29
--- /dev/null
+++ b/arch/microblaze/lib/muldi3.c
@@ -0,0 +1,60 @@
1#include <linux/module.h>
2
3#include "libgcc.h"
4
5#define DWtype long long
6#define UWtype unsigned long
7#define UHWtype unsigned short
8
9#define W_TYPE_SIZE 32
10
11#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
12#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
13#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
14
15/* If we still don't have umul_ppmm, define it using plain C. */
16#if !defined(umul_ppmm)
17#define umul_ppmm(w1, w0, u, v) \
18 do { \
19 UWtype __x0, __x1, __x2, __x3; \
20 UHWtype __ul, __vl, __uh, __vh; \
21 \
22 __ul = __ll_lowpart(u); \
23 __uh = __ll_highpart(u); \
24 __vl = __ll_lowpart(v); \
25 __vh = __ll_highpart(v); \
26 \
27 __x0 = (UWtype) __ul * __vl; \
28 __x1 = (UWtype) __ul * __vh; \
29 __x2 = (UWtype) __uh * __vl; \
30 __x3 = (UWtype) __uh * __vh; \
31 \
32 __x1 += __ll_highpart(__x0); /* this can't give carry */\
33 __x1 += __x2; /* but this indeed can */ \
34 if (__x1 < __x2) /* did we get it? */ \
35 __x3 += __ll_B; /* yes, add it in the proper pos */ \
36 \
37 (w1) = __x3 + __ll_highpart(__x1); \
38 (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
39 } while (0)
40#endif
41
42#if !defined(__umulsidi3)
43#define __umulsidi3(u, v) ({ \
44 DWunion __w; \
45 umul_ppmm(__w.s.high, __w.s.low, u, v); \
46 __w.ll; \
47 })
48#endif
49
50DWtype __muldi3(DWtype u, DWtype v)
51{
52 const DWunion uu = {.ll = u};
53 const DWunion vv = {.ll = v};
54 DWunion w = {.ll = __umulsidi3(uu.s.low, vv.s.low)};
55
56 w.s.high += ((UWtype) uu.s.low * (UWtype) vv.s.high
57 + (UWtype) uu.s.high * (UWtype) vv.s.low);
58
59 return w.ll;
60}