diff options
author | Michal Simek <monstr@monstr.eu> | 2010-08-12 08:28:53 -0400 |
---|---|---|
committer | Michal Simek <monstr@monstr.eu> | 2010-10-21 01:51:42 -0400 |
commit | 4e07dba7cb8c9c76a52d0e32b69f13bb583a9674 (patch) | |
tree | 2f12579e642d5eea3c28f4b7c51a35a465b5e2a2 /arch/microblaze/lib/muldi3.S | |
parent | cec051671dc2bb72cc2870aa149d7101ea494b8b (diff) |
microblaze: Add libgcc function directly to kernel
Replaced libgcc functions with asm optimized implementation.
Signed-off-by: Michal Simek <monstr@monstr.eu>
Diffstat (limited to 'arch/microblaze/lib/muldi3.S')
-rw-r--r-- | arch/microblaze/lib/muldi3.S | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/arch/microblaze/lib/muldi3.S b/arch/microblaze/lib/muldi3.S new file mode 100644 index 00000000000..ceeaa8c407f --- /dev/null +++ b/arch/microblaze/lib/muldi3.S | |||
@@ -0,0 +1,121 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | |||
3 | /* | ||
4 | * Multiply operation for 64 bit integers, for devices with hard multiply | ||
5 | * Input : Operand1[H] in Reg r5 | ||
6 | * Operand1[L] in Reg r6 | ||
7 | * Operand2[H] in Reg r7 | ||
8 | * Operand2[L] in Reg r8 | ||
9 | * Output: Result[H] in Reg r3 | ||
10 | * Result[L] in Reg r4 | ||
11 | * | ||
12 | * Explaination: | ||
13 | * | ||
14 | * Both the input numbers are divided into 16 bit number as follows | ||
15 | * op1 = A B C D | ||
16 | * op2 = E F G H | ||
17 | * result = D * H | ||
18 | * + (C * H + D * G) << 16 | ||
19 | * + (B * H + C * G + D * F) << 32 | ||
20 | * + (A * H + B * G + C * F + D * E) << 48 | ||
21 | * | ||
22 | * Only 64 bits of the output are considered | ||
23 | */ | ||
24 | |||
25 | .text | ||
26 | .globl __muldi3 | ||
27 | .type __muldi3, @function | ||
28 | .ent __muldi3 | ||
29 | |||
30 | __muldi3: | ||
31 | addi r1, r1, -40 | ||
32 | |||
33 | /* Save the input operands on the caller's stack */ | ||
34 | swi r5, r1, 44 | ||
35 | swi r6, r1, 48 | ||
36 | swi r7, r1, 52 | ||
37 | swi r8, r1, 56 | ||
38 | |||
39 | /* Store all the callee saved registers */ | ||
40 | sw r20, r1, r0 | ||
41 | swi r21, r1, 4 | ||
42 | swi r22, r1, 8 | ||
43 | swi r23, r1, 12 | ||
44 | swi r24, r1, 16 | ||
45 | swi r25, r1, 20 | ||
46 | swi r26, r1, 24 | ||
47 | swi r27, r1, 28 | ||
48 | |||
49 | /* Load all the 16 bit values for A thru H */ | ||
50 | lhui r20, r1, 44 /* A */ | ||
51 | lhui r21, r1, 46 /* B */ | ||
52 | lhui r22, r1, 48 /* C */ | ||
53 | lhui r23, r1, 50 /* D */ | ||
54 | lhui r24, r1, 52 /* E */ | ||
55 | lhui r25, r1, 54 /* F */ | ||
56 | lhui r26, r1, 56 /* G */ | ||
57 | lhui r27, r1, 58 /* H */ | ||
58 | |||
59 | /* D * H ==> LSB of the result on stack ==> Store1 */ | ||
60 | mul r9, r23, r27 | ||
61 | swi r9, r1, 36 /* Pos2 and Pos3 */ | ||
62 | |||
63 | /* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */ | ||
64 | /* Store the carry generated in position 2 for Pos 3 */ | ||
65 | lhui r11, r1, 36 /* Pos2 */ | ||
66 | mul r9, r22, r27 /* C * H */ | ||
67 | mul r10, r23, r26 /* D * G */ | ||
68 | add r9, r9, r10 | ||
69 | addc r12, r0, r0 | ||
70 | add r9, r9, r11 | ||
71 | addc r12, r12, r0 /* Store the Carry */ | ||
72 | shi r9, r1, 36 /* Store Pos2 */ | ||
73 | swi r9, r1, 32 | ||
74 | lhui r11, r1, 32 | ||
75 | shi r11, r1, 34 /* Store Pos1 */ | ||
76 | |||
77 | /* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */ | ||
78 | mul r9, r21, r27 /* B * H */ | ||
79 | mul r10, r22, r26 /* C * G */ | ||
80 | mul r7, r23, r25 /* D * F */ | ||
81 | add r9, r9, r11 | ||
82 | add r9, r9, r10 | ||
83 | add r9, r9, r7 | ||
84 | swi r9, r1, 32 /* Pos0 and Pos1 */ | ||
85 | |||
86 | /* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */ | ||
87 | lhui r11, r1, 32 /* Pos0 */ | ||
88 | mul r9, r20, r27 /* A * H */ | ||
89 | mul r10, r21, r26 /* B * G */ | ||
90 | mul r7, r22, r25 /* C * F */ | ||
91 | mul r8, r23, r24 /* D * E */ | ||
92 | add r9, r9, r11 | ||
93 | add r9, r9, r10 | ||
94 | add r9, r9, r7 | ||
95 | add r9, r9, r8 | ||
96 | sext16 r9, r9 /* Sign extend the MSB */ | ||
97 | shi r9, r1, 32 | ||
98 | |||
99 | /* Move results to r3 and r4 */ | ||
100 | lhui r3, r1, 32 | ||
101 | add r3, r3, r12 | ||
102 | shi r3, r1, 32 | ||
103 | lwi r3, r1, 32 /* Hi Part */ | ||
104 | lwi r4, r1, 36 /* Lo Part */ | ||
105 | |||
106 | /* Restore Callee saved registers */ | ||
107 | lw r20, r1, r0 | ||
108 | lwi r21, r1, 4 | ||
109 | lwi r22, r1, 8 | ||
110 | lwi r23, r1, 12 | ||
111 | lwi r24, r1, 16 | ||
112 | lwi r25, r1, 20 | ||
113 | lwi r26, r1, 24 | ||
114 | lwi r27, r1, 28 | ||
115 | |||
116 | /* Restore Frame and return */ | ||
117 | rtsd r15, 8 | ||
118 | addi r1, r1, 40 | ||
119 | |||
120 | .size __muldi3, . - __muldi3 | ||
121 | .end __muldi3 | ||