aboutsummaryrefslogtreecommitdiffstats
path: root/arch/microblaze/lib/muldi3.S
diff options
context:
space:
mode:
authorMichal Simek <monstr@monstr.eu>2010-08-12 08:28:53 -0400
committerMichal Simek <monstr@monstr.eu>2010-10-21 01:51:42 -0400
commit4e07dba7cb8c9c76a52d0e32b69f13bb583a9674 (patch)
tree2f12579e642d5eea3c28f4b7c51a35a465b5e2a2 /arch/microblaze/lib/muldi3.S
parentcec051671dc2bb72cc2870aa149d7101ea494b8b (diff)
microblaze: Add libgcc function directly to kernel
Replaced libgcc functions with asm optimized implementation. Signed-off-by: Michal Simek <monstr@monstr.eu>
Diffstat (limited to 'arch/microblaze/lib/muldi3.S')
-rw-r--r--arch/microblaze/lib/muldi3.S121
1 files changed, 121 insertions, 0 deletions
diff --git a/arch/microblaze/lib/muldi3.S b/arch/microblaze/lib/muldi3.S
new file mode 100644
index 00000000000..ceeaa8c407f
--- /dev/null
+++ b/arch/microblaze/lib/muldi3.S
@@ -0,0 +1,121 @@
1#include <linux/linkage.h>
2
3/*
4 * Multiply operation for 64 bit integers, for devices with hard multiply
5 * Input : Operand1[H] in Reg r5
6 * Operand1[L] in Reg r6
7 * Operand2[H] in Reg r7
8 * Operand2[L] in Reg r8
9 * Output: Result[H] in Reg r3
10 * Result[L] in Reg r4
11 *
12 * Explaination:
13 *
14 * Both the input numbers are divided into 16 bit number as follows
15 * op1 = A B C D
16 * op2 = E F G H
17 * result = D * H
18 * + (C * H + D * G) << 16
19 * + (B * H + C * G + D * F) << 32
20 * + (A * H + B * G + C * F + D * E) << 48
21 *
22 * Only 64 bits of the output are considered
23 */
24
25 .text
26 .globl __muldi3
27 .type __muldi3, @function
28 .ent __muldi3
29
30__muldi3:
31 addi r1, r1, -40
32
33/* Save the input operands on the caller's stack */
34 swi r5, r1, 44
35 swi r6, r1, 48
36 swi r7, r1, 52
37 swi r8, r1, 56
38
39/* Store all the callee saved registers */
40 sw r20, r1, r0
41 swi r21, r1, 4
42 swi r22, r1, 8
43 swi r23, r1, 12
44 swi r24, r1, 16
45 swi r25, r1, 20
46 swi r26, r1, 24
47 swi r27, r1, 28
48
49/* Load all the 16 bit values for A thru H */
50 lhui r20, r1, 44 /* A */
51 lhui r21, r1, 46 /* B */
52 lhui r22, r1, 48 /* C */
53 lhui r23, r1, 50 /* D */
54 lhui r24, r1, 52 /* E */
55 lhui r25, r1, 54 /* F */
56 lhui r26, r1, 56 /* G */
57 lhui r27, r1, 58 /* H */
58
59/* D * H ==> LSB of the result on stack ==> Store1 */
60 mul r9, r23, r27
61 swi r9, r1, 36 /* Pos2 and Pos3 */
62
63/* Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 */
64/* Store the carry generated in position 2 for Pos 3 */
65 lhui r11, r1, 36 /* Pos2 */
66 mul r9, r22, r27 /* C * H */
67 mul r10, r23, r26 /* D * G */
68 add r9, r9, r10
69 addc r12, r0, r0
70 add r9, r9, r11
71 addc r12, r12, r0 /* Store the Carry */
72 shi r9, r1, 36 /* Store Pos2 */
73 swi r9, r1, 32
74 lhui r11, r1, 32
75 shi r11, r1, 34 /* Store Pos1 */
76
77/* Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 */
78 mul r9, r21, r27 /* B * H */
79 mul r10, r22, r26 /* C * G */
80 mul r7, r23, r25 /* D * F */
81 add r9, r9, r11
82 add r9, r9, r10
83 add r9, r9, r7
84 swi r9, r1, 32 /* Pos0 and Pos1 */
85
86/* Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 */
87 lhui r11, r1, 32 /* Pos0 */
88 mul r9, r20, r27 /* A * H */
89 mul r10, r21, r26 /* B * G */
90 mul r7, r22, r25 /* C * F */
91 mul r8, r23, r24 /* D * E */
92 add r9, r9, r11
93 add r9, r9, r10
94 add r9, r9, r7
95 add r9, r9, r8
96 sext16 r9, r9 /* Sign extend the MSB */
97 shi r9, r1, 32
98
99/* Move results to r3 and r4 */
100 lhui r3, r1, 32
101 add r3, r3, r12
102 shi r3, r1, 32
103 lwi r3, r1, 32 /* Hi Part */
104 lwi r4, r1, 36 /* Lo Part */
105
106/* Restore Callee saved registers */
107 lw r20, r1, r0
108 lwi r21, r1, 4
109 lwi r22, r1, 8
110 lwi r23, r1, 12
111 lwi r24, r1, 16
112 lwi r25, r1, 20
113 lwi r26, r1, 24
114 lwi r27, r1, 28
115
116/* Restore Frame and return */
117 rtsd r15, 8
118 addi r1, r1, 40
119
120.size __muldi3, . - __muldi3
121.end __muldi3