diff options
Diffstat (limited to 'arch/ia64/lib/xor.S')
-rw-r--r-- | arch/ia64/lib/xor.S | 184 |
1 files changed, 184 insertions, 0 deletions
diff --git a/arch/ia64/lib/xor.S b/arch/ia64/lib/xor.S new file mode 100644 index 000000000000..54e3f7eab8e9 --- /dev/null +++ b/arch/ia64/lib/xor.S | |||
@@ -0,0 +1,184 @@ | |||
1 | /* | ||
2 | * arch/ia64/lib/xor.S | ||
3 | * | ||
4 | * Optimized RAID-5 checksumming functions for IA-64. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * (for example /usr/src/linux/COPYING); if not, write to the Free | ||
13 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
14 | */ | ||
15 | |||
16 | #include <asm/asmmacro.h> | ||
17 | |||
18 | GLOBAL_ENTRY(xor_ia64_2) | ||
19 | .prologue | ||
20 | .fframe 0 | ||
21 | .save ar.pfs, r31 | ||
22 | alloc r31 = ar.pfs, 3, 0, 13, 16 | ||
23 | .save ar.lc, r30 | ||
24 | mov r30 = ar.lc | ||
25 | .save pr, r29 | ||
26 | mov r29 = pr | ||
27 | ;; | ||
28 | .body | ||
29 | mov r8 = in1 | ||
30 | mov ar.ec = 6 + 2 | ||
31 | shr in0 = in0, 3 | ||
32 | ;; | ||
33 | adds in0 = -1, in0 | ||
34 | mov r16 = in1 | ||
35 | mov r17 = in2 | ||
36 | ;; | ||
37 | mov ar.lc = in0 | ||
38 | mov pr.rot = 1 << 16 | ||
39 | ;; | ||
40 | .rotr s1[6+1], s2[6+1], d[2] | ||
41 | .rotp p[6+2] | ||
42 | 0: | ||
43 | (p[0]) ld8.nta s1[0] = [r16], 8 | ||
44 | (p[0]) ld8.nta s2[0] = [r17], 8 | ||
45 | (p[6]) xor d[0] = s1[6], s2[6] | ||
46 | (p[6+1])st8.nta [r8] = d[1], 8 | ||
47 | nop.f 0 | ||
48 | br.ctop.dptk.few 0b | ||
49 | ;; | ||
50 | mov ar.lc = r30 | ||
51 | mov pr = r29, -1 | ||
52 | br.ret.sptk.few rp | ||
53 | END(xor_ia64_2) | ||
54 | |||
55 | GLOBAL_ENTRY(xor_ia64_3) | ||
56 | .prologue | ||
57 | .fframe 0 | ||
58 | .save ar.pfs, r31 | ||
59 | alloc r31 = ar.pfs, 4, 0, 20, 24 | ||
60 | .save ar.lc, r30 | ||
61 | mov r30 = ar.lc | ||
62 | .save pr, r29 | ||
63 | mov r29 = pr | ||
64 | ;; | ||
65 | .body | ||
66 | mov r8 = in1 | ||
67 | mov ar.ec = 6 + 2 | ||
68 | shr in0 = in0, 3 | ||
69 | ;; | ||
70 | adds in0 = -1, in0 | ||
71 | mov r16 = in1 | ||
72 | mov r17 = in2 | ||
73 | ;; | ||
74 | mov r18 = in3 | ||
75 | mov ar.lc = in0 | ||
76 | mov pr.rot = 1 << 16 | ||
77 | ;; | ||
78 | .rotr s1[6+1], s2[6+1], s3[6+1], d[2] | ||
79 | .rotp p[6+2] | ||
80 | 0: | ||
81 | (p[0]) ld8.nta s1[0] = [r16], 8 | ||
82 | (p[0]) ld8.nta s2[0] = [r17], 8 | ||
83 | (p[6]) xor d[0] = s1[6], s2[6] | ||
84 | ;; | ||
85 | (p[0]) ld8.nta s3[0] = [r18], 8 | ||
86 | (p[6+1])st8.nta [r8] = d[1], 8 | ||
87 | (p[6]) xor d[0] = d[0], s3[6] | ||
88 | br.ctop.dptk.few 0b | ||
89 | ;; | ||
90 | mov ar.lc = r30 | ||
91 | mov pr = r29, -1 | ||
92 | br.ret.sptk.few rp | ||
93 | END(xor_ia64_3) | ||
94 | |||
95 | GLOBAL_ENTRY(xor_ia64_4) | ||
96 | .prologue | ||
97 | .fframe 0 | ||
98 | .save ar.pfs, r31 | ||
99 | alloc r31 = ar.pfs, 5, 0, 27, 32 | ||
100 | .save ar.lc, r30 | ||
101 | mov r30 = ar.lc | ||
102 | .save pr, r29 | ||
103 | mov r29 = pr | ||
104 | ;; | ||
105 | .body | ||
106 | mov r8 = in1 | ||
107 | mov ar.ec = 6 + 2 | ||
108 | shr in0 = in0, 3 | ||
109 | ;; | ||
110 | adds in0 = -1, in0 | ||
111 | mov r16 = in1 | ||
112 | mov r17 = in2 | ||
113 | ;; | ||
114 | mov r18 = in3 | ||
115 | mov ar.lc = in0 | ||
116 | mov pr.rot = 1 << 16 | ||
117 | mov r19 = in4 | ||
118 | ;; | ||
119 | .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] | ||
120 | .rotp p[6+2] | ||
121 | 0: | ||
122 | (p[0]) ld8.nta s1[0] = [r16], 8 | ||
123 | (p[0]) ld8.nta s2[0] = [r17], 8 | ||
124 | (p[6]) xor d[0] = s1[6], s2[6] | ||
125 | (p[0]) ld8.nta s3[0] = [r18], 8 | ||
126 | (p[0]) ld8.nta s4[0] = [r19], 8 | ||
127 | (p[6]) xor r20 = s3[6], s4[6] | ||
128 | ;; | ||
129 | (p[6+1])st8.nta [r8] = d[1], 8 | ||
130 | (p[6]) xor d[0] = d[0], r20 | ||
131 | br.ctop.dptk.few 0b | ||
132 | ;; | ||
133 | mov ar.lc = r30 | ||
134 | mov pr = r29, -1 | ||
135 | br.ret.sptk.few rp | ||
136 | END(xor_ia64_4) | ||
137 | |||
138 | GLOBAL_ENTRY(xor_ia64_5) | ||
139 | .prologue | ||
140 | .fframe 0 | ||
141 | .save ar.pfs, r31 | ||
142 | alloc r31 = ar.pfs, 6, 0, 34, 40 | ||
143 | .save ar.lc, r30 | ||
144 | mov r30 = ar.lc | ||
145 | .save pr, r29 | ||
146 | mov r29 = pr | ||
147 | ;; | ||
148 | .body | ||
149 | mov r8 = in1 | ||
150 | mov ar.ec = 6 + 2 | ||
151 | shr in0 = in0, 3 | ||
152 | ;; | ||
153 | adds in0 = -1, in0 | ||
154 | mov r16 = in1 | ||
155 | mov r17 = in2 | ||
156 | ;; | ||
157 | mov r18 = in3 | ||
158 | mov ar.lc = in0 | ||
159 | mov pr.rot = 1 << 16 | ||
160 | mov r19 = in4 | ||
161 | mov r20 = in5 | ||
162 | ;; | ||
163 | .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] | ||
164 | .rotp p[6+2] | ||
165 | 0: | ||
166 | (p[0]) ld8.nta s1[0] = [r16], 8 | ||
167 | (p[0]) ld8.nta s2[0] = [r17], 8 | ||
168 | (p[6]) xor d[0] = s1[6], s2[6] | ||
169 | (p[0]) ld8.nta s3[0] = [r18], 8 | ||
170 | (p[0]) ld8.nta s4[0] = [r19], 8 | ||
171 | (p[6]) xor r21 = s3[6], s4[6] | ||
172 | ;; | ||
173 | (p[0]) ld8.nta s5[0] = [r20], 8 | ||
174 | (p[6+1])st8.nta [r8] = d[1], 8 | ||
175 | (p[6]) xor d[0] = d[0], r21 | ||
176 | ;; | ||
177 | (p[6]) xor d[0] = d[0], s5[6] | ||
178 | nop.f 0 | ||
179 | br.ctop.dptk.few 0b | ||
180 | ;; | ||
181 | mov ar.lc = r30 | ||
182 | mov pr = r29, -1 | ||
183 | br.ret.sptk.few rp | ||
184 | END(xor_ia64_5) | ||