diff options
Diffstat (limited to 'include/asm-alpha/xor.h')
-rw-r--r-- | include/asm-alpha/xor.h | 855 |
1 files changed, 855 insertions, 0 deletions
diff --git a/include/asm-alpha/xor.h b/include/asm-alpha/xor.h new file mode 100644 index 000000000000..5ee1c2bc0499 --- /dev/null +++ b/include/asm-alpha/xor.h | |||
@@ -0,0 +1,855 @@ | |||
1 | /* | ||
2 | * include/asm-alpha/xor.h | ||
3 | * | ||
4 | * Optimized RAID-5 checksumming functions for alpha EV5 and EV6 | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | * You should have received a copy of the GNU General Public License | ||
12 | * (for example /usr/src/linux/COPYING); if not, write to the Free | ||
13 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
14 | */ | ||
15 | |||
16 | extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *); | ||
17 | extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *, | ||
18 | unsigned long *); | ||
19 | extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *, | ||
20 | unsigned long *, unsigned long *); | ||
21 | extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *, | ||
22 | unsigned long *, unsigned long *, unsigned long *); | ||
23 | |||
24 | extern void xor_alpha_prefetch_2(unsigned long, unsigned long *, | ||
25 | unsigned long *); | ||
26 | extern void xor_alpha_prefetch_3(unsigned long, unsigned long *, | ||
27 | unsigned long *, unsigned long *); | ||
28 | extern void xor_alpha_prefetch_4(unsigned long, unsigned long *, | ||
29 | unsigned long *, unsigned long *, | ||
30 | unsigned long *); | ||
31 | extern void xor_alpha_prefetch_5(unsigned long, unsigned long *, | ||
32 | unsigned long *, unsigned long *, | ||
33 | unsigned long *, unsigned long *); | ||
34 | |||
35 | asm(" \n\ | ||
36 | .text \n\ | ||
37 | .align 3 \n\ | ||
38 | .ent xor_alpha_2 \n\ | ||
39 | xor_alpha_2: \n\ | ||
40 | .prologue 0 \n\ | ||
41 | srl $16, 6, $16 \n\ | ||
42 | .align 4 \n\ | ||
43 | 2: \n\ | ||
44 | ldq $0,0($17) \n\ | ||
45 | ldq $1,0($18) \n\ | ||
46 | ldq $2,8($17) \n\ | ||
47 | ldq $3,8($18) \n\ | ||
48 | \n\ | ||
49 | ldq $4,16($17) \n\ | ||
50 | ldq $5,16($18) \n\ | ||
51 | ldq $6,24($17) \n\ | ||
52 | ldq $7,24($18) \n\ | ||
53 | \n\ | ||
54 | ldq $19,32($17) \n\ | ||
55 | ldq $20,32($18) \n\ | ||
56 | ldq $21,40($17) \n\ | ||
57 | ldq $22,40($18) \n\ | ||
58 | \n\ | ||
59 | ldq $23,48($17) \n\ | ||
60 | ldq $24,48($18) \n\ | ||
61 | ldq $25,56($17) \n\ | ||
62 | xor $0,$1,$0 # 7 cycles from $1 load \n\ | ||
63 | \n\ | ||
64 | ldq $27,56($18) \n\ | ||
65 | xor $2,$3,$2 \n\ | ||
66 | stq $0,0($17) \n\ | ||
67 | xor $4,$5,$4 \n\ | ||
68 | \n\ | ||
69 | stq $2,8($17) \n\ | ||
70 | xor $6,$7,$6 \n\ | ||
71 | stq $4,16($17) \n\ | ||
72 | xor $19,$20,$19 \n\ | ||
73 | \n\ | ||
74 | stq $6,24($17) \n\ | ||
75 | xor $21,$22,$21 \n\ | ||
76 | stq $19,32($17) \n\ | ||
77 | xor $23,$24,$23 \n\ | ||
78 | \n\ | ||
79 | stq $21,40($17) \n\ | ||
80 | xor $25,$27,$25 \n\ | ||
81 | stq $23,48($17) \n\ | ||
82 | subq $16,1,$16 \n\ | ||
83 | \n\ | ||
84 | stq $25,56($17) \n\ | ||
85 | addq $17,64,$17 \n\ | ||
86 | addq $18,64,$18 \n\ | ||
87 | bgt $16,2b \n\ | ||
88 | \n\ | ||
89 | ret \n\ | ||
90 | .end xor_alpha_2 \n\ | ||
91 | \n\ | ||
92 | .align 3 \n\ | ||
93 | .ent xor_alpha_3 \n\ | ||
94 | xor_alpha_3: \n\ | ||
95 | .prologue 0 \n\ | ||
96 | srl $16, 6, $16 \n\ | ||
97 | .align 4 \n\ | ||
98 | 3: \n\ | ||
99 | ldq $0,0($17) \n\ | ||
100 | ldq $1,0($18) \n\ | ||
101 | ldq $2,0($19) \n\ | ||
102 | ldq $3,8($17) \n\ | ||
103 | \n\ | ||
104 | ldq $4,8($18) \n\ | ||
105 | ldq $6,16($17) \n\ | ||
106 | ldq $7,16($18) \n\ | ||
107 | ldq $21,24($17) \n\ | ||
108 | \n\ | ||
109 | ldq $22,24($18) \n\ | ||
110 | ldq $24,32($17) \n\ | ||
111 | ldq $25,32($18) \n\ | ||
112 | ldq $5,8($19) \n\ | ||
113 | \n\ | ||
114 | ldq $20,16($19) \n\ | ||
115 | ldq $23,24($19) \n\ | ||
116 | ldq $27,32($19) \n\ | ||
117 | nop \n\ | ||
118 | \n\ | ||
119 | xor $0,$1,$1 # 8 cycles from $0 load \n\ | ||
120 | xor $3,$4,$4 # 6 cycles from $4 load \n\ | ||
121 | xor $6,$7,$7 # 6 cycles from $7 load \n\ | ||
122 | xor $21,$22,$22 # 5 cycles from $22 load \n\ | ||
123 | \n\ | ||
124 | xor $1,$2,$2 # 9 cycles from $2 load \n\ | ||
125 | xor $24,$25,$25 # 5 cycles from $25 load \n\ | ||
126 | stq $2,0($17) \n\ | ||
127 | xor $4,$5,$5 # 6 cycles from $5 load \n\ | ||
128 | \n\ | ||
129 | stq $5,8($17) \n\ | ||
130 | xor $7,$20,$20 # 7 cycles from $20 load \n\ | ||
131 | stq $20,16($17) \n\ | ||
132 | xor $22,$23,$23 # 7 cycles from $23 load \n\ | ||
133 | \n\ | ||
134 | stq $23,24($17) \n\ | ||
135 | xor $25,$27,$27 # 7 cycles from $27 load \n\ | ||
136 | stq $27,32($17) \n\ | ||
137 | nop \n\ | ||
138 | \n\ | ||
139 | ldq $0,40($17) \n\ | ||
140 | ldq $1,40($18) \n\ | ||
141 | ldq $3,48($17) \n\ | ||
142 | ldq $4,48($18) \n\ | ||
143 | \n\ | ||
144 | ldq $6,56($17) \n\ | ||
145 | ldq $7,56($18) \n\ | ||
146 | ldq $2,40($19) \n\ | ||
147 | ldq $5,48($19) \n\ | ||
148 | \n\ | ||
149 | ldq $20,56($19) \n\ | ||
150 | xor $0,$1,$1 # 4 cycles from $1 load \n\ | ||
151 | xor $3,$4,$4 # 5 cycles from $4 load \n\ | ||
152 | xor $6,$7,$7 # 5 cycles from $7 load \n\ | ||
153 | \n\ | ||
154 | xor $1,$2,$2 # 4 cycles from $2 load \n\ | ||
155 | xor $4,$5,$5 # 5 cycles from $5 load \n\ | ||
156 | stq $2,40($17) \n\ | ||
157 | xor $7,$20,$20 # 4 cycles from $20 load \n\ | ||
158 | \n\ | ||
159 | stq $5,48($17) \n\ | ||
160 | subq $16,1,$16 \n\ | ||
161 | stq $20,56($17) \n\ | ||
162 | addq $19,64,$19 \n\ | ||
163 | \n\ | ||
164 | addq $18,64,$18 \n\ | ||
165 | addq $17,64,$17 \n\ | ||
166 | bgt $16,3b \n\ | ||
167 | ret \n\ | ||
168 | .end xor_alpha_3 \n\ | ||
169 | \n\ | ||
170 | .align 3 \n\ | ||
171 | .ent xor_alpha_4 \n\ | ||
172 | xor_alpha_4: \n\ | ||
173 | .prologue 0 \n\ | ||
174 | srl $16, 6, $16 \n\ | ||
175 | .align 4 \n\ | ||
176 | 4: \n\ | ||
177 | ldq $0,0($17) \n\ | ||
178 | ldq $1,0($18) \n\ | ||
179 | ldq $2,0($19) \n\ | ||
180 | ldq $3,0($20) \n\ | ||
181 | \n\ | ||
182 | ldq $4,8($17) \n\ | ||
183 | ldq $5,8($18) \n\ | ||
184 | ldq $6,8($19) \n\ | ||
185 | ldq $7,8($20) \n\ | ||
186 | \n\ | ||
187 | ldq $21,16($17) \n\ | ||
188 | ldq $22,16($18) \n\ | ||
189 | ldq $23,16($19) \n\ | ||
190 | ldq $24,16($20) \n\ | ||
191 | \n\ | ||
192 | ldq $25,24($17) \n\ | ||
193 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | ||
194 | ldq $27,24($18) \n\ | ||
195 | xor $2,$3,$3 # 6 cycles from $3 load \n\ | ||
196 | \n\ | ||
197 | ldq $0,24($19) \n\ | ||
198 | xor $1,$3,$3 \n\ | ||
199 | ldq $1,24($20) \n\ | ||
200 | xor $4,$5,$5 # 7 cycles from $5 load \n\ | ||
201 | \n\ | ||
202 | stq $3,0($17) \n\ | ||
203 | xor $6,$7,$7 \n\ | ||
204 | xor $21,$22,$22 # 7 cycles from $22 load \n\ | ||
205 | xor $5,$7,$7 \n\ | ||
206 | \n\ | ||
207 | stq $7,8($17) \n\ | ||
208 | xor $23,$24,$24 # 7 cycles from $24 load \n\ | ||
209 | ldq $2,32($17) \n\ | ||
210 | xor $22,$24,$24 \n\ | ||
211 | \n\ | ||
212 | ldq $3,32($18) \n\ | ||
213 | ldq $4,32($19) \n\ | ||
214 | ldq $5,32($20) \n\ | ||
215 | xor $25,$27,$27 # 8 cycles from $27 load \n\ | ||
216 | \n\ | ||
217 | ldq $6,40($17) \n\ | ||
218 | ldq $7,40($18) \n\ | ||
219 | ldq $21,40($19) \n\ | ||
220 | ldq $22,40($20) \n\ | ||
221 | \n\ | ||
222 | stq $24,16($17) \n\ | ||
223 | xor $0,$1,$1 # 9 cycles from $1 load \n\ | ||
224 | xor $2,$3,$3 # 5 cycles from $3 load \n\ | ||
225 | xor $27,$1,$1 \n\ | ||
226 | \n\ | ||
227 | stq $1,24($17) \n\ | ||
228 | xor $4,$5,$5 # 5 cycles from $5 load \n\ | ||
229 | ldq $23,48($17) \n\ | ||
230 | ldq $24,48($18) \n\ | ||
231 | \n\ | ||
232 | ldq $25,48($19) \n\ | ||
233 | xor $3,$5,$5 \n\ | ||
234 | ldq $27,48($20) \n\ | ||
235 | ldq $0,56($17) \n\ | ||
236 | \n\ | ||
237 | ldq $1,56($18) \n\ | ||
238 | ldq $2,56($19) \n\ | ||
239 | xor $6,$7,$7 # 8 cycles from $6 load \n\ | ||
240 | ldq $3,56($20) \n\ | ||
241 | \n\ | ||
242 | stq $5,32($17) \n\ | ||
243 | xor $21,$22,$22 # 8 cycles from $22 load \n\ | ||
244 | xor $7,$22,$22 \n\ | ||
245 | xor $23,$24,$24 # 5 cycles from $24 load \n\ | ||
246 | \n\ | ||
247 | stq $22,40($17) \n\ | ||
248 | xor $25,$27,$27 # 5 cycles from $27 load \n\ | ||
249 | xor $24,$27,$27 \n\ | ||
250 | xor $0,$1,$1 # 5 cycles from $1 load \n\ | ||
251 | \n\ | ||
252 | stq $27,48($17) \n\ | ||
253 | xor $2,$3,$3 # 4 cycles from $3 load \n\ | ||
254 | xor $1,$3,$3 \n\ | ||
255 | subq $16,1,$16 \n\ | ||
256 | \n\ | ||
257 | stq $3,56($17) \n\ | ||
258 | addq $20,64,$20 \n\ | ||
259 | addq $19,64,$19 \n\ | ||
260 | addq $18,64,$18 \n\ | ||
261 | \n\ | ||
262 | addq $17,64,$17 \n\ | ||
263 | bgt $16,4b \n\ | ||
264 | ret \n\ | ||
265 | .end xor_alpha_4 \n\ | ||
266 | \n\ | ||
267 | .align 3 \n\ | ||
268 | .ent xor_alpha_5 \n\ | ||
269 | xor_alpha_5: \n\ | ||
270 | .prologue 0 \n\ | ||
271 | srl $16, 6, $16 \n\ | ||
272 | .align 4 \n\ | ||
273 | 5: \n\ | ||
274 | ldq $0,0($17) \n\ | ||
275 | ldq $1,0($18) \n\ | ||
276 | ldq $2,0($19) \n\ | ||
277 | ldq $3,0($20) \n\ | ||
278 | \n\ | ||
279 | ldq $4,0($21) \n\ | ||
280 | ldq $5,8($17) \n\ | ||
281 | ldq $6,8($18) \n\ | ||
282 | ldq $7,8($19) \n\ | ||
283 | \n\ | ||
284 | ldq $22,8($20) \n\ | ||
285 | ldq $23,8($21) \n\ | ||
286 | ldq $24,16($17) \n\ | ||
287 | ldq $25,16($18) \n\ | ||
288 | \n\ | ||
289 | ldq $27,16($19) \n\ | ||
290 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | ||
291 | ldq $28,16($20) \n\ | ||
292 | xor $2,$3,$3 # 6 cycles from $3 load \n\ | ||
293 | \n\ | ||
294 | ldq $0,16($21) \n\ | ||
295 | xor $1,$3,$3 \n\ | ||
296 | ldq $1,24($17) \n\ | ||
297 | xor $3,$4,$4 # 7 cycles from $4 load \n\ | ||
298 | \n\ | ||
299 | stq $4,0($17) \n\ | ||
300 | xor $5,$6,$6 # 7 cycles from $6 load \n\ | ||
301 | xor $7,$22,$22 # 7 cycles from $22 load \n\ | ||
302 | xor $6,$23,$23 # 7 cycles from $23 load \n\ | ||
303 | \n\ | ||
304 | ldq $2,24($18) \n\ | ||
305 | xor $22,$23,$23 \n\ | ||
306 | ldq $3,24($19) \n\ | ||
307 | xor $24,$25,$25 # 8 cycles from $25 load \n\ | ||
308 | \n\ | ||
309 | stq $23,8($17) \n\ | ||
310 | xor $25,$27,$27 # 8 cycles from $27 load \n\ | ||
311 | ldq $4,24($20) \n\ | ||
312 | xor $28,$0,$0 # 7 cycles from $0 load \n\ | ||
313 | \n\ | ||
314 | ldq $5,24($21) \n\ | ||
315 | xor $27,$0,$0 \n\ | ||
316 | ldq $6,32($17) \n\ | ||
317 | ldq $7,32($18) \n\ | ||
318 | \n\ | ||
319 | stq $0,16($17) \n\ | ||
320 | xor $1,$2,$2 # 6 cycles from $2 load \n\ | ||
321 | ldq $22,32($19) \n\ | ||
322 | xor $3,$4,$4 # 4 cycles from $4 load \n\ | ||
323 | \n\ | ||
324 | ldq $23,32($20) \n\ | ||
325 | xor $2,$4,$4 \n\ | ||
326 | ldq $24,32($21) \n\ | ||
327 | ldq $25,40($17) \n\ | ||
328 | \n\ | ||
329 | ldq $27,40($18) \n\ | ||
330 | ldq $28,40($19) \n\ | ||
331 | ldq $0,40($20) \n\ | ||
332 | xor $4,$5,$5 # 7 cycles from $5 load \n\ | ||
333 | \n\ | ||
334 | stq $5,24($17) \n\ | ||
335 | xor $6,$7,$7 # 7 cycles from $7 load \n\ | ||
336 | ldq $1,40($21) \n\ | ||
337 | ldq $2,48($17) \n\ | ||
338 | \n\ | ||
339 | ldq $3,48($18) \n\ | ||
340 | xor $7,$22,$22 # 7 cycles from $22 load \n\ | ||
341 | ldq $4,48($19) \n\ | ||
342 | xor $23,$24,$24 # 6 cycles from $24 load \n\ | ||
343 | \n\ | ||
344 | ldq $5,48($20) \n\ | ||
345 | xor $22,$24,$24 \n\ | ||
346 | ldq $6,48($21) \n\ | ||
347 | xor $25,$27,$27 # 7 cycles from $27 load \n\ | ||
348 | \n\ | ||
349 | stq $24,32($17) \n\ | ||
350 | xor $27,$28,$28 # 8 cycles from $28 load \n\ | ||
351 | ldq $7,56($17) \n\ | ||
352 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | ||
353 | \n\ | ||
354 | ldq $22,56($18) \n\ | ||
355 | ldq $23,56($19) \n\ | ||
356 | ldq $24,56($20) \n\ | ||
357 | ldq $25,56($21) \n\ | ||
358 | \n\ | ||
359 | xor $28,$1,$1 \n\ | ||
360 | xor $2,$3,$3 # 9 cycles from $3 load \n\ | ||
361 | xor $3,$4,$4 # 9 cycles from $4 load \n\ | ||
362 | xor $5,$6,$6 # 8 cycles from $6 load \n\ | ||
363 | \n\ | ||
364 | stq $1,40($17) \n\ | ||
365 | xor $4,$6,$6 \n\ | ||
366 | xor $7,$22,$22 # 7 cycles from $22 load \n\ | ||
367 | xor $23,$24,$24 # 6 cycles from $24 load \n\ | ||
368 | \n\ | ||
369 | stq $6,48($17) \n\ | ||
370 | xor $22,$24,$24 \n\ | ||
371 | subq $16,1,$16 \n\ | ||
372 | xor $24,$25,$25 # 8 cycles from $25 load \n\ | ||
373 | \n\ | ||
374 | stq $25,56($17) \n\ | ||
375 | addq $21,64,$21 \n\ | ||
376 | addq $20,64,$20 \n\ | ||
377 | addq $19,64,$19 \n\ | ||
378 | \n\ | ||
379 | addq $18,64,$18 \n\ | ||
380 | addq $17,64,$17 \n\ | ||
381 | bgt $16,5b \n\ | ||
382 | ret \n\ | ||
383 | .end xor_alpha_5 \n\ | ||
384 | \n\ | ||
385 | .align 3 \n\ | ||
386 | .ent xor_alpha_prefetch_2 \n\ | ||
387 | xor_alpha_prefetch_2: \n\ | ||
388 | .prologue 0 \n\ | ||
389 | srl $16, 6, $16 \n\ | ||
390 | \n\ | ||
391 | ldq $31, 0($17) \n\ | ||
392 | ldq $31, 0($18) \n\ | ||
393 | \n\ | ||
394 | ldq $31, 64($17) \n\ | ||
395 | ldq $31, 64($18) \n\ | ||
396 | \n\ | ||
397 | ldq $31, 128($17) \n\ | ||
398 | ldq $31, 128($18) \n\ | ||
399 | \n\ | ||
400 | ldq $31, 192($17) \n\ | ||
401 | ldq $31, 192($18) \n\ | ||
402 | .align 4 \n\ | ||
403 | 2: \n\ | ||
404 | ldq $0,0($17) \n\ | ||
405 | ldq $1,0($18) \n\ | ||
406 | ldq $2,8($17) \n\ | ||
407 | ldq $3,8($18) \n\ | ||
408 | \n\ | ||
409 | ldq $4,16($17) \n\ | ||
410 | ldq $5,16($18) \n\ | ||
411 | ldq $6,24($17) \n\ | ||
412 | ldq $7,24($18) \n\ | ||
413 | \n\ | ||
414 | ldq $19,32($17) \n\ | ||
415 | ldq $20,32($18) \n\ | ||
416 | ldq $21,40($17) \n\ | ||
417 | ldq $22,40($18) \n\ | ||
418 | \n\ | ||
419 | ldq $23,48($17) \n\ | ||
420 | ldq $24,48($18) \n\ | ||
421 | ldq $25,56($17) \n\ | ||
422 | ldq $27,56($18) \n\ | ||
423 | \n\ | ||
424 | ldq $31,256($17) \n\ | ||
425 | xor $0,$1,$0 # 8 cycles from $1 load \n\ | ||
426 | ldq $31,256($18) \n\ | ||
427 | xor $2,$3,$2 \n\ | ||
428 | \n\ | ||
429 | stq $0,0($17) \n\ | ||
430 | xor $4,$5,$4 \n\ | ||
431 | stq $2,8($17) \n\ | ||
432 | xor $6,$7,$6 \n\ | ||
433 | \n\ | ||
434 | stq $4,16($17) \n\ | ||
435 | xor $19,$20,$19 \n\ | ||
436 | stq $6,24($17) \n\ | ||
437 | xor $21,$22,$21 \n\ | ||
438 | \n\ | ||
439 | stq $19,32($17) \n\ | ||
440 | xor $23,$24,$23 \n\ | ||
441 | stq $21,40($17) \n\ | ||
442 | xor $25,$27,$25 \n\ | ||
443 | \n\ | ||
444 | stq $23,48($17) \n\ | ||
445 | subq $16,1,$16 \n\ | ||
446 | stq $25,56($17) \n\ | ||
447 | addq $17,64,$17 \n\ | ||
448 | \n\ | ||
449 | addq $18,64,$18 \n\ | ||
450 | bgt $16,2b \n\ | ||
451 | ret \n\ | ||
452 | .end xor_alpha_prefetch_2 \n\ | ||
453 | \n\ | ||
454 | .align 3 \n\ | ||
455 | .ent xor_alpha_prefetch_3 \n\ | ||
456 | xor_alpha_prefetch_3: \n\ | ||
457 | .prologue 0 \n\ | ||
458 | srl $16, 6, $16 \n\ | ||
459 | \n\ | ||
460 | ldq $31, 0($17) \n\ | ||
461 | ldq $31, 0($18) \n\ | ||
462 | ldq $31, 0($19) \n\ | ||
463 | \n\ | ||
464 | ldq $31, 64($17) \n\ | ||
465 | ldq $31, 64($18) \n\ | ||
466 | ldq $31, 64($19) \n\ | ||
467 | \n\ | ||
468 | ldq $31, 128($17) \n\ | ||
469 | ldq $31, 128($18) \n\ | ||
470 | ldq $31, 128($19) \n\ | ||
471 | \n\ | ||
472 | ldq $31, 192($17) \n\ | ||
473 | ldq $31, 192($18) \n\ | ||
474 | ldq $31, 192($19) \n\ | ||
475 | .align 4 \n\ | ||
476 | 3: \n\ | ||
477 | ldq $0,0($17) \n\ | ||
478 | ldq $1,0($18) \n\ | ||
479 | ldq $2,0($19) \n\ | ||
480 | ldq $3,8($17) \n\ | ||
481 | \n\ | ||
482 | ldq $4,8($18) \n\ | ||
483 | ldq $6,16($17) \n\ | ||
484 | ldq $7,16($18) \n\ | ||
485 | ldq $21,24($17) \n\ | ||
486 | \n\ | ||
487 | ldq $22,24($18) \n\ | ||
488 | ldq $24,32($17) \n\ | ||
489 | ldq $25,32($18) \n\ | ||
490 | ldq $5,8($19) \n\ | ||
491 | \n\ | ||
492 | ldq $20,16($19) \n\ | ||
493 | ldq $23,24($19) \n\ | ||
494 | ldq $27,32($19) \n\ | ||
495 | nop \n\ | ||
496 | \n\ | ||
497 | xor $0,$1,$1 # 8 cycles from $0 load \n\ | ||
498 | xor $3,$4,$4 # 7 cycles from $4 load \n\ | ||
499 | xor $6,$7,$7 # 6 cycles from $7 load \n\ | ||
500 | xor $21,$22,$22 # 5 cycles from $22 load \n\ | ||
501 | \n\ | ||
502 | xor $1,$2,$2 # 9 cycles from $2 load \n\ | ||
503 | xor $24,$25,$25 # 5 cycles from $25 load \n\ | ||
504 | stq $2,0($17) \n\ | ||
505 | xor $4,$5,$5 # 6 cycles from $5 load \n\ | ||
506 | \n\ | ||
507 | stq $5,8($17) \n\ | ||
508 | xor $7,$20,$20 # 7 cycles from $20 load \n\ | ||
509 | stq $20,16($17) \n\ | ||
510 | xor $22,$23,$23 # 7 cycles from $23 load \n\ | ||
511 | \n\ | ||
512 | stq $23,24($17) \n\ | ||
513 | xor $25,$27,$27 # 7 cycles from $27 load \n\ | ||
514 | stq $27,32($17) \n\ | ||
515 | nop \n\ | ||
516 | \n\ | ||
517 | ldq $0,40($17) \n\ | ||
518 | ldq $1,40($18) \n\ | ||
519 | ldq $3,48($17) \n\ | ||
520 | ldq $4,48($18) \n\ | ||
521 | \n\ | ||
522 | ldq $6,56($17) \n\ | ||
523 | ldq $7,56($18) \n\ | ||
524 | ldq $2,40($19) \n\ | ||
525 | ldq $5,48($19) \n\ | ||
526 | \n\ | ||
527 | ldq $20,56($19) \n\ | ||
528 | ldq $31,256($17) \n\ | ||
529 | ldq $31,256($18) \n\ | ||
530 | ldq $31,256($19) \n\ | ||
531 | \n\ | ||
532 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | ||
533 | xor $3,$4,$4 # 5 cycles from $4 load \n\ | ||
534 | xor $6,$7,$7 # 5 cycles from $7 load \n\ | ||
535 | xor $1,$2,$2 # 4 cycles from $2 load \n\ | ||
536 | \n\ | ||
537 | xor $4,$5,$5 # 5 cycles from $5 load \n\ | ||
538 | xor $7,$20,$20 # 4 cycles from $20 load \n\ | ||
539 | stq $2,40($17) \n\ | ||
540 | subq $16,1,$16 \n\ | ||
541 | \n\ | ||
542 | stq $5,48($17) \n\ | ||
543 | addq $19,64,$19 \n\ | ||
544 | stq $20,56($17) \n\ | ||
545 | addq $18,64,$18 \n\ | ||
546 | \n\ | ||
547 | addq $17,64,$17 \n\ | ||
548 | bgt $16,3b \n\ | ||
549 | ret \n\ | ||
550 | .end xor_alpha_prefetch_3 \n\ | ||
551 | \n\ | ||
552 | .align 3 \n\ | ||
553 | .ent xor_alpha_prefetch_4 \n\ | ||
554 | xor_alpha_prefetch_4: \n\ | ||
555 | .prologue 0 \n\ | ||
556 | srl $16, 6, $16 \n\ | ||
557 | \n\ | ||
558 | ldq $31, 0($17) \n\ | ||
559 | ldq $31, 0($18) \n\ | ||
560 | ldq $31, 0($19) \n\ | ||
561 | ldq $31, 0($20) \n\ | ||
562 | \n\ | ||
563 | ldq $31, 64($17) \n\ | ||
564 | ldq $31, 64($18) \n\ | ||
565 | ldq $31, 64($19) \n\ | ||
566 | ldq $31, 64($20) \n\ | ||
567 | \n\ | ||
568 | ldq $31, 128($17) \n\ | ||
569 | ldq $31, 128($18) \n\ | ||
570 | ldq $31, 128($19) \n\ | ||
571 | ldq $31, 128($20) \n\ | ||
572 | \n\ | ||
573 | ldq $31, 192($17) \n\ | ||
574 | ldq $31, 192($18) \n\ | ||
575 | ldq $31, 192($19) \n\ | ||
576 | ldq $31, 192($20) \n\ | ||
577 | .align 4 \n\ | ||
578 | 4: \n\ | ||
579 | ldq $0,0($17) \n\ | ||
580 | ldq $1,0($18) \n\ | ||
581 | ldq $2,0($19) \n\ | ||
582 | ldq $3,0($20) \n\ | ||
583 | \n\ | ||
584 | ldq $4,8($17) \n\ | ||
585 | ldq $5,8($18) \n\ | ||
586 | ldq $6,8($19) \n\ | ||
587 | ldq $7,8($20) \n\ | ||
588 | \n\ | ||
589 | ldq $21,16($17) \n\ | ||
590 | ldq $22,16($18) \n\ | ||
591 | ldq $23,16($19) \n\ | ||
592 | ldq $24,16($20) \n\ | ||
593 | \n\ | ||
594 | ldq $25,24($17) \n\ | ||
595 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | ||
596 | ldq $27,24($18) \n\ | ||
597 | xor $2,$3,$3 # 6 cycles from $3 load \n\ | ||
598 | \n\ | ||
599 | ldq $0,24($19) \n\ | ||
600 | xor $1,$3,$3 \n\ | ||
601 | ldq $1,24($20) \n\ | ||
602 | xor $4,$5,$5 # 7 cycles from $5 load \n\ | ||
603 | \n\ | ||
604 | stq $3,0($17) \n\ | ||
605 | xor $6,$7,$7 \n\ | ||
606 | xor $21,$22,$22 # 7 cycles from $22 load \n\ | ||
607 | xor $5,$7,$7 \n\ | ||
608 | \n\ | ||
609 | stq $7,8($17) \n\ | ||
610 | xor $23,$24,$24 # 7 cycles from $24 load \n\ | ||
611 | ldq $2,32($17) \n\ | ||
612 | xor $22,$24,$24 \n\ | ||
613 | \n\ | ||
614 | ldq $3,32($18) \n\ | ||
615 | ldq $4,32($19) \n\ | ||
616 | ldq $5,32($20) \n\ | ||
617 | xor $25,$27,$27 # 8 cycles from $27 load \n\ | ||
618 | \n\ | ||
619 | ldq $6,40($17) \n\ | ||
620 | ldq $7,40($18) \n\ | ||
621 | ldq $21,40($19) \n\ | ||
622 | ldq $22,40($20) \n\ | ||
623 | \n\ | ||
624 | stq $24,16($17) \n\ | ||
625 | xor $0,$1,$1 # 9 cycles from $1 load \n\ | ||
626 | xor $2,$3,$3 # 5 cycles from $3 load \n\ | ||
627 | xor $27,$1,$1 \n\ | ||
628 | \n\ | ||
629 | stq $1,24($17) \n\ | ||
630 | xor $4,$5,$5 # 5 cycles from $5 load \n\ | ||
631 | ldq $23,48($17) \n\ | ||
632 | xor $3,$5,$5 \n\ | ||
633 | \n\ | ||
634 | ldq $24,48($18) \n\ | ||
635 | ldq $25,48($19) \n\ | ||
636 | ldq $27,48($20) \n\ | ||
637 | ldq $0,56($17) \n\ | ||
638 | \n\ | ||
639 | ldq $1,56($18) \n\ | ||
640 | ldq $2,56($19) \n\ | ||
641 | ldq $3,56($20) \n\ | ||
642 | xor $6,$7,$7 # 8 cycles from $6 load \n\ | ||
643 | \n\ | ||
644 | ldq $31,256($17) \n\ | ||
645 | xor $21,$22,$22 # 8 cycles from $22 load \n\ | ||
646 | ldq $31,256($18) \n\ | ||
647 | xor $7,$22,$22 \n\ | ||
648 | \n\ | ||
649 | ldq $31,256($19) \n\ | ||
650 | xor $23,$24,$24 # 6 cycles from $24 load \n\ | ||
651 | ldq $31,256($20) \n\ | ||
652 | xor $25,$27,$27 # 6 cycles from $27 load \n\ | ||
653 | \n\ | ||
654 | stq $5,32($17) \n\ | ||
655 | xor $24,$27,$27 \n\ | ||
656 | xor $0,$1,$1 # 7 cycles from $1 load \n\ | ||
657 | xor $2,$3,$3 # 6 cycles from $3 load \n\ | ||
658 | \n\ | ||
659 | stq $22,40($17) \n\ | ||
660 | xor $1,$3,$3 \n\ | ||
661 | stq $27,48($17) \n\ | ||
662 | subq $16,1,$16 \n\ | ||
663 | \n\ | ||
664 | stq $3,56($17) \n\ | ||
665 | addq $20,64,$20 \n\ | ||
666 | addq $19,64,$19 \n\ | ||
667 | addq $18,64,$18 \n\ | ||
668 | \n\ | ||
669 | addq $17,64,$17 \n\ | ||
670 | bgt $16,4b \n\ | ||
671 | ret \n\ | ||
672 | .end xor_alpha_prefetch_4 \n\ | ||
673 | \n\ | ||
674 | .align 3 \n\ | ||
675 | .ent xor_alpha_prefetch_5 \n\ | ||
676 | xor_alpha_prefetch_5: \n\ | ||
677 | .prologue 0 \n\ | ||
678 | srl $16, 6, $16 \n\ | ||
679 | \n\ | ||
680 | ldq $31, 0($17) \n\ | ||
681 | ldq $31, 0($18) \n\ | ||
682 | ldq $31, 0($19) \n\ | ||
683 | ldq $31, 0($20) \n\ | ||
684 | ldq $31, 0($21) \n\ | ||
685 | \n\ | ||
686 | ldq $31, 64($17) \n\ | ||
687 | ldq $31, 64($18) \n\ | ||
688 | ldq $31, 64($19) \n\ | ||
689 | ldq $31, 64($20) \n\ | ||
690 | ldq $31, 64($21) \n\ | ||
691 | \n\ | ||
692 | ldq $31, 128($17) \n\ | ||
693 | ldq $31, 128($18) \n\ | ||
694 | ldq $31, 128($19) \n\ | ||
695 | ldq $31, 128($20) \n\ | ||
696 | ldq $31, 128($21) \n\ | ||
697 | \n\ | ||
698 | ldq $31, 192($17) \n\ | ||
699 | ldq $31, 192($18) \n\ | ||
700 | ldq $31, 192($19) \n\ | ||
701 | ldq $31, 192($20) \n\ | ||
702 | ldq $31, 192($21) \n\ | ||
703 | .align 4 \n\ | ||
704 | 5: \n\ | ||
705 | ldq $0,0($17) \n\ | ||
706 | ldq $1,0($18) \n\ | ||
707 | ldq $2,0($19) \n\ | ||
708 | ldq $3,0($20) \n\ | ||
709 | \n\ | ||
710 | ldq $4,0($21) \n\ | ||
711 | ldq $5,8($17) \n\ | ||
712 | ldq $6,8($18) \n\ | ||
713 | ldq $7,8($19) \n\ | ||
714 | \n\ | ||
715 | ldq $22,8($20) \n\ | ||
716 | ldq $23,8($21) \n\ | ||
717 | ldq $24,16($17) \n\ | ||
718 | ldq $25,16($18) \n\ | ||
719 | \n\ | ||
720 | ldq $27,16($19) \n\ | ||
721 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | ||
722 | ldq $28,16($20) \n\ | ||
723 | xor $2,$3,$3 # 6 cycles from $3 load \n\ | ||
724 | \n\ | ||
725 | ldq $0,16($21) \n\ | ||
726 | xor $1,$3,$3 \n\ | ||
727 | ldq $1,24($17) \n\ | ||
728 | xor $3,$4,$4 # 7 cycles from $4 load \n\ | ||
729 | \n\ | ||
730 | stq $4,0($17) \n\ | ||
731 | xor $5,$6,$6 # 7 cycles from $6 load \n\ | ||
732 | xor $7,$22,$22 # 7 cycles from $22 load \n\ | ||
733 | xor $6,$23,$23 # 7 cycles from $23 load \n\ | ||
734 | \n\ | ||
735 | ldq $2,24($18) \n\ | ||
736 | xor $22,$23,$23 \n\ | ||
737 | ldq $3,24($19) \n\ | ||
738 | xor $24,$25,$25 # 8 cycles from $25 load \n\ | ||
739 | \n\ | ||
740 | stq $23,8($17) \n\ | ||
741 | xor $25,$27,$27 # 8 cycles from $27 load \n\ | ||
742 | ldq $4,24($20) \n\ | ||
743 | xor $28,$0,$0 # 7 cycles from $0 load \n\ | ||
744 | \n\ | ||
745 | ldq $5,24($21) \n\ | ||
746 | xor $27,$0,$0 \n\ | ||
747 | ldq $6,32($17) \n\ | ||
748 | ldq $7,32($18) \n\ | ||
749 | \n\ | ||
750 | stq $0,16($17) \n\ | ||
751 | xor $1,$2,$2 # 6 cycles from $2 load \n\ | ||
752 | ldq $22,32($19) \n\ | ||
753 | xor $3,$4,$4 # 4 cycles from $4 load \n\ | ||
754 | \n\ | ||
755 | ldq $23,32($20) \n\ | ||
756 | xor $2,$4,$4 \n\ | ||
757 | ldq $24,32($21) \n\ | ||
758 | ldq $25,40($17) \n\ | ||
759 | \n\ | ||
760 | ldq $27,40($18) \n\ | ||
761 | ldq $28,40($19) \n\ | ||
762 | ldq $0,40($20) \n\ | ||
763 | xor $4,$5,$5 # 7 cycles from $5 load \n\ | ||
764 | \n\ | ||
765 | stq $5,24($17) \n\ | ||
766 | xor $6,$7,$7 # 7 cycles from $7 load \n\ | ||
767 | ldq $1,40($21) \n\ | ||
768 | ldq $2,48($17) \n\ | ||
769 | \n\ | ||
770 | ldq $3,48($18) \n\ | ||
771 | xor $7,$22,$22 # 7 cycles from $22 load \n\ | ||
772 | ldq $4,48($19) \n\ | ||
773 | xor $23,$24,$24 # 6 cycles from $24 load \n\ | ||
774 | \n\ | ||
775 | ldq $5,48($20) \n\ | ||
776 | xor $22,$24,$24 \n\ | ||
777 | ldq $6,48($21) \n\ | ||
778 | xor $25,$27,$27 # 7 cycles from $27 load \n\ | ||
779 | \n\ | ||
780 | stq $24,32($17) \n\ | ||
781 | xor $27,$28,$28 # 8 cycles from $28 load \n\ | ||
782 | ldq $7,56($17) \n\ | ||
783 | xor $0,$1,$1 # 6 cycles from $1 load \n\ | ||
784 | \n\ | ||
785 | ldq $22,56($18) \n\ | ||
786 | ldq $23,56($19) \n\ | ||
787 | ldq $24,56($20) \n\ | ||
788 | ldq $25,56($21) \n\ | ||
789 | \n\ | ||
790 | ldq $31,256($17) \n\ | ||
791 | xor $28,$1,$1 \n\ | ||
792 | ldq $31,256($18) \n\ | ||
793 | xor $2,$3,$3 # 9 cycles from $3 load \n\ | ||
794 | \n\ | ||
795 | ldq $31,256($19) \n\ | ||
796 | xor $3,$4,$4 # 9 cycles from $4 load \n\ | ||
797 | ldq $31,256($20) \n\ | ||
798 | xor $5,$6,$6 # 8 cycles from $6 load \n\ | ||
799 | \n\ | ||
800 | stq $1,40($17) \n\ | ||
801 | xor $4,$6,$6 \n\ | ||
802 | xor $7,$22,$22 # 7 cycles from $22 load \n\ | ||
803 | xor $23,$24,$24 # 6 cycles from $24 load \n\ | ||
804 | \n\ | ||
805 | stq $6,48($17) \n\ | ||
806 | xor $22,$24,$24 \n\ | ||
807 | ldq $31,256($21) \n\ | ||
808 | xor $24,$25,$25 # 8 cycles from $25 load \n\ | ||
809 | \n\ | ||
810 | stq $25,56($17) \n\ | ||
811 | subq $16,1,$16 \n\ | ||
812 | addq $21,64,$21 \n\ | ||
813 | addq $20,64,$20 \n\ | ||
814 | \n\ | ||
815 | addq $19,64,$19 \n\ | ||
816 | addq $18,64,$18 \n\ | ||
817 | addq $17,64,$17 \n\ | ||
818 | bgt $16,5b \n\ | ||
819 | \n\ | ||
820 | ret \n\ | ||
821 | .end xor_alpha_prefetch_5 \n\ | ||
822 | "); | ||
823 | |||
824 | static struct xor_block_template xor_block_alpha = { | ||
825 | .name = "alpha", | ||
826 | .do_2 = xor_alpha_2, | ||
827 | .do_3 = xor_alpha_3, | ||
828 | .do_4 = xor_alpha_4, | ||
829 | .do_5 = xor_alpha_5, | ||
830 | }; | ||
831 | |||
832 | static struct xor_block_template xor_block_alpha_prefetch = { | ||
833 | .name = "alpha prefetch", | ||
834 | .do_2 = xor_alpha_prefetch_2, | ||
835 | .do_3 = xor_alpha_prefetch_3, | ||
836 | .do_4 = xor_alpha_prefetch_4, | ||
837 | .do_5 = xor_alpha_prefetch_5, | ||
838 | }; | ||
839 | |||
840 | /* For grins, also test the generic routines. */ | ||
841 | #include <asm-generic/xor.h> | ||
842 | |||
843 | #undef XOR_TRY_TEMPLATES | ||
844 | #define XOR_TRY_TEMPLATES \ | ||
845 | do { \ | ||
846 | xor_speed(&xor_block_8regs); \ | ||
847 | xor_speed(&xor_block_32regs); \ | ||
848 | xor_speed(&xor_block_alpha); \ | ||
849 | xor_speed(&xor_block_alpha_prefetch); \ | ||
850 | } while (0) | ||
851 | |||
852 | /* Force the use of alpha_prefetch if EV6, as it is significantly | ||
853 | faster in the cold cache case. */ | ||
854 | #define XOR_SELECT_TEMPLATE(FASTEST) \ | ||
855 | (implver() == IMPLVER_EV6 ? &xor_block_alpha_prefetch : FASTEST) | ||