diff options
Diffstat (limited to 'arch/powerpc/lib/copyuser_64.S')
-rw-r--r-- | arch/powerpc/lib/copyuser_64.S | 576 |
1 files changed, 576 insertions, 0 deletions
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S new file mode 100644 index 000000000000..a0b3fbbd6fb1 --- /dev/null +++ b/arch/powerpc/lib/copyuser_64.S | |||
@@ -0,0 +1,576 @@ | |||
1 | /* | ||
2 | * arch/ppc64/lib/copyuser.S | ||
3 | * | ||
4 | * Copyright (C) 2002 Paul Mackerras, IBM Corp. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <asm/processor.h> | ||
12 | #include <asm/ppc_asm.h> | ||
13 | |||
14 | .align 7 | ||
15 | _GLOBAL(__copy_tofrom_user) | ||
16 | /* first check for a whole page copy on a page boundary */ | ||
17 | cmpldi cr1,r5,16 | ||
18 | cmpdi cr6,r5,4096 | ||
19 | or r0,r3,r4 | ||
20 | neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ | ||
21 | andi. r0,r0,4095 | ||
22 | std r3,-24(r1) | ||
23 | crand cr0*4+2,cr0*4+2,cr6*4+2 | ||
24 | std r4,-16(r1) | ||
25 | std r5,-8(r1) | ||
26 | dcbt 0,r4 | ||
27 | beq .Lcopy_page | ||
28 | andi. r6,r6,7 | ||
29 | mtcrf 0x01,r5 | ||
30 | blt cr1,.Lshort_copy | ||
31 | bne .Ldst_unaligned | ||
32 | .Ldst_aligned: | ||
33 | andi. r0,r4,7 | ||
34 | addi r3,r3,-16 | ||
35 | bne .Lsrc_unaligned | ||
36 | srdi r7,r5,4 | ||
37 | 20: ld r9,0(r4) | ||
38 | addi r4,r4,-8 | ||
39 | mtctr r7 | ||
40 | andi. r5,r5,7 | ||
41 | bf cr7*4+0,22f | ||
42 | addi r3,r3,8 | ||
43 | addi r4,r4,8 | ||
44 | mr r8,r9 | ||
45 | blt cr1,72f | ||
46 | 21: ld r9,8(r4) | ||
47 | 70: std r8,8(r3) | ||
48 | 22: ldu r8,16(r4) | ||
49 | 71: stdu r9,16(r3) | ||
50 | bdnz 21b | ||
51 | 72: std r8,8(r3) | ||
52 | beq+ 3f | ||
53 | addi r3,r3,16 | ||
54 | 23: ld r9,8(r4) | ||
55 | .Ldo_tail: | ||
56 | bf cr7*4+1,1f | ||
57 | rotldi r9,r9,32 | ||
58 | 73: stw r9,0(r3) | ||
59 | addi r3,r3,4 | ||
60 | 1: bf cr7*4+2,2f | ||
61 | rotldi r9,r9,16 | ||
62 | 74: sth r9,0(r3) | ||
63 | addi r3,r3,2 | ||
64 | 2: bf cr7*4+3,3f | ||
65 | rotldi r9,r9,8 | ||
66 | 75: stb r9,0(r3) | ||
67 | 3: li r3,0 | ||
68 | blr | ||
69 | |||
70 | .Lsrc_unaligned: | ||
71 | srdi r6,r5,3 | ||
72 | addi r5,r5,-16 | ||
73 | subf r4,r0,r4 | ||
74 | srdi r7,r5,4 | ||
75 | sldi r10,r0,3 | ||
76 | cmpldi cr6,r6,3 | ||
77 | andi. r5,r5,7 | ||
78 | mtctr r7 | ||
79 | subfic r11,r10,64 | ||
80 | add r5,r5,r0 | ||
81 | bt cr7*4+0,28f | ||
82 | |||
83 | 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ | ||
84 | 25: ld r0,8(r4) | ||
85 | sld r6,r9,r10 | ||
86 | 26: ldu r9,16(r4) | ||
87 | srd r7,r0,r11 | ||
88 | sld r8,r0,r10 | ||
89 | or r7,r7,r6 | ||
90 | blt cr6,79f | ||
91 | 27: ld r0,8(r4) | ||
92 | b 2f | ||
93 | |||
94 | 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ | ||
95 | 29: ldu r9,8(r4) | ||
96 | sld r8,r0,r10 | ||
97 | addi r3,r3,-8 | ||
98 | blt cr6,5f | ||
99 | 30: ld r0,8(r4) | ||
100 | srd r12,r9,r11 | ||
101 | sld r6,r9,r10 | ||
102 | 31: ldu r9,16(r4) | ||
103 | or r12,r8,r12 | ||
104 | srd r7,r0,r11 | ||
105 | sld r8,r0,r10 | ||
106 | addi r3,r3,16 | ||
107 | beq cr6,78f | ||
108 | |||
109 | 1: or r7,r7,r6 | ||
110 | 32: ld r0,8(r4) | ||
111 | 76: std r12,8(r3) | ||
112 | 2: srd r12,r9,r11 | ||
113 | sld r6,r9,r10 | ||
114 | 33: ldu r9,16(r4) | ||
115 | or r12,r8,r12 | ||
116 | 77: stdu r7,16(r3) | ||
117 | srd r7,r0,r11 | ||
118 | sld r8,r0,r10 | ||
119 | bdnz 1b | ||
120 | |||
121 | 78: std r12,8(r3) | ||
122 | or r7,r7,r6 | ||
123 | 79: std r7,16(r3) | ||
124 | 5: srd r12,r9,r11 | ||
125 | or r12,r8,r12 | ||
126 | 80: std r12,24(r3) | ||
127 | bne 6f | ||
128 | li r3,0 | ||
129 | blr | ||
130 | 6: cmpwi cr1,r5,8 | ||
131 | addi r3,r3,32 | ||
132 | sld r9,r9,r10 | ||
133 | ble cr1,.Ldo_tail | ||
134 | 34: ld r0,8(r4) | ||
135 | srd r7,r0,r11 | ||
136 | or r9,r7,r9 | ||
137 | b .Ldo_tail | ||
138 | |||
139 | .Ldst_unaligned: | ||
140 | mtcrf 0x01,r6 /* put #bytes to 8B bdry into cr7 */ | ||
141 | subf r5,r6,r5 | ||
142 | li r7,0 | ||
143 | cmpldi r1,r5,16 | ||
144 | bf cr7*4+3,1f | ||
145 | 35: lbz r0,0(r4) | ||
146 | 81: stb r0,0(r3) | ||
147 | addi r7,r7,1 | ||
148 | 1: bf cr7*4+2,2f | ||
149 | 36: lhzx r0,r7,r4 | ||
150 | 82: sthx r0,r7,r3 | ||
151 | addi r7,r7,2 | ||
152 | 2: bf cr7*4+1,3f | ||
153 | 37: lwzx r0,r7,r4 | ||
154 | 83: stwx r0,r7,r3 | ||
155 | 3: mtcrf 0x01,r5 | ||
156 | add r4,r6,r4 | ||
157 | add r3,r6,r3 | ||
158 | b .Ldst_aligned | ||
159 | |||
160 | .Lshort_copy: | ||
161 | bf cr7*4+0,1f | ||
162 | 38: lwz r0,0(r4) | ||
163 | 39: lwz r9,4(r4) | ||
164 | addi r4,r4,8 | ||
165 | 84: stw r0,0(r3) | ||
166 | 85: stw r9,4(r3) | ||
167 | addi r3,r3,8 | ||
168 | 1: bf cr7*4+1,2f | ||
169 | 40: lwz r0,0(r4) | ||
170 | addi r4,r4,4 | ||
171 | 86: stw r0,0(r3) | ||
172 | addi r3,r3,4 | ||
173 | 2: bf cr7*4+2,3f | ||
174 | 41: lhz r0,0(r4) | ||
175 | addi r4,r4,2 | ||
176 | 87: sth r0,0(r3) | ||
177 | addi r3,r3,2 | ||
178 | 3: bf cr7*4+3,4f | ||
179 | 42: lbz r0,0(r4) | ||
180 | 88: stb r0,0(r3) | ||
181 | 4: li r3,0 | ||
182 | blr | ||
183 | |||
184 | /* | ||
185 | * exception handlers follow | ||
186 | * we have to return the number of bytes not copied | ||
187 | * for an exception on a load, we set the rest of the destination to 0 | ||
188 | */ | ||
189 | |||
190 | 136: | ||
191 | 137: | ||
192 | add r3,r3,r7 | ||
193 | b 1f | ||
194 | 130: | ||
195 | 131: | ||
196 | addi r3,r3,8 | ||
197 | 120: | ||
198 | 122: | ||
199 | 124: | ||
200 | 125: | ||
201 | 126: | ||
202 | 127: | ||
203 | 128: | ||
204 | 129: | ||
205 | 133: | ||
206 | addi r3,r3,8 | ||
207 | 121: | ||
208 | 132: | ||
209 | addi r3,r3,8 | ||
210 | 123: | ||
211 | 134: | ||
212 | 135: | ||
213 | 138: | ||
214 | 139: | ||
215 | 140: | ||
216 | 141: | ||
217 | 142: | ||
218 | |||
219 | /* | ||
220 | * here we have had a fault on a load and r3 points to the first | ||
221 | * unmodified byte of the destination | ||
222 | */ | ||
223 | 1: ld r6,-24(r1) | ||
224 | ld r4,-16(r1) | ||
225 | ld r5,-8(r1) | ||
226 | subf r6,r6,r3 | ||
227 | add r4,r4,r6 | ||
228 | subf r5,r6,r5 /* #bytes left to go */ | ||
229 | |||
230 | /* | ||
231 | * first see if we can copy any more bytes before hitting another exception | ||
232 | */ | ||
233 | mtctr r5 | ||
234 | 43: lbz r0,0(r4) | ||
235 | addi r4,r4,1 | ||
236 | 89: stb r0,0(r3) | ||
237 | addi r3,r3,1 | ||
238 | bdnz 43b | ||
239 | li r3,0 /* huh? all copied successfully this time? */ | ||
240 | blr | ||
241 | |||
242 | /* | ||
243 | * here we have trapped again, need to clear ctr bytes starting at r3 | ||
244 | */ | ||
245 | 143: mfctr r5 | ||
246 | li r0,0 | ||
247 | mr r4,r3 | ||
248 | mr r3,r5 /* return the number of bytes not copied */ | ||
249 | 1: andi. r9,r4,7 | ||
250 | beq 3f | ||
251 | 90: stb r0,0(r4) | ||
252 | addic. r5,r5,-1 | ||
253 | addi r4,r4,1 | ||
254 | bne 1b | ||
255 | blr | ||
256 | 3: cmpldi cr1,r5,8 | ||
257 | srdi r9,r5,3 | ||
258 | andi. r5,r5,7 | ||
259 | blt cr1,93f | ||
260 | mtctr r9 | ||
261 | 91: std r0,0(r4) | ||
262 | addi r4,r4,8 | ||
263 | bdnz 91b | ||
264 | 93: beqlr | ||
265 | mtctr r5 | ||
266 | 92: stb r0,0(r4) | ||
267 | addi r4,r4,1 | ||
268 | bdnz 92b | ||
269 | blr | ||
270 | |||
271 | /* | ||
272 | * exception handlers for stores: we just need to work | ||
273 | * out how many bytes weren't copied | ||
274 | */ | ||
275 | 182: | ||
276 | 183: | ||
277 | add r3,r3,r7 | ||
278 | b 1f | ||
279 | 180: | ||
280 | addi r3,r3,8 | ||
281 | 171: | ||
282 | 177: | ||
283 | addi r3,r3,8 | ||
284 | 170: | ||
285 | 172: | ||
286 | 176: | ||
287 | 178: | ||
288 | addi r3,r3,4 | ||
289 | 185: | ||
290 | addi r3,r3,4 | ||
291 | 173: | ||
292 | 174: | ||
293 | 175: | ||
294 | 179: | ||
295 | 181: | ||
296 | 184: | ||
297 | 186: | ||
298 | 187: | ||
299 | 188: | ||
300 | 189: | ||
301 | 1: | ||
302 | ld r6,-24(r1) | ||
303 | ld r5,-8(r1) | ||
304 | add r6,r6,r5 | ||
305 | subf r3,r3,r6 /* #bytes not copied */ | ||
306 | 190: | ||
307 | 191: | ||
308 | 192: | ||
309 | blr /* #bytes not copied in r3 */ | ||
310 | |||
311 | .section __ex_table,"a" | ||
312 | .align 3 | ||
313 | .llong 20b,120b | ||
314 | .llong 21b,121b | ||
315 | .llong 70b,170b | ||
316 | .llong 22b,122b | ||
317 | .llong 71b,171b | ||
318 | .llong 72b,172b | ||
319 | .llong 23b,123b | ||
320 | .llong 73b,173b | ||
321 | .llong 74b,174b | ||
322 | .llong 75b,175b | ||
323 | .llong 24b,124b | ||
324 | .llong 25b,125b | ||
325 | .llong 26b,126b | ||
326 | .llong 27b,127b | ||
327 | .llong 28b,128b | ||
328 | .llong 29b,129b | ||
329 | .llong 30b,130b | ||
330 | .llong 31b,131b | ||
331 | .llong 32b,132b | ||
332 | .llong 76b,176b | ||
333 | .llong 33b,133b | ||
334 | .llong 77b,177b | ||
335 | .llong 78b,178b | ||
336 | .llong 79b,179b | ||
337 | .llong 80b,180b | ||
338 | .llong 34b,134b | ||
339 | .llong 35b,135b | ||
340 | .llong 81b,181b | ||
341 | .llong 36b,136b | ||
342 | .llong 82b,182b | ||
343 | .llong 37b,137b | ||
344 | .llong 83b,183b | ||
345 | .llong 38b,138b | ||
346 | .llong 39b,139b | ||
347 | .llong 84b,184b | ||
348 | .llong 85b,185b | ||
349 | .llong 40b,140b | ||
350 | .llong 86b,186b | ||
351 | .llong 41b,141b | ||
352 | .llong 87b,187b | ||
353 | .llong 42b,142b | ||
354 | .llong 88b,188b | ||
355 | .llong 43b,143b | ||
356 | .llong 89b,189b | ||
357 | .llong 90b,190b | ||
358 | .llong 91b,191b | ||
359 | .llong 92b,192b | ||
360 | |||
361 | .text | ||
362 | |||
363 | /* | ||
364 | * Routine to copy a whole page of data, optimized for POWER4. | ||
365 | * On POWER4 it is more than 50% faster than the simple loop | ||
366 | * above (following the .Ldst_aligned label) but it runs slightly | ||
367 | * slower on POWER3. | ||
368 | */ | ||
369 | .Lcopy_page: | ||
370 | std r31,-32(1) | ||
371 | std r30,-40(1) | ||
372 | std r29,-48(1) | ||
373 | std r28,-56(1) | ||
374 | std r27,-64(1) | ||
375 | std r26,-72(1) | ||
376 | std r25,-80(1) | ||
377 | std r24,-88(1) | ||
378 | std r23,-96(1) | ||
379 | std r22,-104(1) | ||
380 | std r21,-112(1) | ||
381 | std r20,-120(1) | ||
382 | li r5,4096/32 - 1 | ||
383 | addi r3,r3,-8 | ||
384 | li r0,5 | ||
385 | 0: addi r5,r5,-24 | ||
386 | mtctr r0 | ||
387 | 20: ld r22,640(4) | ||
388 | 21: ld r21,512(4) | ||
389 | 22: ld r20,384(4) | ||
390 | 23: ld r11,256(4) | ||
391 | 24: ld r9,128(4) | ||
392 | 25: ld r7,0(4) | ||
393 | 26: ld r25,648(4) | ||
394 | 27: ld r24,520(4) | ||
395 | 28: ld r23,392(4) | ||
396 | 29: ld r10,264(4) | ||
397 | 30: ld r8,136(4) | ||
398 | 31: ldu r6,8(4) | ||
399 | cmpwi r5,24 | ||
400 | 1: | ||
401 | 32: std r22,648(3) | ||
402 | 33: std r21,520(3) | ||
403 | 34: std r20,392(3) | ||
404 | 35: std r11,264(3) | ||
405 | 36: std r9,136(3) | ||
406 | 37: std r7,8(3) | ||
407 | 38: ld r28,648(4) | ||
408 | 39: ld r27,520(4) | ||
409 | 40: ld r26,392(4) | ||
410 | 41: ld r31,264(4) | ||
411 | 42: ld r30,136(4) | ||
412 | 43: ld r29,8(4) | ||
413 | 44: std r25,656(3) | ||
414 | 45: std r24,528(3) | ||
415 | 46: std r23,400(3) | ||
416 | 47: std r10,272(3) | ||
417 | 48: std r8,144(3) | ||
418 | 49: std r6,16(3) | ||
419 | 50: ld r22,656(4) | ||
420 | 51: ld r21,528(4) | ||
421 | 52: ld r20,400(4) | ||
422 | 53: ld r11,272(4) | ||
423 | 54: ld r9,144(4) | ||
424 | 55: ld r7,16(4) | ||
425 | 56: std r28,664(3) | ||
426 | 57: std r27,536(3) | ||
427 | 58: std r26,408(3) | ||
428 | 59: std r31,280(3) | ||
429 | 60: std r30,152(3) | ||
430 | 61: stdu r29,24(3) | ||
431 | 62: ld r25,664(4) | ||
432 | 63: ld r24,536(4) | ||
433 | 64: ld r23,408(4) | ||
434 | 65: ld r10,280(4) | ||
435 | 66: ld r8,152(4) | ||
436 | 67: ldu r6,24(4) | ||
437 | bdnz 1b | ||
438 | 68: std r22,648(3) | ||
439 | 69: std r21,520(3) | ||
440 | 70: std r20,392(3) | ||
441 | 71: std r11,264(3) | ||
442 | 72: std r9,136(3) | ||
443 | 73: std r7,8(3) | ||
444 | 74: addi r4,r4,640 | ||
445 | 75: addi r3,r3,648 | ||
446 | bge 0b | ||
447 | mtctr r5 | ||
448 | 76: ld r7,0(4) | ||
449 | 77: ld r8,8(4) | ||
450 | 78: ldu r9,16(4) | ||
451 | 3: | ||
452 | 79: ld r10,8(4) | ||
453 | 80: std r7,8(3) | ||
454 | 81: ld r7,16(4) | ||
455 | 82: std r8,16(3) | ||
456 | 83: ld r8,24(4) | ||
457 | 84: std r9,24(3) | ||
458 | 85: ldu r9,32(4) | ||
459 | 86: stdu r10,32(3) | ||
460 | bdnz 3b | ||
461 | 4: | ||
462 | 87: ld r10,8(4) | ||
463 | 88: std r7,8(3) | ||
464 | 89: std r8,16(3) | ||
465 | 90: std r9,24(3) | ||
466 | 91: std r10,32(3) | ||
467 | 9: ld r20,-120(1) | ||
468 | ld r21,-112(1) | ||
469 | ld r22,-104(1) | ||
470 | ld r23,-96(1) | ||
471 | ld r24,-88(1) | ||
472 | ld r25,-80(1) | ||
473 | ld r26,-72(1) | ||
474 | ld r27,-64(1) | ||
475 | ld r28,-56(1) | ||
476 | ld r29,-48(1) | ||
477 | ld r30,-40(1) | ||
478 | ld r31,-32(1) | ||
479 | li r3,0 | ||
480 | blr | ||
481 | |||
482 | /* | ||
483 | * on an exception, reset to the beginning and jump back into the | ||
484 | * standard __copy_tofrom_user | ||
485 | */ | ||
486 | 100: ld r20,-120(1) | ||
487 | ld r21,-112(1) | ||
488 | ld r22,-104(1) | ||
489 | ld r23,-96(1) | ||
490 | ld r24,-88(1) | ||
491 | ld r25,-80(1) | ||
492 | ld r26,-72(1) | ||
493 | ld r27,-64(1) | ||
494 | ld r28,-56(1) | ||
495 | ld r29,-48(1) | ||
496 | ld r30,-40(1) | ||
497 | ld r31,-32(1) | ||
498 | ld r3,-24(r1) | ||
499 | ld r4,-16(r1) | ||
500 | li r5,4096 | ||
501 | b .Ldst_aligned | ||
502 | |||
503 | .section __ex_table,"a" | ||
504 | .align 3 | ||
505 | .llong 20b,100b | ||
506 | .llong 21b,100b | ||
507 | .llong 22b,100b | ||
508 | .llong 23b,100b | ||
509 | .llong 24b,100b | ||
510 | .llong 25b,100b | ||
511 | .llong 26b,100b | ||
512 | .llong 27b,100b | ||
513 | .llong 28b,100b | ||
514 | .llong 29b,100b | ||
515 | .llong 30b,100b | ||
516 | .llong 31b,100b | ||
517 | .llong 32b,100b | ||
518 | .llong 33b,100b | ||
519 | .llong 34b,100b | ||
520 | .llong 35b,100b | ||
521 | .llong 36b,100b | ||
522 | .llong 37b,100b | ||
523 | .llong 38b,100b | ||
524 | .llong 39b,100b | ||
525 | .llong 40b,100b | ||
526 | .llong 41b,100b | ||
527 | .llong 42b,100b | ||
528 | .llong 43b,100b | ||
529 | .llong 44b,100b | ||
530 | .llong 45b,100b | ||
531 | .llong 46b,100b | ||
532 | .llong 47b,100b | ||
533 | .llong 48b,100b | ||
534 | .llong 49b,100b | ||
535 | .llong 50b,100b | ||
536 | .llong 51b,100b | ||
537 | .llong 52b,100b | ||
538 | .llong 53b,100b | ||
539 | .llong 54b,100b | ||
540 | .llong 55b,100b | ||
541 | .llong 56b,100b | ||
542 | .llong 57b,100b | ||
543 | .llong 58b,100b | ||
544 | .llong 59b,100b | ||
545 | .llong 60b,100b | ||
546 | .llong 61b,100b | ||
547 | .llong 62b,100b | ||
548 | .llong 63b,100b | ||
549 | .llong 64b,100b | ||
550 | .llong 65b,100b | ||
551 | .llong 66b,100b | ||
552 | .llong 67b,100b | ||
553 | .llong 68b,100b | ||
554 | .llong 69b,100b | ||
555 | .llong 70b,100b | ||
556 | .llong 71b,100b | ||
557 | .llong 72b,100b | ||
558 | .llong 73b,100b | ||
559 | .llong 74b,100b | ||
560 | .llong 75b,100b | ||
561 | .llong 76b,100b | ||
562 | .llong 77b,100b | ||
563 | .llong 78b,100b | ||
564 | .llong 79b,100b | ||
565 | .llong 80b,100b | ||
566 | .llong 81b,100b | ||
567 | .llong 82b,100b | ||
568 | .llong 83b,100b | ||
569 | .llong 84b,100b | ||
570 | .llong 85b,100b | ||
571 | .llong 86b,100b | ||
572 | .llong 87b,100b | ||
573 | .llong 88b,100b | ||
574 | .llong 89b,100b | ||
575 | .llong 90b,100b | ||
576 | .llong 91b,100b | ||