diff options
Diffstat (limited to 'arch/powerpc/lib/copyuser_power7.S')
-rw-r--r-- | arch/powerpc/lib/copyuser_power7.S | 157 |
1 files changed, 108 insertions, 49 deletions
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 497db7b23bb1..f9ede7c6606e 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S | |||
@@ -19,9 +19,6 @@ | |||
19 | */ | 19 | */ |
20 | #include <asm/ppc_asm.h> | 20 | #include <asm/ppc_asm.h> |
21 | 21 | ||
22 | #define STACKFRAMESIZE 256 | ||
23 | #define STK_REG(i) (112 + ((i)-14)*8) | ||
24 | |||
25 | .macro err1 | 22 | .macro err1 |
26 | 100: | 23 | 100: |
27 | .section __ex_table,"a" | 24 | .section __ex_table,"a" |
@@ -57,26 +54,26 @@ | |||
57 | 54 | ||
58 | 55 | ||
59 | .Ldo_err4: | 56 | .Ldo_err4: |
60 | ld r16,STK_REG(r16)(r1) | 57 | ld r16,STK_REG(R16)(r1) |
61 | ld r15,STK_REG(r15)(r1) | 58 | ld r15,STK_REG(R15)(r1) |
62 | ld r14,STK_REG(r14)(r1) | 59 | ld r14,STK_REG(R14)(r1) |
63 | .Ldo_err3: | 60 | .Ldo_err3: |
64 | bl .exit_vmx_copy | 61 | bl .exit_vmx_usercopy |
65 | ld r0,STACKFRAMESIZE+16(r1) | 62 | ld r0,STACKFRAMESIZE+16(r1) |
66 | mtlr r0 | 63 | mtlr r0 |
67 | b .Lexit | 64 | b .Lexit |
68 | #endif /* CONFIG_ALTIVEC */ | 65 | #endif /* CONFIG_ALTIVEC */ |
69 | 66 | ||
70 | .Ldo_err2: | 67 | .Ldo_err2: |
71 | ld r22,STK_REG(r22)(r1) | 68 | ld r22,STK_REG(R22)(r1) |
72 | ld r21,STK_REG(r21)(r1) | 69 | ld r21,STK_REG(R21)(r1) |
73 | ld r20,STK_REG(r20)(r1) | 70 | ld r20,STK_REG(R20)(r1) |
74 | ld r19,STK_REG(r19)(r1) | 71 | ld r19,STK_REG(R19)(r1) |
75 | ld r18,STK_REG(r18)(r1) | 72 | ld r18,STK_REG(R18)(r1) |
76 | ld r17,STK_REG(r17)(r1) | 73 | ld r17,STK_REG(R17)(r1) |
77 | ld r16,STK_REG(r16)(r1) | 74 | ld r16,STK_REG(R16)(r1) |
78 | ld r15,STK_REG(r15)(r1) | 75 | ld r15,STK_REG(R15)(r1) |
79 | ld r14,STK_REG(r14)(r1) | 76 | ld r14,STK_REG(R14)(r1) |
80 | .Lexit: | 77 | .Lexit: |
81 | addi r1,r1,STACKFRAMESIZE | 78 | addi r1,r1,STACKFRAMESIZE |
82 | .Ldo_err1: | 79 | .Ldo_err1: |
@@ -137,15 +134,15 @@ err1; stw r0,0(r3) | |||
137 | 134 | ||
138 | mflr r0 | 135 | mflr r0 |
139 | stdu r1,-STACKFRAMESIZE(r1) | 136 | stdu r1,-STACKFRAMESIZE(r1) |
140 | std r14,STK_REG(r14)(r1) | 137 | std r14,STK_REG(R14)(r1) |
141 | std r15,STK_REG(r15)(r1) | 138 | std r15,STK_REG(R15)(r1) |
142 | std r16,STK_REG(r16)(r1) | 139 | std r16,STK_REG(R16)(r1) |
143 | std r17,STK_REG(r17)(r1) | 140 | std r17,STK_REG(R17)(r1) |
144 | std r18,STK_REG(r18)(r1) | 141 | std r18,STK_REG(R18)(r1) |
145 | std r19,STK_REG(r19)(r1) | 142 | std r19,STK_REG(R19)(r1) |
146 | std r20,STK_REG(r20)(r1) | 143 | std r20,STK_REG(R20)(r1) |
147 | std r21,STK_REG(r21)(r1) | 144 | std r21,STK_REG(R21)(r1) |
148 | std r22,STK_REG(r22)(r1) | 145 | std r22,STK_REG(R22)(r1) |
149 | std r0,STACKFRAMESIZE+16(r1) | 146 | std r0,STACKFRAMESIZE+16(r1) |
150 | 147 | ||
151 | srdi r6,r5,7 | 148 | srdi r6,r5,7 |
@@ -192,15 +189,15 @@ err2; std r21,120(r3) | |||
192 | 189 | ||
193 | clrldi r5,r5,(64-7) | 190 | clrldi r5,r5,(64-7) |
194 | 191 | ||
195 | ld r14,STK_REG(r14)(r1) | 192 | ld r14,STK_REG(R14)(r1) |
196 | ld r15,STK_REG(r15)(r1) | 193 | ld r15,STK_REG(R15)(r1) |
197 | ld r16,STK_REG(r16)(r1) | 194 | ld r16,STK_REG(R16)(r1) |
198 | ld r17,STK_REG(r17)(r1) | 195 | ld r17,STK_REG(R17)(r1) |
199 | ld r18,STK_REG(r18)(r1) | 196 | ld r18,STK_REG(R18)(r1) |
200 | ld r19,STK_REG(r19)(r1) | 197 | ld r19,STK_REG(R19)(r1) |
201 | ld r20,STK_REG(r20)(r1) | 198 | ld r20,STK_REG(R20)(r1) |
202 | ld r21,STK_REG(r21)(r1) | 199 | ld r21,STK_REG(R21)(r1) |
203 | ld r22,STK_REG(r22)(r1) | 200 | ld r22,STK_REG(R22)(r1) |
204 | addi r1,r1,STACKFRAMESIZE | 201 | addi r1,r1,STACKFRAMESIZE |
205 | 202 | ||
206 | /* Up to 127B to go */ | 203 | /* Up to 127B to go */ |
@@ -290,7 +287,7 @@ err1; stb r0,0(r3) | |||
290 | mflr r0 | 287 | mflr r0 |
291 | std r0,16(r1) | 288 | std r0,16(r1) |
292 | stdu r1,-STACKFRAMESIZE(r1) | 289 | stdu r1,-STACKFRAMESIZE(r1) |
293 | bl .enter_vmx_copy | 290 | bl .enter_vmx_usercopy |
294 | cmpwi r3,0 | 291 | cmpwi r3,0 |
295 | ld r0,STACKFRAMESIZE+16(r1) | 292 | ld r0,STACKFRAMESIZE+16(r1) |
296 | ld r3,STACKFRAMESIZE+48(r1) | 293 | ld r3,STACKFRAMESIZE+48(r1) |
@@ -298,6 +295,68 @@ err1; stb r0,0(r3) | |||
298 | ld r5,STACKFRAMESIZE+64(r1) | 295 | ld r5,STACKFRAMESIZE+64(r1) |
299 | mtlr r0 | 296 | mtlr r0 |
300 | 297 | ||
298 | /* | ||
299 | * We prefetch both the source and destination using enhanced touch | ||
300 | * instructions. We use a stream ID of 0 for the load side and | ||
301 | * 1 for the store side. | ||
302 | */ | ||
303 | clrrdi r6,r4,7 | ||
304 | clrrdi r9,r3,7 | ||
305 | ori r9,r9,1 /* stream=1 */ | ||
306 | |||
307 | srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */ | ||
308 | cmpldi r7,0x3FF | ||
309 | ble 1f | ||
310 | li r7,0x3FF | ||
311 | 1: lis r0,0x0E00 /* depth=7 */ | ||
312 | sldi r7,r7,7 | ||
313 | or r7,r7,r0 | ||
314 | ori r10,r7,1 /* stream=1 */ | ||
315 | |||
316 | lis r8,0x8000 /* GO=1 */ | ||
317 | clrldi r8,r8,32 | ||
318 | |||
319 | .machine push | ||
320 | .machine "power4" | ||
321 | dcbt r0,r6,0b01000 | ||
322 | dcbt r0,r7,0b01010 | ||
323 | dcbtst r0,r9,0b01000 | ||
324 | dcbtst r0,r10,0b01010 | ||
325 | eieio | ||
326 | dcbt r0,r8,0b01010 /* GO */ | ||
327 | .machine pop | ||
328 | |||
329 | /* | ||
330 | * We prefetch both the source and destination using enhanced touch | ||
331 | * instructions. We use a stream ID of 0 for the load side and | ||
332 | * 1 for the store side. | ||
333 | */ | ||
334 | clrrdi r6,r4,7 | ||
335 | clrrdi r9,r3,7 | ||
336 | ori r9,r9,1 /* stream=1 */ | ||
337 | |||
338 | srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */ | ||
339 | cmpldi cr1,r7,0x3FF | ||
340 | ble cr1,1f | ||
341 | li r7,0x3FF | ||
342 | 1: lis r0,0x0E00 /* depth=7 */ | ||
343 | sldi r7,r7,7 | ||
344 | or r7,r7,r0 | ||
345 | ori r10,r7,1 /* stream=1 */ | ||
346 | |||
347 | lis r8,0x8000 /* GO=1 */ | ||
348 | clrldi r8,r8,32 | ||
349 | |||
350 | .machine push | ||
351 | .machine "power4" | ||
352 | dcbt r0,r6,0b01000 | ||
353 | dcbt r0,r7,0b01010 | ||
354 | dcbtst r0,r9,0b01000 | ||
355 | dcbtst r0,r10,0b01010 | ||
356 | eieio | ||
357 | dcbt r0,r8,0b01010 /* GO */ | ||
358 | .machine pop | ||
359 | |||
301 | beq .Lunwind_stack_nonvmx_copy | 360 | beq .Lunwind_stack_nonvmx_copy |
302 | 361 | ||
303 | /* | 362 | /* |
@@ -378,9 +437,9 @@ err3; stvx vr0,r3,r11 | |||
378 | 7: sub r5,r5,r6 | 437 | 7: sub r5,r5,r6 |
379 | srdi r6,r5,7 | 438 | srdi r6,r5,7 |
380 | 439 | ||
381 | std r14,STK_REG(r14)(r1) | 440 | std r14,STK_REG(R14)(r1) |
382 | std r15,STK_REG(r15)(r1) | 441 | std r15,STK_REG(R15)(r1) |
383 | std r16,STK_REG(r16)(r1) | 442 | std r16,STK_REG(R16)(r1) |
384 | 443 | ||
385 | li r12,64 | 444 | li r12,64 |
386 | li r14,80 | 445 | li r14,80 |
@@ -415,9 +474,9 @@ err4; stvx vr0,r3,r16 | |||
415 | addi r3,r3,128 | 474 | addi r3,r3,128 |
416 | bdnz 8b | 475 | bdnz 8b |
417 | 476 | ||
418 | ld r14,STK_REG(r14)(r1) | 477 | ld r14,STK_REG(R14)(r1) |
419 | ld r15,STK_REG(r15)(r1) | 478 | ld r15,STK_REG(R15)(r1) |
420 | ld r16,STK_REG(r16)(r1) | 479 | ld r16,STK_REG(R16)(r1) |
421 | 480 | ||
422 | /* Up to 127B to go */ | 481 | /* Up to 127B to go */ |
423 | clrldi r5,r5,(64-7) | 482 | clrldi r5,r5,(64-7) |
@@ -476,7 +535,7 @@ err3; lbz r0,0(r4) | |||
476 | err3; stb r0,0(r3) | 535 | err3; stb r0,0(r3) |
477 | 536 | ||
478 | 15: addi r1,r1,STACKFRAMESIZE | 537 | 15: addi r1,r1,STACKFRAMESIZE |
479 | b .exit_vmx_copy /* tail call optimise */ | 538 | b .exit_vmx_usercopy /* tail call optimise */ |
480 | 539 | ||
481 | .Lvmx_unaligned_copy: | 540 | .Lvmx_unaligned_copy: |
482 | /* Get the destination 16B aligned */ | 541 | /* Get the destination 16B aligned */ |
@@ -563,9 +622,9 @@ err3; stvx vr11,r3,r11 | |||
563 | 7: sub r5,r5,r6 | 622 | 7: sub r5,r5,r6 |
564 | srdi r6,r5,7 | 623 | srdi r6,r5,7 |
565 | 624 | ||
566 | std r14,STK_REG(r14)(r1) | 625 | std r14,STK_REG(R14)(r1) |
567 | std r15,STK_REG(r15)(r1) | 626 | std r15,STK_REG(R15)(r1) |
568 | std r16,STK_REG(r16)(r1) | 627 | std r16,STK_REG(R16)(r1) |
569 | 628 | ||
570 | li r12,64 | 629 | li r12,64 |
571 | li r14,80 | 630 | li r14,80 |
@@ -608,9 +667,9 @@ err4; stvx vr15,r3,r16 | |||
608 | addi r3,r3,128 | 667 | addi r3,r3,128 |
609 | bdnz 8b | 668 | bdnz 8b |
610 | 669 | ||
611 | ld r14,STK_REG(r14)(r1) | 670 | ld r14,STK_REG(R14)(r1) |
612 | ld r15,STK_REG(r15)(r1) | 671 | ld r15,STK_REG(R15)(r1) |
613 | ld r16,STK_REG(r16)(r1) | 672 | ld r16,STK_REG(R16)(r1) |
614 | 673 | ||
615 | /* Up to 127B to go */ | 674 | /* Up to 127B to go */ |
616 | clrldi r5,r5,(64-7) | 675 | clrldi r5,r5,(64-7) |
@@ -679,5 +738,5 @@ err3; lbz r0,0(r4) | |||
679 | err3; stb r0,0(r3) | 738 | err3; stb r0,0(r3) |
680 | 739 | ||
681 | 15: addi r1,r1,STACKFRAMESIZE | 740 | 15: addi r1,r1,STACKFRAMESIZE |
682 | b .exit_vmx_copy /* tail call optimise */ | 741 | b .exit_vmx_usercopy /* tail call optimise */ |
683 | #endif /* CONFiG_ALTIVEC */ | 742 | #endif /* CONFiG_ALTIVEC */ |