aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/lib/copyuser_power7.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/lib/copyuser_power7.S')
-rw-r--r--arch/powerpc/lib/copyuser_power7.S157
1 files changed, 108 insertions, 49 deletions
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index 497db7b23bb1..f9ede7c6606e 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -19,9 +19,6 @@
19 */ 19 */
20#include <asm/ppc_asm.h> 20#include <asm/ppc_asm.h>
21 21
22#define STACKFRAMESIZE 256
23#define STK_REG(i) (112 + ((i)-14)*8)
24
25 .macro err1 22 .macro err1
26100: 23100:
27 .section __ex_table,"a" 24 .section __ex_table,"a"
@@ -57,26 +54,26 @@
57 54
58 55
59.Ldo_err4: 56.Ldo_err4:
60 ld r16,STK_REG(r16)(r1) 57 ld r16,STK_REG(R16)(r1)
61 ld r15,STK_REG(r15)(r1) 58 ld r15,STK_REG(R15)(r1)
62 ld r14,STK_REG(r14)(r1) 59 ld r14,STK_REG(R14)(r1)
63.Ldo_err3: 60.Ldo_err3:
64 bl .exit_vmx_copy 61 bl .exit_vmx_usercopy
65 ld r0,STACKFRAMESIZE+16(r1) 62 ld r0,STACKFRAMESIZE+16(r1)
66 mtlr r0 63 mtlr r0
67 b .Lexit 64 b .Lexit
68#endif /* CONFIG_ALTIVEC */ 65#endif /* CONFIG_ALTIVEC */
69 66
70.Ldo_err2: 67.Ldo_err2:
71 ld r22,STK_REG(r22)(r1) 68 ld r22,STK_REG(R22)(r1)
72 ld r21,STK_REG(r21)(r1) 69 ld r21,STK_REG(R21)(r1)
73 ld r20,STK_REG(r20)(r1) 70 ld r20,STK_REG(R20)(r1)
74 ld r19,STK_REG(r19)(r1) 71 ld r19,STK_REG(R19)(r1)
75 ld r18,STK_REG(r18)(r1) 72 ld r18,STK_REG(R18)(r1)
76 ld r17,STK_REG(r17)(r1) 73 ld r17,STK_REG(R17)(r1)
77 ld r16,STK_REG(r16)(r1) 74 ld r16,STK_REG(R16)(r1)
78 ld r15,STK_REG(r15)(r1) 75 ld r15,STK_REG(R15)(r1)
79 ld r14,STK_REG(r14)(r1) 76 ld r14,STK_REG(R14)(r1)
80.Lexit: 77.Lexit:
81 addi r1,r1,STACKFRAMESIZE 78 addi r1,r1,STACKFRAMESIZE
82.Ldo_err1: 79.Ldo_err1:
@@ -137,15 +134,15 @@ err1; stw r0,0(r3)
137 134
138 mflr r0 135 mflr r0
139 stdu r1,-STACKFRAMESIZE(r1) 136 stdu r1,-STACKFRAMESIZE(r1)
140 std r14,STK_REG(r14)(r1) 137 std r14,STK_REG(R14)(r1)
141 std r15,STK_REG(r15)(r1) 138 std r15,STK_REG(R15)(r1)
142 std r16,STK_REG(r16)(r1) 139 std r16,STK_REG(R16)(r1)
143 std r17,STK_REG(r17)(r1) 140 std r17,STK_REG(R17)(r1)
144 std r18,STK_REG(r18)(r1) 141 std r18,STK_REG(R18)(r1)
145 std r19,STK_REG(r19)(r1) 142 std r19,STK_REG(R19)(r1)
146 std r20,STK_REG(r20)(r1) 143 std r20,STK_REG(R20)(r1)
147 std r21,STK_REG(r21)(r1) 144 std r21,STK_REG(R21)(r1)
148 std r22,STK_REG(r22)(r1) 145 std r22,STK_REG(R22)(r1)
149 std r0,STACKFRAMESIZE+16(r1) 146 std r0,STACKFRAMESIZE+16(r1)
150 147
151 srdi r6,r5,7 148 srdi r6,r5,7
@@ -192,15 +189,15 @@ err2; std r21,120(r3)
192 189
193 clrldi r5,r5,(64-7) 190 clrldi r5,r5,(64-7)
194 191
195 ld r14,STK_REG(r14)(r1) 192 ld r14,STK_REG(R14)(r1)
196 ld r15,STK_REG(r15)(r1) 193 ld r15,STK_REG(R15)(r1)
197 ld r16,STK_REG(r16)(r1) 194 ld r16,STK_REG(R16)(r1)
198 ld r17,STK_REG(r17)(r1) 195 ld r17,STK_REG(R17)(r1)
199 ld r18,STK_REG(r18)(r1) 196 ld r18,STK_REG(R18)(r1)
200 ld r19,STK_REG(r19)(r1) 197 ld r19,STK_REG(R19)(r1)
201 ld r20,STK_REG(r20)(r1) 198 ld r20,STK_REG(R20)(r1)
202 ld r21,STK_REG(r21)(r1) 199 ld r21,STK_REG(R21)(r1)
203 ld r22,STK_REG(r22)(r1) 200 ld r22,STK_REG(R22)(r1)
204 addi r1,r1,STACKFRAMESIZE 201 addi r1,r1,STACKFRAMESIZE
205 202
206 /* Up to 127B to go */ 203 /* Up to 127B to go */
@@ -290,7 +287,7 @@ err1; stb r0,0(r3)
290 mflr r0 287 mflr r0
291 std r0,16(r1) 288 std r0,16(r1)
292 stdu r1,-STACKFRAMESIZE(r1) 289 stdu r1,-STACKFRAMESIZE(r1)
293 bl .enter_vmx_copy 290 bl .enter_vmx_usercopy
294 cmpwi r3,0 291 cmpwi r3,0
295 ld r0,STACKFRAMESIZE+16(r1) 292 ld r0,STACKFRAMESIZE+16(r1)
296 ld r3,STACKFRAMESIZE+48(r1) 293 ld r3,STACKFRAMESIZE+48(r1)
@@ -298,6 +295,68 @@ err1; stb r0,0(r3)
298 ld r5,STACKFRAMESIZE+64(r1) 295 ld r5,STACKFRAMESIZE+64(r1)
299 mtlr r0 296 mtlr r0
300 297
298 /*
299 * We prefetch both the source and destination using enhanced touch
300 * instructions. We use a stream ID of 0 for the load side and
301 * 1 for the store side.
302 */
303 clrrdi r6,r4,7
304 clrrdi r9,r3,7
305 ori r9,r9,1 /* stream=1 */
306
307 srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
308 cmpldi r7,0x3FF
309 ble 1f
310 li r7,0x3FF
3111: lis r0,0x0E00 /* depth=7 */
312 sldi r7,r7,7
313 or r7,r7,r0
314 ori r10,r7,1 /* stream=1 */
315
316 lis r8,0x8000 /* GO=1 */
317 clrldi r8,r8,32
318
319.machine push
320.machine "power4"
321 dcbt r0,r6,0b01000
322 dcbt r0,r7,0b01010
323 dcbtst r0,r9,0b01000
324 dcbtst r0,r10,0b01010
325 eieio
326 dcbt r0,r8,0b01010 /* GO */
327.machine pop
328
329 /*
330 * We prefetch both the source and destination using enhanced touch
331 * instructions. We use a stream ID of 0 for the load side and
332 * 1 for the store side.
333 */
334 clrrdi r6,r4,7
335 clrrdi r9,r3,7
336 ori r9,r9,1 /* stream=1 */
337
338 srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
339 cmpldi cr1,r7,0x3FF
340 ble cr1,1f
341 li r7,0x3FF
3421: lis r0,0x0E00 /* depth=7 */
343 sldi r7,r7,7
344 or r7,r7,r0
345 ori r10,r7,1 /* stream=1 */
346
347 lis r8,0x8000 /* GO=1 */
348 clrldi r8,r8,32
349
350.machine push
351.machine "power4"
352 dcbt r0,r6,0b01000
353 dcbt r0,r7,0b01010
354 dcbtst r0,r9,0b01000
355 dcbtst r0,r10,0b01010
356 eieio
357 dcbt r0,r8,0b01010 /* GO */
358.machine pop
359
301 beq .Lunwind_stack_nonvmx_copy 360 beq .Lunwind_stack_nonvmx_copy
302 361
303 /* 362 /*
@@ -378,9 +437,9 @@ err3; stvx vr0,r3,r11
3787: sub r5,r5,r6 4377: sub r5,r5,r6
379 srdi r6,r5,7 438 srdi r6,r5,7
380 439
381 std r14,STK_REG(r14)(r1) 440 std r14,STK_REG(R14)(r1)
382 std r15,STK_REG(r15)(r1) 441 std r15,STK_REG(R15)(r1)
383 std r16,STK_REG(r16)(r1) 442 std r16,STK_REG(R16)(r1)
384 443
385 li r12,64 444 li r12,64
386 li r14,80 445 li r14,80
@@ -415,9 +474,9 @@ err4; stvx vr0,r3,r16
415 addi r3,r3,128 474 addi r3,r3,128
416 bdnz 8b 475 bdnz 8b
417 476
418 ld r14,STK_REG(r14)(r1) 477 ld r14,STK_REG(R14)(r1)
419 ld r15,STK_REG(r15)(r1) 478 ld r15,STK_REG(R15)(r1)
420 ld r16,STK_REG(r16)(r1) 479 ld r16,STK_REG(R16)(r1)
421 480
422 /* Up to 127B to go */ 481 /* Up to 127B to go */
423 clrldi r5,r5,(64-7) 482 clrldi r5,r5,(64-7)
@@ -476,7 +535,7 @@ err3; lbz r0,0(r4)
476err3; stb r0,0(r3) 535err3; stb r0,0(r3)
477 536
47815: addi r1,r1,STACKFRAMESIZE 53715: addi r1,r1,STACKFRAMESIZE
479 b .exit_vmx_copy /* tail call optimise */ 538 b .exit_vmx_usercopy /* tail call optimise */
480 539
481.Lvmx_unaligned_copy: 540.Lvmx_unaligned_copy:
482 /* Get the destination 16B aligned */ 541 /* Get the destination 16B aligned */
@@ -563,9 +622,9 @@ err3; stvx vr11,r3,r11
5637: sub r5,r5,r6 6227: sub r5,r5,r6
564 srdi r6,r5,7 623 srdi r6,r5,7
565 624
566 std r14,STK_REG(r14)(r1) 625 std r14,STK_REG(R14)(r1)
567 std r15,STK_REG(r15)(r1) 626 std r15,STK_REG(R15)(r1)
568 std r16,STK_REG(r16)(r1) 627 std r16,STK_REG(R16)(r1)
569 628
570 li r12,64 629 li r12,64
571 li r14,80 630 li r14,80
@@ -608,9 +667,9 @@ err4; stvx vr15,r3,r16
608 addi r3,r3,128 667 addi r3,r3,128
609 bdnz 8b 668 bdnz 8b
610 669
611 ld r14,STK_REG(r14)(r1) 670 ld r14,STK_REG(R14)(r1)
612 ld r15,STK_REG(r15)(r1) 671 ld r15,STK_REG(R15)(r1)
613 ld r16,STK_REG(r16)(r1) 672 ld r16,STK_REG(R16)(r1)
614 673
615 /* Up to 127B to go */ 674 /* Up to 127B to go */
616 clrldi r5,r5,(64-7) 675 clrldi r5,r5,(64-7)
@@ -679,5 +738,5 @@ err3; lbz r0,0(r4)
679err3; stb r0,0(r3) 738err3; stb r0,0(r3)
680 739
68115: addi r1,r1,STACKFRAMESIZE 74015: addi r1,r1,STACKFRAMESIZE
682 b .exit_vmx_copy /* tail call optimise */ 741 b .exit_vmx_usercopy /* tail call optimise */
683#endif /* CONFiG_ALTIVEC */ 742#endif /* CONFiG_ALTIVEC */