diff options
-rw-r--r-- | arch/powerpc/lib/copypage_64.S | 198 |
1 files changed, 93 insertions, 105 deletions
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index f9837f44ac0b..75f3267fdc30 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2002 Paul Mackerras, IBM Corp. | 2 | * Copyright (C) 2008 Mark Nelson, IBM Corp. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public License | 5 | * modify it under the terms of the GNU General Public License |
@@ -8,112 +8,100 @@ | |||
8 | */ | 8 | */ |
9 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
10 | #include <asm/ppc_asm.h> | 10 | #include <asm/ppc_asm.h> |
11 | #include <asm/asm-offsets.h> | ||
12 | |||
13 | .section ".toc","aw" | ||
14 | PPC64_CACHES: | ||
15 | .tc ppc64_caches[TC],ppc64_caches | ||
16 | .section ".text" | ||
17 | |||
11 | 18 | ||
12 | _GLOBAL(copy_4K_page) | 19 | _GLOBAL(copy_4K_page) |
13 | std r31,-8(1) | 20 | li r5,4096 /* 4K page size */ |
14 | std r30,-16(1) | 21 | BEGIN_FTR_SECTION |
15 | std r29,-24(1) | 22 | ld r10,PPC64_CACHES@toc(r2) |
16 | std r28,-32(1) | 23 | lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */ |
17 | std r27,-40(1) | 24 | lwz r12,DCACHEL1LINESIZE(r10) /* get cache line size */ |
18 | std r26,-48(1) | 25 | li r9,0 |
19 | std r25,-56(1) | 26 | srd r8,r5,r11 |
20 | std r24,-64(1) | 27 | |
21 | std r23,-72(1) | 28 | mtctr r8 |
22 | std r22,-80(1) | 29 | setup: |
23 | std r21,-88(1) | 30 | dcbt r9,r4 |
24 | std r20,-96(1) | 31 | dcbz r9,r3 |
25 | li r5,4096/32 - 1 | 32 | add r9,r9,r12 |
33 | bdnz setup | ||
34 | END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ) | ||
26 | addi r3,r3,-8 | 35 | addi r3,r3,-8 |
27 | li r12,5 | 36 | srdi r8,r5,7 /* page is copied in 128 byte strides */ |
28 | 0: addi r5,r5,-24 | 37 | addi r8,r8,-1 /* one stride copied outside loop */ |
29 | mtctr r12 | 38 | |
30 | ld r22,640(4) | 39 | mtctr r8 |
31 | ld r21,512(4) | 40 | |
32 | ld r20,384(4) | 41 | ld r5,0(r4) |
33 | ld r11,256(4) | 42 | ld r6,8(r4) |
34 | ld r9,128(4) | 43 | ld r7,16(r4) |
35 | ld r7,0(4) | 44 | ldu r8,24(r4) |
36 | ld r25,648(4) | 45 | 1: std r5,8(r3) |
37 | ld r24,520(4) | 46 | ld r9,8(r4) |
38 | ld r23,392(4) | 47 | std r6,16(r3) |
39 | ld r10,264(4) | 48 | ld r10,16(r4) |
40 | ld r8,136(4) | 49 | std r7,24(r3) |
41 | ldu r6,8(4) | 50 | ld r11,24(r4) |
42 | cmpwi r5,24 | 51 | std r8,32(r3) |
43 | 1: std r22,648(3) | 52 | ld r12,32(r4) |
44 | std r21,520(3) | 53 | std r9,40(r3) |
45 | std r20,392(3) | 54 | ld r5,40(r4) |
46 | std r11,264(3) | 55 | std r10,48(r3) |
47 | std r9,136(3) | 56 | ld r6,48(r4) |
48 | std r7,8(3) | 57 | std r11,56(r3) |
49 | ld r28,648(4) | 58 | ld r7,56(r4) |
50 | ld r27,520(4) | 59 | std r12,64(r3) |
51 | ld r26,392(4) | 60 | ld r8,64(r4) |
52 | ld r31,264(4) | 61 | std r5,72(r3) |
53 | ld r30,136(4) | 62 | ld r9,72(r4) |
54 | ld r29,8(4) | 63 | std r6,80(r3) |
55 | std r25,656(3) | 64 | ld r10,80(r4) |
56 | std r24,528(3) | 65 | std r7,88(r3) |
57 | std r23,400(3) | 66 | ld r11,88(r4) |
58 | std r10,272(3) | 67 | std r8,96(r3) |
59 | std r8,144(3) | 68 | ld r12,96(r4) |
60 | std r6,16(3) | 69 | std r9,104(r3) |
61 | ld r22,656(4) | 70 | ld r5,104(r4) |
62 | ld r21,528(4) | 71 | std r10,112(r3) |
63 | ld r20,400(4) | 72 | ld r6,112(r4) |
64 | ld r11,272(4) | 73 | std r11,120(r3) |
65 | ld r9,144(4) | 74 | ld r7,120(r4) |
66 | ld r7,16(4) | 75 | stdu r12,128(r3) |
67 | std r28,664(3) | 76 | ldu r8,128(r4) |
68 | std r27,536(3) | ||
69 | std r26,408(3) | ||
70 | std r31,280(3) | ||
71 | std r30,152(3) | ||
72 | stdu r29,24(3) | ||
73 | ld r25,664(4) | ||
74 | ld r24,536(4) | ||
75 | ld r23,408(4) | ||
76 | ld r10,280(4) | ||
77 | ld r8,152(4) | ||
78 | ldu r6,24(4) | ||
79 | bdnz 1b | 77 | bdnz 1b |
80 | std r22,648(3) | 78 | |
81 | std r21,520(3) | 79 | std r5,8(r3) |
82 | std r20,392(3) | 80 | ld r9,8(r4) |
83 | std r11,264(3) | 81 | std r6,16(r3) |
84 | std r9,136(3) | 82 | ld r10,16(r4) |
85 | std r7,8(3) | 83 | std r7,24(r3) |
86 | addi r4,r4,640 | 84 | ld r11,24(r4) |
87 | addi r3,r3,648 | 85 | std r8,32(r3) |
88 | bge 0b | 86 | ld r12,32(r4) |
89 | mtctr r5 | 87 | std r9,40(r3) |
90 | ld r7,0(4) | 88 | ld r5,40(r4) |
91 | ld r8,8(4) | 89 | std r10,48(r3) |
92 | ldu r9,16(4) | 90 | ld r6,48(r4) |
93 | 3: ld r10,8(4) | 91 | std r11,56(r3) |
94 | std r7,8(3) | 92 | ld r7,56(r4) |
95 | ld r7,16(4) | 93 | std r12,64(r3) |
96 | std r8,16(3) | 94 | ld r8,64(r4) |
97 | ld r8,24(4) | 95 | std r5,72(r3) |
98 | std r9,24(3) | 96 | ld r9,72(r4) |
99 | ldu r9,32(4) | 97 | std r6,80(r3) |
100 | stdu r10,32(3) | 98 | ld r10,80(r4) |
101 | bdnz 3b | 99 | std r7,88(r3) |
102 | 4: ld r10,8(4) | 100 | ld r11,88(r4) |
103 | std r7,8(3) | 101 | std r8,96(r3) |
104 | std r8,16(3) | 102 | ld r12,96(r4) |
105 | std r9,24(3) | 103 | std r9,104(r3) |
106 | std r10,32(3) | 104 | std r10,112(r3) |
107 | 9: ld r20,-96(1) | 105 | std r11,120(r3) |
108 | ld r21,-88(1) | 106 | std r12,128(r3) |
109 | ld r22,-80(1) | ||
110 | ld r23,-72(1) | ||
111 | ld r24,-64(1) | ||
112 | ld r25,-56(1) | ||
113 | ld r26,-48(1) | ||
114 | ld r27,-40(1) | ||
115 | ld r28,-32(1) | ||
116 | ld r29,-24(1) | ||
117 | ld r30,-16(1) | ||
118 | ld r31,-8(1) | ||
119 | blr | 107 | blr |