diff options
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r-- | arch/powerpc/lib/Makefile | 7 | ||||
-rw-r--r-- | arch/powerpc/lib/alloc.c | 8 | ||||
-rw-r--r-- | arch/powerpc/lib/checksum_64.S | 482 | ||||
-rw-r--r-- | arch/powerpc/lib/checksum_wrappers_64.c | 102 | ||||
-rw-r--r-- | arch/powerpc/lib/copy_32.S | 2 | ||||
-rw-r--r-- | arch/powerpc/lib/copypage_64.S | 7 | ||||
-rw-r--r-- | arch/powerpc/lib/devres.c | 6 | ||||
-rw-r--r-- | arch/powerpc/lib/feature-fixups-test.S | 19 | ||||
-rw-r--r-- | arch/powerpc/lib/hweight_64.S | 110 | ||||
-rw-r--r-- | arch/powerpc/lib/ldstfp.S | 36 | ||||
-rw-r--r-- | arch/powerpc/lib/locks.c | 4 | ||||
-rw-r--r-- | arch/powerpc/lib/sstep.c | 70 |
12 files changed, 655 insertions, 198 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 5bb89c828070..166a6a0ad544 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile | |||
@@ -4,9 +4,7 @@ | |||
4 | 4 | ||
5 | subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror | 5 | subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror |
6 | 6 | ||
7 | ifeq ($(CONFIG_PPC64),y) | 7 | ccflags-$(CONFIG_PPC64) := -mno-minimal-toc |
8 | EXTRA_CFLAGS += -mno-minimal-toc | ||
9 | endif | ||
10 | 8 | ||
11 | CFLAGS_REMOVE_code-patching.o = -pg | 9 | CFLAGS_REMOVE_code-patching.o = -pg |
12 | CFLAGS_REMOVE_feature-fixups.o = -pg | 10 | CFLAGS_REMOVE_feature-fixups.o = -pg |
@@ -17,7 +15,8 @@ obj-$(CONFIG_PPC32) += div64.o copy_32.o | |||
17 | obj-$(CONFIG_HAS_IOMEM) += devres.o | 15 | obj-$(CONFIG_HAS_IOMEM) += devres.o |
18 | 16 | ||
19 | obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ | 17 | obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ |
20 | memcpy_64.o usercopy_64.o mem_64.o string.o | 18 | memcpy_64.o usercopy_64.o mem_64.o string.o \ |
19 | checksum_wrappers_64.o hweight_64.o | ||
21 | obj-$(CONFIG_XMON) += sstep.o ldstfp.o | 20 | obj-$(CONFIG_XMON) += sstep.o ldstfp.o |
22 | obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o | 21 | obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o |
23 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o | 22 | obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o |
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c index f53e09c7dac7..13b676c20d12 100644 --- a/arch/powerpc/lib/alloc.c +++ b/arch/powerpc/lib/alloc.c | |||
@@ -6,14 +6,6 @@ | |||
6 | 6 | ||
7 | #include <asm/system.h> | 7 | #include <asm/system.h> |
8 | 8 | ||
9 | void * __init_refok alloc_maybe_bootmem(size_t size, gfp_t mask) | ||
10 | { | ||
11 | if (mem_init_done) | ||
12 | return kmalloc(size, mask); | ||
13 | else | ||
14 | return alloc_bootmem(size); | ||
15 | } | ||
16 | |||
17 | void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) | 9 | void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask) |
18 | { | 10 | { |
19 | void *p; | 11 | void *p; |
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index ef96c6c58efc..18245af38aea 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S | |||
@@ -65,165 +65,393 @@ _GLOBAL(csum_tcpudp_magic) | |||
65 | srwi r3,r3,16 | 65 | srwi r3,r3,16 |
66 | blr | 66 | blr |
67 | 67 | ||
68 | #define STACKFRAMESIZE 256 | ||
69 | #define STK_REG(i) (112 + ((i)-14)*8) | ||
70 | |||
68 | /* | 71 | /* |
69 | * Computes the checksum of a memory block at buff, length len, | 72 | * Computes the checksum of a memory block at buff, length len, |
70 | * and adds in "sum" (32-bit). | 73 | * and adds in "sum" (32-bit). |
71 | * | 74 | * |
72 | * This code assumes at least halfword alignment, though the length | ||
73 | * can be any number of bytes. The sum is accumulated in r5. | ||
74 | * | ||
75 | * csum_partial(r3=buff, r4=len, r5=sum) | 75 | * csum_partial(r3=buff, r4=len, r5=sum) |
76 | */ | 76 | */ |
77 | _GLOBAL(csum_partial) | 77 | _GLOBAL(csum_partial) |
78 | subi r3,r3,8 /* we'll offset by 8 for the loads */ | 78 | addic r0,r5,0 /* clear carry */ |
79 | srdi. r6,r4,3 /* divide by 8 for doubleword count */ | 79 | |
80 | addic r5,r5,0 /* clear carry */ | 80 | srdi. r6,r4,3 /* less than 8 bytes? */ |
81 | beq 3f /* if we're doing < 8 bytes */ | 81 | beq .Lcsum_tail_word |
82 | andi. r0,r3,2 /* aligned on a word boundary already? */ | 82 | |
83 | beq+ 1f | 83 | /* |
84 | lhz r6,8(r3) /* do 2 bytes to get aligned */ | 84 | * If only halfword aligned, align to a double word. Since odd |
85 | addi r3,r3,2 | 85 | * aligned addresses should be rare and they would require more |
86 | subi r4,r4,2 | 86 | * work to calculate the correct checksum, we ignore that case |
87 | addc r5,r5,r6 | 87 | * and take the potential slowdown of unaligned loads. |
88 | srdi. r6,r4,3 /* recompute number of doublewords */ | 88 | */ |
89 | beq 3f /* any left? */ | 89 | rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ |
90 | 1: mtctr r6 | 90 | beq .Lcsum_aligned |
91 | 2: ldu r6,8(r3) /* main sum loop */ | 91 | |
92 | adde r5,r5,r6 | 92 | li r7,4 |
93 | bdnz 2b | 93 | sub r6,r7,r6 |
94 | andi. r4,r4,7 /* compute bytes left to sum after doublewords */ | 94 | mtctr r6 |
95 | 3: cmpwi 0,r4,4 /* is at least a full word left? */ | 95 | |
96 | blt 4f | 96 | 1: |
97 | lwz r6,8(r3) /* sum this word */ | 97 | lhz r6,0(r3) /* align to doubleword */ |
98 | subi r4,r4,2 | ||
99 | addi r3,r3,2 | ||
100 | adde r0,r0,r6 | ||
101 | bdnz 1b | ||
102 | |||
103 | .Lcsum_aligned: | ||
104 | /* | ||
105 | * We unroll the loop such that each iteration is 64 bytes with an | ||
106 | * entry and exit limb of 64 bytes, meaning a minimum size of | ||
107 | * 128 bytes. | ||
108 | */ | ||
109 | srdi. r6,r4,7 | ||
110 | beq .Lcsum_tail_doublewords /* len < 128 */ | ||
111 | |||
112 | srdi r6,r4,6 | ||
113 | subi r6,r6,1 | ||
114 | mtctr r6 | ||
115 | |||
116 | stdu r1,-STACKFRAMESIZE(r1) | ||
117 | std r14,STK_REG(r14)(r1) | ||
118 | std r15,STK_REG(r15)(r1) | ||
119 | std r16,STK_REG(r16)(r1) | ||
120 | |||
121 | ld r6,0(r3) | ||
122 | ld r9,8(r3) | ||
123 | |||
124 | ld r10,16(r3) | ||
125 | ld r11,24(r3) | ||
126 | |||
127 | /* | ||
128 | * On POWER6 and POWER7 back to back addes take 2 cycles because of | ||
129 | * the XER dependency. This means the fastest this loop can go is | ||
130 | * 16 cycles per iteration. The scheduling of the loop below has | ||
131 | * been shown to hit this on both POWER6 and POWER7. | ||
132 | */ | ||
133 | .align 5 | ||
134 | 2: | ||
135 | adde r0,r0,r6 | ||
136 | ld r12,32(r3) | ||
137 | ld r14,40(r3) | ||
138 | |||
139 | adde r0,r0,r9 | ||
140 | ld r15,48(r3) | ||
141 | ld r16,56(r3) | ||
142 | addi r3,r3,64 | ||
143 | |||
144 | adde r0,r0,r10 | ||
145 | |||
146 | adde r0,r0,r11 | ||
147 | |||
148 | adde r0,r0,r12 | ||
149 | |||
150 | adde r0,r0,r14 | ||
151 | |||
152 | adde r0,r0,r15 | ||
153 | ld r6,0(r3) | ||
154 | ld r9,8(r3) | ||
155 | |||
156 | adde r0,r0,r16 | ||
157 | ld r10,16(r3) | ||
158 | ld r11,24(r3) | ||
159 | bdnz 2b | ||
160 | |||
161 | |||
162 | adde r0,r0,r6 | ||
163 | ld r12,32(r3) | ||
164 | ld r14,40(r3) | ||
165 | |||
166 | adde r0,r0,r9 | ||
167 | ld r15,48(r3) | ||
168 | ld r16,56(r3) | ||
169 | addi r3,r3,64 | ||
170 | |||
171 | adde r0,r0,r10 | ||
172 | adde r0,r0,r11 | ||
173 | adde r0,r0,r12 | ||
174 | adde r0,r0,r14 | ||
175 | adde r0,r0,r15 | ||
176 | adde r0,r0,r16 | ||
177 | |||
178 | ld r14,STK_REG(r14)(r1) | ||
179 | ld r15,STK_REG(r15)(r1) | ||
180 | ld r16,STK_REG(r16)(r1) | ||
181 | addi r1,r1,STACKFRAMESIZE | ||
182 | |||
183 | andi. r4,r4,63 | ||
184 | |||
185 | .Lcsum_tail_doublewords: /* Up to 127 bytes to go */ | ||
186 | srdi. r6,r4,3 | ||
187 | beq .Lcsum_tail_word | ||
188 | |||
189 | mtctr r6 | ||
190 | 3: | ||
191 | ld r6,0(r3) | ||
192 | addi r3,r3,8 | ||
193 | adde r0,r0,r6 | ||
194 | bdnz 3b | ||
195 | |||
196 | andi. r4,r4,7 | ||
197 | |||
198 | .Lcsum_tail_word: /* Up to 7 bytes to go */ | ||
199 | srdi. r6,r4,2 | ||
200 | beq .Lcsum_tail_halfword | ||
201 | |||
202 | lwz r6,0(r3) | ||
98 | addi r3,r3,4 | 203 | addi r3,r3,4 |
204 | adde r0,r0,r6 | ||
99 | subi r4,r4,4 | 205 | subi r4,r4,4 |
100 | adde r5,r5,r6 | 206 | |
101 | 4: cmpwi 0,r4,2 /* is at least a halfword left? */ | 207 | .Lcsum_tail_halfword: /* Up to 3 bytes to go */ |
102 | blt+ 5f | 208 | srdi. r6,r4,1 |
103 | lhz r6,8(r3) /* sum this halfword */ | 209 | beq .Lcsum_tail_byte |
104 | addi r3,r3,2 | 210 | |
105 | subi r4,r4,2 | 211 | lhz r6,0(r3) |
106 | adde r5,r5,r6 | 212 | addi r3,r3,2 |
107 | 5: cmpwi 0,r4,1 /* is at least a byte left? */ | 213 | adde r0,r0,r6 |
108 | bne+ 6f | 214 | subi r4,r4,2 |
109 | lbz r6,8(r3) /* sum this byte */ | 215 | |
110 | slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */ | 216 | .Lcsum_tail_byte: /* Up to 1 byte to go */ |
111 | adde r5,r5,r6 | 217 | andi. r6,r4,1 |
112 | 6: addze r5,r5 /* add in final carry */ | 218 | beq .Lcsum_finish |
113 | rldicl r4,r5,32,0 /* fold two 32-bit halves together */ | 219 | |
114 | add r3,r4,r5 | 220 | lbz r6,0(r3) |
115 | srdi r3,r3,32 | 221 | sldi r9,r6,8 /* Pad the byte out to 16 bits */ |
116 | blr | 222 | adde r0,r0,r9 |
223 | |||
224 | .Lcsum_finish: | ||
225 | addze r0,r0 /* add in final carry */ | ||
226 | rldicl r4,r0,32,0 /* fold two 32 bit halves together */ | ||
227 | add r3,r4,r0 | ||
228 | srdi r3,r3,32 | ||
229 | blr | ||
230 | |||
231 | |||
232 | .macro source | ||
233 | 100: | ||
234 | .section __ex_table,"a" | ||
235 | .align 3 | ||
236 | .llong 100b,.Lsrc_error | ||
237 | .previous | ||
238 | .endm | ||
239 | |||
240 | .macro dest | ||
241 | 200: | ||
242 | .section __ex_table,"a" | ||
243 | .align 3 | ||
244 | .llong 200b,.Ldest_error | ||
245 | .previous | ||
246 | .endm | ||
117 | 247 | ||
118 | /* | 248 | /* |
119 | * Computes the checksum of a memory block at src, length len, | 249 | * Computes the checksum of a memory block at src, length len, |
120 | * and adds in "sum" (32-bit), while copying the block to dst. | 250 | * and adds in "sum" (32-bit), while copying the block to dst. |
121 | * If an access exception occurs on src or dst, it stores -EFAULT | 251 | * If an access exception occurs on src or dst, it stores -EFAULT |
122 | * to *src_err or *dst_err respectively, and (for an error on | 252 | * to *src_err or *dst_err respectively. The caller must take any action |
123 | * src) zeroes the rest of dst. | 253 | * required in this case (zeroing memory, recalculating partial checksum etc). |
124 | * | ||
125 | * This code needs to be reworked to take advantage of 64 bit sum+copy. | ||
126 | * However, due to tokenring halfword alignment problems this will be very | ||
127 | * tricky. For now we'll leave it until we instrument it somehow. | ||
128 | * | 254 | * |
129 | * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) | 255 | * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) |
130 | */ | 256 | */ |
131 | _GLOBAL(csum_partial_copy_generic) | 257 | _GLOBAL(csum_partial_copy_generic) |
132 | addic r0,r6,0 | 258 | addic r0,r6,0 /* clear carry */ |
133 | subi r3,r3,4 | 259 | |
134 | subi r4,r4,4 | 260 | srdi. r6,r5,3 /* less than 8 bytes? */ |
135 | srwi. r6,r5,2 | 261 | beq .Lcopy_tail_word |
136 | beq 3f /* if we're doing < 4 bytes */ | 262 | |
137 | andi. r9,r4,2 /* Align dst to longword boundary */ | 263 | /* |
138 | beq+ 1f | 264 | * If only halfword aligned, align to a double word. Since odd |
139 | 81: lhz r6,4(r3) /* do 2 bytes to get aligned */ | 265 | * aligned addresses should be rare and they would require more |
140 | addi r3,r3,2 | 266 | * work to calculate the correct checksum, we ignore that case |
267 | * and take the potential slowdown of unaligned loads. | ||
268 | * | ||
269 | * If the source and destination are relatively unaligned we only | ||
270 | * align the source. This keeps things simple. | ||
271 | */ | ||
272 | rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ | ||
273 | beq .Lcopy_aligned | ||
274 | |||
275 | li r7,4 | ||
276 | sub r6,r7,r6 | ||
277 | mtctr r6 | ||
278 | |||
279 | 1: | ||
280 | source; lhz r6,0(r3) /* align to doubleword */ | ||
141 | subi r5,r5,2 | 281 | subi r5,r5,2 |
142 | 91: sth r6,4(r4) | ||
143 | addi r4,r4,2 | ||
144 | addc r0,r0,r6 | ||
145 | srwi. r6,r5,2 /* # words to do */ | ||
146 | beq 3f | ||
147 | 1: mtctr r6 | ||
148 | 82: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */ | ||
149 | 92: stwu r6,4(r4) /* be unnecessary to unroll this loop */ | ||
150 | adde r0,r0,r6 | ||
151 | bdnz 82b | ||
152 | andi. r5,r5,3 | ||
153 | 3: cmpwi 0,r5,2 | ||
154 | blt+ 4f | ||
155 | 83: lhz r6,4(r3) | ||
156 | addi r3,r3,2 | 282 | addi r3,r3,2 |
157 | subi r5,r5,2 | 283 | adde r0,r0,r6 |
158 | 93: sth r6,4(r4) | 284 | dest; sth r6,0(r4) |
159 | addi r4,r4,2 | 285 | addi r4,r4,2 |
286 | bdnz 1b | ||
287 | |||
288 | .Lcopy_aligned: | ||
289 | /* | ||
290 | * We unroll the loop such that each iteration is 64 bytes with an | ||
291 | * entry and exit limb of 64 bytes, meaning a minimum size of | ||
292 | * 128 bytes. | ||
293 | */ | ||
294 | srdi. r6,r5,7 | ||
295 | beq .Lcopy_tail_doublewords /* len < 128 */ | ||
296 | |||
297 | srdi r6,r5,6 | ||
298 | subi r6,r6,1 | ||
299 | mtctr r6 | ||
300 | |||
301 | stdu r1,-STACKFRAMESIZE(r1) | ||
302 | std r14,STK_REG(r14)(r1) | ||
303 | std r15,STK_REG(r15)(r1) | ||
304 | std r16,STK_REG(r16)(r1) | ||
305 | |||
306 | source; ld r6,0(r3) | ||
307 | source; ld r9,8(r3) | ||
308 | |||
309 | source; ld r10,16(r3) | ||
310 | source; ld r11,24(r3) | ||
311 | |||
312 | /* | ||
313 | * On POWER6 and POWER7 back to back addes take 2 cycles because of | ||
314 | * the XER dependency. This means the fastest this loop can go is | ||
315 | * 16 cycles per iteration. The scheduling of the loop below has | ||
316 | * been shown to hit this on both POWER6 and POWER7. | ||
317 | */ | ||
318 | .align 5 | ||
319 | 2: | ||
160 | adde r0,r0,r6 | 320 | adde r0,r0,r6 |
161 | 4: cmpwi 0,r5,1 | 321 | source; ld r12,32(r3) |
162 | bne+ 5f | 322 | source; ld r14,40(r3) |
163 | 84: lbz r6,4(r3) | 323 | |
164 | 94: stb r6,4(r4) | 324 | adde r0,r0,r9 |
165 | slwi r6,r6,8 /* Upper byte of word */ | 325 | source; ld r15,48(r3) |
326 | source; ld r16,56(r3) | ||
327 | addi r3,r3,64 | ||
328 | |||
329 | adde r0,r0,r10 | ||
330 | dest; std r6,0(r4) | ||
331 | dest; std r9,8(r4) | ||
332 | |||
333 | adde r0,r0,r11 | ||
334 | dest; std r10,16(r4) | ||
335 | dest; std r11,24(r4) | ||
336 | |||
337 | adde r0,r0,r12 | ||
338 | dest; std r12,32(r4) | ||
339 | dest; std r14,40(r4) | ||
340 | |||
341 | adde r0,r0,r14 | ||
342 | dest; std r15,48(r4) | ||
343 | dest; std r16,56(r4) | ||
344 | addi r4,r4,64 | ||
345 | |||
346 | adde r0,r0,r15 | ||
347 | source; ld r6,0(r3) | ||
348 | source; ld r9,8(r3) | ||
349 | |||
350 | adde r0,r0,r16 | ||
351 | source; ld r10,16(r3) | ||
352 | source; ld r11,24(r3) | ||
353 | bdnz 2b | ||
354 | |||
355 | |||
166 | adde r0,r0,r6 | 356 | adde r0,r0,r6 |
167 | 5: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ | 357 | source; ld r12,32(r3) |
168 | rldicl r4,r3,32,0 /* fold 64 bit value */ | 358 | source; ld r14,40(r3) |
169 | add r3,r4,r3 | ||
170 | srdi r3,r3,32 | ||
171 | blr | ||
172 | 359 | ||
173 | /* These shouldn't go in the fixup section, since that would | 360 | adde r0,r0,r9 |
174 | cause the ex_table addresses to get out of order. */ | 361 | source; ld r15,48(r3) |
362 | source; ld r16,56(r3) | ||
363 | addi r3,r3,64 | ||
364 | |||
365 | adde r0,r0,r10 | ||
366 | dest; std r6,0(r4) | ||
367 | dest; std r9,8(r4) | ||
368 | |||
369 | adde r0,r0,r11 | ||
370 | dest; std r10,16(r4) | ||
371 | dest; std r11,24(r4) | ||
372 | |||
373 | adde r0,r0,r12 | ||
374 | dest; std r12,32(r4) | ||
375 | dest; std r14,40(r4) | ||
376 | |||
377 | adde r0,r0,r14 | ||
378 | dest; std r15,48(r4) | ||
379 | dest; std r16,56(r4) | ||
380 | addi r4,r4,64 | ||
381 | |||
382 | adde r0,r0,r15 | ||
383 | adde r0,r0,r16 | ||
384 | |||
385 | ld r14,STK_REG(r14)(r1) | ||
386 | ld r15,STK_REG(r15)(r1) | ||
387 | ld r16,STK_REG(r16)(r1) | ||
388 | addi r1,r1,STACKFRAMESIZE | ||
389 | |||
390 | andi. r5,r5,63 | ||
391 | |||
392 | .Lcopy_tail_doublewords: /* Up to 127 bytes to go */ | ||
393 | srdi. r6,r5,3 | ||
394 | beq .Lcopy_tail_word | ||
175 | 395 | ||
176 | .globl src_error_1 | ||
177 | src_error_1: | ||
178 | li r6,0 | ||
179 | subi r5,r5,2 | ||
180 | 95: sth r6,4(r4) | ||
181 | addi r4,r4,2 | ||
182 | srwi. r6,r5,2 | ||
183 | beq 3f | ||
184 | mtctr r6 | 396 | mtctr r6 |
185 | .globl src_error_2 | 397 | 3: |
186 | src_error_2: | 398 | source; ld r6,0(r3) |
187 | li r6,0 | 399 | addi r3,r3,8 |
188 | 96: stwu r6,4(r4) | 400 | adde r0,r0,r6 |
189 | bdnz 96b | 401 | dest; std r6,0(r4) |
190 | 3: andi. r5,r5,3 | 402 | addi r4,r4,8 |
191 | beq src_error | 403 | bdnz 3b |
192 | .globl src_error_3 | 404 | |
193 | src_error_3: | 405 | andi. r5,r5,7 |
194 | li r6,0 | 406 | |
195 | mtctr r5 | 407 | .Lcopy_tail_word: /* Up to 7 bytes to go */ |
196 | addi r4,r4,3 | 408 | srdi. r6,r5,2 |
197 | 97: stbu r6,1(r4) | 409 | beq .Lcopy_tail_halfword |
198 | bdnz 97b | 410 | |
199 | .globl src_error | 411 | source; lwz r6,0(r3) |
200 | src_error: | 412 | addi r3,r3,4 |
413 | adde r0,r0,r6 | ||
414 | dest; stw r6,0(r4) | ||
415 | addi r4,r4,4 | ||
416 | subi r5,r5,4 | ||
417 | |||
418 | .Lcopy_tail_halfword: /* Up to 3 bytes to go */ | ||
419 | srdi. r6,r5,1 | ||
420 | beq .Lcopy_tail_byte | ||
421 | |||
422 | source; lhz r6,0(r3) | ||
423 | addi r3,r3,2 | ||
424 | adde r0,r0,r6 | ||
425 | dest; sth r6,0(r4) | ||
426 | addi r4,r4,2 | ||
427 | subi r5,r5,2 | ||
428 | |||
429 | .Lcopy_tail_byte: /* Up to 1 byte to go */ | ||
430 | andi. r6,r5,1 | ||
431 | beq .Lcopy_finish | ||
432 | |||
433 | source; lbz r6,0(r3) | ||
434 | sldi r9,r6,8 /* Pad the byte out to 16 bits */ | ||
435 | adde r0,r0,r9 | ||
436 | dest; stb r6,0(r4) | ||
437 | |||
438 | .Lcopy_finish: | ||
439 | addze r0,r0 /* add in final carry */ | ||
440 | rldicl r4,r0,32,0 /* fold two 32 bit halves together */ | ||
441 | add r3,r4,r0 | ||
442 | srdi r3,r3,32 | ||
443 | blr | ||
444 | |||
445 | .Lsrc_error: | ||
201 | cmpdi 0,r7,0 | 446 | cmpdi 0,r7,0 |
202 | beq 1f | 447 | beqlr |
203 | li r6,-EFAULT | 448 | li r6,-EFAULT |
204 | stw r6,0(r7) | 449 | stw r6,0(r7) |
205 | 1: addze r3,r0 | ||
206 | blr | 450 | blr |
207 | 451 | ||
208 | .globl dst_error | 452 | .Ldest_error: |
209 | dst_error: | ||
210 | cmpdi 0,r8,0 | 453 | cmpdi 0,r8,0 |
211 | beq 1f | 454 | beqlr |
212 | li r6,-EFAULT | 455 | li r6,-EFAULT |
213 | stw r6,0(r8) | 456 | stw r6,0(r8) |
214 | 1: addze r3,r0 | ||
215 | blr | 457 | blr |
216 | |||
217 | .section __ex_table,"a" | ||
218 | .align 3 | ||
219 | .llong 81b,src_error_1 | ||
220 | .llong 91b,dst_error | ||
221 | .llong 82b,src_error_2 | ||
222 | .llong 92b,dst_error | ||
223 | .llong 83b,src_error_3 | ||
224 | .llong 93b,dst_error | ||
225 | .llong 84b,src_error_3 | ||
226 | .llong 94b,dst_error | ||
227 | .llong 95b,dst_error | ||
228 | .llong 96b,dst_error | ||
229 | .llong 97b,dst_error | ||
diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c new file mode 100644 index 000000000000..769b817fbb32 --- /dev/null +++ b/arch/powerpc/lib/checksum_wrappers_64.c | |||
@@ -0,0 +1,102 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) IBM Corporation, 2010 | ||
17 | * | ||
18 | * Author: Anton Blanchard <anton@au.ibm.com> | ||
19 | */ | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/compiler.h> | ||
22 | #include <linux/types.h> | ||
23 | #include <asm/checksum.h> | ||
24 | #include <asm/uaccess.h> | ||
25 | |||
26 | __wsum csum_and_copy_from_user(const void __user *src, void *dst, | ||
27 | int len, __wsum sum, int *err_ptr) | ||
28 | { | ||
29 | unsigned int csum; | ||
30 | |||
31 | might_sleep(); | ||
32 | |||
33 | *err_ptr = 0; | ||
34 | |||
35 | if (!len) { | ||
36 | csum = 0; | ||
37 | goto out; | ||
38 | } | ||
39 | |||
40 | if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) { | ||
41 | *err_ptr = -EFAULT; | ||
42 | csum = (__force unsigned int)sum; | ||
43 | goto out; | ||
44 | } | ||
45 | |||
46 | csum = csum_partial_copy_generic((void __force *)src, dst, | ||
47 | len, sum, err_ptr, NULL); | ||
48 | |||
49 | if (unlikely(*err_ptr)) { | ||
50 | int missing = __copy_from_user(dst, src, len); | ||
51 | |||
52 | if (missing) { | ||
53 | memset(dst + len - missing, 0, missing); | ||
54 | *err_ptr = -EFAULT; | ||
55 | } else { | ||
56 | *err_ptr = 0; | ||
57 | } | ||
58 | |||
59 | csum = csum_partial(dst, len, sum); | ||
60 | } | ||
61 | |||
62 | out: | ||
63 | return (__force __wsum)csum; | ||
64 | } | ||
65 | EXPORT_SYMBOL(csum_and_copy_from_user); | ||
66 | |||
67 | __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, | ||
68 | __wsum sum, int *err_ptr) | ||
69 | { | ||
70 | unsigned int csum; | ||
71 | |||
72 | might_sleep(); | ||
73 | |||
74 | *err_ptr = 0; | ||
75 | |||
76 | if (!len) { | ||
77 | csum = 0; | ||
78 | goto out; | ||
79 | } | ||
80 | |||
81 | if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) { | ||
82 | *err_ptr = -EFAULT; | ||
83 | csum = -1; /* invalid checksum */ | ||
84 | goto out; | ||
85 | } | ||
86 | |||
87 | csum = csum_partial_copy_generic(src, (void __force *)dst, | ||
88 | len, sum, NULL, err_ptr); | ||
89 | |||
90 | if (unlikely(*err_ptr)) { | ||
91 | csum = csum_partial(src, len, sum); | ||
92 | |||
93 | if (copy_to_user(dst, src, len)) { | ||
94 | *err_ptr = -EFAULT; | ||
95 | csum = -1; /* invalid checksum */ | ||
96 | } | ||
97 | } | ||
98 | |||
99 | out: | ||
100 | return (__force __wsum)csum; | ||
101 | } | ||
102 | EXPORT_SYMBOL(csum_and_copy_to_user); | ||
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 74a7f4130b4c..55f19f9fd708 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S | |||
@@ -62,7 +62,7 @@ | |||
62 | 62 | ||
63 | .text | 63 | .text |
64 | .stabs "arch/powerpc/lib/",N_SO,0,0,0f | 64 | .stabs "arch/powerpc/lib/",N_SO,0,0,0f |
65 | .stabs "copy32.S",N_SO,0,0,0f | 65 | .stabs "copy_32.S",N_SO,0,0,0f |
66 | 0: | 66 | 0: |
67 | 67 | ||
68 | CACHELINE_BYTES = L1_CACHE_BYTES | 68 | CACHELINE_BYTES = L1_CACHE_BYTES |
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 4d4eeb900486..53dcb6b1b708 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S | |||
@@ -6,6 +6,7 @@ | |||
6 | * as published by the Free Software Foundation; either version | 6 | * as published by the Free Software Foundation; either version |
7 | * 2 of the License, or (at your option) any later version. | 7 | * 2 of the License, or (at your option) any later version. |
8 | */ | 8 | */ |
9 | #include <asm/page.h> | ||
9 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
10 | #include <asm/ppc_asm.h> | 11 | #include <asm/ppc_asm.h> |
11 | #include <asm/asm-offsets.h> | 12 | #include <asm/asm-offsets.h> |
@@ -15,9 +16,9 @@ PPC64_CACHES: | |||
15 | .tc ppc64_caches[TC],ppc64_caches | 16 | .tc ppc64_caches[TC],ppc64_caches |
16 | .section ".text" | 17 | .section ".text" |
17 | 18 | ||
18 | 19 | _GLOBAL(copy_page) | |
19 | _GLOBAL(copy_4K_page) | 20 | lis r5,PAGE_SIZE@h |
20 | li r5,4096 /* 4K page size */ | 21 | ori r5,r5,PAGE_SIZE@l |
21 | BEGIN_FTR_SECTION | 22 | BEGIN_FTR_SECTION |
22 | ld r10,PPC64_CACHES@toc(r2) | 23 | ld r10,PPC64_CACHES@toc(r2) |
23 | lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */ | 24 | lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */ |
diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c index deac4d30daf4..e91615abae66 100644 --- a/arch/powerpc/lib/devres.c +++ b/arch/powerpc/lib/devres.c | |||
@@ -9,11 +9,11 @@ | |||
9 | 9 | ||
10 | #include <linux/device.h> /* devres_*(), devm_ioremap_release() */ | 10 | #include <linux/device.h> /* devres_*(), devm_ioremap_release() */ |
11 | #include <linux/gfp.h> | 11 | #include <linux/gfp.h> |
12 | #include <linux/io.h> /* ioremap_flags() */ | 12 | #include <linux/io.h> /* ioremap_prot() */ |
13 | #include <linux/module.h> /* EXPORT_SYMBOL() */ | 13 | #include <linux/module.h> /* EXPORT_SYMBOL() */ |
14 | 14 | ||
15 | /** | 15 | /** |
16 | * devm_ioremap_prot - Managed ioremap_flags() | 16 | * devm_ioremap_prot - Managed ioremap_prot() |
17 | * @dev: Generic device to remap IO address for | 17 | * @dev: Generic device to remap IO address for |
18 | * @offset: BUS offset to map | 18 | * @offset: BUS offset to map |
19 | * @size: Size of map | 19 | * @size: Size of map |
@@ -31,7 +31,7 @@ void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset, | |||
31 | if (!ptr) | 31 | if (!ptr) |
32 | return NULL; | 32 | return NULL; |
33 | 33 | ||
34 | addr = ioremap_flags(offset, size, flags); | 34 | addr = ioremap_prot(offset, size, flags); |
35 | if (addr) { | 35 | if (addr) { |
36 | *ptr = addr; | 36 | *ptr = addr; |
37 | devres_add(dev, ptr); | 37 | devres_add(dev, ptr); |
diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S index cb737484c5aa..f4613118132e 100644 --- a/arch/powerpc/lib/feature-fixups-test.S +++ b/arch/powerpc/lib/feature-fixups-test.S | |||
@@ -172,6 +172,25 @@ globl(ftr_fixup_test6_expected) | |||
172 | 3: or 3,3,3 | 172 | 3: or 3,3,3 |
173 | 173 | ||
174 | 174 | ||
175 | #if 0 | ||
176 | /* Test that if we have a larger else case the assembler spots it and | ||
177 | * reports an error. #if 0'ed so as not to break the build normally. | ||
178 | */ | ||
179 | ftr_fixup_test7: | ||
180 | or 1,1,1 | ||
181 | BEGIN_FTR_SECTION | ||
182 | or 2,2,2 | ||
183 | or 2,2,2 | ||
184 | or 2,2,2 | ||
185 | FTR_SECTION_ELSE | ||
186 | or 3,3,3 | ||
187 | or 3,3,3 | ||
188 | or 3,3,3 | ||
189 | or 3,3,3 | ||
190 | ALT_FTR_SECTION_END(0, 1) | ||
191 | or 1,1,1 | ||
192 | #endif | ||
193 | |||
175 | #define MAKE_MACRO_TEST(TYPE) \ | 194 | #define MAKE_MACRO_TEST(TYPE) \ |
176 | globl(ftr_fixup_test_ ##TYPE##_macros) \ | 195 | globl(ftr_fixup_test_ ##TYPE##_macros) \ |
177 | or 1,1,1; \ | 196 | or 1,1,1; \ |
diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S new file mode 100644 index 000000000000..fda27868cf8c --- /dev/null +++ b/arch/powerpc/lib/hweight_64.S | |||
@@ -0,0 +1,110 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) IBM Corporation, 2010 | ||
17 | * | ||
18 | * Author: Anton Blanchard <anton@au.ibm.com> | ||
19 | */ | ||
20 | #include <asm/processor.h> | ||
21 | #include <asm/ppc_asm.h> | ||
22 | |||
23 | /* Note: This code relies on -mminimal-toc */ | ||
24 | |||
25 | _GLOBAL(__arch_hweight8) | ||
26 | BEGIN_FTR_SECTION | ||
27 | b .__sw_hweight8 | ||
28 | nop | ||
29 | nop | ||
30 | FTR_SECTION_ELSE | ||
31 | PPC_POPCNTB(r3,r3) | ||
32 | clrldi r3,r3,64-8 | ||
33 | blr | ||
34 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB) | ||
35 | |||
36 | _GLOBAL(__arch_hweight16) | ||
37 | BEGIN_FTR_SECTION | ||
38 | b .__sw_hweight16 | ||
39 | nop | ||
40 | nop | ||
41 | nop | ||
42 | nop | ||
43 | FTR_SECTION_ELSE | ||
44 | BEGIN_FTR_SECTION_NESTED(50) | ||
45 | PPC_POPCNTB(r3,r3) | ||
46 | srdi r4,r3,8 | ||
47 | add r3,r4,r3 | ||
48 | clrldi r3,r3,64-8 | ||
49 | blr | ||
50 | FTR_SECTION_ELSE_NESTED(50) | ||
51 | clrlwi r3,r3,16 | ||
52 | PPC_POPCNTW(r3,r3) | ||
53 | clrldi r3,r3,64-8 | ||
54 | blr | ||
55 | ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50) | ||
56 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB) | ||
57 | |||
58 | _GLOBAL(__arch_hweight32) | ||
59 | BEGIN_FTR_SECTION | ||
60 | b .__sw_hweight32 | ||
61 | nop | ||
62 | nop | ||
63 | nop | ||
64 | nop | ||
65 | nop | ||
66 | nop | ||
67 | FTR_SECTION_ELSE | ||
68 | BEGIN_FTR_SECTION_NESTED(51) | ||
69 | PPC_POPCNTB(r3,r3) | ||
70 | srdi r4,r3,16 | ||
71 | add r3,r4,r3 | ||
72 | srdi r4,r3,8 | ||
73 | add r3,r4,r3 | ||
74 | clrldi r3,r3,64-8 | ||
75 | blr | ||
76 | FTR_SECTION_ELSE_NESTED(51) | ||
77 | PPC_POPCNTW(r3,r3) | ||
78 | clrldi r3,r3,64-8 | ||
79 | blr | ||
80 | ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51) | ||
81 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB) | ||
82 | |||
83 | _GLOBAL(__arch_hweight64) | ||
84 | BEGIN_FTR_SECTION | ||
85 | b .__sw_hweight64 | ||
86 | nop | ||
87 | nop | ||
88 | nop | ||
89 | nop | ||
90 | nop | ||
91 | nop | ||
92 | nop | ||
93 | nop | ||
94 | FTR_SECTION_ELSE | ||
95 | BEGIN_FTR_SECTION_NESTED(52) | ||
96 | PPC_POPCNTB(r3,r3) | ||
97 | srdi r4,r3,32 | ||
98 | add r3,r4,r3 | ||
99 | srdi r4,r3,16 | ||
100 | add r3,r4,r3 | ||
101 | srdi r4,r3,8 | ||
102 | add r3,r4,r3 | ||
103 | clrldi r3,r3,64-8 | ||
104 | blr | ||
105 | FTR_SECTION_ELSE_NESTED(52) | ||
106 | PPC_POPCNTD(r3,r3) | ||
107 | clrldi r3,r3,64-8 | ||
108 | blr | ||
109 | ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52) | ||
110 | ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB) | ||
diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S index f6448636baf5..6a85380520b6 100644 --- a/arch/powerpc/lib/ldstfp.S +++ b/arch/powerpc/lib/ldstfp.S | |||
@@ -17,6 +17,8 @@ | |||
17 | #include <asm/asm-offsets.h> | 17 | #include <asm/asm-offsets.h> |
18 | #include <linux/errno.h> | 18 | #include <linux/errno.h> |
19 | 19 | ||
20 | #ifdef CONFIG_PPC_FPU | ||
21 | |||
20 | #define STKFRM (PPC_MIN_STKFRM + 16) | 22 | #define STKFRM (PPC_MIN_STKFRM + 16) |
21 | 23 | ||
22 | .macro extab instr,handler | 24 | .macro extab instr,handler |
@@ -81,7 +83,7 @@ _GLOBAL(do_lfs) | |||
81 | mfmsr r6 | 83 | mfmsr r6 |
82 | ori r7,r6,MSR_FP | 84 | ori r7,r6,MSR_FP |
83 | cmpwi cr7,r3,0 | 85 | cmpwi cr7,r3,0 |
84 | mtmsrd r7 | 86 | MTMSRD(r7) |
85 | isync | 87 | isync |
86 | beq cr7,1f | 88 | beq cr7,1f |
87 | stfd fr0,STKFRM-16(r1) | 89 | stfd fr0,STKFRM-16(r1) |
@@ -93,7 +95,7 @@ _GLOBAL(do_lfs) | |||
93 | lfd fr0,STKFRM-16(r1) | 95 | lfd fr0,STKFRM-16(r1) |
94 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) | 96 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) |
95 | mtlr r0 | 97 | mtlr r0 |
96 | mtmsrd r6 | 98 | MTMSRD(r6) |
97 | isync | 99 | isync |
98 | mr r3,r9 | 100 | mr r3,r9 |
99 | addi r1,r1,STKFRM | 101 | addi r1,r1,STKFRM |
@@ -108,7 +110,7 @@ _GLOBAL(do_lfd) | |||
108 | mfmsr r6 | 110 | mfmsr r6 |
109 | ori r7,r6,MSR_FP | 111 | ori r7,r6,MSR_FP |
110 | cmpwi cr7,r3,0 | 112 | cmpwi cr7,r3,0 |
111 | mtmsrd r7 | 113 | MTMSRD(r7) |
112 | isync | 114 | isync |
113 | beq cr7,1f | 115 | beq cr7,1f |
114 | stfd fr0,STKFRM-16(r1) | 116 | stfd fr0,STKFRM-16(r1) |
@@ -120,7 +122,7 @@ _GLOBAL(do_lfd) | |||
120 | lfd fr0,STKFRM-16(r1) | 122 | lfd fr0,STKFRM-16(r1) |
121 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) | 123 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) |
122 | mtlr r0 | 124 | mtlr r0 |
123 | mtmsrd r6 | 125 | MTMSRD(r6) |
124 | isync | 126 | isync |
125 | mr r3,r9 | 127 | mr r3,r9 |
126 | addi r1,r1,STKFRM | 128 | addi r1,r1,STKFRM |
@@ -135,7 +137,7 @@ _GLOBAL(do_stfs) | |||
135 | mfmsr r6 | 137 | mfmsr r6 |
136 | ori r7,r6,MSR_FP | 138 | ori r7,r6,MSR_FP |
137 | cmpwi cr7,r3,0 | 139 | cmpwi cr7,r3,0 |
138 | mtmsrd r7 | 140 | MTMSRD(r7) |
139 | isync | 141 | isync |
140 | beq cr7,1f | 142 | beq cr7,1f |
141 | stfd fr0,STKFRM-16(r1) | 143 | stfd fr0,STKFRM-16(r1) |
@@ -147,7 +149,7 @@ _GLOBAL(do_stfs) | |||
147 | lfd fr0,STKFRM-16(r1) | 149 | lfd fr0,STKFRM-16(r1) |
148 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) | 150 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) |
149 | mtlr r0 | 151 | mtlr r0 |
150 | mtmsrd r6 | 152 | MTMSRD(r6) |
151 | isync | 153 | isync |
152 | mr r3,r9 | 154 | mr r3,r9 |
153 | addi r1,r1,STKFRM | 155 | addi r1,r1,STKFRM |
@@ -162,7 +164,7 @@ _GLOBAL(do_stfd) | |||
162 | mfmsr r6 | 164 | mfmsr r6 |
163 | ori r7,r6,MSR_FP | 165 | ori r7,r6,MSR_FP |
164 | cmpwi cr7,r3,0 | 166 | cmpwi cr7,r3,0 |
165 | mtmsrd r7 | 167 | MTMSRD(r7) |
166 | isync | 168 | isync |
167 | beq cr7,1f | 169 | beq cr7,1f |
168 | stfd fr0,STKFRM-16(r1) | 170 | stfd fr0,STKFRM-16(r1) |
@@ -174,7 +176,7 @@ _GLOBAL(do_stfd) | |||
174 | lfd fr0,STKFRM-16(r1) | 176 | lfd fr0,STKFRM-16(r1) |
175 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) | 177 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) |
176 | mtlr r0 | 178 | mtlr r0 |
177 | mtmsrd r6 | 179 | MTMSRD(r6) |
178 | isync | 180 | isync |
179 | mr r3,r9 | 181 | mr r3,r9 |
180 | addi r1,r1,STKFRM | 182 | addi r1,r1,STKFRM |
@@ -229,7 +231,7 @@ _GLOBAL(do_lvx) | |||
229 | oris r7,r6,MSR_VEC@h | 231 | oris r7,r6,MSR_VEC@h |
230 | cmpwi cr7,r3,0 | 232 | cmpwi cr7,r3,0 |
231 | li r8,STKFRM-16 | 233 | li r8,STKFRM-16 |
232 | mtmsrd r7 | 234 | MTMSRD(r7) |
233 | isync | 235 | isync |
234 | beq cr7,1f | 236 | beq cr7,1f |
235 | stvx vr0,r1,r8 | 237 | stvx vr0,r1,r8 |
@@ -241,7 +243,7 @@ _GLOBAL(do_lvx) | |||
241 | lvx vr0,r1,r8 | 243 | lvx vr0,r1,r8 |
242 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) | 244 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) |
243 | mtlr r0 | 245 | mtlr r0 |
244 | mtmsrd r6 | 246 | MTMSRD(r6) |
245 | isync | 247 | isync |
246 | mr r3,r9 | 248 | mr r3,r9 |
247 | addi r1,r1,STKFRM | 249 | addi r1,r1,STKFRM |
@@ -257,7 +259,7 @@ _GLOBAL(do_stvx) | |||
257 | oris r7,r6,MSR_VEC@h | 259 | oris r7,r6,MSR_VEC@h |
258 | cmpwi cr7,r3,0 | 260 | cmpwi cr7,r3,0 |
259 | li r8,STKFRM-16 | 261 | li r8,STKFRM-16 |
260 | mtmsrd r7 | 262 | MTMSRD(r7) |
261 | isync | 263 | isync |
262 | beq cr7,1f | 264 | beq cr7,1f |
263 | stvx vr0,r1,r8 | 265 | stvx vr0,r1,r8 |
@@ -269,7 +271,7 @@ _GLOBAL(do_stvx) | |||
269 | lvx vr0,r1,r8 | 271 | lvx vr0,r1,r8 |
270 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) | 272 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) |
271 | mtlr r0 | 273 | mtlr r0 |
272 | mtmsrd r6 | 274 | MTMSRD(r6) |
273 | isync | 275 | isync |
274 | mr r3,r9 | 276 | mr r3,r9 |
275 | addi r1,r1,STKFRM | 277 | addi r1,r1,STKFRM |
@@ -325,7 +327,7 @@ _GLOBAL(do_lxvd2x) | |||
325 | oris r7,r6,MSR_VSX@h | 327 | oris r7,r6,MSR_VSX@h |
326 | cmpwi cr7,r3,0 | 328 | cmpwi cr7,r3,0 |
327 | li r8,STKFRM-16 | 329 | li r8,STKFRM-16 |
328 | mtmsrd r7 | 330 | MTMSRD(r7) |
329 | isync | 331 | isync |
330 | beq cr7,1f | 332 | beq cr7,1f |
331 | STXVD2X(0,r1,r8) | 333 | STXVD2X(0,r1,r8) |
@@ -337,7 +339,7 @@ _GLOBAL(do_lxvd2x) | |||
337 | LXVD2X(0,r1,r8) | 339 | LXVD2X(0,r1,r8) |
338 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) | 340 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) |
339 | mtlr r0 | 341 | mtlr r0 |
340 | mtmsrd r6 | 342 | MTMSRD(r6) |
341 | isync | 343 | isync |
342 | mr r3,r9 | 344 | mr r3,r9 |
343 | addi r1,r1,STKFRM | 345 | addi r1,r1,STKFRM |
@@ -353,7 +355,7 @@ _GLOBAL(do_stxvd2x) | |||
353 | oris r7,r6,MSR_VSX@h | 355 | oris r7,r6,MSR_VSX@h |
354 | cmpwi cr7,r3,0 | 356 | cmpwi cr7,r3,0 |
355 | li r8,STKFRM-16 | 357 | li r8,STKFRM-16 |
356 | mtmsrd r7 | 358 | MTMSRD(r7) |
357 | isync | 359 | isync |
358 | beq cr7,1f | 360 | beq cr7,1f |
359 | STXVD2X(0,r1,r8) | 361 | STXVD2X(0,r1,r8) |
@@ -365,7 +367,7 @@ _GLOBAL(do_stxvd2x) | |||
365 | LXVD2X(0,r1,r8) | 367 | LXVD2X(0,r1,r8) |
366 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) | 368 | 4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) |
367 | mtlr r0 | 369 | mtlr r0 |
368 | mtmsrd r6 | 370 | MTMSRD(r6) |
369 | isync | 371 | isync |
370 | mr r3,r9 | 372 | mr r3,r9 |
371 | addi r1,r1,STKFRM | 373 | addi r1,r1,STKFRM |
@@ -373,3 +375,5 @@ _GLOBAL(do_stxvd2x) | |||
373 | extab 2b,3b | 375 | extab 2b,3b |
374 | 376 | ||
375 | #endif /* CONFIG_VSX */ | 377 | #endif /* CONFIG_VSX */ |
378 | |||
379 | #endif /* CONFIG_PPC_FPU */ | ||
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c index 58e14fba11b1..9b8182e82166 100644 --- a/arch/powerpc/lib/locks.c +++ b/arch/powerpc/lib/locks.c | |||
@@ -34,7 +34,7 @@ void __spin_yield(arch_spinlock_t *lock) | |||
34 | return; | 34 | return; |
35 | holder_cpu = lock_value & 0xffff; | 35 | holder_cpu = lock_value & 0xffff; |
36 | BUG_ON(holder_cpu >= NR_CPUS); | 36 | BUG_ON(holder_cpu >= NR_CPUS); |
37 | yield_count = lppaca[holder_cpu].yield_count; | 37 | yield_count = lppaca_of(holder_cpu).yield_count; |
38 | if ((yield_count & 1) == 0) | 38 | if ((yield_count & 1) == 0) |
39 | return; /* virtual cpu is currently running */ | 39 | return; /* virtual cpu is currently running */ |
40 | rmb(); | 40 | rmb(); |
@@ -65,7 +65,7 @@ void __rw_yield(arch_rwlock_t *rw) | |||
65 | return; /* no write lock at present */ | 65 | return; /* no write lock at present */ |
66 | holder_cpu = lock_value & 0xffff; | 66 | holder_cpu = lock_value & 0xffff; |
67 | BUG_ON(holder_cpu >= NR_CPUS); | 67 | BUG_ON(holder_cpu >= NR_CPUS); |
68 | yield_count = lppaca[holder_cpu].yield_count; | 68 | yield_count = lppaca_of(holder_cpu).yield_count; |
69 | if ((yield_count & 1) == 0) | 69 | if ((yield_count & 1) == 0) |
70 | return; /* virtual cpu is currently running */ | 70 | return; /* virtual cpu is currently running */ |
71 | rmb(); | 71 | rmb(); |
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e0a9858d537e..9a52349874ee 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/kprobes.h> | 12 | #include <linux/kprobes.h> |
13 | #include <linux/ptrace.h> | 13 | #include <linux/ptrace.h> |
14 | #include <linux/prefetch.h> | ||
14 | #include <asm/sstep.h> | 15 | #include <asm/sstep.h> |
15 | #include <asm/processor.h> | 16 | #include <asm/processor.h> |
16 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
@@ -30,6 +31,7 @@ extern char system_call_common[]; | |||
30 | #define XER_OV 0x40000000U | 31 | #define XER_OV 0x40000000U |
31 | #define XER_CA 0x20000000U | 32 | #define XER_CA 0x20000000U |
32 | 33 | ||
34 | #ifdef CONFIG_PPC_FPU | ||
33 | /* | 35 | /* |
34 | * Functions in ldstfp.S | 36 | * Functions in ldstfp.S |
35 | */ | 37 | */ |
@@ -41,6 +43,19 @@ extern int do_lvx(int rn, unsigned long ea); | |||
41 | extern int do_stvx(int rn, unsigned long ea); | 43 | extern int do_stvx(int rn, unsigned long ea); |
42 | extern int do_lxvd2x(int rn, unsigned long ea); | 44 | extern int do_lxvd2x(int rn, unsigned long ea); |
43 | extern int do_stxvd2x(int rn, unsigned long ea); | 45 | extern int do_stxvd2x(int rn, unsigned long ea); |
46 | #endif | ||
47 | |||
48 | /* | ||
49 | * Emulate the truncation of 64 bit values in 32-bit mode. | ||
50 | */ | ||
51 | static unsigned long truncate_if_32bit(unsigned long msr, unsigned long val) | ||
52 | { | ||
53 | #ifdef __powerpc64__ | ||
54 | if ((msr & MSR_64BIT) == 0) | ||
55 | val &= 0xffffffffUL; | ||
56 | #endif | ||
57 | return val; | ||
58 | } | ||
44 | 59 | ||
45 | /* | 60 | /* |
46 | * Determine whether a conditional branch instruction would branch. | 61 | * Determine whether a conditional branch instruction would branch. |
@@ -88,11 +103,8 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs | |||
88 | if (instr & 0x04000000) /* update forms */ | 103 | if (instr & 0x04000000) /* update forms */ |
89 | regs->gpr[ra] = ea; | 104 | regs->gpr[ra] = ea; |
90 | } | 105 | } |
91 | #ifdef __powerpc64__ | 106 | |
92 | if (!(regs->msr & MSR_SF)) | 107 | return truncate_if_32bit(regs->msr, ea); |
93 | ea &= 0xffffffffUL; | ||
94 | #endif | ||
95 | return ea; | ||
96 | } | 108 | } |
97 | 109 | ||
98 | #ifdef __powerpc64__ | 110 | #ifdef __powerpc64__ |
@@ -111,9 +123,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg | |||
111 | if ((instr & 3) == 1) /* update forms */ | 123 | if ((instr & 3) == 1) /* update forms */ |
112 | regs->gpr[ra] = ea; | 124 | regs->gpr[ra] = ea; |
113 | } | 125 | } |
114 | if (!(regs->msr & MSR_SF)) | 126 | |
115 | ea &= 0xffffffffUL; | 127 | return truncate_if_32bit(regs->msr, ea); |
116 | return ea; | ||
117 | } | 128 | } |
118 | #endif /* __powerpc64 */ | 129 | #endif /* __powerpc64 */ |
119 | 130 | ||
@@ -134,11 +145,8 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs | |||
134 | if (do_update) /* update forms */ | 145 | if (do_update) /* update forms */ |
135 | regs->gpr[ra] = ea; | 146 | regs->gpr[ra] = ea; |
136 | } | 147 | } |
137 | #ifdef __powerpc64__ | 148 | |
138 | if (!(regs->msr & MSR_SF)) | 149 | return truncate_if_32bit(regs->msr, ea); |
139 | ea &= 0xffffffffUL; | ||
140 | #endif | ||
141 | return ea; | ||
142 | } | 150 | } |
143 | 151 | ||
144 | /* | 152 | /* |
@@ -290,6 +298,7 @@ static int __kprobes write_mem(unsigned long val, unsigned long ea, int nb, | |||
290 | return write_mem_unaligned(val, ea, nb, regs); | 298 | return write_mem_unaligned(val, ea, nb, regs); |
291 | } | 299 | } |
292 | 300 | ||
301 | #ifdef CONFIG_PPC_FPU | ||
293 | /* | 302 | /* |
294 | * Check the address and alignment, and call func to do the actual | 303 | * Check the address and alignment, and call func to do the actual |
295 | * load or store. | 304 | * load or store. |
@@ -351,6 +360,7 @@ static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long), | |||
351 | } | 360 | } |
352 | return err; | 361 | return err; |
353 | } | 362 | } |
363 | #endif | ||
354 | 364 | ||
355 | #ifdef CONFIG_ALTIVEC | 365 | #ifdef CONFIG_ALTIVEC |
356 | /* For Altivec/VMX, no need to worry about alignment */ | 366 | /* For Altivec/VMX, no need to worry about alignment */ |
@@ -462,7 +472,7 @@ static void __kprobes set_cr0(struct pt_regs *regs, int rd) | |||
462 | 472 | ||
463 | regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); | 473 | regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000); |
464 | #ifdef __powerpc64__ | 474 | #ifdef __powerpc64__ |
465 | if (!(regs->msr & MSR_SF)) | 475 | if (!(regs->msr & MSR_64BIT)) |
466 | val = (int) val; | 476 | val = (int) val; |
467 | #endif | 477 | #endif |
468 | if (val < 0) | 478 | if (val < 0) |
@@ -483,7 +493,7 @@ static void __kprobes add_with_carry(struct pt_regs *regs, int rd, | |||
483 | ++val; | 493 | ++val; |
484 | regs->gpr[rd] = val; | 494 | regs->gpr[rd] = val; |
485 | #ifdef __powerpc64__ | 495 | #ifdef __powerpc64__ |
486 | if (!(regs->msr & MSR_SF)) { | 496 | if (!(regs->msr & MSR_64BIT)) { |
487 | val = (unsigned int) val; | 497 | val = (unsigned int) val; |
488 | val1 = (unsigned int) val1; | 498 | val1 = (unsigned int) val1; |
489 | } | 499 | } |
@@ -566,8 +576,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) | |||
566 | if ((instr & 2) == 0) | 576 | if ((instr & 2) == 0) |
567 | imm += regs->nip; | 577 | imm += regs->nip; |
568 | regs->nip += 4; | 578 | regs->nip += 4; |
569 | if ((regs->msr & MSR_SF) == 0) | 579 | regs->nip = truncate_if_32bit(regs->msr, regs->nip); |
570 | regs->nip &= 0xffffffffUL; | ||
571 | if (instr & 1) | 580 | if (instr & 1) |
572 | regs->link = regs->nip; | 581 | regs->link = regs->nip; |
573 | if (branch_taken(instr, regs)) | 582 | if (branch_taken(instr, regs)) |
@@ -600,13 +609,9 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) | |||
600 | imm -= 0x04000000; | 609 | imm -= 0x04000000; |
601 | if ((instr & 2) == 0) | 610 | if ((instr & 2) == 0) |
602 | imm += regs->nip; | 611 | imm += regs->nip; |
603 | if (instr & 1) { | 612 | if (instr & 1) |
604 | regs->link = regs->nip + 4; | 613 | regs->link = truncate_if_32bit(regs->msr, regs->nip + 4); |
605 | if ((regs->msr & MSR_SF) == 0) | 614 | imm = truncate_if_32bit(regs->msr, imm); |
606 | regs->link &= 0xffffffffUL; | ||
607 | } | ||
608 | if ((regs->msr & MSR_SF) == 0) | ||
609 | imm &= 0xffffffffUL; | ||
610 | regs->nip = imm; | 615 | regs->nip = imm; |
611 | return 1; | 616 | return 1; |
612 | case 19: | 617 | case 19: |
@@ -614,11 +619,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) | |||
614 | case 16: /* bclr */ | 619 | case 16: /* bclr */ |
615 | case 528: /* bcctr */ | 620 | case 528: /* bcctr */ |
616 | imm = (instr & 0x400)? regs->ctr: regs->link; | 621 | imm = (instr & 0x400)? regs->ctr: regs->link; |
617 | regs->nip += 4; | 622 | regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); |
618 | if ((regs->msr & MSR_SF) == 0) { | 623 | imm = truncate_if_32bit(regs->msr, imm); |
619 | regs->nip &= 0xffffffffUL; | ||
620 | imm &= 0xffffffffUL; | ||
621 | } | ||
622 | if (instr & 1) | 624 | if (instr & 1) |
623 | regs->link = regs->nip; | 625 | regs->link = regs->nip; |
624 | if (branch_taken(instr, regs)) | 626 | if (branch_taken(instr, regs)) |
@@ -1393,6 +1395,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) | |||
1393 | regs->gpr[rd] = byterev_4(val); | 1395 | regs->gpr[rd] = byterev_4(val); |
1394 | goto ldst_done; | 1396 | goto ldst_done; |
1395 | 1397 | ||
1398 | #ifdef CONFIG_PPC_CPU | ||
1396 | case 535: /* lfsx */ | 1399 | case 535: /* lfsx */ |
1397 | case 567: /* lfsux */ | 1400 | case 567: /* lfsux */ |
1398 | if (!(regs->msr & MSR_FP)) | 1401 | if (!(regs->msr & MSR_FP)) |
@@ -1424,6 +1427,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) | |||
1424 | ea = xform_ea(instr, regs, u); | 1427 | ea = xform_ea(instr, regs, u); |
1425 | err = do_fp_store(rd, do_stfd, ea, 8, regs); | 1428 | err = do_fp_store(rd, do_stfd, ea, 8, regs); |
1426 | goto ldst_done; | 1429 | goto ldst_done; |
1430 | #endif | ||
1427 | 1431 | ||
1428 | #ifdef __powerpc64__ | 1432 | #ifdef __powerpc64__ |
1429 | case 660: /* stdbrx */ | 1433 | case 660: /* stdbrx */ |
@@ -1534,6 +1538,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) | |||
1534 | } while (++rd < 32); | 1538 | } while (++rd < 32); |
1535 | goto instr_done; | 1539 | goto instr_done; |
1536 | 1540 | ||
1541 | #ifdef CONFIG_PPC_FPU | ||
1537 | case 48: /* lfs */ | 1542 | case 48: /* lfs */ |
1538 | case 49: /* lfsu */ | 1543 | case 49: /* lfsu */ |
1539 | if (!(regs->msr & MSR_FP)) | 1544 | if (!(regs->msr & MSR_FP)) |
@@ -1565,6 +1570,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) | |||
1565 | ea = dform_ea(instr, regs); | 1570 | ea = dform_ea(instr, regs); |
1566 | err = do_fp_store(rd, do_stfd, ea, 8, regs); | 1571 | err = do_fp_store(rd, do_stfd, ea, 8, regs); |
1567 | goto ldst_done; | 1572 | goto ldst_done; |
1573 | #endif | ||
1568 | 1574 | ||
1569 | #ifdef __powerpc64__ | 1575 | #ifdef __powerpc64__ |
1570 | case 58: /* ld[u], lwa */ | 1576 | case 58: /* ld[u], lwa */ |
@@ -1608,11 +1614,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) | |||
1608 | return 0; /* invoke DSI if -EFAULT? */ | 1614 | return 0; /* invoke DSI if -EFAULT? */ |
1609 | } | 1615 | } |
1610 | instr_done: | 1616 | instr_done: |
1611 | regs->nip += 4; | 1617 | regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4); |
1612 | #ifdef __powerpc64__ | ||
1613 | if ((regs->msr & MSR_SF) == 0) | ||
1614 | regs->nip &= 0xffffffffUL; | ||
1615 | #endif | ||
1616 | return 1; | 1618 | return 1; |
1617 | 1619 | ||
1618 | logical_done: | 1620 | logical_done: |