aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r--arch/powerpc/lib/Makefile7
-rw-r--r--arch/powerpc/lib/checksum_64.S482
-rw-r--r--arch/powerpc/lib/checksum_wrappers_64.c102
-rw-r--r--arch/powerpc/lib/copy_32.S2
-rw-r--r--arch/powerpc/lib/ldstfp.S36
-rw-r--r--arch/powerpc/lib/locks.c4
-rw-r--r--arch/powerpc/lib/sstep.c8
7 files changed, 491 insertions, 150 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 5bb89c82807..889f2bc106d 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -4,9 +4,7 @@
4 4
5subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror 5subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
6 6
7ifeq ($(CONFIG_PPC64),y) 7ccflags-$(CONFIG_PPC64) := -mno-minimal-toc
8EXTRA_CFLAGS += -mno-minimal-toc
9endif
10 8
11CFLAGS_REMOVE_code-patching.o = -pg 9CFLAGS_REMOVE_code-patching.o = -pg
12CFLAGS_REMOVE_feature-fixups.o = -pg 10CFLAGS_REMOVE_feature-fixups.o = -pg
@@ -17,7 +15,8 @@ obj-$(CONFIG_PPC32) += div64.o copy_32.o
17obj-$(CONFIG_HAS_IOMEM) += devres.o 15obj-$(CONFIG_HAS_IOMEM) += devres.o
18 16
19obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ 17obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
20 memcpy_64.o usercopy_64.o mem_64.o string.o 18 memcpy_64.o usercopy_64.o mem_64.o string.o \
19 checksum_wrappers_64.o
21obj-$(CONFIG_XMON) += sstep.o ldstfp.o 20obj-$(CONFIG_XMON) += sstep.o ldstfp.o
22obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o 21obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o
23obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o 22obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index ef96c6c58ef..18245af38ae 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -65,165 +65,393 @@ _GLOBAL(csum_tcpudp_magic)
65 srwi r3,r3,16 65 srwi r3,r3,16
66 blr 66 blr
67 67
68#define STACKFRAMESIZE 256
69#define STK_REG(i) (112 + ((i)-14)*8)
70
68/* 71/*
69 * Computes the checksum of a memory block at buff, length len, 72 * Computes the checksum of a memory block at buff, length len,
70 * and adds in "sum" (32-bit). 73 * and adds in "sum" (32-bit).
71 * 74 *
72 * This code assumes at least halfword alignment, though the length
73 * can be any number of bytes. The sum is accumulated in r5.
74 *
75 * csum_partial(r3=buff, r4=len, r5=sum) 75 * csum_partial(r3=buff, r4=len, r5=sum)
76 */ 76 */
77_GLOBAL(csum_partial) 77_GLOBAL(csum_partial)
78 subi r3,r3,8 /* we'll offset by 8 for the loads */ 78 addic r0,r5,0 /* clear carry */
79 srdi. r6,r4,3 /* divide by 8 for doubleword count */ 79
80 addic r5,r5,0 /* clear carry */ 80 srdi. r6,r4,3 /* less than 8 bytes? */
81 beq 3f /* if we're doing < 8 bytes */ 81 beq .Lcsum_tail_word
82 andi. r0,r3,2 /* aligned on a word boundary already? */ 82
83 beq+ 1f 83 /*
84 lhz r6,8(r3) /* do 2 bytes to get aligned */ 84 * If only halfword aligned, align to a double word. Since odd
85 addi r3,r3,2 85 * aligned addresses should be rare and they would require more
86 subi r4,r4,2 86 * work to calculate the correct checksum, we ignore that case
87 addc r5,r5,r6 87 * and take the potential slowdown of unaligned loads.
88 srdi. r6,r4,3 /* recompute number of doublewords */ 88 */
89 beq 3f /* any left? */ 89 rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
901: mtctr r6 90 beq .Lcsum_aligned
912: ldu r6,8(r3) /* main sum loop */ 91
92 adde r5,r5,r6 92 li r7,4
93 bdnz 2b 93 sub r6,r7,r6
94 andi. r4,r4,7 /* compute bytes left to sum after doublewords */ 94 mtctr r6
953: cmpwi 0,r4,4 /* is at least a full word left? */ 95
96 blt 4f 961:
97 lwz r6,8(r3) /* sum this word */ 97 lhz r6,0(r3) /* align to doubleword */
98 subi r4,r4,2
99 addi r3,r3,2
100 adde r0,r0,r6
101 bdnz 1b
102
103.Lcsum_aligned:
104 /*
105 * We unroll the loop such that each iteration is 64 bytes with an
106 * entry and exit limb of 64 bytes, meaning a minimum size of
107 * 128 bytes.
108 */
109 srdi. r6,r4,7
110 beq .Lcsum_tail_doublewords /* len < 128 */
111
112 srdi r6,r4,6
113 subi r6,r6,1
114 mtctr r6
115
116 stdu r1,-STACKFRAMESIZE(r1)
117 std r14,STK_REG(r14)(r1)
118 std r15,STK_REG(r15)(r1)
119 std r16,STK_REG(r16)(r1)
120
121 ld r6,0(r3)
122 ld r9,8(r3)
123
124 ld r10,16(r3)
125 ld r11,24(r3)
126
127 /*
128 * On POWER6 and POWER7 back to back addes take 2 cycles because of
129 * the XER dependency. This means the fastest this loop can go is
130 * 16 cycles per iteration. The scheduling of the loop below has
131 * been shown to hit this on both POWER6 and POWER7.
132 */
133 .align 5
1342:
135 adde r0,r0,r6
136 ld r12,32(r3)
137 ld r14,40(r3)
138
139 adde r0,r0,r9
140 ld r15,48(r3)
141 ld r16,56(r3)
142 addi r3,r3,64
143
144 adde r0,r0,r10
145
146 adde r0,r0,r11
147
148 adde r0,r0,r12
149
150 adde r0,r0,r14
151
152 adde r0,r0,r15
153 ld r6,0(r3)
154 ld r9,8(r3)
155
156 adde r0,r0,r16
157 ld r10,16(r3)
158 ld r11,24(r3)
159 bdnz 2b
160
161
162 adde r0,r0,r6
163 ld r12,32(r3)
164 ld r14,40(r3)
165
166 adde r0,r0,r9
167 ld r15,48(r3)
168 ld r16,56(r3)
169 addi r3,r3,64
170
171 adde r0,r0,r10
172 adde r0,r0,r11
173 adde r0,r0,r12
174 adde r0,r0,r14
175 adde r0,r0,r15
176 adde r0,r0,r16
177
178 ld r14,STK_REG(r14)(r1)
179 ld r15,STK_REG(r15)(r1)
180 ld r16,STK_REG(r16)(r1)
181 addi r1,r1,STACKFRAMESIZE
182
183 andi. r4,r4,63
184
185.Lcsum_tail_doublewords: /* Up to 127 bytes to go */
186 srdi. r6,r4,3
187 beq .Lcsum_tail_word
188
189 mtctr r6
1903:
191 ld r6,0(r3)
192 addi r3,r3,8
193 adde r0,r0,r6
194 bdnz 3b
195
196 andi. r4,r4,7
197
198.Lcsum_tail_word: /* Up to 7 bytes to go */
199 srdi. r6,r4,2
200 beq .Lcsum_tail_halfword
201
202 lwz r6,0(r3)
98 addi r3,r3,4 203 addi r3,r3,4
204 adde r0,r0,r6
99 subi r4,r4,4 205 subi r4,r4,4
100 adde r5,r5,r6 206
1014: cmpwi 0,r4,2 /* is at least a halfword left? */ 207.Lcsum_tail_halfword: /* Up to 3 bytes to go */
102 blt+ 5f 208 srdi. r6,r4,1
103 lhz r6,8(r3) /* sum this halfword */ 209 beq .Lcsum_tail_byte
104 addi r3,r3,2 210
105 subi r4,r4,2 211 lhz r6,0(r3)
106 adde r5,r5,r6 212 addi r3,r3,2
1075: cmpwi 0,r4,1 /* is at least a byte left? */ 213 adde r0,r0,r6
108 bne+ 6f 214 subi r4,r4,2
109 lbz r6,8(r3) /* sum this byte */ 215
110 slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */ 216.Lcsum_tail_byte: /* Up to 1 byte to go */
111 adde r5,r5,r6 217 andi. r6,r4,1
1126: addze r5,r5 /* add in final carry */ 218 beq .Lcsum_finish
113 rldicl r4,r5,32,0 /* fold two 32-bit halves together */ 219
114 add r3,r4,r5 220 lbz r6,0(r3)
115 srdi r3,r3,32 221 sldi r9,r6,8 /* Pad the byte out to 16 bits */
116 blr 222 adde r0,r0,r9
223
224.Lcsum_finish:
225 addze r0,r0 /* add in final carry */
226 rldicl r4,r0,32,0 /* fold two 32 bit halves together */
227 add r3,r4,r0
228 srdi r3,r3,32
229 blr
230
231
232 .macro source
233100:
234 .section __ex_table,"a"
235 .align 3
236 .llong 100b,.Lsrc_error
237 .previous
238 .endm
239
240 .macro dest
241200:
242 .section __ex_table,"a"
243 .align 3
244 .llong 200b,.Ldest_error
245 .previous
246 .endm
117 247
118/* 248/*
119 * Computes the checksum of a memory block at src, length len, 249 * Computes the checksum of a memory block at src, length len,
120 * and adds in "sum" (32-bit), while copying the block to dst. 250 * and adds in "sum" (32-bit), while copying the block to dst.
121 * If an access exception occurs on src or dst, it stores -EFAULT 251 * If an access exception occurs on src or dst, it stores -EFAULT
122 * to *src_err or *dst_err respectively, and (for an error on 252 * to *src_err or *dst_err respectively. The caller must take any action
123 * src) zeroes the rest of dst. 253 * required in this case (zeroing memory, recalculating partial checksum etc).
124 *
125 * This code needs to be reworked to take advantage of 64 bit sum+copy.
126 * However, due to tokenring halfword alignment problems this will be very
127 * tricky. For now we'll leave it until we instrument it somehow.
128 * 254 *
129 * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) 255 * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
130 */ 256 */
131_GLOBAL(csum_partial_copy_generic) 257_GLOBAL(csum_partial_copy_generic)
132 addic r0,r6,0 258 addic r0,r6,0 /* clear carry */
133 subi r3,r3,4 259
134 subi r4,r4,4 260 srdi. r6,r5,3 /* less than 8 bytes? */
135 srwi. r6,r5,2 261 beq .Lcopy_tail_word
136 beq 3f /* if we're doing < 4 bytes */ 262
137 andi. r9,r4,2 /* Align dst to longword boundary */ 263 /*
138 beq+ 1f 264 * If only halfword aligned, align to a double word. Since odd
13981: lhz r6,4(r3) /* do 2 bytes to get aligned */ 265 * aligned addresses should be rare and they would require more
140 addi r3,r3,2 266 * work to calculate the correct checksum, we ignore that case
267 * and take the potential slowdown of unaligned loads.
268 *
269 * If the source and destination are relatively unaligned we only
270 * align the source. This keeps things simple.
271 */
272 rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
273 beq .Lcopy_aligned
274
275 li r7,4
276 sub r6,r7,r6
277 mtctr r6
278
2791:
280source; lhz r6,0(r3) /* align to doubleword */
141 subi r5,r5,2 281 subi r5,r5,2
14291: sth r6,4(r4)
143 addi r4,r4,2
144 addc r0,r0,r6
145 srwi. r6,r5,2 /* # words to do */
146 beq 3f
1471: mtctr r6
14882: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */
14992: stwu r6,4(r4) /* be unnecessary to unroll this loop */
150 adde r0,r0,r6
151 bdnz 82b
152 andi. r5,r5,3
1533: cmpwi 0,r5,2
154 blt+ 4f
15583: lhz r6,4(r3)
156 addi r3,r3,2 282 addi r3,r3,2
157 subi r5,r5,2 283 adde r0,r0,r6
15893: sth r6,4(r4) 284dest; sth r6,0(r4)
159 addi r4,r4,2 285 addi r4,r4,2
286 bdnz 1b
287
288.Lcopy_aligned:
289 /*
290 * We unroll the loop such that each iteration is 64 bytes with an
291 * entry and exit limb of 64 bytes, meaning a minimum size of
292 * 128 bytes.
293 */
294 srdi. r6,r5,7
295 beq .Lcopy_tail_doublewords /* len < 128 */
296
297 srdi r6,r5,6
298 subi r6,r6,1
299 mtctr r6
300
301 stdu r1,-STACKFRAMESIZE(r1)
302 std r14,STK_REG(r14)(r1)
303 std r15,STK_REG(r15)(r1)
304 std r16,STK_REG(r16)(r1)
305
306source; ld r6,0(r3)
307source; ld r9,8(r3)
308
309source; ld r10,16(r3)
310source; ld r11,24(r3)
311
312 /*
313 * On POWER6 and POWER7 back to back addes take 2 cycles because of
314 * the XER dependency. This means the fastest this loop can go is
315 * 16 cycles per iteration. The scheduling of the loop below has
316 * been shown to hit this on both POWER6 and POWER7.
317 */
318 .align 5
3192:
160 adde r0,r0,r6 320 adde r0,r0,r6
1614: cmpwi 0,r5,1 321source; ld r12,32(r3)
162 bne+ 5f 322source; ld r14,40(r3)
16384: lbz r6,4(r3) 323
16494: stb r6,4(r4) 324 adde r0,r0,r9
165 slwi r6,r6,8 /* Upper byte of word */ 325source; ld r15,48(r3)
326source; ld r16,56(r3)
327 addi r3,r3,64
328
329 adde r0,r0,r10
330dest; std r6,0(r4)
331dest; std r9,8(r4)
332
333 adde r0,r0,r11
334dest; std r10,16(r4)
335dest; std r11,24(r4)
336
337 adde r0,r0,r12
338dest; std r12,32(r4)
339dest; std r14,40(r4)
340
341 adde r0,r0,r14
342dest; std r15,48(r4)
343dest; std r16,56(r4)
344 addi r4,r4,64
345
346 adde r0,r0,r15
347source; ld r6,0(r3)
348source; ld r9,8(r3)
349
350 adde r0,r0,r16
351source; ld r10,16(r3)
352source; ld r11,24(r3)
353 bdnz 2b
354
355
166 adde r0,r0,r6 356 adde r0,r0,r6
1675: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ 357source; ld r12,32(r3)
168 rldicl r4,r3,32,0 /* fold 64 bit value */ 358source; ld r14,40(r3)
169 add r3,r4,r3
170 srdi r3,r3,32
171 blr
172 359
173/* These shouldn't go in the fixup section, since that would 360 adde r0,r0,r9
174 cause the ex_table addresses to get out of order. */ 361source; ld r15,48(r3)
362source; ld r16,56(r3)
363 addi r3,r3,64
364
365 adde r0,r0,r10
366dest; std r6,0(r4)
367dest; std r9,8(r4)
368
369 adde r0,r0,r11
370dest; std r10,16(r4)
371dest; std r11,24(r4)
372
373 adde r0,r0,r12
374dest; std r12,32(r4)
375dest; std r14,40(r4)
376
377 adde r0,r0,r14
378dest; std r15,48(r4)
379dest; std r16,56(r4)
380 addi r4,r4,64
381
382 adde r0,r0,r15
383 adde r0,r0,r16
384
385 ld r14,STK_REG(r14)(r1)
386 ld r15,STK_REG(r15)(r1)
387 ld r16,STK_REG(r16)(r1)
388 addi r1,r1,STACKFRAMESIZE
389
390 andi. r5,r5,63
391
392.Lcopy_tail_doublewords: /* Up to 127 bytes to go */
393 srdi. r6,r5,3
394 beq .Lcopy_tail_word
175 395
176 .globl src_error_1
177src_error_1:
178 li r6,0
179 subi r5,r5,2
18095: sth r6,4(r4)
181 addi r4,r4,2
182 srwi. r6,r5,2
183 beq 3f
184 mtctr r6 396 mtctr r6
185 .globl src_error_2 3973:
186src_error_2: 398source; ld r6,0(r3)
187 li r6,0 399 addi r3,r3,8
18896: stwu r6,4(r4) 400 adde r0,r0,r6
189 bdnz 96b 401dest; std r6,0(r4)
1903: andi. r5,r5,3 402 addi r4,r4,8
191 beq src_error 403 bdnz 3b
192 .globl src_error_3 404
193src_error_3: 405 andi. r5,r5,7
194 li r6,0 406
195 mtctr r5 407.Lcopy_tail_word: /* Up to 7 bytes to go */
196 addi r4,r4,3 408 srdi. r6,r5,2
19797: stbu r6,1(r4) 409 beq .Lcopy_tail_halfword
198 bdnz 97b 410
199 .globl src_error 411source; lwz r6,0(r3)
200src_error: 412 addi r3,r3,4
413 adde r0,r0,r6
414dest; stw r6,0(r4)
415 addi r4,r4,4
416 subi r5,r5,4
417
418.Lcopy_tail_halfword: /* Up to 3 bytes to go */
419 srdi. r6,r5,1
420 beq .Lcopy_tail_byte
421
422source; lhz r6,0(r3)
423 addi r3,r3,2
424 adde r0,r0,r6
425dest; sth r6,0(r4)
426 addi r4,r4,2
427 subi r5,r5,2
428
429.Lcopy_tail_byte: /* Up to 1 byte to go */
430 andi. r6,r5,1
431 beq .Lcopy_finish
432
433source; lbz r6,0(r3)
434 sldi r9,r6,8 /* Pad the byte out to 16 bits */
435 adde r0,r0,r9
436dest; stb r6,0(r4)
437
438.Lcopy_finish:
439 addze r0,r0 /* add in final carry */
440 rldicl r4,r0,32,0 /* fold two 32 bit halves together */
441 add r3,r4,r0
442 srdi r3,r3,32
443 blr
444
445.Lsrc_error:
201 cmpdi 0,r7,0 446 cmpdi 0,r7,0
202 beq 1f 447 beqlr
203 li r6,-EFAULT 448 li r6,-EFAULT
204 stw r6,0(r7) 449 stw r6,0(r7)
2051: addze r3,r0
206 blr 450 blr
207 451
208 .globl dst_error 452.Ldest_error:
209dst_error:
210 cmpdi 0,r8,0 453 cmpdi 0,r8,0
211 beq 1f 454 beqlr
212 li r6,-EFAULT 455 li r6,-EFAULT
213 stw r6,0(r8) 456 stw r6,0(r8)
2141: addze r3,r0
215 blr 457 blr
216
217.section __ex_table,"a"
218 .align 3
219 .llong 81b,src_error_1
220 .llong 91b,dst_error
221 .llong 82b,src_error_2
222 .llong 92b,dst_error
223 .llong 83b,src_error_3
224 .llong 93b,dst_error
225 .llong 84b,src_error_3
226 .llong 94b,dst_error
227 .llong 95b,dst_error
228 .llong 96b,dst_error
229 .llong 97b,dst_error
diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c
new file mode 100644
index 00000000000..769b817fbb3
--- /dev/null
+++ b/arch/powerpc/lib/checksum_wrappers_64.c
@@ -0,0 +1,102 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2010
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20#include <linux/module.h>
21#include <linux/compiler.h>
22#include <linux/types.h>
23#include <asm/checksum.h>
24#include <asm/uaccess.h>
25
26__wsum csum_and_copy_from_user(const void __user *src, void *dst,
27 int len, __wsum sum, int *err_ptr)
28{
29 unsigned int csum;
30
31 might_sleep();
32
33 *err_ptr = 0;
34
35 if (!len) {
36 csum = 0;
37 goto out;
38 }
39
40 if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) {
41 *err_ptr = -EFAULT;
42 csum = (__force unsigned int)sum;
43 goto out;
44 }
45
46 csum = csum_partial_copy_generic((void __force *)src, dst,
47 len, sum, err_ptr, NULL);
48
49 if (unlikely(*err_ptr)) {
50 int missing = __copy_from_user(dst, src, len);
51
52 if (missing) {
53 memset(dst + len - missing, 0, missing);
54 *err_ptr = -EFAULT;
55 } else {
56 *err_ptr = 0;
57 }
58
59 csum = csum_partial(dst, len, sum);
60 }
61
62out:
63 return (__force __wsum)csum;
64}
65EXPORT_SYMBOL(csum_and_copy_from_user);
66
67__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
68 __wsum sum, int *err_ptr)
69{
70 unsigned int csum;
71
72 might_sleep();
73
74 *err_ptr = 0;
75
76 if (!len) {
77 csum = 0;
78 goto out;
79 }
80
81 if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) {
82 *err_ptr = -EFAULT;
83 csum = -1; /* invalid checksum */
84 goto out;
85 }
86
87 csum = csum_partial_copy_generic(src, (void __force *)dst,
88 len, sum, NULL, err_ptr);
89
90 if (unlikely(*err_ptr)) {
91 csum = csum_partial(src, len, sum);
92
93 if (copy_to_user(dst, src, len)) {
94 *err_ptr = -EFAULT;
95 csum = -1; /* invalid checksum */
96 }
97 }
98
99out:
100 return (__force __wsum)csum;
101}
102EXPORT_SYMBOL(csum_and_copy_to_user);
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 74a7f4130b4..55f19f9fd70 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -62,7 +62,7 @@
62 62
63 .text 63 .text
64 .stabs "arch/powerpc/lib/",N_SO,0,0,0f 64 .stabs "arch/powerpc/lib/",N_SO,0,0,0f
65 .stabs "copy32.S",N_SO,0,0,0f 65 .stabs "copy_32.S",N_SO,0,0,0f
660: 660:
67 67
68CACHELINE_BYTES = L1_CACHE_BYTES 68CACHELINE_BYTES = L1_CACHE_BYTES
diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S
index f6448636baf..6a85380520b 100644
--- a/arch/powerpc/lib/ldstfp.S
+++ b/arch/powerpc/lib/ldstfp.S
@@ -17,6 +17,8 @@
17#include <asm/asm-offsets.h> 17#include <asm/asm-offsets.h>
18#include <linux/errno.h> 18#include <linux/errno.h>
19 19
20#ifdef CONFIG_PPC_FPU
21
20#define STKFRM (PPC_MIN_STKFRM + 16) 22#define STKFRM (PPC_MIN_STKFRM + 16)
21 23
22 .macro extab instr,handler 24 .macro extab instr,handler
@@ -81,7 +83,7 @@ _GLOBAL(do_lfs)
81 mfmsr r6 83 mfmsr r6
82 ori r7,r6,MSR_FP 84 ori r7,r6,MSR_FP
83 cmpwi cr7,r3,0 85 cmpwi cr7,r3,0
84 mtmsrd r7 86 MTMSRD(r7)
85 isync 87 isync
86 beq cr7,1f 88 beq cr7,1f
87 stfd fr0,STKFRM-16(r1) 89 stfd fr0,STKFRM-16(r1)
@@ -93,7 +95,7 @@ _GLOBAL(do_lfs)
93 lfd fr0,STKFRM-16(r1) 95 lfd fr0,STKFRM-16(r1)
944: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) 964: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
95 mtlr r0 97 mtlr r0
96 mtmsrd r6 98 MTMSRD(r6)
97 isync 99 isync
98 mr r3,r9 100 mr r3,r9
99 addi r1,r1,STKFRM 101 addi r1,r1,STKFRM
@@ -108,7 +110,7 @@ _GLOBAL(do_lfd)
108 mfmsr r6 110 mfmsr r6
109 ori r7,r6,MSR_FP 111 ori r7,r6,MSR_FP
110 cmpwi cr7,r3,0 112 cmpwi cr7,r3,0
111 mtmsrd r7 113 MTMSRD(r7)
112 isync 114 isync
113 beq cr7,1f 115 beq cr7,1f
114 stfd fr0,STKFRM-16(r1) 116 stfd fr0,STKFRM-16(r1)
@@ -120,7 +122,7 @@ _GLOBAL(do_lfd)
120 lfd fr0,STKFRM-16(r1) 122 lfd fr0,STKFRM-16(r1)
1214: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) 1234: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
122 mtlr r0 124 mtlr r0
123 mtmsrd r6 125 MTMSRD(r6)
124 isync 126 isync
125 mr r3,r9 127 mr r3,r9
126 addi r1,r1,STKFRM 128 addi r1,r1,STKFRM
@@ -135,7 +137,7 @@ _GLOBAL(do_stfs)
135 mfmsr r6 137 mfmsr r6
136 ori r7,r6,MSR_FP 138 ori r7,r6,MSR_FP
137 cmpwi cr7,r3,0 139 cmpwi cr7,r3,0
138 mtmsrd r7 140 MTMSRD(r7)
139 isync 141 isync
140 beq cr7,1f 142 beq cr7,1f
141 stfd fr0,STKFRM-16(r1) 143 stfd fr0,STKFRM-16(r1)
@@ -147,7 +149,7 @@ _GLOBAL(do_stfs)
147 lfd fr0,STKFRM-16(r1) 149 lfd fr0,STKFRM-16(r1)
1484: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) 1504: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
149 mtlr r0 151 mtlr r0
150 mtmsrd r6 152 MTMSRD(r6)
151 isync 153 isync
152 mr r3,r9 154 mr r3,r9
153 addi r1,r1,STKFRM 155 addi r1,r1,STKFRM
@@ -162,7 +164,7 @@ _GLOBAL(do_stfd)
162 mfmsr r6 164 mfmsr r6
163 ori r7,r6,MSR_FP 165 ori r7,r6,MSR_FP
164 cmpwi cr7,r3,0 166 cmpwi cr7,r3,0
165 mtmsrd r7 167 MTMSRD(r7)
166 isync 168 isync
167 beq cr7,1f 169 beq cr7,1f
168 stfd fr0,STKFRM-16(r1) 170 stfd fr0,STKFRM-16(r1)
@@ -174,7 +176,7 @@ _GLOBAL(do_stfd)
174 lfd fr0,STKFRM-16(r1) 176 lfd fr0,STKFRM-16(r1)
1754: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) 1774: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
176 mtlr r0 178 mtlr r0
177 mtmsrd r6 179 MTMSRD(r6)
178 isync 180 isync
179 mr r3,r9 181 mr r3,r9
180 addi r1,r1,STKFRM 182 addi r1,r1,STKFRM
@@ -229,7 +231,7 @@ _GLOBAL(do_lvx)
229 oris r7,r6,MSR_VEC@h 231 oris r7,r6,MSR_VEC@h
230 cmpwi cr7,r3,0 232 cmpwi cr7,r3,0
231 li r8,STKFRM-16 233 li r8,STKFRM-16
232 mtmsrd r7 234 MTMSRD(r7)
233 isync 235 isync
234 beq cr7,1f 236 beq cr7,1f
235 stvx vr0,r1,r8 237 stvx vr0,r1,r8
@@ -241,7 +243,7 @@ _GLOBAL(do_lvx)
241 lvx vr0,r1,r8 243 lvx vr0,r1,r8
2424: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) 2444: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
243 mtlr r0 245 mtlr r0
244 mtmsrd r6 246 MTMSRD(r6)
245 isync 247 isync
246 mr r3,r9 248 mr r3,r9
247 addi r1,r1,STKFRM 249 addi r1,r1,STKFRM
@@ -257,7 +259,7 @@ _GLOBAL(do_stvx)
257 oris r7,r6,MSR_VEC@h 259 oris r7,r6,MSR_VEC@h
258 cmpwi cr7,r3,0 260 cmpwi cr7,r3,0
259 li r8,STKFRM-16 261 li r8,STKFRM-16
260 mtmsrd r7 262 MTMSRD(r7)
261 isync 263 isync
262 beq cr7,1f 264 beq cr7,1f
263 stvx vr0,r1,r8 265 stvx vr0,r1,r8
@@ -269,7 +271,7 @@ _GLOBAL(do_stvx)
269 lvx vr0,r1,r8 271 lvx vr0,r1,r8
2704: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) 2724: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
271 mtlr r0 273 mtlr r0
272 mtmsrd r6 274 MTMSRD(r6)
273 isync 275 isync
274 mr r3,r9 276 mr r3,r9
275 addi r1,r1,STKFRM 277 addi r1,r1,STKFRM
@@ -325,7 +327,7 @@ _GLOBAL(do_lxvd2x)
325 oris r7,r6,MSR_VSX@h 327 oris r7,r6,MSR_VSX@h
326 cmpwi cr7,r3,0 328 cmpwi cr7,r3,0
327 li r8,STKFRM-16 329 li r8,STKFRM-16
328 mtmsrd r7 330 MTMSRD(r7)
329 isync 331 isync
330 beq cr7,1f 332 beq cr7,1f
331 STXVD2X(0,r1,r8) 333 STXVD2X(0,r1,r8)
@@ -337,7 +339,7 @@ _GLOBAL(do_lxvd2x)
337 LXVD2X(0,r1,r8) 339 LXVD2X(0,r1,r8)
3384: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) 3404: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
339 mtlr r0 341 mtlr r0
340 mtmsrd r6 342 MTMSRD(r6)
341 isync 343 isync
342 mr r3,r9 344 mr r3,r9
343 addi r1,r1,STKFRM 345 addi r1,r1,STKFRM
@@ -353,7 +355,7 @@ _GLOBAL(do_stxvd2x)
353 oris r7,r6,MSR_VSX@h 355 oris r7,r6,MSR_VSX@h
354 cmpwi cr7,r3,0 356 cmpwi cr7,r3,0
355 li r8,STKFRM-16 357 li r8,STKFRM-16
356 mtmsrd r7 358 MTMSRD(r7)
357 isync 359 isync
358 beq cr7,1f 360 beq cr7,1f
359 STXVD2X(0,r1,r8) 361 STXVD2X(0,r1,r8)
@@ -365,7 +367,7 @@ _GLOBAL(do_stxvd2x)
365 LXVD2X(0,r1,r8) 367 LXVD2X(0,r1,r8)
3664: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1) 3684: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
367 mtlr r0 369 mtlr r0
368 mtmsrd r6 370 MTMSRD(r6)
369 isync 371 isync
370 mr r3,r9 372 mr r3,r9
371 addi r1,r1,STKFRM 373 addi r1,r1,STKFRM
@@ -373,3 +375,5 @@ _GLOBAL(do_stxvd2x)
373 extab 2b,3b 375 extab 2b,3b
374 376
375#endif /* CONFIG_VSX */ 377#endif /* CONFIG_VSX */
378
379#endif /* CONFIG_PPC_FPU */
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 58e14fba11b..9b8182e8216 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -34,7 +34,7 @@ void __spin_yield(arch_spinlock_t *lock)
34 return; 34 return;
35 holder_cpu = lock_value & 0xffff; 35 holder_cpu = lock_value & 0xffff;
36 BUG_ON(holder_cpu >= NR_CPUS); 36 BUG_ON(holder_cpu >= NR_CPUS);
37 yield_count = lppaca[holder_cpu].yield_count; 37 yield_count = lppaca_of(holder_cpu).yield_count;
38 if ((yield_count & 1) == 0) 38 if ((yield_count & 1) == 0)
39 return; /* virtual cpu is currently running */ 39 return; /* virtual cpu is currently running */
40 rmb(); 40 rmb();
@@ -65,7 +65,7 @@ void __rw_yield(arch_rwlock_t *rw)
65 return; /* no write lock at present */ 65 return; /* no write lock at present */
66 holder_cpu = lock_value & 0xffff; 66 holder_cpu = lock_value & 0xffff;
67 BUG_ON(holder_cpu >= NR_CPUS); 67 BUG_ON(holder_cpu >= NR_CPUS);
68 yield_count = lppaca[holder_cpu].yield_count; 68 yield_count = lppaca_of(holder_cpu).yield_count;
69 if ((yield_count & 1) == 0) 69 if ((yield_count & 1) == 0)
70 return; /* virtual cpu is currently running */ 70 return; /* virtual cpu is currently running */
71 rmb(); 71 rmb();
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index e0a9858d537..ae5189ab004 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -30,6 +30,7 @@ extern char system_call_common[];
30#define XER_OV 0x40000000U 30#define XER_OV 0x40000000U
31#define XER_CA 0x20000000U 31#define XER_CA 0x20000000U
32 32
33#ifdef CONFIG_PPC_FPU
33/* 34/*
34 * Functions in ldstfp.S 35 * Functions in ldstfp.S
35 */ 36 */
@@ -41,6 +42,7 @@ extern int do_lvx(int rn, unsigned long ea);
41extern int do_stvx(int rn, unsigned long ea); 42extern int do_stvx(int rn, unsigned long ea);
42extern int do_lxvd2x(int rn, unsigned long ea); 43extern int do_lxvd2x(int rn, unsigned long ea);
43extern int do_stxvd2x(int rn, unsigned long ea); 44extern int do_stxvd2x(int rn, unsigned long ea);
45#endif
44 46
45/* 47/*
46 * Determine whether a conditional branch instruction would branch. 48 * Determine whether a conditional branch instruction would branch.
@@ -290,6 +292,7 @@ static int __kprobes write_mem(unsigned long val, unsigned long ea, int nb,
290 return write_mem_unaligned(val, ea, nb, regs); 292 return write_mem_unaligned(val, ea, nb, regs);
291} 293}
292 294
295#ifdef CONFIG_PPC_FPU
293/* 296/*
294 * Check the address and alignment, and call func to do the actual 297 * Check the address and alignment, and call func to do the actual
295 * load or store. 298 * load or store.
@@ -351,6 +354,7 @@ static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long),
351 } 354 }
352 return err; 355 return err;
353} 356}
357#endif
354 358
355#ifdef CONFIG_ALTIVEC 359#ifdef CONFIG_ALTIVEC
356/* For Altivec/VMX, no need to worry about alignment */ 360/* For Altivec/VMX, no need to worry about alignment */
@@ -1393,6 +1397,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
1393 regs->gpr[rd] = byterev_4(val); 1397 regs->gpr[rd] = byterev_4(val);
1394 goto ldst_done; 1398 goto ldst_done;
1395 1399
1400#ifdef CONFIG_PPC_CPU
1396 case 535: /* lfsx */ 1401 case 535: /* lfsx */
1397 case 567: /* lfsux */ 1402 case 567: /* lfsux */
1398 if (!(regs->msr & MSR_FP)) 1403 if (!(regs->msr & MSR_FP))
@@ -1424,6 +1429,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
1424 ea = xform_ea(instr, regs, u); 1429 ea = xform_ea(instr, regs, u);
1425 err = do_fp_store(rd, do_stfd, ea, 8, regs); 1430 err = do_fp_store(rd, do_stfd, ea, 8, regs);
1426 goto ldst_done; 1431 goto ldst_done;
1432#endif
1427 1433
1428#ifdef __powerpc64__ 1434#ifdef __powerpc64__
1429 case 660: /* stdbrx */ 1435 case 660: /* stdbrx */
@@ -1534,6 +1540,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
1534 } while (++rd < 32); 1540 } while (++rd < 32);
1535 goto instr_done; 1541 goto instr_done;
1536 1542
1543#ifdef CONFIG_PPC_FPU
1537 case 48: /* lfs */ 1544 case 48: /* lfs */
1538 case 49: /* lfsu */ 1545 case 49: /* lfsu */
1539 if (!(regs->msr & MSR_FP)) 1546 if (!(regs->msr & MSR_FP))
@@ -1565,6 +1572,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
1565 ea = dform_ea(instr, regs); 1572 ea = dform_ea(instr, regs);
1566 err = do_fp_store(rd, do_stfd, ea, 8, regs); 1573 err = do_fp_store(rd, do_stfd, ea, 8, regs);
1567 goto ldst_done; 1574 goto ldst_done;
1575#endif
1568 1576
1569#ifdef __powerpc64__ 1577#ifdef __powerpc64__
1570 case 58: /* ld[u], lwa */ 1578 case 58: /* ld[u], lwa */