aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/include/asm/checksum.h7
-rw-r--r--arch/powerpc/lib/Makefile3
-rw-r--r--arch/powerpc/lib/checksum_64.S289
-rw-r--r--arch/powerpc/lib/checksum_wrappers_64.c65
4 files changed, 276 insertions, 88 deletions
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 7cdf358337cf..9ea58c0e7cfb 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -52,12 +52,19 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum);
52extern __wsum csum_partial_copy_generic(const void *src, void *dst, 52extern __wsum csum_partial_copy_generic(const void *src, void *dst,
53 int len, __wsum sum, 53 int len, __wsum sum,
54 int *src_err, int *dst_err); 54 int *src_err, int *dst_err);
55
56#ifdef __powerpc64__
57#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
58extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
59 int len, __wsum sum, int *err_ptr);
60#else
55/* 61/*
56 * the same as csum_partial, but copies from src to dst while it 62 * the same as csum_partial, but copies from src to dst while it
57 * checksums. 63 * checksums.
58 */ 64 */
59#define csum_partial_copy_from_user(src, dst, len, sum, errp) \ 65#define csum_partial_copy_from_user(src, dst, len, sum, errp) \
60 csum_partial_copy_generic((__force const void *)(src), (dst), (len), (sum), (errp), NULL) 66 csum_partial_copy_generic((__force const void *)(src), (dst), (len), (sum), (errp), NULL)
67#endif
61 68
62#define csum_partial_copy_nocheck(src, dst, len, sum) \ 69#define csum_partial_copy_nocheck(src, dst, len, sum) \
63 csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL) 70 csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 5bb89c828070..ad4a36848f25 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -17,7 +17,8 @@ obj-$(CONFIG_PPC32) += div64.o copy_32.o
17obj-$(CONFIG_HAS_IOMEM) += devres.o 17obj-$(CONFIG_HAS_IOMEM) += devres.o
18 18
19obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ 19obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
20 memcpy_64.o usercopy_64.o mem_64.o string.o 20 memcpy_64.o usercopy_64.o mem_64.o string.o \
21 checksum_wrappers_64.o
21obj-$(CONFIG_XMON) += sstep.o ldstfp.o 22obj-$(CONFIG_XMON) += sstep.o ldstfp.o
22obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o 23obj-$(CONFIG_KPROBES) += sstep.o ldstfp.o
23obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o 24obj-$(CONFIG_HAVE_HW_BREAKPOINT) += sstep.o ldstfp.o
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index 404d5a6e3387..18245af38aea 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -228,115 +228,230 @@ _GLOBAL(csum_partial)
228 srdi r3,r3,32 228 srdi r3,r3,32
229 blr 229 blr
230 230
231
232 .macro source
233100:
234 .section __ex_table,"a"
235 .align 3
236 .llong 100b,.Lsrc_error
237 .previous
238 .endm
239
240 .macro dest
241200:
242 .section __ex_table,"a"
243 .align 3
244 .llong 200b,.Ldest_error
245 .previous
246 .endm
247
231/* 248/*
232 * Computes the checksum of a memory block at src, length len, 249 * Computes the checksum of a memory block at src, length len,
233 * and adds in "sum" (32-bit), while copying the block to dst. 250 * and adds in "sum" (32-bit), while copying the block to dst.
234 * If an access exception occurs on src or dst, it stores -EFAULT 251 * If an access exception occurs on src or dst, it stores -EFAULT
235 * to *src_err or *dst_err respectively, and (for an error on 252 * to *src_err or *dst_err respectively. The caller must take any action
236 * src) zeroes the rest of dst. 253 * required in this case (zeroing memory, recalculating partial checksum etc).
237 *
238 * This code needs to be reworked to take advantage of 64 bit sum+copy.
239 * However, due to tokenring halfword alignment problems this will be very
240 * tricky. For now we'll leave it until we instrument it somehow.
241 * 254 *
242 * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) 255 * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
243 */ 256 */
244_GLOBAL(csum_partial_copy_generic) 257_GLOBAL(csum_partial_copy_generic)
245 addic r0,r6,0 258 addic r0,r6,0 /* clear carry */
246 subi r3,r3,4 259
247 subi r4,r4,4 260 srdi. r6,r5,3 /* less than 8 bytes? */
248 srwi. r6,r5,2 261 beq .Lcopy_tail_word
249 beq 3f /* if we're doing < 4 bytes */ 262
250 andi. r9,r4,2 /* Align dst to longword boundary */ 263 /*
251 beq+ 1f 264 * If only halfword aligned, align to a double word. Since odd
25281: lhz r6,4(r3) /* do 2 bytes to get aligned */ 265 * aligned addresses should be rare and they would require more
253 addi r3,r3,2 266 * work to calculate the correct checksum, we ignore that case
267 * and take the potential slowdown of unaligned loads.
268 *
269 * If the source and destination are relatively unaligned we only
270 * align the source. This keeps things simple.
271 */
272 rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
273 beq .Lcopy_aligned
274
275 li r7,4
276 sub r6,r7,r6
277 mtctr r6
278
2791:
280source; lhz r6,0(r3) /* align to doubleword */
254 subi r5,r5,2 281 subi r5,r5,2
25591: sth r6,4(r4)
256 addi r4,r4,2
257 addc r0,r0,r6
258 srwi. r6,r5,2 /* # words to do */
259 beq 3f
2601: mtctr r6
26182: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */
26292: stwu r6,4(r4) /* be unnecessary to unroll this loop */
263 adde r0,r0,r6
264 bdnz 82b
265 andi. r5,r5,3
2663: cmpwi 0,r5,2
267 blt+ 4f
26883: lhz r6,4(r3)
269 addi r3,r3,2 282 addi r3,r3,2
270 subi r5,r5,2 283 adde r0,r0,r6
27193: sth r6,4(r4) 284dest; sth r6,0(r4)
272 addi r4,r4,2 285 addi r4,r4,2
286 bdnz 1b
287
288.Lcopy_aligned:
289 /*
290 * We unroll the loop such that each iteration is 64 bytes with an
291 * entry and exit limb of 64 bytes, meaning a minimum size of
292 * 128 bytes.
293 */
294 srdi. r6,r5,7
295 beq .Lcopy_tail_doublewords /* len < 128 */
296
297 srdi r6,r5,6
298 subi r6,r6,1
299 mtctr r6
300
301 stdu r1,-STACKFRAMESIZE(r1)
302 std r14,STK_REG(r14)(r1)
303 std r15,STK_REG(r15)(r1)
304 std r16,STK_REG(r16)(r1)
305
306source; ld r6,0(r3)
307source; ld r9,8(r3)
308
309source; ld r10,16(r3)
310source; ld r11,24(r3)
311
312 /*
313 * On POWER6 and POWER7 back to back addes take 2 cycles because of
314 * the XER dependency. This means the fastest this loop can go is
315 * 16 cycles per iteration. The scheduling of the loop below has
316 * been shown to hit this on both POWER6 and POWER7.
317 */
318 .align 5
3192:
320 adde r0,r0,r6
321source; ld r12,32(r3)
322source; ld r14,40(r3)
323
324 adde r0,r0,r9
325source; ld r15,48(r3)
326source; ld r16,56(r3)
327 addi r3,r3,64
328
329 adde r0,r0,r10
330dest; std r6,0(r4)
331dest; std r9,8(r4)
332
333 adde r0,r0,r11
334dest; std r10,16(r4)
335dest; std r11,24(r4)
336
337 adde r0,r0,r12
338dest; std r12,32(r4)
339dest; std r14,40(r4)
340
341 adde r0,r0,r14
342dest; std r15,48(r4)
343dest; std r16,56(r4)
344 addi r4,r4,64
345
346 adde r0,r0,r15
347source; ld r6,0(r3)
348source; ld r9,8(r3)
349
350 adde r0,r0,r16
351source; ld r10,16(r3)
352source; ld r11,24(r3)
353 bdnz 2b
354
355
273 adde r0,r0,r6 356 adde r0,r0,r6
2744: cmpwi 0,r5,1 357source; ld r12,32(r3)
275 bne+ 5f 358source; ld r14,40(r3)
27684: lbz r6,4(r3) 359
27794: stb r6,4(r4) 360 adde r0,r0,r9
278 slwi r6,r6,8 /* Upper byte of word */ 361source; ld r15,48(r3)
362source; ld r16,56(r3)
363 addi r3,r3,64
364
365 adde r0,r0,r10
366dest; std r6,0(r4)
367dest; std r9,8(r4)
368
369 adde r0,r0,r11
370dest; std r10,16(r4)
371dest; std r11,24(r4)
372
373 adde r0,r0,r12
374dest; std r12,32(r4)
375dest; std r14,40(r4)
376
377 adde r0,r0,r14
378dest; std r15,48(r4)
379dest; std r16,56(r4)
380 addi r4,r4,64
381
382 adde r0,r0,r15
383 adde r0,r0,r16
384
385 ld r14,STK_REG(r14)(r1)
386 ld r15,STK_REG(r15)(r1)
387 ld r16,STK_REG(r16)(r1)
388 addi r1,r1,STACKFRAMESIZE
389
390 andi. r5,r5,63
391
392.Lcopy_tail_doublewords: /* Up to 127 bytes to go */
393 srdi. r6,r5,3
394 beq .Lcopy_tail_word
395
396 mtctr r6
3973:
398source; ld r6,0(r3)
399 addi r3,r3,8
279 adde r0,r0,r6 400 adde r0,r0,r6
2805: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ 401dest; std r6,0(r4)
281 rldicl r4,r3,32,0 /* fold 64 bit value */ 402 addi r4,r4,8
282 add r3,r4,r3 403 bdnz 3b
283 srdi r3,r3,32
284 blr
285 404
286/* These shouldn't go in the fixup section, since that would 405 andi. r5,r5,7
287 cause the ex_table addresses to get out of order. */
288 406
289 .globl src_error_1 407.Lcopy_tail_word: /* Up to 7 bytes to go */
290src_error_1: 408 srdi. r6,r5,2
291 li r6,0 409 beq .Lcopy_tail_halfword
292 subi r5,r5,2 410
29395: sth r6,4(r4) 411source; lwz r6,0(r3)
412 addi r3,r3,4
413 adde r0,r0,r6
414dest; stw r6,0(r4)
415 addi r4,r4,4
416 subi r5,r5,4
417
418.Lcopy_tail_halfword: /* Up to 3 bytes to go */
419 srdi. r6,r5,1
420 beq .Lcopy_tail_byte
421
422source; lhz r6,0(r3)
423 addi r3,r3,2
424 adde r0,r0,r6
425dest; sth r6,0(r4)
294 addi r4,r4,2 426 addi r4,r4,2
295 srwi. r6,r5,2 427 subi r5,r5,2
296 beq 3f 428
297 mtctr r6 429.Lcopy_tail_byte: /* Up to 1 byte to go */
298 .globl src_error_2 430 andi. r6,r5,1
299src_error_2: 431 beq .Lcopy_finish
300 li r6,0 432
30196: stwu r6,4(r4) 433source; lbz r6,0(r3)
302 bdnz 96b 434 sldi r9,r6,8 /* Pad the byte out to 16 bits */
3033: andi. r5,r5,3 435 adde r0,r0,r9
304 beq src_error 436dest; stb r6,0(r4)
305 .globl src_error_3 437
306src_error_3: 438.Lcopy_finish:
307 li r6,0 439 addze r0,r0 /* add in final carry */
308 mtctr r5 440 rldicl r4,r0,32,0 /* fold two 32 bit halves together */
309 addi r4,r4,3 441 add r3,r4,r0
31097: stbu r6,1(r4) 442 srdi r3,r3,32
311 bdnz 97b 443 blr
312 .globl src_error 444
313src_error: 445.Lsrc_error:
314 cmpdi 0,r7,0 446 cmpdi 0,r7,0
315 beq 1f 447 beqlr
316 li r6,-EFAULT 448 li r6,-EFAULT
317 stw r6,0(r7) 449 stw r6,0(r7)
3181: addze r3,r0
319 blr 450 blr
320 451
321 .globl dst_error 452.Ldest_error:
322dst_error:
323 cmpdi 0,r8,0 453 cmpdi 0,r8,0
324 beq 1f 454 beqlr
325 li r6,-EFAULT 455 li r6,-EFAULT
326 stw r6,0(r8) 456 stw r6,0(r8)
3271: addze r3,r0
328 blr 457 blr
329
330.section __ex_table,"a"
331 .align 3
332 .llong 81b,src_error_1
333 .llong 91b,dst_error
334 .llong 82b,src_error_2
335 .llong 92b,dst_error
336 .llong 83b,src_error_3
337 .llong 93b,dst_error
338 .llong 84b,src_error_3
339 .llong 94b,dst_error
340 .llong 95b,dst_error
341 .llong 96b,dst_error
342 .llong 97b,dst_error
diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c
new file mode 100644
index 000000000000..614cff1a8e0e
--- /dev/null
+++ b/arch/powerpc/lib/checksum_wrappers_64.c
@@ -0,0 +1,65 @@
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2010
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20#include <linux/module.h>
21#include <linux/compiler.h>
22#include <linux/types.h>
23#include <asm/checksum.h>
24#include <asm/uaccess.h>
25
26__wsum csum_and_copy_from_user(const void __user *src, void *dst,
27 int len, __wsum sum, int *err_ptr)
28{
29 unsigned int csum;
30
31 might_sleep();
32
33 *err_ptr = 0;
34
35 if (!len) {
36 csum = 0;
37 goto out;
38 }
39
40 if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) {
41 *err_ptr = -EFAULT;
42 csum = (__force unsigned int)sum;
43 goto out;
44 }
45
46 csum = csum_partial_copy_generic((void __force *)src, dst,
47 len, sum, err_ptr, NULL);
48
49 if (unlikely(*err_ptr)) {
50 int missing = __copy_from_user(dst, src, len);
51
52 if (missing) {
53 memset(dst + len - missing, 0, missing);
54 *err_ptr = -EFAULT;
55 } else {
56 *err_ptr = 0;
57 }
58
59 csum = csum_partial(dst, len, sum);
60 }
61
62out:
63 return (__force __wsum)csum;
64}
65EXPORT_SYMBOL(csum_and_copy_from_user);