diff options
Diffstat (limited to 'arch/sparc/lib')
-rw-r--r-- | arch/sparc/lib/Makefile | 3 | ||||
-rw-r--r-- | arch/sparc/lib/NG2memcpy.S | 46 | ||||
-rw-r--r-- | arch/sparc/lib/NG4clear_page.S | 29 | ||||
-rw-r--r-- | arch/sparc/lib/NG4copy_from_user.S | 30 | ||||
-rw-r--r-- | arch/sparc/lib/NG4copy_page.S | 57 | ||||
-rw-r--r-- | arch/sparc/lib/NG4copy_to_user.S | 39 | ||||
-rw-r--r-- | arch/sparc/lib/NG4memcpy.S | 360 | ||||
-rw-r--r-- | arch/sparc/lib/NG4memset.S | 105 | ||||
-rw-r--r-- | arch/sparc/lib/NG4patch.S | 54 | ||||
-rw-r--r-- | arch/sparc/lib/NGpage.S | 2 | ||||
-rw-r--r-- | arch/sparc/lib/ksyms.c | 4 |
11 files changed, 706 insertions, 23 deletions
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index dff4096f3dec..8410065f2862 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile | |||
@@ -32,6 +32,9 @@ lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o | |||
32 | lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o | 32 | lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o |
33 | lib-$(CONFIG_SPARC64) += NG2patch.o | 33 | lib-$(CONFIG_SPARC64) += NG2patch.o |
34 | 34 | ||
35 | lib-$(CONFIG_SPARC64) += NG4memcpy.o NG4copy_from_user.o NG4copy_to_user.o | ||
36 | lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o | ||
37 | |||
35 | lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o | 38 | lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o |
36 | lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o | 39 | lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o |
37 | 40 | ||
diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index 03eadf66b0d3..2c20ad63ddbf 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S | |||
@@ -14,7 +14,7 @@ | |||
14 | #define FPRS_FEF 0x04 | 14 | #define FPRS_FEF 0x04 |
15 | #ifdef MEMCPY_DEBUG | 15 | #ifdef MEMCPY_DEBUG |
16 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ | 16 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \ |
17 | clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0; | 17 | clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; |
18 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | 18 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs |
19 | #else | 19 | #else |
20 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs | 20 | #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs |
@@ -182,13 +182,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
182 | cmp %g2, 0 | 182 | cmp %g2, 0 |
183 | tne %xcc, 5 | 183 | tne %xcc, 5 |
184 | PREAMBLE | 184 | PREAMBLE |
185 | mov %o0, GLOBAL_SPARE | 185 | mov %o0, %o3 |
186 | cmp %o2, 0 | 186 | cmp %o2, 0 |
187 | be,pn %XCC, 85f | 187 | be,pn %XCC, 85f |
188 | or %o0, %o1, %o3 | 188 | or %o0, %o1, GLOBAL_SPARE |
189 | cmp %o2, 16 | 189 | cmp %o2, 16 |
190 | blu,a,pn %XCC, 80f | 190 | blu,a,pn %XCC, 80f |
191 | or %o3, %o2, %o3 | 191 | or GLOBAL_SPARE, %o2, GLOBAL_SPARE |
192 | 192 | ||
193 | /* 2 blocks (128 bytes) is the minimum we can do the block | 193 | /* 2 blocks (128 bytes) is the minimum we can do the block |
194 | * copy with. We need to ensure that we'll iterate at least | 194 | * copy with. We need to ensure that we'll iterate at least |
@@ -202,7 +202,7 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
202 | */ | 202 | */ |
203 | cmp %o2, (4 * 64) | 203 | cmp %o2, (4 * 64) |
204 | blu,pt %XCC, 75f | 204 | blu,pt %XCC, 75f |
205 | andcc %o3, 0x7, %g0 | 205 | andcc GLOBAL_SPARE, 0x7, %g0 |
206 | 206 | ||
207 | /* %o0: dst | 207 | /* %o0: dst |
208 | * %o1: src | 208 | * %o1: src |
@@ -404,13 +404,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
404 | * over. If anything is left, we copy it one byte at a time. | 404 | * over. If anything is left, we copy it one byte at a time. |
405 | */ | 405 | */ |
406 | brz,pt %o2, 85f | 406 | brz,pt %o2, 85f |
407 | sub %o0, %o1, %o3 | 407 | sub %o0, %o1, GLOBAL_SPARE |
408 | ba,a,pt %XCC, 90f | 408 | ba,a,pt %XCC, 90f |
409 | 409 | ||
410 | .align 64 | 410 | .align 64 |
411 | 75: /* 16 < len <= 64 */ | 411 | 75: /* 16 < len <= 64 */ |
412 | bne,pn %XCC, 75f | 412 | bne,pn %XCC, 75f |
413 | sub %o0, %o1, %o3 | 413 | sub %o0, %o1, GLOBAL_SPARE |
414 | 414 | ||
415 | 72: | 415 | 72: |
416 | andn %o2, 0xf, %o4 | 416 | andn %o2, 0xf, %o4 |
@@ -420,9 +420,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
420 | add %o1, 0x08, %o1 | 420 | add %o1, 0x08, %o1 |
421 | EX_LD(LOAD(ldx, %o1, %g1)) | 421 | EX_LD(LOAD(ldx, %o1, %g1)) |
422 | sub %o1, 0x08, %o1 | 422 | sub %o1, 0x08, %o1 |
423 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | 423 | EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) |
424 | add %o1, 0x8, %o1 | 424 | add %o1, 0x8, %o1 |
425 | EX_ST(STORE(stx, %g1, %o1 + %o3)) | 425 | EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) |
426 | bgu,pt %XCC, 1b | 426 | bgu,pt %XCC, 1b |
427 | add %o1, 0x8, %o1 | 427 | add %o1, 0x8, %o1 |
428 | 73: andcc %o2, 0x8, %g0 | 428 | 73: andcc %o2, 0x8, %g0 |
@@ -430,14 +430,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
430 | nop | 430 | nop |
431 | sub %o2, 0x8, %o2 | 431 | sub %o2, 0x8, %o2 |
432 | EX_LD(LOAD(ldx, %o1, %o5)) | 432 | EX_LD(LOAD(ldx, %o1, %o5)) |
433 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | 433 | EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) |
434 | add %o1, 0x8, %o1 | 434 | add %o1, 0x8, %o1 |
435 | 1: andcc %o2, 0x4, %g0 | 435 | 1: andcc %o2, 0x4, %g0 |
436 | be,pt %XCC, 1f | 436 | be,pt %XCC, 1f |
437 | nop | 437 | nop |
438 | sub %o2, 0x4, %o2 | 438 | sub %o2, 0x4, %o2 |
439 | EX_LD(LOAD(lduw, %o1, %o5)) | 439 | EX_LD(LOAD(lduw, %o1, %o5)) |
440 | EX_ST(STORE(stw, %o5, %o1 + %o3)) | 440 | EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) |
441 | add %o1, 0x4, %o1 | 441 | add %o1, 0x4, %o1 |
442 | 1: cmp %o2, 0 | 442 | 1: cmp %o2, 0 |
443 | be,pt %XCC, 85f | 443 | be,pt %XCC, 85f |
@@ -454,11 +454,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
454 | 454 | ||
455 | 1: subcc %g1, 1, %g1 | 455 | 1: subcc %g1, 1, %g1 |
456 | EX_LD(LOAD(ldub, %o1, %o5)) | 456 | EX_LD(LOAD(ldub, %o1, %o5)) |
457 | EX_ST(STORE(stb, %o5, %o1 + %o3)) | 457 | EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) |
458 | bgu,pt %icc, 1b | 458 | bgu,pt %icc, 1b |
459 | add %o1, 1, %o1 | 459 | add %o1, 1, %o1 |
460 | 460 | ||
461 | 2: add %o1, %o3, %o0 | 461 | 2: add %o1, GLOBAL_SPARE, %o0 |
462 | andcc %o1, 0x7, %g1 | 462 | andcc %o1, 0x7, %g1 |
463 | bne,pt %icc, 8f | 463 | bne,pt %icc, 8f |
464 | sll %g1, 3, %g1 | 464 | sll %g1, 3, %g1 |
@@ -468,16 +468,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
468 | nop | 468 | nop |
469 | ba,a,pt %xcc, 73b | 469 | ba,a,pt %xcc, 73b |
470 | 470 | ||
471 | 8: mov 64, %o3 | 471 | 8: mov 64, GLOBAL_SPARE |
472 | andn %o1, 0x7, %o1 | 472 | andn %o1, 0x7, %o1 |
473 | EX_LD(LOAD(ldx, %o1, %g2)) | 473 | EX_LD(LOAD(ldx, %o1, %g2)) |
474 | sub %o3, %g1, %o3 | 474 | sub GLOBAL_SPARE, %g1, GLOBAL_SPARE |
475 | andn %o2, 0x7, %o4 | 475 | andn %o2, 0x7, %o4 |
476 | sllx %g2, %g1, %g2 | 476 | sllx %g2, %g1, %g2 |
477 | 1: add %o1, 0x8, %o1 | 477 | 1: add %o1, 0x8, %o1 |
478 | EX_LD(LOAD(ldx, %o1, %g3)) | 478 | EX_LD(LOAD(ldx, %o1, %g3)) |
479 | subcc %o4, 0x8, %o4 | 479 | subcc %o4, 0x8, %o4 |
480 | srlx %g3, %o3, %o5 | 480 | srlx %g3, GLOBAL_SPARE, %o5 |
481 | or %o5, %g2, %o5 | 481 | or %o5, %g2, %o5 |
482 | EX_ST(STORE(stx, %o5, %o0)) | 482 | EX_ST(STORE(stx, %o5, %o0)) |
483 | add %o0, 0x8, %o0 | 483 | add %o0, 0x8, %o0 |
@@ -489,32 +489,32 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | |||
489 | be,pn %icc, 85f | 489 | be,pn %icc, 85f |
490 | add %o1, %g1, %o1 | 490 | add %o1, %g1, %o1 |
491 | ba,pt %xcc, 90f | 491 | ba,pt %xcc, 90f |
492 | sub %o0, %o1, %o3 | 492 | sub %o0, %o1, GLOBAL_SPARE |
493 | 493 | ||
494 | .align 64 | 494 | .align 64 |
495 | 80: /* 0 < len <= 16 */ | 495 | 80: /* 0 < len <= 16 */ |
496 | andcc %o3, 0x3, %g0 | 496 | andcc GLOBAL_SPARE, 0x3, %g0 |
497 | bne,pn %XCC, 90f | 497 | bne,pn %XCC, 90f |
498 | sub %o0, %o1, %o3 | 498 | sub %o0, %o1, GLOBAL_SPARE |
499 | 499 | ||
500 | 1: | 500 | 1: |
501 | subcc %o2, 4, %o2 | 501 | subcc %o2, 4, %o2 |
502 | EX_LD(LOAD(lduw, %o1, %g1)) | 502 | EX_LD(LOAD(lduw, %o1, %g1)) |
503 | EX_ST(STORE(stw, %g1, %o1 + %o3)) | 503 | EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) |
504 | bgu,pt %XCC, 1b | 504 | bgu,pt %XCC, 1b |
505 | add %o1, 4, %o1 | 505 | add %o1, 4, %o1 |
506 | 506 | ||
507 | 85: retl | 507 | 85: retl |
508 | mov EX_RETVAL(GLOBAL_SPARE), %o0 | 508 | mov EX_RETVAL(%o3), %o0 |
509 | 509 | ||
510 | .align 32 | 510 | .align 32 |
511 | 90: | 511 | 90: |
512 | subcc %o2, 1, %o2 | 512 | subcc %o2, 1, %o2 |
513 | EX_LD(LOAD(ldub, %o1, %g1)) | 513 | EX_LD(LOAD(ldub, %o1, %g1)) |
514 | EX_ST(STORE(stb, %g1, %o1 + %o3)) | 514 | EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) |
515 | bgu,pt %XCC, 90b | 515 | bgu,pt %XCC, 90b |
516 | add %o1, 1, %o1 | 516 | add %o1, 1, %o1 |
517 | retl | 517 | retl |
518 | mov EX_RETVAL(GLOBAL_SPARE), %o0 | 518 | mov EX_RETVAL(%o3), %o0 |
519 | 519 | ||
520 | .size FUNC_NAME, .-FUNC_NAME | 520 | .size FUNC_NAME, .-FUNC_NAME |
diff --git a/arch/sparc/lib/NG4clear_page.S b/arch/sparc/lib/NG4clear_page.S new file mode 100644 index 000000000000..e16c88204a42 --- /dev/null +++ b/arch/sparc/lib/NG4clear_page.S | |||
@@ -0,0 +1,29 @@ | |||
1 | /* NG4copy_page.S: Niagara-4 optimized clear page. | ||
2 | * | ||
3 | * Copyright (C) 2012 (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #include <asm/asi.h> | ||
7 | #include <asm/page.h> | ||
8 | |||
9 | .text | ||
10 | |||
11 | .register %g3, #scratch | ||
12 | |||
13 | .align 32 | ||
14 | .globl NG4clear_page | ||
15 | .globl NG4clear_user_page | ||
16 | NG4clear_page: /* %o0=dest */ | ||
17 | NG4clear_user_page: /* %o0=dest, %o1=vaddr */ | ||
18 | set PAGE_SIZE, %g7 | ||
19 | mov 0x20, %g3 | ||
20 | 1: stxa %g0, [%o0 + %g0] ASI_ST_BLKINIT_MRU_P | ||
21 | subcc %g7, 0x40, %g7 | ||
22 | stxa %g0, [%o0 + %g3] ASI_ST_BLKINIT_MRU_P | ||
23 | bne,pt %xcc, 1b | ||
24 | add %o0, 0x40, %o0 | ||
25 | membar #StoreLoad|#StoreStore | ||
26 | retl | ||
27 | nop | ||
28 | .size NG4clear_page,.-NG4clear_page | ||
29 | .size NG4clear_user_page,.-NG4clear_user_page \ No newline at end of file | ||
diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S new file mode 100644 index 000000000000..fd9f903ffa32 --- /dev/null +++ b/arch/sparc/lib/NG4copy_from_user.S | |||
@@ -0,0 +1,30 @@ | |||
1 | /* NG4copy_from_user.S: Niagara-4 optimized copy from userspace. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #define EX_LD(x) \ | ||
7 | 98: x; \ | ||
8 | .section __ex_table,"a";\ | ||
9 | .align 4; \ | ||
10 | .word 98b, __retl_one_asi;\ | ||
11 | .text; \ | ||
12 | .align 4; | ||
13 | |||
14 | #ifndef ASI_AIUS | ||
15 | #define ASI_AIUS 0x11 | ||
16 | #endif | ||
17 | |||
18 | #define FUNC_NAME NG4copy_from_user | ||
19 | #define LOAD(type,addr,dest) type##a [addr] %asi, dest | ||
20 | #define EX_RETVAL(x) 0 | ||
21 | |||
22 | #ifdef __KERNEL__ | ||
23 | #define PREAMBLE \ | ||
24 | rd %asi, %g1; \ | ||
25 | cmp %g1, ASI_AIUS; \ | ||
26 | bne,pn %icc, ___copy_in_user; \ | ||
27 | nop | ||
28 | #endif | ||
29 | |||
30 | #include "NG4memcpy.S" | ||
diff --git a/arch/sparc/lib/NG4copy_page.S b/arch/sparc/lib/NG4copy_page.S new file mode 100644 index 000000000000..28504e88c535 --- /dev/null +++ b/arch/sparc/lib/NG4copy_page.S | |||
@@ -0,0 +1,57 @@ | |||
1 | /* NG4copy_page.S: Niagara-4 optimized copy page. | ||
2 | * | ||
3 | * Copyright (C) 2012 (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #include <asm/asi.h> | ||
7 | #include <asm/page.h> | ||
8 | |||
9 | .text | ||
10 | .align 32 | ||
11 | |||
12 | .register %g2, #scratch | ||
13 | .register %g3, #scratch | ||
14 | |||
15 | .globl NG4copy_user_page | ||
16 | NG4copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ | ||
17 | prefetch [%o1 + 0x000], #n_reads_strong | ||
18 | prefetch [%o1 + 0x040], #n_reads_strong | ||
19 | prefetch [%o1 + 0x080], #n_reads_strong | ||
20 | prefetch [%o1 + 0x0c0], #n_reads_strong | ||
21 | set PAGE_SIZE, %g7 | ||
22 | prefetch [%o1 + 0x100], #n_reads_strong | ||
23 | prefetch [%o1 + 0x140], #n_reads_strong | ||
24 | prefetch [%o1 + 0x180], #n_reads_strong | ||
25 | prefetch [%o1 + 0x1c0], #n_reads_strong | ||
26 | 1: | ||
27 | ldx [%o1 + 0x00], %o2 | ||
28 | subcc %g7, 0x40, %g7 | ||
29 | ldx [%o1 + 0x08], %o3 | ||
30 | ldx [%o1 + 0x10], %o4 | ||
31 | ldx [%o1 + 0x18], %o5 | ||
32 | ldx [%o1 + 0x20], %g1 | ||
33 | stxa %o2, [%o0] ASI_ST_BLKINIT_MRU_P | ||
34 | add %o0, 0x08, %o0 | ||
35 | ldx [%o1 + 0x28], %g2 | ||
36 | stxa %o3, [%o0] ASI_ST_BLKINIT_MRU_P | ||
37 | add %o0, 0x08, %o0 | ||
38 | ldx [%o1 + 0x30], %g3 | ||
39 | stxa %o4, [%o0] ASI_ST_BLKINIT_MRU_P | ||
40 | add %o0, 0x08, %o0 | ||
41 | ldx [%o1 + 0x38], %o2 | ||
42 | add %o1, 0x40, %o1 | ||
43 | stxa %o5, [%o0] ASI_ST_BLKINIT_MRU_P | ||
44 | add %o0, 0x08, %o0 | ||
45 | stxa %g1, [%o0] ASI_ST_BLKINIT_MRU_P | ||
46 | add %o0, 0x08, %o0 | ||
47 | stxa %g2, [%o0] ASI_ST_BLKINIT_MRU_P | ||
48 | add %o0, 0x08, %o0 | ||
49 | stxa %g3, [%o0] ASI_ST_BLKINIT_MRU_P | ||
50 | add %o0, 0x08, %o0 | ||
51 | stxa %o2, [%o0] ASI_ST_BLKINIT_MRU_P | ||
52 | add %o0, 0x08, %o0 | ||
53 | bne,pt %icc, 1b | ||
54 | prefetch [%o1 + 0x200], #n_reads_strong | ||
55 | retl | ||
56 | membar #StoreLoad | #StoreStore | ||
57 | .size NG4copy_user_page,.-NG4copy_user_page | ||
diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S new file mode 100644 index 000000000000..9744c4540a8d --- /dev/null +++ b/arch/sparc/lib/NG4copy_to_user.S | |||
@@ -0,0 +1,39 @@ | |||
1 | /* NG4copy_to_user.S: Niagara-4 optimized copy to userspace. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #define EX_ST(x) \ | ||
7 | 98: x; \ | ||
8 | .section __ex_table,"a";\ | ||
9 | .align 4; \ | ||
10 | .word 98b, __retl_one_asi;\ | ||
11 | .text; \ | ||
12 | .align 4; | ||
13 | |||
14 | #ifndef ASI_AIUS | ||
15 | #define ASI_AIUS 0x11 | ||
16 | #endif | ||
17 | |||
18 | #ifndef ASI_BLK_INIT_QUAD_LDD_AIUS | ||
19 | #define ASI_BLK_INIT_QUAD_LDD_AIUS 0x23 | ||
20 | #endif | ||
21 | |||
22 | #define FUNC_NAME NG4copy_to_user | ||
23 | #define STORE(type,src,addr) type##a src, [addr] %asi | ||
24 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS | ||
25 | #define EX_RETVAL(x) 0 | ||
26 | |||
27 | #ifdef __KERNEL__ | ||
28 | /* Writing to %asi is _expensive_ so we hardcode it. | ||
29 | * Reading %asi to check for KERNEL_DS is comparatively | ||
30 | * cheap. | ||
31 | */ | ||
32 | #define PREAMBLE \ | ||
33 | rd %asi, %g1; \ | ||
34 | cmp %g1, ASI_AIUS; \ | ||
35 | bne,pn %icc, ___copy_in_user; \ | ||
36 | nop | ||
37 | #endif | ||
38 | |||
39 | #include "NG4memcpy.S" | ||
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S new file mode 100644 index 000000000000..9cf2ee01cee3 --- /dev/null +++ b/arch/sparc/lib/NG4memcpy.S | |||
@@ -0,0 +1,360 @@ | |||
1 | /* NG4memcpy.S: Niagara-4 optimized memcpy. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #ifdef __KERNEL__ | ||
7 | #include <asm/visasm.h> | ||
8 | #include <asm/asi.h> | ||
9 | #define GLOBAL_SPARE %g7 | ||
10 | #else | ||
11 | #define ASI_BLK_INIT_QUAD_LDD_P 0xe2 | ||
12 | #define FPRS_FEF 0x04 | ||
13 | |||
14 | /* On T4 it is very expensive to access ASRs like %fprs and | ||
15 | * %asi, avoiding a read or a write can save ~50 cycles. | ||
16 | */ | ||
17 | #define FPU_ENTER \ | ||
18 | rd %fprs, %o5; \ | ||
19 | andcc %o5, FPRS_FEF, %g0; \ | ||
20 | be,a,pn %icc, 999f; \ | ||
21 | wr %g0, FPRS_FEF, %fprs; \ | ||
22 | 999: | ||
23 | |||
24 | #ifdef MEMCPY_DEBUG | ||
25 | #define VISEntryHalf FPU_ENTER; \ | ||
26 | clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0; | ||
27 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | ||
28 | #else | ||
29 | #define VISEntryHalf FPU_ENTER | ||
30 | #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs | ||
31 | #endif | ||
32 | |||
33 | #define GLOBAL_SPARE %g5 | ||
34 | #endif | ||
35 | |||
36 | #ifndef STORE_ASI | ||
37 | #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA | ||
38 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P | ||
39 | #else | ||
40 | #define STORE_ASI 0x80 /* ASI_P */ | ||
41 | #endif | ||
42 | #endif | ||
43 | |||
44 | #ifndef EX_LD | ||
45 | #define EX_LD(x) x | ||
46 | #endif | ||
47 | |||
48 | #ifndef EX_ST | ||
49 | #define EX_ST(x) x | ||
50 | #endif | ||
51 | |||
52 | #ifndef EX_RETVAL | ||
53 | #define EX_RETVAL(x) x | ||
54 | #endif | ||
55 | |||
56 | #ifndef LOAD | ||
57 | #define LOAD(type,addr,dest) type [addr], dest | ||
58 | #endif | ||
59 | |||
60 | #ifndef STORE | ||
61 | #ifndef MEMCPY_DEBUG | ||
62 | #define STORE(type,src,addr) type src, [addr] | ||
63 | #else | ||
64 | #define STORE(type,src,addr) type##a src, [addr] %asi | ||
65 | #endif | ||
66 | #endif | ||
67 | |||
68 | #ifndef STORE_INIT | ||
69 | #define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI | ||
70 | #endif | ||
71 | |||
72 | #ifndef FUNC_NAME | ||
73 | #define FUNC_NAME NG4memcpy | ||
74 | #endif | ||
75 | #ifndef PREAMBLE | ||
76 | #define PREAMBLE | ||
77 | #endif | ||
78 | |||
79 | #ifndef XCC | ||
80 | #define XCC xcc | ||
81 | #endif | ||
82 | |||
83 | .register %g2,#scratch | ||
84 | .register %g3,#scratch | ||
85 | |||
86 | .text | ||
87 | .align 64 | ||
88 | |||
89 | .globl FUNC_NAME | ||
90 | .type FUNC_NAME,#function | ||
91 | FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
92 | #ifdef MEMCPY_DEBUG | ||
93 | wr %g0, 0x80, %asi | ||
94 | #endif | ||
95 | srlx %o2, 31, %g2 | ||
96 | cmp %g2, 0 | ||
97 | tne %XCC, 5 | ||
98 | PREAMBLE | ||
99 | mov %o0, %o3 | ||
100 | brz,pn %o2, .Lexit | ||
101 | cmp %o2, 3 | ||
102 | ble,pn %icc, .Ltiny | ||
103 | cmp %o2, 19 | ||
104 | ble,pn %icc, .Lsmall | ||
105 | or %o0, %o1, %g2 | ||
106 | cmp %o2, 128 | ||
107 | bl,pn %icc, .Lmedium | ||
108 | nop | ||
109 | |||
110 | .Llarge:/* len >= 0x80 */ | ||
111 | /* First get dest 8 byte aligned. */ | ||
112 | sub %g0, %o0, %g1 | ||
113 | and %g1, 0x7, %g1 | ||
114 | brz,pt %g1, 51f | ||
115 | sub %o2, %g1, %o2 | ||
116 | |||
117 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) | ||
118 | add %o1, 1, %o1 | ||
119 | subcc %g1, 1, %g1 | ||
120 | add %o0, 1, %o0 | ||
121 | bne,pt %icc, 1b | ||
122 | EX_ST(STORE(stb, %g2, %o0 - 0x01)) | ||
123 | |||
124 | 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) | ||
125 | LOAD(prefetch, %o1 + 0x080, #n_reads_strong) | ||
126 | LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong) | ||
127 | LOAD(prefetch, %o1 + 0x100, #n_reads_strong) | ||
128 | LOAD(prefetch, %o1 + 0x140, #n_reads_strong) | ||
129 | LOAD(prefetch, %o1 + 0x180, #n_reads_strong) | ||
130 | LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong) | ||
131 | LOAD(prefetch, %o1 + 0x200, #n_reads_strong) | ||
132 | |||
133 | /* Check if we can use the straight fully aligned | ||
134 | * loop, or we require the alignaddr/faligndata variant. | ||
135 | */ | ||
136 | andcc %o1, 0x7, %o5 | ||
137 | bne,pn %icc, .Llarge_src_unaligned | ||
138 | sub %g0, %o0, %g1 | ||
139 | |||
140 | /* Legitimize the use of initializing stores by getting dest | ||
141 | * to be 64-byte aligned. | ||
142 | */ | ||
143 | and %g1, 0x3f, %g1 | ||
144 | brz,pt %g1, .Llarge_aligned | ||
145 | sub %o2, %g1, %o2 | ||
146 | |||
147 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) | ||
148 | add %o1, 8, %o1 | ||
149 | subcc %g1, 8, %g1 | ||
150 | add %o0, 8, %o0 | ||
151 | bne,pt %icc, 1b | ||
152 | EX_ST(STORE(stx, %g2, %o0 - 0x08)) | ||
153 | |||
154 | .Llarge_aligned: | ||
155 | /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ | ||
156 | andn %o2, 0x3f, %o4 | ||
157 | sub %o2, %o4, %o2 | ||
158 | |||
159 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | ||
160 | add %o1, 0x40, %o1 | ||
161 | EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) | ||
162 | subcc %o4, 0x40, %o4 | ||
163 | EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) | ||
164 | EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) | ||
165 | EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) | ||
166 | EX_ST(STORE_INIT(%g1, %o0)) | ||
167 | add %o0, 0x08, %o0 | ||
168 | EX_ST(STORE_INIT(%g2, %o0)) | ||
169 | add %o0, 0x08, %o0 | ||
170 | EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) | ||
171 | EX_ST(STORE_INIT(%g3, %o0)) | ||
172 | add %o0, 0x08, %o0 | ||
173 | EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) | ||
174 | EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) | ||
175 | add %o0, 0x08, %o0 | ||
176 | EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) | ||
177 | EX_ST(STORE_INIT(%o5, %o0)) | ||
178 | add %o0, 0x08, %o0 | ||
179 | EX_ST(STORE_INIT(%g2, %o0)) | ||
180 | add %o0, 0x08, %o0 | ||
181 | EX_ST(STORE_INIT(%g3, %o0)) | ||
182 | add %o0, 0x08, %o0 | ||
183 | EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) | ||
184 | add %o0, 0x08, %o0 | ||
185 | bne,pt %icc, 1b | ||
186 | LOAD(prefetch, %o1 + 0x200, #n_reads_strong) | ||
187 | |||
188 | membar #StoreLoad | #StoreStore | ||
189 | |||
190 | brz,pn %o2, .Lexit | ||
191 | cmp %o2, 19 | ||
192 | ble,pn %icc, .Lsmall_unaligned | ||
193 | nop | ||
194 | ba,a,pt %icc, .Lmedium_noprefetch | ||
195 | |||
196 | .Lexit: retl | ||
197 | mov EX_RETVAL(%o3), %o0 | ||
198 | |||
199 | .Llarge_src_unaligned: | ||
200 | andn %o2, 0x3f, %o4 | ||
201 | sub %o2, %o4, %o2 | ||
202 | VISEntryHalf | ||
203 | alignaddr %o1, %g0, %g1 | ||
204 | add %o1, %o4, %o1 | ||
205 | EX_LD(LOAD(ldd, %g1 + 0x00, %f0)) | ||
206 | 1: EX_LD(LOAD(ldd, %g1 + 0x08, %f2)) | ||
207 | subcc %o4, 0x40, %o4 | ||
208 | EX_LD(LOAD(ldd, %g1 + 0x10, %f4)) | ||
209 | EX_LD(LOAD(ldd, %g1 + 0x18, %f6)) | ||
210 | EX_LD(LOAD(ldd, %g1 + 0x20, %f8)) | ||
211 | EX_LD(LOAD(ldd, %g1 + 0x28, %f10)) | ||
212 | EX_LD(LOAD(ldd, %g1 + 0x30, %f12)) | ||
213 | EX_LD(LOAD(ldd, %g1 + 0x38, %f14)) | ||
214 | faligndata %f0, %f2, %f16 | ||
215 | EX_LD(LOAD(ldd, %g1 + 0x40, %f0)) | ||
216 | faligndata %f2, %f4, %f18 | ||
217 | add %g1, 0x40, %g1 | ||
218 | faligndata %f4, %f6, %f20 | ||
219 | faligndata %f6, %f8, %f22 | ||
220 | faligndata %f8, %f10, %f24 | ||
221 | faligndata %f10, %f12, %f26 | ||
222 | faligndata %f12, %f14, %f28 | ||
223 | faligndata %f14, %f0, %f30 | ||
224 | EX_ST(STORE(std, %f16, %o0 + 0x00)) | ||
225 | EX_ST(STORE(std, %f18, %o0 + 0x08)) | ||
226 | EX_ST(STORE(std, %f20, %o0 + 0x10)) | ||
227 | EX_ST(STORE(std, %f22, %o0 + 0x18)) | ||
228 | EX_ST(STORE(std, %f24, %o0 + 0x20)) | ||
229 | EX_ST(STORE(std, %f26, %o0 + 0x28)) | ||
230 | EX_ST(STORE(std, %f28, %o0 + 0x30)) | ||
231 | EX_ST(STORE(std, %f30, %o0 + 0x38)) | ||
232 | add %o0, 0x40, %o0 | ||
233 | bne,pt %icc, 1b | ||
234 | LOAD(prefetch, %g1 + 0x200, #n_reads_strong) | ||
235 | VISExitHalf | ||
236 | |||
237 | brz,pn %o2, .Lexit | ||
238 | cmp %o2, 19 | ||
239 | ble,pn %icc, .Lsmall_unaligned | ||
240 | nop | ||
241 | ba,a,pt %icc, .Lmedium_unaligned | ||
242 | |||
243 | .Lmedium: | ||
244 | LOAD(prefetch, %o1 + 0x40, #n_reads_strong) | ||
245 | andcc %g2, 0x7, %g0 | ||
246 | bne,pn %icc, .Lmedium_unaligned | ||
247 | nop | ||
248 | .Lmedium_noprefetch: | ||
249 | andncc %o2, 0x20 - 1, %o5 | ||
250 | be,pn %icc, 2f | ||
251 | sub %o2, %o5, %o2 | ||
252 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | ||
253 | EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) | ||
254 | EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) | ||
255 | EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) | ||
256 | add %o1, 0x20, %o1 | ||
257 | subcc %o5, 0x20, %o5 | ||
258 | EX_ST(STORE(stx, %g1, %o0 + 0x00)) | ||
259 | EX_ST(STORE(stx, %g2, %o0 + 0x08)) | ||
260 | EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) | ||
261 | EX_ST(STORE(stx, %o4, %o0 + 0x18)) | ||
262 | bne,pt %icc, 1b | ||
263 | add %o0, 0x20, %o0 | ||
264 | 2: andcc %o2, 0x18, %o5 | ||
265 | be,pt %icc, 3f | ||
266 | sub %o2, %o5, %o2 | ||
267 | 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) | ||
268 | add %o1, 0x08, %o1 | ||
269 | add %o0, 0x08, %o0 | ||
270 | subcc %o5, 0x08, %o5 | ||
271 | bne,pt %icc, 1b | ||
272 | EX_ST(STORE(stx, %g1, %o0 - 0x08)) | ||
273 | 3: brz,pt %o2, .Lexit | ||
274 | cmp %o2, 0x04 | ||
275 | bl,pn %icc, .Ltiny | ||
276 | nop | ||
277 | EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) | ||
278 | add %o1, 0x04, %o1 | ||
279 | add %o0, 0x04, %o0 | ||
280 | subcc %o2, 0x04, %o2 | ||
281 | bne,pn %icc, .Ltiny | ||
282 | EX_ST(STORE(stw, %g1, %o0 - 0x04)) | ||
283 | ba,a,pt %icc, .Lexit | ||
284 | .Lmedium_unaligned: | ||
285 | /* First get dest 8 byte aligned. */ | ||
286 | sub %g0, %o0, %g1 | ||
287 | and %g1, 0x7, %g1 | ||
288 | brz,pt %g1, 2f | ||
289 | sub %o2, %g1, %o2 | ||
290 | |||
291 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) | ||
292 | add %o1, 1, %o1 | ||
293 | subcc %g1, 1, %g1 | ||
294 | add %o0, 1, %o0 | ||
295 | bne,pt %icc, 1b | ||
296 | EX_ST(STORE(stb, %g2, %o0 - 0x01)) | ||
297 | 2: | ||
298 | and %o1, 0x7, %g1 | ||
299 | brz,pn %g1, .Lmedium_noprefetch | ||
300 | sll %g1, 3, %g1 | ||
301 | mov 64, %g2 | ||
302 | sub %g2, %g1, %g2 | ||
303 | andn %o1, 0x7, %o1 | ||
304 | EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) | ||
305 | sllx %o4, %g1, %o4 | ||
306 | andn %o2, 0x08 - 1, %o5 | ||
307 | sub %o2, %o5, %o2 | ||
308 | 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) | ||
309 | add %o1, 0x08, %o1 | ||
310 | subcc %o5, 0x08, %o5 | ||
311 | srlx %g3, %g2, GLOBAL_SPARE | ||
312 | or GLOBAL_SPARE, %o4, GLOBAL_SPARE | ||
313 | EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) | ||
314 | add %o0, 0x08, %o0 | ||
315 | bne,pt %icc, 1b | ||
316 | sllx %g3, %g1, %o4 | ||
317 | srl %g1, 3, %g1 | ||
318 | add %o1, %g1, %o1 | ||
319 | brz,pn %o2, .Lexit | ||
320 | nop | ||
321 | ba,pt %icc, .Lsmall_unaligned | ||
322 | |||
323 | .Ltiny: | ||
324 | EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) | ||
325 | subcc %o2, 1, %o2 | ||
326 | be,pn %icc, .Lexit | ||
327 | EX_ST(STORE(stb, %g1, %o0 + 0x00)) | ||
328 | EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) | ||
329 | subcc %o2, 1, %o2 | ||
330 | be,pn %icc, .Lexit | ||
331 | EX_ST(STORE(stb, %g1, %o0 + 0x01)) | ||
332 | EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) | ||
333 | ba,pt %icc, .Lexit | ||
334 | EX_ST(STORE(stb, %g1, %o0 + 0x02)) | ||
335 | |||
336 | .Lsmall: | ||
337 | andcc %g2, 0x3, %g0 | ||
338 | bne,pn %icc, .Lsmall_unaligned | ||
339 | andn %o2, 0x4 - 1, %o5 | ||
340 | sub %o2, %o5, %o2 | ||
341 | 1: | ||
342 | EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) | ||
343 | add %o1, 0x04, %o1 | ||
344 | subcc %o5, 0x04, %o5 | ||
345 | add %o0, 0x04, %o0 | ||
346 | bne,pt %icc, 1b | ||
347 | EX_ST(STORE(stw, %g1, %o0 - 0x04)) | ||
348 | brz,pt %o2, .Lexit | ||
349 | nop | ||
350 | ba,a,pt %icc, .Ltiny | ||
351 | |||
352 | .Lsmall_unaligned: | ||
353 | 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) | ||
354 | add %o1, 1, %o1 | ||
355 | add %o0, 1, %o0 | ||
356 | subcc %o2, 1, %o2 | ||
357 | bne,pt %icc, 1b | ||
358 | EX_ST(STORE(stb, %g1, %o0 - 0x01)) | ||
359 | ba,a,pt %icc, .Lexit | ||
360 | .size FUNC_NAME, .-FUNC_NAME | ||
diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S new file mode 100644 index 000000000000..41da4bdd95cb --- /dev/null +++ b/arch/sparc/lib/NG4memset.S | |||
@@ -0,0 +1,105 @@ | |||
1 | /* NG4memset.S: Niagara-4 optimized memset/bzero. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #include <asm/asi.h> | ||
7 | |||
8 | .register %g2, #scratch | ||
9 | .register %g3, #scratch | ||
10 | |||
11 | .text | ||
12 | .align 32 | ||
13 | .globl NG4memset | ||
14 | NG4memset: | ||
15 | andcc %o1, 0xff, %o4 | ||
16 | be,pt %icc, 1f | ||
17 | mov %o2, %o1 | ||
18 | sllx %o4, 8, %g1 | ||
19 | or %g1, %o4, %o2 | ||
20 | sllx %o2, 16, %g1 | ||
21 | or %g1, %o2, %o2 | ||
22 | sllx %o2, 32, %g1 | ||
23 | ba,pt %icc, 1f | ||
24 | or %g1, %o2, %o4 | ||
25 | .size NG4memset,.-NG4memset | ||
26 | |||
27 | .align 32 | ||
28 | .globl NG4bzero | ||
29 | NG4bzero: | ||
30 | clr %o4 | ||
31 | 1: cmp %o1, 16 | ||
32 | ble %icc, .Ltiny | ||
33 | mov %o0, %o3 | ||
34 | sub %g0, %o0, %g1 | ||
35 | and %g1, 0x7, %g1 | ||
36 | brz,pt %g1, .Laligned8 | ||
37 | sub %o1, %g1, %o1 | ||
38 | 1: stb %o4, [%o0 + 0x00] | ||
39 | subcc %g1, 1, %g1 | ||
40 | bne,pt %icc, 1b | ||
41 | add %o0, 1, %o0 | ||
42 | .Laligned8: | ||
43 | cmp %o1, 64 + (64 - 8) | ||
44 | ble .Lmedium | ||
45 | sub %g0, %o0, %g1 | ||
46 | andcc %g1, (64 - 1), %g1 | ||
47 | brz,pn %g1, .Laligned64 | ||
48 | sub %o1, %g1, %o1 | ||
49 | 1: stx %o4, [%o0 + 0x00] | ||
50 | subcc %g1, 8, %g1 | ||
51 | bne,pt %icc, 1b | ||
52 | add %o0, 0x8, %o0 | ||
53 | .Laligned64: | ||
54 | andn %o1, 64 - 1, %g1 | ||
55 | sub %o1, %g1, %o1 | ||
56 | brnz,pn %o4, .Lnon_bzero_loop | ||
57 | mov 0x20, %g2 | ||
58 | 1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | ||
59 | subcc %g1, 0x40, %g1 | ||
60 | stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | ||
61 | bne,pt %icc, 1b | ||
62 | add %o0, 0x40, %o0 | ||
63 | .Lpostloop: | ||
64 | cmp %o1, 8 | ||
65 | bl,pn %icc, .Ltiny | ||
66 | membar #StoreStore|#StoreLoad | ||
67 | .Lmedium: | ||
68 | andn %o1, 0x7, %g1 | ||
69 | sub %o1, %g1, %o1 | ||
70 | 1: stx %o4, [%o0 + 0x00] | ||
71 | subcc %g1, 0x8, %g1 | ||
72 | bne,pt %icc, 1b | ||
73 | add %o0, 0x08, %o0 | ||
74 | andcc %o1, 0x4, %g1 | ||
75 | be,pt %icc, .Ltiny | ||
76 | sub %o1, %g1, %o1 | ||
77 | stw %o4, [%o0 + 0x00] | ||
78 | add %o0, 0x4, %o0 | ||
79 | .Ltiny: | ||
80 | cmp %o1, 0 | ||
81 | be,pn %icc, .Lexit | ||
82 | 1: subcc %o1, 1, %o1 | ||
83 | stb %o4, [%o0 + 0x00] | ||
84 | bne,pt %icc, 1b | ||
85 | add %o0, 1, %o0 | ||
86 | .Lexit: | ||
87 | retl | ||
88 | mov %o3, %o0 | ||
89 | .Lnon_bzero_loop: | ||
90 | mov 0x08, %g3 | ||
91 | mov 0x28, %o5 | ||
92 | 1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | ||
93 | subcc %g1, 0x40, %g1 | ||
94 | stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | ||
95 | stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | ||
96 | stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P | ||
97 | add %o0, 0x10, %o0 | ||
98 | stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | ||
99 | stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | ||
100 | stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | ||
101 | stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P | ||
102 | bne,pt %icc, 1b | ||
103 | add %o0, 0x30, %o0 | ||
104 | ba,a,pt %icc, .Lpostloop | ||
105 | .size NG4bzero,.-NG4bzero | ||
diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S new file mode 100644 index 000000000000..a114cbcf2a48 --- /dev/null +++ b/arch/sparc/lib/NG4patch.S | |||
@@ -0,0 +1,54 @@ | |||
1 | /* NG4patch.S: Patch Ultra-I routines with Niagara-4 variant. | ||
2 | * | ||
3 | * Copyright (C) 2012 David S. Miller <davem@davemloft.net> | ||
4 | */ | ||
5 | |||
6 | #define BRANCH_ALWAYS 0x10680000 | ||
7 | #define NOP 0x01000000 | ||
8 | #define NG_DO_PATCH(OLD, NEW) \ | ||
9 | sethi %hi(NEW), %g1; \ | ||
10 | or %g1, %lo(NEW), %g1; \ | ||
11 | sethi %hi(OLD), %g2; \ | ||
12 | or %g2, %lo(OLD), %g2; \ | ||
13 | sub %g1, %g2, %g1; \ | ||
14 | sethi %hi(BRANCH_ALWAYS), %g3; \ | ||
15 | sll %g1, 11, %g1; \ | ||
16 | srl %g1, 11 + 2, %g1; \ | ||
17 | or %g3, %lo(BRANCH_ALWAYS), %g3; \ | ||
18 | or %g3, %g1, %g3; \ | ||
19 | stw %g3, [%g2]; \ | ||
20 | sethi %hi(NOP), %g3; \ | ||
21 | or %g3, %lo(NOP), %g3; \ | ||
22 | stw %g3, [%g2 + 0x4]; \ | ||
23 | flush %g2; | ||
24 | |||
25 | .globl niagara4_patch_copyops | ||
26 | .type niagara4_patch_copyops,#function | ||
27 | niagara4_patch_copyops: | ||
28 | NG_DO_PATCH(memcpy, NG4memcpy) | ||
29 | NG_DO_PATCH(___copy_from_user, NG4copy_from_user) | ||
30 | NG_DO_PATCH(___copy_to_user, NG4copy_to_user) | ||
31 | retl | ||
32 | nop | ||
33 | .size niagara4_patch_copyops,.-niagara4_patch_copyops | ||
34 | |||
35 | .globl niagara4_patch_bzero | ||
36 | .type niagara4_patch_bzero,#function | ||
37 | niagara4_patch_bzero: | ||
38 | NG_DO_PATCH(memset, NG4memset) | ||
39 | NG_DO_PATCH(__bzero, NG4bzero) | ||
40 | NG_DO_PATCH(__clear_user, NGclear_user) | ||
41 | NG_DO_PATCH(tsb_init, NGtsb_init) | ||
42 | retl | ||
43 | nop | ||
44 | .size niagara4_patch_bzero,.-niagara4_patch_bzero | ||
45 | |||
46 | .globl niagara4_patch_pageops | ||
47 | .type niagara4_patch_pageops,#function | ||
48 | niagara4_patch_pageops: | ||
49 | NG_DO_PATCH(copy_user_page, NG4copy_user_page) | ||
50 | NG_DO_PATCH(_clear_page, NG4clear_page) | ||
51 | NG_DO_PATCH(clear_user_page, NG4clear_user_page) | ||
52 | retl | ||
53 | nop | ||
54 | .size niagara4_patch_pageops,.-niagara4_patch_pageops | ||
diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S index b9e790b9c6b8..423d46e2258b 100644 --- a/arch/sparc/lib/NGpage.S +++ b/arch/sparc/lib/NGpage.S | |||
@@ -59,6 +59,8 @@ NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ | |||
59 | restore | 59 | restore |
60 | 60 | ||
61 | .align 32 | 61 | .align 32 |
62 | .globl NGclear_page | ||
63 | .globl NGclear_user_page | ||
62 | NGclear_page: /* %o0=dest */ | 64 | NGclear_page: /* %o0=dest */ |
63 | NGclear_user_page: /* %o0=dest, %o1=vaddr */ | 65 | NGclear_user_page: /* %o0=dest, %o1=vaddr */ |
64 | rd %asi, %g3 | 66 | rd %asi, %g3 |
diff --git a/arch/sparc/lib/ksyms.c b/arch/sparc/lib/ksyms.c index 3b31218cafc6..ee31b884c61b 100644 --- a/arch/sparc/lib/ksyms.c +++ b/arch/sparc/lib/ksyms.c | |||
@@ -134,6 +134,10 @@ EXPORT_SYMBOL(copy_user_page); | |||
134 | void VISenter(void); | 134 | void VISenter(void); |
135 | EXPORT_SYMBOL(VISenter); | 135 | EXPORT_SYMBOL(VISenter); |
136 | 136 | ||
137 | /* CRYPTO code needs this */ | ||
138 | void VISenterhalf(void); | ||
139 | EXPORT_SYMBOL(VISenterhalf); | ||
140 | |||
137 | extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); | 141 | extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *); |
138 | extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, | 142 | extern void xor_vis_3(unsigned long, unsigned long *, unsigned long *, |
139 | unsigned long *); | 143 | unsigned long *); |