diff options
author | David S. Miller <davem@davemloft.net> | 2011-08-01 21:18:57 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-08-03 00:28:32 -0400 |
commit | e95ade083939dcb4b0c51c1a2c8504ea9ef3d6ef (patch) | |
tree | ad81930a3160a865a78869ece5066484a854cff0 /arch | |
parent | ac85fe8b21248054851e05bfaa352562e5b06dd3 (diff) |
sparc: Minor tweaks to Niagara page copy/clear.
Don't use floating point on Niagara2, use the traditional
plain Niagara code instead.
Unroll Niagara loops to 128 bytes for copy, and 256 bytes
for clear.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/sparc/kernel/head_64.S | 2 | ||||
-rw-r--r-- | arch/sparc/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/sparc/lib/NG2page.S | 61 | ||||
-rw-r--r-- | arch/sparc/lib/NGpage.S | 114 |
4 files changed, 77 insertions, 102 deletions
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index c752603a7c0d..0eac1b2fc53d 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S | |||
@@ -559,7 +559,7 @@ niagara2_patch: | |||
559 | nop | 559 | nop |
560 | call niagara_patch_bzero | 560 | call niagara_patch_bzero |
561 | nop | 561 | nop |
562 | call niagara2_patch_pageops | 562 | call niagara_patch_pageops |
563 | nop | 563 | nop |
564 | 564 | ||
565 | ba,a,pt %xcc, 80f | 565 | ba,a,pt %xcc, 80f |
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 7f01b8fce8bc..c25f94d28df8 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile | |||
@@ -31,7 +31,7 @@ lib-$(CONFIG_SPARC64) += NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o | |||
31 | lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o | 31 | lib-$(CONFIG_SPARC64) += NGpatch.o NGpage.o NGbzero.o |
32 | 32 | ||
33 | lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o | 33 | lib-$(CONFIG_SPARC64) += NG2memcpy.o NG2copy_from_user.o NG2copy_to_user.o |
34 | lib-$(CONFIG_SPARC64) += NG2patch.o NG2page.o | 34 | lib-$(CONFIG_SPARC64) += NG2patch.o |
35 | 35 | ||
36 | lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o | 36 | lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o |
37 | lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o | 37 | lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o |
diff --git a/arch/sparc/lib/NG2page.S b/arch/sparc/lib/NG2page.S deleted file mode 100644 index 73b6b7c72cbf..000000000000 --- a/arch/sparc/lib/NG2page.S +++ /dev/null | |||
@@ -1,61 +0,0 @@ | |||
1 | /* NG2page.S: Niagara-2 optimized clear and copy page. | ||
2 | * | ||
3 | * Copyright (C) 2007 (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #include <asm/asi.h> | ||
7 | #include <asm/page.h> | ||
8 | #include <asm/visasm.h> | ||
9 | |||
10 | .text | ||
11 | .align 32 | ||
12 | |||
13 | /* This is heavily simplified from the sun4u variants | ||
14 | * because Niagara-2 does not have any D-cache aliasing issues. | ||
15 | */ | ||
16 | NG2copy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ | ||
17 | prefetch [%o1 + 0x00], #one_read | ||
18 | prefetch [%o1 + 0x40], #one_read | ||
19 | VISEntryHalf | ||
20 | set PAGE_SIZE, %g7 | ||
21 | sub %o0, %o1, %g3 | ||
22 | 1: stxa %g0, [%o1 + %g3] ASI_BLK_INIT_QUAD_LDD_P | ||
23 | subcc %g7, 64, %g7 | ||
24 | ldda [%o1] ASI_BLK_P, %f0 | ||
25 | stda %f0, [%o1 + %g3] ASI_BLK_P | ||
26 | add %o1, 64, %o1 | ||
27 | bne,pt %xcc, 1b | ||
28 | prefetch [%o1 + 0x40], #one_read | ||
29 | membar #Sync | ||
30 | VISExitHalf | ||
31 | retl | ||
32 | nop | ||
33 | |||
34 | #define BRANCH_ALWAYS 0x10680000 | ||
35 | #define NOP 0x01000000 | ||
36 | #define NG_DO_PATCH(OLD, NEW) \ | ||
37 | sethi %hi(NEW), %g1; \ | ||
38 | or %g1, %lo(NEW), %g1; \ | ||
39 | sethi %hi(OLD), %g2; \ | ||
40 | or %g2, %lo(OLD), %g2; \ | ||
41 | sub %g1, %g2, %g1; \ | ||
42 | sethi %hi(BRANCH_ALWAYS), %g3; \ | ||
43 | sll %g1, 11, %g1; \ | ||
44 | srl %g1, 11 + 2, %g1; \ | ||
45 | or %g3, %lo(BRANCH_ALWAYS), %g3; \ | ||
46 | or %g3, %g1, %g3; \ | ||
47 | stw %g3, [%g2]; \ | ||
48 | sethi %hi(NOP), %g3; \ | ||
49 | or %g3, %lo(NOP), %g3; \ | ||
50 | stw %g3, [%g2 + 0x4]; \ | ||
51 | flush %g2; | ||
52 | |||
53 | .globl niagara2_patch_pageops | ||
54 | .type niagara2_patch_pageops,#function | ||
55 | niagara2_patch_pageops: | ||
56 | NG_DO_PATCH(copy_user_page, NG2copy_user_page) | ||
57 | NG_DO_PATCH(_clear_page, NGclear_page) | ||
58 | NG_DO_PATCH(clear_user_page, NGclear_user_page) | ||
59 | retl | ||
60 | nop | ||
61 | .size niagara2_patch_pageops,.-niagara2_patch_pageops | ||
diff --git a/arch/sparc/lib/NGpage.S b/arch/sparc/lib/NGpage.S index 428920de05ba..b9e790b9c6b8 100644 --- a/arch/sparc/lib/NGpage.S +++ b/arch/sparc/lib/NGpage.S | |||
@@ -16,55 +16,91 @@ | |||
16 | */ | 16 | */ |
17 | 17 | ||
18 | NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ | 18 | NGcopy_user_page: /* %o0=dest, %o1=src, %o2=vaddr */ |
19 | prefetch [%o1 + 0x00], #one_read | 19 | save %sp, -192, %sp |
20 | mov 8, %g1 | 20 | rd %asi, %g3 |
21 | mov 16, %g2 | 21 | wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi |
22 | mov 24, %g3 | ||
23 | set PAGE_SIZE, %g7 | 22 | set PAGE_SIZE, %g7 |
23 | prefetch [%i1 + 0x00], #one_read | ||
24 | prefetch [%i1 + 0x40], #one_read | ||
24 | 25 | ||
25 | 1: ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 | 26 | 1: prefetch [%i1 + 0x80], #one_read |
26 | ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 | 27 | prefetch [%i1 + 0xc0], #one_read |
27 | prefetch [%o1 + 0x40], #one_read | 28 | ldda [%i1 + 0x00] %asi, %o2 |
28 | add %o1, 32, %o1 | 29 | ldda [%i1 + 0x10] %asi, %o4 |
29 | stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | 30 | ldda [%i1 + 0x20] %asi, %l2 |
30 | stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P | 31 | ldda [%i1 + 0x30] %asi, %l4 |
31 | ldda [%o1 + %g0] ASI_BLK_INIT_QUAD_LDD_P, %o2 | 32 | stxa %o2, [%i0 + 0x00] %asi |
32 | stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | 33 | stxa %o3, [%i0 + 0x08] %asi |
33 | stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | 34 | stxa %o4, [%i0 + 0x10] %asi |
34 | ldda [%o1 + %g2] ASI_BLK_INIT_QUAD_LDD_P, %o4 | 35 | stxa %o5, [%i0 + 0x18] %asi |
35 | add %o1, 32, %o1 | 36 | stxa %l2, [%i0 + 0x20] %asi |
36 | add %o0, 32, %o0 | 37 | stxa %l3, [%i0 + 0x28] %asi |
37 | stxa %o2, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | 38 | stxa %l4, [%i0 + 0x30] %asi |
38 | stxa %o3, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P | 39 | stxa %l5, [%i0 + 0x38] %asi |
39 | stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | 40 | ldda [%i1 + 0x40] %asi, %o2 |
40 | stxa %o5, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | 41 | ldda [%i1 + 0x50] %asi, %o4 |
41 | subcc %g7, 64, %g7 | 42 | ldda [%i1 + 0x60] %asi, %l2 |
43 | ldda [%i1 + 0x70] %asi, %l4 | ||
44 | stxa %o2, [%i0 + 0x40] %asi | ||
45 | stxa %o3, [%i0 + 0x48] %asi | ||
46 | stxa %o4, [%i0 + 0x50] %asi | ||
47 | stxa %o5, [%i0 + 0x58] %asi | ||
48 | stxa %l2, [%i0 + 0x60] %asi | ||
49 | stxa %l3, [%i0 + 0x68] %asi | ||
50 | stxa %l4, [%i0 + 0x70] %asi | ||
51 | stxa %l5, [%i0 + 0x78] %asi | ||
52 | add %i1, 128, %i1 | ||
53 | subcc %g7, 128, %g7 | ||
42 | bne,pt %xcc, 1b | 54 | bne,pt %xcc, 1b |
43 | add %o0, 32, %o0 | 55 | add %i0, 128, %i0 |
56 | wr %g3, 0x0, %asi | ||
44 | membar #Sync | 57 | membar #Sync |
45 | retl | 58 | ret |
46 | nop | 59 | restore |
47 | 60 | ||
48 | .globl NGclear_page, NGclear_user_page | 61 | .align 32 |
49 | NGclear_page: /* %o0=dest */ | 62 | NGclear_page: /* %o0=dest */ |
50 | NGclear_user_page: /* %o0=dest, %o1=vaddr */ | 63 | NGclear_user_page: /* %o0=dest, %o1=vaddr */ |
51 | mov 8, %g1 | 64 | rd %asi, %g3 |
52 | mov 16, %g2 | 65 | wr %g0, ASI_BLK_INIT_QUAD_LDD_P, %asi |
53 | mov 24, %g3 | ||
54 | set PAGE_SIZE, %g7 | 66 | set PAGE_SIZE, %g7 |
55 | 67 | ||
56 | 1: stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | 68 | 1: stxa %g0, [%o0 + 0x00] %asi |
57 | stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P | 69 | stxa %g0, [%o0 + 0x08] %asi |
58 | stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | 70 | stxa %g0, [%o0 + 0x10] %asi |
59 | stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | 71 | stxa %g0, [%o0 + 0x18] %asi |
60 | add %o0, 32, %o0 | 72 | stxa %g0, [%o0 + 0x20] %asi |
61 | stxa %g0, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | 73 | stxa %g0, [%o0 + 0x28] %asi |
62 | stxa %g0, [%o0 + %g1] ASI_BLK_INIT_QUAD_LDD_P | 74 | stxa %g0, [%o0 + 0x30] %asi |
63 | stxa %g0, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | 75 | stxa %g0, [%o0 + 0x38] %asi |
64 | stxa %g0, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | 76 | stxa %g0, [%o0 + 0x40] %asi |
65 | subcc %g7, 64, %g7 | 77 | stxa %g0, [%o0 + 0x48] %asi |
78 | stxa %g0, [%o0 + 0x50] %asi | ||
79 | stxa %g0, [%o0 + 0x58] %asi | ||
80 | stxa %g0, [%o0 + 0x60] %asi | ||
81 | stxa %g0, [%o0 + 0x68] %asi | ||
82 | stxa %g0, [%o0 + 0x70] %asi | ||
83 | stxa %g0, [%o0 + 0x78] %asi | ||
84 | stxa %g0, [%o0 + 0x80] %asi | ||
85 | stxa %g0, [%o0 + 0x88] %asi | ||
86 | stxa %g0, [%o0 + 0x90] %asi | ||
87 | stxa %g0, [%o0 + 0x98] %asi | ||
88 | stxa %g0, [%o0 + 0xa0] %asi | ||
89 | stxa %g0, [%o0 + 0xa8] %asi | ||
90 | stxa %g0, [%o0 + 0xb0] %asi | ||
91 | stxa %g0, [%o0 + 0xb8] %asi | ||
92 | stxa %g0, [%o0 + 0xc0] %asi | ||
93 | stxa %g0, [%o0 + 0xc8] %asi | ||
94 | stxa %g0, [%o0 + 0xd0] %asi | ||
95 | stxa %g0, [%o0 + 0xd8] %asi | ||
96 | stxa %g0, [%o0 + 0xe0] %asi | ||
97 | stxa %g0, [%o0 + 0xe8] %asi | ||
98 | stxa %g0, [%o0 + 0xf0] %asi | ||
99 | stxa %g0, [%o0 + 0xf8] %asi | ||
100 | subcc %g7, 256, %g7 | ||
66 | bne,pt %xcc, 1b | 101 | bne,pt %xcc, 1b |
67 | add %o0, 32, %o0 | 102 | add %o0, 256, %o0 |
103 | wr %g3, 0x0, %asi | ||
68 | membar #Sync | 104 | membar #Sync |
69 | retl | 105 | retl |
70 | nop | 106 | nop |