diff options
author | David S. Miller <davem@sunset.davemloft.net> | 2006-02-21 17:29:42 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-03-20 04:13:50 -0500 |
commit | 8ca2557c48000daa8183b07d83f582a597705ebe (patch) | |
tree | 2ce53d5b4d6f21509dc4bc5fdeead1f559e626a0 /arch/sparc64/lib | |
parent | d371c0c17466b7e7bb4d395f96aa885a23df1073 (diff) |
[SPARC64]: Niagara optimized memset/bzero/clear_user.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc64/lib')
-rw-r--r-- | arch/sparc64/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/sparc64/lib/NGbzero.S | 162 |
2 files changed, 163 insertions, 1 deletions
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index 3d0e9a24d7a3..8812ded19f01 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile | |||
@@ -12,7 +12,7 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ | |||
12 | U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ | 12 | U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ |
13 | U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ | 13 | U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ |
14 | NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ | 14 | NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ |
15 | NGpage.o \ | 15 | NGpage.o NGbzero.o \ |
16 | copy_in_user.o user_fixup.o memmove.o \ | 16 | copy_in_user.o user_fixup.o memmove.o \ |
17 | mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o | 17 | mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o |
18 | 18 | ||
diff --git a/arch/sparc64/lib/NGbzero.S b/arch/sparc64/lib/NGbzero.S new file mode 100644 index 000000000000..fef584f745dc --- /dev/null +++ b/arch/sparc64/lib/NGbzero.S | |||
@@ -0,0 +1,162 @@ | |||
1 | /* NGbzero.S: Niagara optimized memset/clear_user. | ||
2 | * | ||
3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | #include <asm/asi.h> | ||
6 | |||
7 | #define EX_ST(x,y) \ | ||
8 | 98: x,y; \ | ||
9 | .section .fixup; \ | ||
10 | .align 4; \ | ||
11 | 99: retl; \ | ||
12 | mov %o1, %o0; \ | ||
13 | .section __ex_table; \ | ||
14 | .align 4; \ | ||
15 | .word 98b, 99b; \ | ||
16 | .text; \ | ||
17 | .align 4; | ||
18 | |||
19 | .text | ||
20 | |||
21 | .globl NGmemset | ||
22 | .type NGmemset, #function | ||
23 | NGmemset: /* %o0=buf, %o1=pat, %o2=len */ | ||
24 | and %o1, 0xff, %o3 | ||
25 | mov %o2, %o1 | ||
26 | sllx %o3, 8, %g1 | ||
27 | or %g1, %o3, %o2 | ||
28 | sllx %o2, 16, %g1 | ||
29 | or %g1, %o2, %o2 | ||
30 | sllx %o2, 32, %g1 | ||
31 | ba,pt %xcc, 1f | ||
32 | or %g1, %o2, %o2 | ||
33 | |||
34 | .globl NGbzero | ||
35 | .type NGbzero, #function | ||
36 | NGbzero: | ||
37 | clr %o2 | ||
38 | 1: brz,pn %o1, NGbzero_return | ||
39 | mov %o0, %o3 | ||
40 | |||
41 | /* %o5: saved %asi, restored at NGbzero_done | ||
42 | * %g7: store-init %asi to use | ||
43 | * %o4: non-store-init %asi to use | ||
44 | */ | ||
45 | rd %asi, %o5 | ||
46 | mov ASI_BLK_INIT_QUAD_LDD_P, %g7 | ||
47 | mov ASI_P, %o4 | ||
48 | wr %o4, 0x0, %asi | ||
49 | |||
50 | NGbzero_from_clear_user: | ||
51 | cmp %o1, 15 | ||
52 | bl,pn %icc, NGbzero_tiny | ||
53 | andcc %o0, 0x7, %g1 | ||
54 | be,pt %xcc, 2f | ||
55 | mov 8, %g2 | ||
56 | sub %g2, %g1, %g1 | ||
57 | sub %o1, %g1, %o1 | ||
58 | 1: EX_ST(stba %o2, [%o0 + 0x00] %asi) | ||
59 | subcc %g1, 1, %g1 | ||
60 | bne,pt %xcc, 1b | ||
61 | add %o0, 1, %o0 | ||
62 | 2: cmp %o1, 128 | ||
63 | bl,pn %icc, NGbzero_medium | ||
64 | andcc %o0, (64 - 1), %g1 | ||
65 | be,pt %xcc, NGbzero_pre_loop | ||
66 | mov 64, %g2 | ||
67 | sub %g2, %g1, %g1 | ||
68 | sub %o1, %g1, %o1 | ||
69 | 1: EX_ST(stxa %o2, [%o0 + 0x00] %asi) | ||
70 | subcc %g1, 8, %g1 | ||
71 | bne,pt %xcc, 1b | ||
72 | add %o0, 8, %o0 | ||
73 | |||
74 | NGbzero_pre_loop: | ||
75 | wr %g7, 0x0, %asi | ||
76 | andn %o1, (64 - 1), %g1 | ||
77 | sub %o1, %g1, %o1 | ||
78 | NGbzero_loop: | ||
79 | EX_ST(stxa %o2, [%o0 + 0x00] %asi) | ||
80 | EX_ST(stxa %o2, [%o0 + 0x08] %asi) | ||
81 | EX_ST(stxa %o2, [%o0 + 0x10] %asi) | ||
82 | EX_ST(stxa %o2, [%o0 + 0x18] %asi) | ||
83 | EX_ST(stxa %o2, [%o0 + 0x20] %asi) | ||
84 | EX_ST(stxa %o2, [%o0 + 0x28] %asi) | ||
85 | EX_ST(stxa %o2, [%o0 + 0x30] %asi) | ||
86 | EX_ST(stxa %o2, [%o0 + 0x38] %asi) | ||
87 | subcc %g1, 64, %g1 | ||
88 | bne,pt %xcc, NGbzero_loop | ||
89 | add %o0, 64, %o0 | ||
90 | |||
91 | wr %o4, 0x0, %asi | ||
92 | brz,pn %o1, NGbzero_done | ||
93 | NGbzero_medium: | ||
94 | andncc %o1, 0x7, %g1 | ||
95 | be,pn %xcc, 2f | ||
96 | sub %o1, %g1, %o1 | ||
97 | 1: EX_ST(stxa %o2, [%o0 + 0x00] %asi) | ||
98 | subcc %g1, 8, %g1 | ||
99 | bne,pt %xcc, 1b | ||
100 | add %o0, 8, %o0 | ||
101 | 2: brz,pt %o1, NGbzero_done | ||
102 | nop | ||
103 | |||
104 | NGbzero_tiny: | ||
105 | 1: EX_ST(stba %o2, [%o0 + 0x00] %asi) | ||
106 | subcc %o1, 1, %o1 | ||
107 | bne,pt %icc, 1b | ||
108 | add %o0, 1, %o0 | ||
109 | |||
110 | /* fallthrough */ | ||
111 | |||
112 | NGbzero_done: | ||
113 | wr %o5, 0x0, %asi | ||
114 | |||
115 | NGbzero_return: | ||
116 | retl | ||
117 | mov %o3, %o0 | ||
118 | .size NGbzero, .-NGbzero | ||
119 | .size NGmemset, .-NGmemset | ||
120 | |||
121 | .globl NGclear_user | ||
122 | .type NGclear_user, #function | ||
123 | NGclear_user: /* %o0=buf, %o1=len */ | ||
124 | rd %asi, %o5 | ||
125 | brz,pn %o1, NGbzero_done | ||
126 | clr %o3 | ||
127 | cmp %o5, ASI_AIUS | ||
128 | bne,pn %icc, NGbzero | ||
129 | clr %o2 | ||
130 | mov ASI_BLK_INIT_QUAD_LDD_AIUS, %g7 | ||
131 | ba,pt %xcc, NGbzero_from_clear_user | ||
132 | mov ASI_AIUS, %o4 | ||
133 | .size NGclear_user, .-NGclear_user | ||
134 | |||
135 | #define BRANCH_ALWAYS 0x10680000 | ||
136 | #define NOP 0x01000000 | ||
137 | #define NG_DO_PATCH(OLD, NEW) \ | ||
138 | sethi %hi(NEW), %g1; \ | ||
139 | or %g1, %lo(NEW), %g1; \ | ||
140 | sethi %hi(OLD), %g2; \ | ||
141 | or %g2, %lo(OLD), %g2; \ | ||
142 | sub %g1, %g2, %g1; \ | ||
143 | sethi %hi(BRANCH_ALWAYS), %g3; \ | ||
144 | sll %g1, 11, %g1; \ | ||
145 | srl %g1, 11 + 2, %g1; \ | ||
146 | or %g3, %lo(BRANCH_ALWAYS), %g3; \ | ||
147 | or %g3, %g1, %g3; \ | ||
148 | stw %g3, [%g2]; \ | ||
149 | sethi %hi(NOP), %g3; \ | ||
150 | or %g3, %lo(NOP), %g3; \ | ||
151 | stw %g3, [%g2 + 0x4]; \ | ||
152 | flush %g2; | ||
153 | |||
154 | .globl niagara_patch_bzero | ||
155 | .type niagara_patch_bzero,#function | ||
156 | niagara_patch_bzero: | ||
157 | NG_DO_PATCH(memset, NGmemset) | ||
158 | NG_DO_PATCH(__bzero, NGbzero) | ||
159 | NG_DO_PATCH(__clear_user, NGclear_user) | ||
160 | retl | ||
161 | nop | ||
162 | .size niagara_patch_bzero,.-niagara_patch_bzero | ||