diff options
author | David S. Miller <davem@sunset.davemloft.net> | 2006-03-05 19:41:56 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-03-20 04:11:42 -0500 |
commit | 398d10830843bda7798f71052b54a5341a8ddd53 (patch) | |
tree | 3c2f7da709f2f5e21fe3e5678276eae2d665735f | |
parent | 30ddbdb03339fc62480ddbff800a44066bb14455 (diff) |
[SPARC64]: Niagara optimized memcpy() and copy_{to,from}_user().
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/sparc64/lib/Makefile | 1 | ||||
-rw-r--r-- | arch/sparc64/lib/NGcopy_from_user.S | 37 | ||||
-rw-r--r-- | arch/sparc64/lib/NGcopy_to_user.S | 40 | ||||
-rw-r--r-- | arch/sparc64/lib/NGmemcpy.S | 364 | ||||
-rw-r--r-- | arch/sparc64/lib/NGpatch.S | 32 |
5 files changed, 474 insertions, 0 deletions
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile index c295806500f7..813f622b5c4e 100644 --- a/arch/sparc64/lib/Makefile +++ b/arch/sparc64/lib/Makefile | |||
@@ -11,6 +11,7 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ | |||
11 | VISsave.o atomic.o bitops.o \ | 11 | VISsave.o atomic.o bitops.o \ |
12 | U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ | 12 | U1memcpy.o U1copy_from_user.o U1copy_to_user.o \ |
13 | U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ | 13 | U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ |
14 | NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ | ||
14 | copy_in_user.o user_fixup.o memmove.o \ | 15 | copy_in_user.o user_fixup.o memmove.o \ |
15 | mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o | 16 | mcount.o ipcsum.o rwsem.o xor.o find_bit.o delay.o |
16 | 17 | ||
diff --git a/arch/sparc64/lib/NGcopy_from_user.S b/arch/sparc64/lib/NGcopy_from_user.S new file mode 100644 index 000000000000..2d93456f76dd --- /dev/null +++ b/arch/sparc64/lib/NGcopy_from_user.S | |||
@@ -0,0 +1,37 @@ | |||
1 | /* NGcopy_from_user.S: Niagara optimized copy from userspace. | ||
2 | * | ||
3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #define EX_LD(x) \ | ||
7 | 98: x; \ | ||
8 | .section .fixup; \ | ||
9 | .align 4; \ | ||
10 | 99: wr %g0, ASI_AIUS, %asi;\ | ||
11 | retl; \ | ||
12 | mov 1, %o0; \ | ||
13 | .section __ex_table,"a";\ | ||
14 | .align 4; \ | ||
15 | .word 98b, 99b; \ | ||
16 | .text; \ | ||
17 | .align 4; | ||
18 | |||
19 | #ifndef ASI_AIUS | ||
20 | #define ASI_AIUS 0x11 | ||
21 | #endif | ||
22 | |||
23 | #define FUNC_NAME NGcopy_from_user | ||
24 | #define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest | ||
25 | #define LOAD_TWIN(addr_reg,dest0,dest1) \ | ||
26 | ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_AIUS, dest0 | ||
27 | #define EX_RETVAL(x) 0 | ||
28 | |||
29 | #ifdef __KERNEL__ | ||
30 | #define PREAMBLE \ | ||
31 | rd %asi, %g1; \ | ||
32 | cmp %g1, ASI_AIUS; \ | ||
33 | bne,pn %icc, memcpy_user_stub; \ | ||
34 | nop | ||
35 | #endif | ||
36 | |||
37 | #include "NGmemcpy.S" | ||
diff --git a/arch/sparc64/lib/NGcopy_to_user.S b/arch/sparc64/lib/NGcopy_to_user.S new file mode 100644 index 000000000000..4a12395b4502 --- /dev/null +++ b/arch/sparc64/lib/NGcopy_to_user.S | |||
@@ -0,0 +1,40 @@ | |||
1 | /* NGcopy_to_user.S: Niagara optimized copy to userspace. | ||
2 | * | ||
3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #define EX_ST(x) \ | ||
7 | 98: x; \ | ||
8 | .section .fixup; \ | ||
9 | .align 4; \ | ||
10 | 99: wr %g0, ASI_AIUS, %asi;\ | ||
11 | retl; \ | ||
12 | mov 1, %o0; \ | ||
13 | .section __ex_table,"a";\ | ||
14 | .align 4; \ | ||
15 | .word 98b, 99b; \ | ||
16 | .text; \ | ||
17 | .align 4; | ||
18 | |||
19 | #ifndef ASI_AIUS | ||
20 | #define ASI_AIUS 0x11 | ||
21 | #endif | ||
22 | |||
23 | #define FUNC_NAME NGcopy_to_user | ||
24 | #define STORE(type,src,addr) type##a src, [addr] ASI_AIUS | ||
25 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_AIUS | ||
26 | #define EX_RETVAL(x) 0 | ||
27 | |||
28 | #ifdef __KERNEL__ | ||
29 | /* Writing to %asi is _expensive_ so we hardcode it. | ||
30 | * Reading %asi to check for KERNEL_DS is comparatively | ||
31 | * cheap. | ||
32 | */ | ||
33 | #define PREAMBLE \ | ||
34 | rd %asi, %g1; \ | ||
35 | cmp %g1, ASI_AIUS; \ | ||
36 | bne,pn %icc, memcpy_user_stub; \ | ||
37 | nop | ||
38 | #endif | ||
39 | |||
40 | #include "U3memcpy.S" | ||
diff --git a/arch/sparc64/lib/NGmemcpy.S b/arch/sparc64/lib/NGmemcpy.S new file mode 100644 index 000000000000..a39aa3bd4345 --- /dev/null +++ b/arch/sparc64/lib/NGmemcpy.S | |||
@@ -0,0 +1,364 @@ | |||
1 | /* NGmemcpy.S: Niagara optimized memcpy. | ||
2 | * | ||
3 | * Copyright (C) 2006 David S. Miller (davem@davemloft.net) | ||
4 | */ | ||
5 | |||
6 | #ifdef __KERNEL__ | ||
7 | #include <asm/asi.h> | ||
8 | #define GLOBAL_SPARE %g7 | ||
9 | #define RESTORE_ASI wr %g0, ASI_AIUS, %asi | ||
10 | #else | ||
11 | #define GLOBAL_SPARE %g5 | ||
12 | #define RESTORE_ASI | ||
13 | #endif | ||
14 | |||
15 | #ifndef STORE_ASI | ||
16 | #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P | ||
17 | #endif | ||
18 | |||
19 | #ifndef EX_LD | ||
20 | #define EX_LD(x) x | ||
21 | #endif | ||
22 | |||
23 | #ifndef EX_ST | ||
24 | #define EX_ST(x) x | ||
25 | #endif | ||
26 | |||
27 | #ifndef EX_RETVAL | ||
28 | #define EX_RETVAL(x) x | ||
29 | #endif | ||
30 | |||
31 | #ifndef LOAD | ||
32 | #ifndef MEMCPY_DEBUG | ||
33 | #define LOAD(type,addr,dest) type [addr], dest | ||
34 | #else | ||
35 | #define LOAD(type,addr,dest) type##a [addr] 0x80, dest | ||
36 | #endif | ||
37 | #endif | ||
38 | |||
39 | #ifndef LOAD_TWIN | ||
40 | #define LOAD_TWIN(addr_reg,dest0,dest1) \ | ||
41 | ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0 | ||
42 | #endif | ||
43 | |||
44 | #ifndef STORE | ||
45 | #define STORE(type,src,addr) type src, [addr] | ||
46 | #endif | ||
47 | |||
48 | #ifndef STORE_INIT | ||
49 | #define STORE_INIT(src,addr) stxa src, [addr] %asi | ||
50 | #endif | ||
51 | |||
52 | #ifndef FUNC_NAME | ||
53 | #define FUNC_NAME NGmemcpy | ||
54 | #endif | ||
55 | |||
56 | #ifndef PREAMBLE | ||
57 | #define PREAMBLE | ||
58 | #endif | ||
59 | |||
60 | #ifndef XCC | ||
61 | #define XCC xcc | ||
62 | #endif | ||
63 | |||
64 | .register %g2,#scratch | ||
65 | .register %g3,#scratch | ||
66 | |||
67 | .text | ||
68 | .align 64 | ||
69 | |||
70 | .globl FUNC_NAME | ||
71 | .type FUNC_NAME,#function | ||
72 | FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ | ||
73 | srlx %o2, 31, %g2 | ||
74 | cmp %g2, 0 | ||
75 | tne %xcc, 5 | ||
76 | PREAMBLE | ||
77 | mov %o0, GLOBAL_SPARE | ||
78 | cmp %o2, 0 | ||
79 | be,pn %XCC, 85f | ||
80 | or %o0, %o1, %o3 | ||
81 | cmp %o2, 16 | ||
82 | blu,a,pn %XCC, 80f | ||
83 | or %o3, %o2, %o3 | ||
84 | |||
85 | /* 2 blocks (128 bytes) is the minimum we can do the block | ||
86 | * copy with. We need to ensure that we'll iterate at least | ||
87 | * once in the block copy loop. At worst we'll need to align | ||
88 | * the destination to a 64-byte boundary which can chew up | ||
89 | * to (64 - 1) bytes from the length before we perform the | ||
90 | * block copy loop. | ||
91 | */ | ||
92 | cmp %o2, (2 * 64) | ||
93 | blu,pt %XCC, 70f | ||
94 | andcc %o3, 0x7, %g0 | ||
95 | |||
96 | /* %o0: dst | ||
97 | * %o1: src | ||
98 | * %o2: len (known to be >= 128) | ||
99 | * | ||
100 | * The block copy loops will use %o4/%o5,%g2/%g3 as | ||
101 | * temporaries while copying the data. | ||
102 | */ | ||
103 | |||
104 | LOAD(prefetch, %o1, #one_read) | ||
105 | wr %g0, STORE_ASI, %asi | ||
106 | |||
107 | /* Align destination on 64-byte boundary. */ | ||
108 | andcc %o0, (64 - 1), %o4 | ||
109 | be,pt %XCC, 2f | ||
110 | sub %o4, 64, %o4 | ||
111 | sub %g0, %o4, %o4 ! bytes to align dst | ||
112 | sub %o2, %o4, %o2 | ||
113 | 1: subcc %o4, 1, %o4 | ||
114 | EX_LD(LOAD(ldub, %o1, %g1)) | ||
115 | EX_ST(STORE(stb, %g1, %o0)) | ||
116 | add %o1, 1, %o1 | ||
117 | bne,pt %XCC, 1b | ||
118 | add %o0, 1, %o0 | ||
119 | |||
120 | /* If the source is on a 16-byte boundary we can do | ||
121 | * the direct block copy loop. If it is 8-byte aligned | ||
122 | * we can do the 16-byte loads offset by -8 bytes and the | ||
123 | * init stores offset by one register. | ||
124 | * | ||
125 | * If the source is not even 8-byte aligned, we need to do | ||
126 | * shifting and masking (basically integer faligndata). | ||
127 | * | ||
128 | * The careful bit with init stores is that if we store | ||
129 | * to any part of the cache line we have to store the whole | ||
130 | * cacheline else we can end up with corrupt L2 cache line | ||
131 | * contents. Since the loop works on 64-bytes of 64-byte | ||
132 | * aligned store data at a time, this is easy to ensure. | ||
133 | */ | ||
134 | 2: | ||
135 | andcc %o1, (16 - 1), %o4 | ||
136 | andn %o2, (64 - 1), %g1 ! block copy loop iterator | ||
137 | sub %o2, %g1, %o2 ! final sub-block copy bytes | ||
138 | be,pt %XCC, 50f | ||
139 | cmp %o4, 8 | ||
140 | be,a,pt %XCC, 10f | ||
141 | sub %o1, 0x8, %o1 | ||
142 | |||
143 | /* Neither 8-byte nor 16-byte aligned, shift and mask. */ | ||
144 | mov %g1, %o4 | ||
145 | and %o1, 0x7, %g1 | ||
146 | sll %g1, 3, %g1 | ||
147 | mov 64, %o3 | ||
148 | andn %o1, 0x7, %o1 | ||
149 | EX_LD(LOAD(ldx, %o1, %g2)) | ||
150 | sub %o3, %g1, %o3 | ||
151 | sllx %g2, %g1, %g2 | ||
152 | |||
153 | #define SWIVEL_ONE_DWORD(SRC, TMP1, TMP2, PRE_VAL, PRE_SHIFT, POST_SHIFT, DST)\ | ||
154 | EX_LD(LOAD(ldx, SRC, TMP1)); \ | ||
155 | srlx TMP1, PRE_SHIFT, TMP2; \ | ||
156 | or TMP2, PRE_VAL, TMP2; \ | ||
157 | EX_ST(STORE_INIT(TMP2, DST)); \ | ||
158 | sllx TMP1, POST_SHIFT, PRE_VAL; | ||
159 | |||
160 | 1: add %o1, 0x8, %o1 | ||
161 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x00) | ||
162 | add %o1, 0x8, %o1 | ||
163 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x08) | ||
164 | add %o1, 0x8, %o1 | ||
165 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x10) | ||
166 | add %o1, 0x8, %o1 | ||
167 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x18) | ||
168 | add %o1, 32, %o1 | ||
169 | LOAD(prefetch, %o1, #one_read) | ||
170 | sub %o1, 32 - 8, %o1 | ||
171 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x20) | ||
172 | add %o1, 8, %o1 | ||
173 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x28) | ||
174 | add %o1, 8, %o1 | ||
175 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x30) | ||
176 | add %o1, 8, %o1 | ||
177 | SWIVEL_ONE_DWORD(%o1, %g3, %o5, %g2, %o3, %g1, %o0 + 0x38) | ||
178 | subcc %o4, 64, %o4 | ||
179 | bne,pt %XCC, 1b | ||
180 | add %o0, 64, %o0 | ||
181 | |||
182 | #undef SWIVEL_ONE_DWORD | ||
183 | |||
184 | srl %g1, 3, %g1 | ||
185 | ba,pt %XCC, 60f | ||
186 | add %o1, %g1, %o1 | ||
187 | |||
188 | 10: /* Destination is 64-byte aligned, source was only 8-byte | ||
189 | * aligned but it has been subtracted by 8 and we perform | ||
190 | * one twin load ahead, then add 8 back into source when | ||
191 | * we finish the loop. | ||
192 | */ | ||
193 | EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | ||
194 | 1: add %o1, 16, %o1 | ||
195 | EX_LD(LOAD_TWIN(%o1, %g2, %g3)) | ||
196 | add %o1, 16 + 32, %o1 | ||
197 | LOAD(prefetch, %o1, #one_read) | ||
198 | sub %o1, 32, %o1 | ||
199 | EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line | ||
200 | EX_ST(STORE_INIT(%g2, %o0 + 0x08)) | ||
201 | EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | ||
202 | add %o1, 16, %o1 | ||
203 | EX_ST(STORE_INIT(%g3, %o0 + 0x10)) | ||
204 | EX_ST(STORE_INIT(%o4, %o0 + 0x18)) | ||
205 | EX_LD(LOAD_TWIN(%o1, %g2, %g3)) | ||
206 | add %o1, 16, %o1 | ||
207 | EX_ST(STORE_INIT(%o5, %o0 + 0x20)) | ||
208 | EX_ST(STORE_INIT(%g2, %o0 + 0x28)) | ||
209 | EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | ||
210 | EX_ST(STORE_INIT(%g3, %o0 + 0x30)) | ||
211 | EX_ST(STORE_INIT(%o4, %o0 + 0x38)) | ||
212 | subcc %g1, 64, %g1 | ||
213 | bne,pt %XCC, 1b | ||
214 | add %o0, 64, %o0 | ||
215 | |||
216 | ba,pt %XCC, 60f | ||
217 | add %o1, 0x8, %o1 | ||
218 | |||
219 | 50: /* Destination is 64-byte aligned, and source is 16-byte | ||
220 | * aligned. | ||
221 | */ | ||
222 | 1: EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | ||
223 | add %o1, 16, %o1 | ||
224 | EX_LD(LOAD_TWIN(%o1, %g2, %g3)) | ||
225 | add %o1, 16 + 32, %o1 | ||
226 | LOAD(prefetch, %o1, #one_read) | ||
227 | sub %o1, 32, %o1 | ||
228 | EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line | ||
229 | EX_ST(STORE_INIT(%o5, %o0 + 0x08)) | ||
230 | EX_LD(LOAD_TWIN(%o1, %o4, %o5)) | ||
231 | add %o1, 16, %o1 | ||
232 | EX_ST(STORE_INIT(%g2, %o0 + 0x10)) | ||
233 | EX_ST(STORE_INIT(%g3, %o0 + 0x18)) | ||
234 | EX_LD(LOAD_TWIN(%o1, %g2, %g3)) | ||
235 | add %o1, 16, %o1 | ||
236 | EX_ST(STORE_INIT(%o4, %o0 + 0x20)) | ||
237 | EX_ST(STORE_INIT(%o5, %o0 + 0x28)) | ||
238 | EX_ST(STORE_INIT(%g2, %o0 + 0x30)) | ||
239 | EX_ST(STORE_INIT(%g3, %o0 + 0x38)) | ||
240 | subcc %g1, 64, %g1 | ||
241 | bne,pt %XCC, 1b | ||
242 | add %o0, 64, %o0 | ||
243 | /* fall through */ | ||
244 | |||
245 | 60: | ||
246 | /* %o2 contains any final bytes still needed to be copied | ||
247 | * over. If anything is left, we copy it one byte at a time. | ||
248 | */ | ||
249 | RESTORE_ASI | ||
250 | brz,pt %o2, 85f | ||
251 | sub %o0, %o1, %o3 | ||
252 | ba,a,pt %XCC, 90f | ||
253 | |||
254 | .align 64 | ||
255 | 70: /* 16 < len <= 64 */ | ||
256 | bne,pn %XCC, 75f | ||
257 | sub %o0, %o1, %o3 | ||
258 | |||
259 | 72: | ||
260 | andn %o2, 0xf, %o4 | ||
261 | and %o2, 0xf, %o2 | ||
262 | 1: subcc %o4, 0x10, %o4 | ||
263 | EX_LD(LOAD(ldx, %o1, %o5)) | ||
264 | add %o1, 0x08, %o1 | ||
265 | EX_LD(LOAD(ldx, %o1, %g1)) | ||
266 | sub %o1, 0x08, %o1 | ||
267 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
268 | add %o1, 0x8, %o1 | ||
269 | EX_ST(STORE(stx, %g1, %o1 + %o3)) | ||
270 | bgu,pt %XCC, 1b | ||
271 | add %o1, 0x8, %o1 | ||
272 | 73: andcc %o2, 0x8, %g0 | ||
273 | be,pt %XCC, 1f | ||
274 | nop | ||
275 | sub %o2, 0x8, %o2 | ||
276 | EX_LD(LOAD(ldx, %o1, %o5)) | ||
277 | EX_ST(STORE(stx, %o5, %o1 + %o3)) | ||
278 | add %o1, 0x8, %o1 | ||
279 | 1: andcc %o2, 0x4, %g0 | ||
280 | be,pt %XCC, 1f | ||
281 | nop | ||
282 | sub %o2, 0x4, %o2 | ||
283 | EX_LD(LOAD(lduw, %o1, %o5)) | ||
284 | EX_ST(STORE(stw, %o5, %o1 + %o3)) | ||
285 | add %o1, 0x4, %o1 | ||
286 | 1: cmp %o2, 0 | ||
287 | be,pt %XCC, 85f | ||
288 | nop | ||
289 | ba,pt %xcc, 90f | ||
290 | nop | ||
291 | |||
292 | 75: | ||
293 | andcc %o0, 0x7, %g1 | ||
294 | sub %g1, 0x8, %g1 | ||
295 | be,pn %icc, 2f | ||
296 | sub %g0, %g1, %g1 | ||
297 | sub %o2, %g1, %o2 | ||
298 | |||
299 | 1: subcc %g1, 1, %g1 | ||
300 | EX_LD(LOAD(ldub, %o1, %o5)) | ||
301 | EX_ST(STORE(stb, %o5, %o1 + %o3)) | ||
302 | bgu,pt %icc, 1b | ||
303 | add %o1, 1, %o1 | ||
304 | |||
305 | 2: add %o1, %o3, %o0 | ||
306 | andcc %o1, 0x7, %g1 | ||
307 | bne,pt %icc, 8f | ||
308 | sll %g1, 3, %g1 | ||
309 | |||
310 | cmp %o2, 16 | ||
311 | bgeu,pt %icc, 72b | ||
312 | nop | ||
313 | ba,a,pt %xcc, 73b | ||
314 | |||
315 | 8: mov 64, %o3 | ||
316 | andn %o1, 0x7, %o1 | ||
317 | EX_LD(LOAD(ldx, %o1, %g2)) | ||
318 | sub %o3, %g1, %o3 | ||
319 | andn %o2, 0x7, %o4 | ||
320 | sllx %g2, %g1, %g2 | ||
321 | 1: add %o1, 0x8, %o1 | ||
322 | EX_LD(LOAD(ldx, %o1, %g3)) | ||
323 | subcc %o4, 0x8, %o4 | ||
324 | srlx %g3, %o3, %o5 | ||
325 | or %o5, %g2, %o5 | ||
326 | EX_ST(STORE(stx, %o5, %o0)) | ||
327 | add %o0, 0x8, %o0 | ||
328 | bgu,pt %icc, 1b | ||
329 | sllx %g3, %g1, %g2 | ||
330 | |||
331 | srl %g1, 3, %g1 | ||
332 | andcc %o2, 0x7, %o2 | ||
333 | be,pn %icc, 85f | ||
334 | add %o1, %g1, %o1 | ||
335 | ba,pt %xcc, 90f | ||
336 | sub %o0, %o1, %o3 | ||
337 | |||
338 | .align 64 | ||
339 | 80: /* 0 < len <= 16 */ | ||
340 | andcc %o3, 0x3, %g0 | ||
341 | bne,pn %XCC, 90f | ||
342 | sub %o0, %o1, %o3 | ||
343 | |||
344 | 1: | ||
345 | subcc %o2, 4, %o2 | ||
346 | EX_LD(LOAD(lduw, %o1, %g1)) | ||
347 | EX_ST(STORE(stw, %g1, %o1 + %o3)) | ||
348 | bgu,pt %XCC, 1b | ||
349 | add %o1, 4, %o1 | ||
350 | |||
351 | 85: retl | ||
352 | mov EX_RETVAL(GLOBAL_SPARE), %o0 | ||
353 | |||
354 | .align 32 | ||
355 | 90: | ||
356 | subcc %o2, 1, %o2 | ||
357 | EX_LD(LOAD(ldub, %o1, %g1)) | ||
358 | EX_ST(STORE(stb, %g1, %o1 + %o3)) | ||
359 | bgu,pt %XCC, 90b | ||
360 | add %o1, 1, %o1 | ||
361 | retl | ||
362 | mov EX_RETVAL(GLOBAL_SPARE), %o0 | ||
363 | |||
364 | .size FUNC_NAME, .-FUNC_NAME | ||
diff --git a/arch/sparc64/lib/NGpatch.S b/arch/sparc64/lib/NGpatch.S new file mode 100644 index 000000000000..f13ec9e4c8a3 --- /dev/null +++ b/arch/sparc64/lib/NGpatch.S | |||
@@ -0,0 +1,32 @@ | |||
1 | /* NGpatch.S: Patch Ultra-I routines with Niagara variant. | ||
2 | * | ||
3 | * Copyright (C) 2006 David S. Miller <davem@davemloft.net> | ||
4 | */ | ||
5 | |||
6 | #define BRANCH_ALWAYS 0x10680000 | ||
7 | #define NOP 0x01000000 | ||
8 | #define NG_DO_PATCH(OLD, NEW) \ | ||
9 | sethi %hi(NEW), %g1; \ | ||
10 | or %g1, %lo(NEW), %g1; \ | ||
11 | sethi %hi(OLD), %g2; \ | ||
12 | or %g2, %lo(OLD), %g2; \ | ||
13 | sub %g1, %g2, %g1; \ | ||
14 | sethi %hi(BRANCH_ALWAYS), %g3; \ | ||
15 | srl %g1, 2, %g1; \ | ||
16 | or %g3, %lo(BRANCH_ALWAYS), %g3; \ | ||
17 | or %g3, %g1, %g3; \ | ||
18 | stw %g3, [%g2]; \ | ||
19 | sethi %hi(NOP), %g3; \ | ||
20 | or %g3, %lo(NOP), %g3; \ | ||
21 | stw %g3, [%g2 + 0x4]; \ | ||
22 | flush %g2; | ||
23 | |||
24 | .globl niagara_patch_copyops | ||
25 | .type niagara_patch_copyops,#function | ||
26 | niagara_patch_copyops: | ||
27 | NG_DO_PATCH(memcpy, NGmemcpy) | ||
28 | NG_DO_PATCH(___copy_from_user, NGcopy_from_user) | ||
29 | NG_DO_PATCH(___copy_to_user, NGcopy_to_user) | ||
30 | retl | ||
31 | nop | ||
32 | .size niagara_patch_copyops,.-niagara_patch_copyops | ||