diff options
Diffstat (limited to 'arch/cris/arch-v32/lib/usercopy.c')
-rw-r--r-- | arch/cris/arch-v32/lib/usercopy.c | 470 |
1 files changed, 470 insertions, 0 deletions
diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c new file mode 100644 index 000000000000..f0b08460c1be --- /dev/null +++ b/arch/cris/arch-v32/lib/usercopy.c | |||
@@ -0,0 +1,470 @@ | |||
1 | /* | ||
2 | * User address space access functions. | ||
3 | * The non-inlined parts of asm-cris/uaccess.h are here. | ||
4 | * | ||
5 | * Copyright (C) 2000, 2003 Axis Communications AB. | ||
6 | * | ||
7 | * Written by Hans-Peter Nilsson. | ||
8 | * Pieces used from memcpy, originally by Kenny Ranerup long time ago. | ||
9 | */ | ||
10 | |||
11 | #include <asm/uaccess.h> | ||
12 | |||
13 | /* Asm:s have been tweaked (within the domain of correctness) to give | ||
14 | satisfactory results for "gcc version 3.2.1 Axis release R53/1.53-v32". | ||
15 | |||
16 | Check regularly... | ||
17 | |||
18 | Note that for CRISv32, the PC saved at a bus-fault is the address | ||
19 | *at* the faulting instruction, with a special case for instructions | ||
20 | in delay slots: then it's the address of the branch. Note also that | ||
21 | in contrast to v10, a postincrement in the instruction is *not* | ||
22 | performed at a bus-fault; the register is seen having the original | ||
23 | value in fault handlers. */ | ||
24 | |||
25 | |||
26 | /* Copy to userspace. This is based on the memcpy used for | ||
27 | kernel-to-kernel copying; see "string.c". */ | ||
28 | |||
29 | unsigned long | ||
30 | __copy_user (void __user *pdst, const void *psrc, unsigned long pn) | ||
31 | { | ||
32 | /* We want the parameters put in special registers. | ||
33 | Make sure the compiler is able to make something useful of this. | ||
34 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | ||
35 | |||
36 | FIXME: Comment for old gcc version. Check. | ||
37 | If gcc was allright, it really would need no temporaries, and no | ||
38 | stack space to save stuff on. */ | ||
39 | |||
40 | register char *dst __asm__ ("r13") = pdst; | ||
41 | register const char *src __asm__ ("r11") = psrc; | ||
42 | register int n __asm__ ("r12") = pn; | ||
43 | register int retn __asm__ ("r10") = 0; | ||
44 | |||
45 | |||
46 | /* When src is aligned but not dst, this makes a few extra needless | ||
47 | cycles. I believe it would take as many to check that the | ||
48 | re-alignment was unnecessary. */ | ||
49 | if (((unsigned long) dst & 3) != 0 | ||
50 | /* Don't align if we wouldn't copy more than a few bytes; so we | ||
51 | don't have to check further for overflows. */ | ||
52 | && n >= 3) | ||
53 | { | ||
54 | if ((unsigned long) dst & 1) | ||
55 | { | ||
56 | __asm_copy_to_user_1 (dst, src, retn); | ||
57 | n--; | ||
58 | } | ||
59 | |||
60 | if ((unsigned long) dst & 2) | ||
61 | { | ||
62 | __asm_copy_to_user_2 (dst, src, retn); | ||
63 | n -= 2; | ||
64 | } | ||
65 | } | ||
66 | |||
67 | /* Movem is dirt cheap. The overheap is low enough to always use the | ||
68 | minimum possible block size as the threshold. */ | ||
69 | if (n >= 44) | ||
70 | { | ||
71 | /* For large copies we use 'movem'. */ | ||
72 | |||
73 | /* It is not optimal to tell the compiler about clobbering any | ||
74 | registers; that will move the saving/restoring of those registers | ||
75 | to the function prologue/epilogue, and make non-movem sizes | ||
76 | suboptimal. */ | ||
77 | __asm__ volatile ("\ | ||
78 | ;; Check that the register asm declaration got right. \n\ | ||
79 | ;; The GCC manual explicitly says TRT will happen. \n\ | ||
80 | .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ | ||
81 | .err \n\ | ||
82 | .endif \n\ | ||
83 | \n\ | ||
84 | ;; Save the registers we'll use in the movem process \n\ | ||
85 | ;; on the stack. \n\ | ||
86 | subq 11*4,$sp \n\ | ||
87 | movem $r10,[$sp] \n\ | ||
88 | \n\ | ||
89 | ;; Now we've got this: \n\ | ||
90 | ;; r11 - src \n\ | ||
91 | ;; r13 - dst \n\ | ||
92 | ;; r12 - n \n\ | ||
93 | \n\ | ||
94 | ;; Update n for the first loop \n\ | ||
95 | subq 44,$r12 \n\ | ||
96 | 0: \n\ | ||
97 | movem [$r11+],$r10 \n\ | ||
98 | subq 44,$r12 \n\ | ||
99 | 1: bge 0b \n\ | ||
100 | movem $r10,[$r13+] \n\ | ||
101 | 3: \n\ | ||
102 | addq 44,$r12 ;; compensate for last loop underflowing n \n\ | ||
103 | \n\ | ||
104 | ;; Restore registers from stack \n\ | ||
105 | movem [$sp+],$r10 \n\ | ||
106 | 2: \n\ | ||
107 | .section .fixup,\"ax\" \n\ | ||
108 | 4: \n\ | ||
109 | ; When failing on any of the 1..44 bytes in a chunk, we adjust back the \n\ | ||
110 | ; source pointer and just drop through to the by-16 and by-4 loops to \n\ | ||
111 | ; get the correct number of failing bytes. This necessarily means a \n\ | ||
112 | ; few extra exceptions, but invalid user pointers shouldn't happen in \n\ | ||
113 | ; time-critical code anyway. \n\ | ||
114 | jump 3b \n\ | ||
115 | subq 44,$r11 \n\ | ||
116 | \n\ | ||
117 | .previous \n\ | ||
118 | .section __ex_table,\"a\" \n\ | ||
119 | .dword 1b,4b \n\ | ||
120 | .previous" | ||
121 | |||
122 | /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) | ||
123 | /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); | ||
124 | |||
125 | } | ||
126 | |||
127 | while (n >= 16) | ||
128 | { | ||
129 | __asm_copy_to_user_16 (dst, src, retn); | ||
130 | n -= 16; | ||
131 | } | ||
132 | |||
133 | /* Having a separate by-four loops cuts down on cache footprint. | ||
134 | FIXME: Test with and without; increasing switch to be 0..15. */ | ||
135 | while (n >= 4) | ||
136 | { | ||
137 | __asm_copy_to_user_4 (dst, src, retn); | ||
138 | n -= 4; | ||
139 | } | ||
140 | |||
141 | switch (n) | ||
142 | { | ||
143 | case 0: | ||
144 | break; | ||
145 | case 1: | ||
146 | __asm_copy_to_user_1 (dst, src, retn); | ||
147 | break; | ||
148 | case 2: | ||
149 | __asm_copy_to_user_2 (dst, src, retn); | ||
150 | break; | ||
151 | case 3: | ||
152 | __asm_copy_to_user_3 (dst, src, retn); | ||
153 | break; | ||
154 | } | ||
155 | |||
156 | return retn; | ||
157 | } | ||
158 | |||
159 | /* Copy from user to kernel, zeroing the bytes that were inaccessible in | ||
160 | userland. The return-value is the number of bytes that were | ||
161 | inaccessible. */ | ||
162 | |||
163 | unsigned long | ||
164 | __copy_user_zeroing (void __user *pdst, const void *psrc, unsigned long pn) | ||
165 | { | ||
166 | /* We want the parameters put in special registers. | ||
167 | Make sure the compiler is able to make something useful of this. | ||
168 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | ||
169 | |||
170 | FIXME: Comment for old gcc version. Check. | ||
171 | If gcc was allright, it really would need no temporaries, and no | ||
172 | stack space to save stuff on. */ | ||
173 | |||
174 | register char *dst __asm__ ("r13") = pdst; | ||
175 | register const char *src __asm__ ("r11") = psrc; | ||
176 | register int n __asm__ ("r12") = pn; | ||
177 | register int retn __asm__ ("r10") = 0; | ||
178 | |||
179 | /* The best reason to align src is that we then know that a read-fault | ||
180 | was for aligned bytes; there's no 1..3 remaining good bytes to | ||
181 | pickle. */ | ||
182 | if (((unsigned long) src & 3) != 0) | ||
183 | { | ||
184 | if (((unsigned long) src & 1) && n != 0) | ||
185 | { | ||
186 | __asm_copy_from_user_1 (dst, src, retn); | ||
187 | n--; | ||
188 | } | ||
189 | |||
190 | if (((unsigned long) src & 2) && n >= 2) | ||
191 | { | ||
192 | __asm_copy_from_user_2 (dst, src, retn); | ||
193 | n -= 2; | ||
194 | } | ||
195 | |||
196 | /* We only need one check after the unalignment-adjustments, because | ||
197 | if both adjustments were done, either both or neither reference | ||
198 | had an exception. */ | ||
199 | if (retn != 0) | ||
200 | goto copy_exception_bytes; | ||
201 | } | ||
202 | |||
203 | /* Movem is dirt cheap. The overheap is low enough to always use the | ||
204 | minimum possible block size as the threshold. */ | ||
205 | if (n >= 44) | ||
206 | { | ||
207 | /* It is not optimal to tell the compiler about clobbering any | ||
208 | registers; that will move the saving/restoring of those registers | ||
209 | to the function prologue/epilogue, and make non-movem sizes | ||
210 | suboptimal. */ | ||
211 | __asm__ volatile ("\ | ||
212 | .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ | ||
213 | .err \n\ | ||
214 | .endif \n\ | ||
215 | \n\ | ||
216 | ;; Save the registers we'll use in the movem process \n\ | ||
217 | ;; on the stack. \n\ | ||
218 | subq 11*4,$sp \n\ | ||
219 | movem $r10,[$sp] \n\ | ||
220 | \n\ | ||
221 | ;; Now we've got this: \n\ | ||
222 | ;; r11 - src \n\ | ||
223 | ;; r13 - dst \n\ | ||
224 | ;; r12 - n \n\ | ||
225 | \n\ | ||
226 | ;; Update n for the first loop \n\ | ||
227 | subq 44,$r12 \n\ | ||
228 | 0: \n\ | ||
229 | movem [$r11+],$r10 \n\ | ||
230 | \n\ | ||
231 | subq 44,$r12 \n\ | ||
232 | bge 0b \n\ | ||
233 | movem $r10,[$r13+] \n\ | ||
234 | \n\ | ||
235 | 4: \n\ | ||
236 | addq 44,$r12 ;; compensate for last loop underflowing n \n\ | ||
237 | \n\ | ||
238 | ;; Restore registers from stack \n\ | ||
239 | movem [$sp+],$r10 \n\ | ||
240 | .section .fixup,\"ax\" \n\ | ||
241 | \n\ | ||
242 | ;; Do not jump back into the loop if we fail. For some uses, we get a \n\ | ||
243 | ;; page fault somewhere on the line. Without checking for page limits, \n\ | ||
244 | ;; we don't know where, but we need to copy accurately and keep an \n\ | ||
245 | ;; accurate count; not just clear the whole line. To do that, we fall \n\ | ||
246 | ;; down in the code below, proceeding with smaller amounts. It should \n\ | ||
247 | ;; be kept in mind that we have to cater to code like what at one time \n\ | ||
248 | ;; was in fs/super.c: \n\ | ||
249 | ;; i = size - copy_from_user((void *)page, data, size); \n\ | ||
250 | ;; which would cause repeated faults while clearing the remainder of \n\ | ||
251 | ;; the SIZE bytes at PAGE after the first fault. \n\ | ||
252 | ;; A caveat here is that we must not fall through from a failing page \n\ | ||
253 | ;; to a valid page. \n\ | ||
254 | \n\ | ||
255 | 3: \n\ | ||
256 | jump 4b ;; Fall through, pretending the fault didn't happen. \n\ | ||
257 | nop \n\ | ||
258 | \n\ | ||
259 | .previous \n\ | ||
260 | .section __ex_table,\"a\" \n\ | ||
261 | .dword 0b,3b \n\ | ||
262 | .previous" | ||
263 | |||
264 | /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) | ||
265 | /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); | ||
266 | } | ||
267 | |||
268 | /* Either we directly start copying here, using dword copying in a loop, | ||
269 | or we copy as much as possible with 'movem' and then the last block | ||
270 | (<44 bytes) is copied here. This will work since 'movem' will have | ||
271 | updated src, dst and n. (Except with failing src.) | ||
272 | |||
273 | Since we want to keep src accurate, we can't use | ||
274 | __asm_copy_from_user_N with N != (1, 2, 4); it updates dst and | ||
275 | retn, but not src (by design; it's value is ignored elsewhere). */ | ||
276 | |||
277 | while (n >= 4) | ||
278 | { | ||
279 | __asm_copy_from_user_4 (dst, src, retn); | ||
280 | n -= 4; | ||
281 | |||
282 | if (retn) | ||
283 | goto copy_exception_bytes; | ||
284 | } | ||
285 | |||
286 | /* If we get here, there were no memory read faults. */ | ||
287 | switch (n) | ||
288 | { | ||
289 | /* These copies are at least "naturally aligned" (so we don't have | ||
290 | to check each byte), due to the src alignment code before the | ||
291 | movem loop. The *_3 case *will* get the correct count for retn. */ | ||
292 | case 0: | ||
293 | /* This case deliberately left in (if you have doubts check the | ||
294 | generated assembly code). */ | ||
295 | break; | ||
296 | case 1: | ||
297 | __asm_copy_from_user_1 (dst, src, retn); | ||
298 | break; | ||
299 | case 2: | ||
300 | __asm_copy_from_user_2 (dst, src, retn); | ||
301 | break; | ||
302 | case 3: | ||
303 | __asm_copy_from_user_3 (dst, src, retn); | ||
304 | break; | ||
305 | } | ||
306 | |||
307 | /* If we get here, retn correctly reflects the number of failing | ||
308 | bytes. */ | ||
309 | return retn; | ||
310 | |||
311 | copy_exception_bytes: | ||
312 | /* We already have "retn" bytes cleared, and need to clear the | ||
313 | remaining "n" bytes. A non-optimized simple byte-for-byte in-line | ||
314 | memset is preferred here, since this isn't speed-critical code and | ||
315 | we'd rather have this a leaf-function than calling memset. */ | ||
316 | { | ||
317 | char *endp; | ||
318 | for (endp = dst + n; dst < endp; dst++) | ||
319 | *dst = 0; | ||
320 | } | ||
321 | |||
322 | return retn + n; | ||
323 | } | ||
324 | |||
325 | /* Zero userspace. */ | ||
326 | |||
327 | unsigned long | ||
328 | __do_clear_user (void __user *pto, unsigned long pn) | ||
329 | { | ||
330 | /* We want the parameters put in special registers. | ||
331 | Make sure the compiler is able to make something useful of this. | ||
332 | As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). | ||
333 | |||
334 | FIXME: Comment for old gcc version. Check. | ||
335 | If gcc was allright, it really would need no temporaries, and no | ||
336 | stack space to save stuff on. */ | ||
337 | |||
338 | register char *dst __asm__ ("r13") = pto; | ||
339 | register int n __asm__ ("r12") = pn; | ||
340 | register int retn __asm__ ("r10") = 0; | ||
341 | |||
342 | |||
343 | if (((unsigned long) dst & 3) != 0 | ||
344 | /* Don't align if we wouldn't copy more than a few bytes. */ | ||
345 | && n >= 3) | ||
346 | { | ||
347 | if ((unsigned long) dst & 1) | ||
348 | { | ||
349 | __asm_clear_1 (dst, retn); | ||
350 | n--; | ||
351 | } | ||
352 | |||
353 | if ((unsigned long) dst & 2) | ||
354 | { | ||
355 | __asm_clear_2 (dst, retn); | ||
356 | n -= 2; | ||
357 | } | ||
358 | } | ||
359 | |||
360 | /* Decide which copying method to use. | ||
361 | FIXME: This number is from the "ordinary" kernel memset. */ | ||
362 | if (n >= 48) | ||
363 | { | ||
364 | /* For large clears we use 'movem' */ | ||
365 | |||
366 | /* It is not optimal to tell the compiler about clobbering any | ||
367 | call-saved registers; that will move the saving/restoring of | ||
368 | those registers to the function prologue/epilogue, and make | ||
369 | non-movem sizes suboptimal. | ||
370 | |||
371 | This method is not foolproof; it assumes that the "asm reg" | ||
372 | declarations at the beginning of the function really are used | ||
373 | here (beware: they may be moved to temporary registers). | ||
374 | This way, we do not have to save/move the registers around into | ||
375 | temporaries; we can safely use them straight away. | ||
376 | |||
377 | If you want to check that the allocation was right; then | ||
378 | check the equalities in the first comment. It should say | ||
379 | something like "r13=r13, r11=r11, r12=r12". */ | ||
380 | __asm__ volatile ("\ | ||
381 | .ifnc %0%1%2,$r13$r12$r10 \n\ | ||
382 | .err \n\ | ||
383 | .endif \n\ | ||
384 | \n\ | ||
385 | ;; Save the registers we'll clobber in the movem process \n\ | ||
386 | ;; on the stack. Don't mention them to gcc, it will only be \n\ | ||
387 | ;; upset. \n\ | ||
388 | subq 11*4,$sp \n\ | ||
389 | movem $r10,[$sp] \n\ | ||
390 | \n\ | ||
391 | clear.d $r0 \n\ | ||
392 | clear.d $r1 \n\ | ||
393 | clear.d $r2 \n\ | ||
394 | clear.d $r3 \n\ | ||
395 | clear.d $r4 \n\ | ||
396 | clear.d $r5 \n\ | ||
397 | clear.d $r6 \n\ | ||
398 | clear.d $r7 \n\ | ||
399 | clear.d $r8 \n\ | ||
400 | clear.d $r9 \n\ | ||
401 | clear.d $r10 \n\ | ||
402 | clear.d $r11 \n\ | ||
403 | \n\ | ||
404 | ;; Now we've got this: \n\ | ||
405 | ;; r13 - dst \n\ | ||
406 | ;; r12 - n \n\ | ||
407 | \n\ | ||
408 | ;; Update n for the first loop \n\ | ||
409 | subq 12*4,$r12 \n\ | ||
410 | 0: \n\ | ||
411 | subq 12*4,$r12 \n\ | ||
412 | 1: \n\ | ||
413 | bge 0b \n\ | ||
414 | movem $r11,[$r13+] \n\ | ||
415 | \n\ | ||
416 | addq 12*4,$r12 ;; compensate for last loop underflowing n \n\ | ||
417 | \n\ | ||
418 | ;; Restore registers from stack \n\ | ||
419 | movem [$sp+],$r10 \n\ | ||
420 | 2: \n\ | ||
421 | .section .fixup,\"ax\" \n\ | ||
422 | 3: \n\ | ||
423 | movem [$sp],$r10 \n\ | ||
424 | addq 12*4,$r10 \n\ | ||
425 | addq 12*4,$r13 \n\ | ||
426 | movem $r10,[$sp] \n\ | ||
427 | jump 0b \n\ | ||
428 | clear.d $r10 \n\ | ||
429 | \n\ | ||
430 | .previous \n\ | ||
431 | .section __ex_table,\"a\" \n\ | ||
432 | .dword 1b,3b \n\ | ||
433 | .previous" | ||
434 | |||
435 | /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn) | ||
436 | /* Inputs */ : "0" (dst), "1" (n), "2" (retn) | ||
437 | /* Clobber */ : "r11"); | ||
438 | } | ||
439 | |||
440 | while (n >= 16) | ||
441 | { | ||
442 | __asm_clear_16 (dst, retn); | ||
443 | n -= 16; | ||
444 | } | ||
445 | |||
446 | /* Having a separate by-four loops cuts down on cache footprint. | ||
447 | FIXME: Test with and without; increasing switch to be 0..15. */ | ||
448 | while (n >= 4) | ||
449 | { | ||
450 | __asm_clear_4 (dst, retn); | ||
451 | n -= 4; | ||
452 | } | ||
453 | |||
454 | switch (n) | ||
455 | { | ||
456 | case 0: | ||
457 | break; | ||
458 | case 1: | ||
459 | __asm_clear_1 (dst, retn); | ||
460 | break; | ||
461 | case 2: | ||
462 | __asm_clear_2 (dst, retn); | ||
463 | break; | ||
464 | case 3: | ||
465 | __asm_clear_3 (dst, retn); | ||
466 | break; | ||
467 | } | ||
468 | |||
469 | return retn; | ||
470 | } | ||