diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/i386/lib |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/i386/lib')
-rw-r--r-- | arch/i386/lib/Makefile | 10 | ||||
-rw-r--r-- | arch/i386/lib/bitops.c | 70 | ||||
-rw-r--r-- | arch/i386/lib/checksum.S | 496 | ||||
-rw-r--r-- | arch/i386/lib/dec_and_lock.c | 40 | ||||
-rw-r--r-- | arch/i386/lib/delay.c | 49 | ||||
-rw-r--r-- | arch/i386/lib/getuser.S | 70 | ||||
-rw-r--r-- | arch/i386/lib/memcpy.c | 44 | ||||
-rw-r--r-- | arch/i386/lib/mmx.c | 399 | ||||
-rw-r--r-- | arch/i386/lib/putuser.S | 87 | ||||
-rw-r--r-- | arch/i386/lib/strstr.c | 31 | ||||
-rw-r--r-- | arch/i386/lib/usercopy.c | 636 |
11 files changed, 1932 insertions, 0 deletions
diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile new file mode 100644 index 000000000000..7b1932d20f96 --- /dev/null +++ b/arch/i386/lib/Makefile | |||
@@ -0,0 +1,10 @@ | |||
1 | # | ||
2 | # Makefile for i386-specific library files.. | ||
3 | # | ||
4 | |||
5 | |||
6 | lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \ | ||
7 | bitops.o | ||
8 | |||
9 | lib-$(CONFIG_X86_USE_3DNOW) += mmx.o | ||
10 | lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o | ||
diff --git a/arch/i386/lib/bitops.c b/arch/i386/lib/bitops.c new file mode 100644 index 000000000000..97db3853dc82 --- /dev/null +++ b/arch/i386/lib/bitops.c | |||
@@ -0,0 +1,70 @@ | |||
1 | #include <linux/bitops.h> | ||
2 | #include <linux/module.h> | ||
3 | |||
4 | /** | ||
5 | * find_next_bit - find the first set bit in a memory region | ||
6 | * @addr: The address to base the search on | ||
7 | * @offset: The bitnumber to start searching at | ||
8 | * @size: The maximum size to search | ||
9 | */ | ||
10 | int find_next_bit(const unsigned long *addr, int size, int offset) | ||
11 | { | ||
12 | const unsigned long *p = addr + (offset >> 5); | ||
13 | int set = 0, bit = offset & 31, res; | ||
14 | |||
15 | if (bit) { | ||
16 | /* | ||
17 | * Look for nonzero in the first 32 bits: | ||
18 | */ | ||
19 | __asm__("bsfl %1,%0\n\t" | ||
20 | "jne 1f\n\t" | ||
21 | "movl $32, %0\n" | ||
22 | "1:" | ||
23 | : "=r" (set) | ||
24 | : "r" (*p >> bit)); | ||
25 | if (set < (32 - bit)) | ||
26 | return set + offset; | ||
27 | set = 32 - bit; | ||
28 | p++; | ||
29 | } | ||
30 | /* | ||
31 | * No set bit yet, search remaining full words for a bit | ||
32 | */ | ||
33 | res = find_first_bit (p, size - 32 * (p - addr)); | ||
34 | return (offset + set + res); | ||
35 | } | ||
36 | EXPORT_SYMBOL(find_next_bit); | ||
37 | |||
38 | /** | ||
39 | * find_next_zero_bit - find the first zero bit in a memory region | ||
40 | * @addr: The address to base the search on | ||
41 | * @offset: The bitnumber to start searching at | ||
42 | * @size: The maximum size to search | ||
43 | */ | ||
44 | int find_next_zero_bit(const unsigned long *addr, int size, int offset) | ||
45 | { | ||
46 | unsigned long * p = ((unsigned long *) addr) + (offset >> 5); | ||
47 | int set = 0, bit = offset & 31, res; | ||
48 | |||
49 | if (bit) { | ||
50 | /* | ||
51 | * Look for zero in the first 32 bits. | ||
52 | */ | ||
53 | __asm__("bsfl %1,%0\n\t" | ||
54 | "jne 1f\n\t" | ||
55 | "movl $32, %0\n" | ||
56 | "1:" | ||
57 | : "=r" (set) | ||
58 | : "r" (~(*p >> bit))); | ||
59 | if (set < (32 - bit)) | ||
60 | return set + offset; | ||
61 | set = 32 - bit; | ||
62 | p++; | ||
63 | } | ||
64 | /* | ||
65 | * No zero yet, search remaining full bytes for a zero | ||
66 | */ | ||
67 | res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr)); | ||
68 | return (offset + set + res); | ||
69 | } | ||
70 | EXPORT_SYMBOL(find_next_zero_bit); | ||
diff --git a/arch/i386/lib/checksum.S b/arch/i386/lib/checksum.S new file mode 100644 index 000000000000..94c7867ddc33 --- /dev/null +++ b/arch/i386/lib/checksum.S | |||
@@ -0,0 +1,496 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * IP/TCP/UDP checksumming routines | ||
7 | * | ||
8 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | ||
9 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | ||
10 | * Tom May, <ftom@netcom.com> | ||
11 | * Pentium Pro/II routines: | ||
12 | * Alexander Kjeldaas <astor@guardian.no> | ||
13 | * Finn Arne Gangstad <finnag@guardian.no> | ||
14 | * Lots of code moved from tcp.c and ip.c; see those files | ||
15 | * for more names. | ||
16 | * | ||
17 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception | ||
18 | * handling. | ||
19 | * Andi Kleen, add zeroing on error | ||
20 | * converted to pure assembler | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or | ||
23 | * modify it under the terms of the GNU General Public License | ||
24 | * as published by the Free Software Foundation; either version | ||
25 | * 2 of the License, or (at your option) any later version. | ||
26 | */ | ||
27 | |||
28 | #include <linux/config.h> | ||
29 | #include <asm/errno.h> | ||
30 | |||
31 | /* | ||
32 | * computes a partial checksum, e.g. for TCP/UDP fragments | ||
33 | */ | ||
34 | |||
35 | /* | ||
36 | unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | ||
37 | */ | ||
38 | |||
39 | .text | ||
40 | .align 4 | ||
41 | .globl csum_partial | ||
42 | |||
43 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM | ||
44 | |||
45 | /* | ||
46 | * Experiments with Ethernet and SLIP connections show that buff | ||
47 | * is aligned on either a 2-byte or 4-byte boundary. We get at | ||
48 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. | ||
49 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | ||
50 | * alignment for the unrolled loop. | ||
51 | */ | ||
52 | csum_partial: | ||
53 | pushl %esi | ||
54 | pushl %ebx | ||
55 | movl 20(%esp),%eax # Function arg: unsigned int sum | ||
56 | movl 16(%esp),%ecx # Function arg: int len | ||
57 | movl 12(%esp),%esi # Function arg: unsigned char *buff | ||
58 | testl $3, %esi # Check alignment. | ||
59 | jz 2f # Jump if alignment is ok. | ||
60 | testl $1, %esi # Check alignment. | ||
61 | jz 10f # Jump if alignment is boundary of 2bytes. | ||
62 | |||
63 | # buf is odd | ||
64 | dec %ecx | ||
65 | jl 8f | ||
66 | movzbl (%esi), %ebx | ||
67 | adcl %ebx, %eax | ||
68 | roll $8, %eax | ||
69 | inc %esi | ||
70 | testl $2, %esi | ||
71 | jz 2f | ||
72 | 10: | ||
73 | subl $2, %ecx # Alignment uses up two bytes. | ||
74 | jae 1f # Jump if we had at least two bytes. | ||
75 | addl $2, %ecx # ecx was < 2. Deal with it. | ||
76 | jmp 4f | ||
77 | 1: movw (%esi), %bx | ||
78 | addl $2, %esi | ||
79 | addw %bx, %ax | ||
80 | adcl $0, %eax | ||
81 | 2: | ||
82 | movl %ecx, %edx | ||
83 | shrl $5, %ecx | ||
84 | jz 2f | ||
85 | testl %esi, %esi | ||
86 | 1: movl (%esi), %ebx | ||
87 | adcl %ebx, %eax | ||
88 | movl 4(%esi), %ebx | ||
89 | adcl %ebx, %eax | ||
90 | movl 8(%esi), %ebx | ||
91 | adcl %ebx, %eax | ||
92 | movl 12(%esi), %ebx | ||
93 | adcl %ebx, %eax | ||
94 | movl 16(%esi), %ebx | ||
95 | adcl %ebx, %eax | ||
96 | movl 20(%esi), %ebx | ||
97 | adcl %ebx, %eax | ||
98 | movl 24(%esi), %ebx | ||
99 | adcl %ebx, %eax | ||
100 | movl 28(%esi), %ebx | ||
101 | adcl %ebx, %eax | ||
102 | lea 32(%esi), %esi | ||
103 | dec %ecx | ||
104 | jne 1b | ||
105 | adcl $0, %eax | ||
106 | 2: movl %edx, %ecx | ||
107 | andl $0x1c, %edx | ||
108 | je 4f | ||
109 | shrl $2, %edx # This clears CF | ||
110 | 3: adcl (%esi), %eax | ||
111 | lea 4(%esi), %esi | ||
112 | dec %edx | ||
113 | jne 3b | ||
114 | adcl $0, %eax | ||
115 | 4: andl $3, %ecx | ||
116 | jz 7f | ||
117 | cmpl $2, %ecx | ||
118 | jb 5f | ||
119 | movw (%esi),%cx | ||
120 | leal 2(%esi),%esi | ||
121 | je 6f | ||
122 | shll $16,%ecx | ||
123 | 5: movb (%esi),%cl | ||
124 | 6: addl %ecx,%eax | ||
125 | adcl $0, %eax | ||
126 | 7: | ||
127 | testl $1, 12(%esp) | ||
128 | jz 8f | ||
129 | roll $8, %eax | ||
130 | 8: | ||
131 | popl %ebx | ||
132 | popl %esi | ||
133 | ret | ||
134 | |||
135 | #else | ||
136 | |||
137 | /* Version for PentiumII/PPro */ | ||
138 | |||
139 | csum_partial: | ||
140 | pushl %esi | ||
141 | pushl %ebx | ||
142 | movl 20(%esp),%eax # Function arg: unsigned int sum | ||
143 | movl 16(%esp),%ecx # Function arg: int len | ||
144 | movl 12(%esp),%esi # Function arg: const unsigned char *buf | ||
145 | |||
146 | testl $3, %esi | ||
147 | jnz 25f | ||
148 | 10: | ||
149 | movl %ecx, %edx | ||
150 | movl %ecx, %ebx | ||
151 | andl $0x7c, %ebx | ||
152 | shrl $7, %ecx | ||
153 | addl %ebx,%esi | ||
154 | shrl $2, %ebx | ||
155 | negl %ebx | ||
156 | lea 45f(%ebx,%ebx,2), %ebx | ||
157 | testl %esi, %esi | ||
158 | jmp *%ebx | ||
159 | |||
160 | # Handle 2-byte-aligned regions | ||
161 | 20: addw (%esi), %ax | ||
162 | lea 2(%esi), %esi | ||
163 | adcl $0, %eax | ||
164 | jmp 10b | ||
165 | 25: | ||
166 | testl $1, %esi | ||
167 | jz 30f | ||
168 | # buf is odd | ||
169 | dec %ecx | ||
170 | jl 90f | ||
171 | movzbl (%esi), %ebx | ||
172 | addl %ebx, %eax | ||
173 | adcl $0, %eax | ||
174 | roll $8, %eax | ||
175 | inc %esi | ||
176 | testl $2, %esi | ||
177 | jz 10b | ||
178 | |||
179 | 30: subl $2, %ecx | ||
180 | ja 20b | ||
181 | je 32f | ||
182 | addl $2, %ecx | ||
183 | jz 80f | ||
184 | movzbl (%esi),%ebx # csumming 1 byte, 2-aligned | ||
185 | addl %ebx, %eax | ||
186 | adcl $0, %eax | ||
187 | jmp 80f | ||
188 | 32: | ||
189 | addw (%esi), %ax # csumming 2 bytes, 2-aligned | ||
190 | adcl $0, %eax | ||
191 | jmp 80f | ||
192 | |||
193 | 40: | ||
194 | addl -128(%esi), %eax | ||
195 | adcl -124(%esi), %eax | ||
196 | adcl -120(%esi), %eax | ||
197 | adcl -116(%esi), %eax | ||
198 | adcl -112(%esi), %eax | ||
199 | adcl -108(%esi), %eax | ||
200 | adcl -104(%esi), %eax | ||
201 | adcl -100(%esi), %eax | ||
202 | adcl -96(%esi), %eax | ||
203 | adcl -92(%esi), %eax | ||
204 | adcl -88(%esi), %eax | ||
205 | adcl -84(%esi), %eax | ||
206 | adcl -80(%esi), %eax | ||
207 | adcl -76(%esi), %eax | ||
208 | adcl -72(%esi), %eax | ||
209 | adcl -68(%esi), %eax | ||
210 | adcl -64(%esi), %eax | ||
211 | adcl -60(%esi), %eax | ||
212 | adcl -56(%esi), %eax | ||
213 | adcl -52(%esi), %eax | ||
214 | adcl -48(%esi), %eax | ||
215 | adcl -44(%esi), %eax | ||
216 | adcl -40(%esi), %eax | ||
217 | adcl -36(%esi), %eax | ||
218 | adcl -32(%esi), %eax | ||
219 | adcl -28(%esi), %eax | ||
220 | adcl -24(%esi), %eax | ||
221 | adcl -20(%esi), %eax | ||
222 | adcl -16(%esi), %eax | ||
223 | adcl -12(%esi), %eax | ||
224 | adcl -8(%esi), %eax | ||
225 | adcl -4(%esi), %eax | ||
226 | 45: | ||
227 | lea 128(%esi), %esi | ||
228 | adcl $0, %eax | ||
229 | dec %ecx | ||
230 | jge 40b | ||
231 | movl %edx, %ecx | ||
232 | 50: andl $3, %ecx | ||
233 | jz 80f | ||
234 | |||
235 | # Handle the last 1-3 bytes without jumping | ||
236 | notl %ecx # 1->2, 2->1, 3->0, higher bits are masked | ||
237 | movl $0xffffff,%ebx # by the shll and shrl instructions | ||
238 | shll $3,%ecx | ||
239 | shrl %cl,%ebx | ||
240 | andl -128(%esi),%ebx # esi is 4-aligned so should be ok | ||
241 | addl %ebx,%eax | ||
242 | adcl $0,%eax | ||
243 | 80: | ||
244 | testl $1, 12(%esp) | ||
245 | jz 90f | ||
246 | roll $8, %eax | ||
247 | 90: | ||
248 | popl %ebx | ||
249 | popl %esi | ||
250 | ret | ||
251 | |||
252 | #endif | ||
253 | |||
254 | /* | ||
255 | unsigned int csum_partial_copy_generic (const char *src, char *dst, | ||
256 | int len, int sum, int *src_err_ptr, int *dst_err_ptr) | ||
257 | */ | ||
258 | |||
259 | /* | ||
260 | * Copy from ds while checksumming, otherwise like csum_partial | ||
261 | * | ||
262 | * The macros SRC and DST specify the type of access for the instruction. | ||
263 | * thus we can call a custom exception handler for all access types. | ||
264 | * | ||
265 | * FIXME: could someone double-check whether I haven't mixed up some SRC and | ||
266 | * DST definitions? It's damn hard to trigger all cases. I hope I got | ||
267 | * them all but there's no guarantee. | ||
268 | */ | ||
269 | |||
270 | #define SRC(y...) \ | ||
271 | 9999: y; \ | ||
272 | .section __ex_table, "a"; \ | ||
273 | .long 9999b, 6001f ; \ | ||
274 | .previous | ||
275 | |||
276 | #define DST(y...) \ | ||
277 | 9999: y; \ | ||
278 | .section __ex_table, "a"; \ | ||
279 | .long 9999b, 6002f ; \ | ||
280 | .previous | ||
281 | |||
282 | .align 4 | ||
283 | .globl csum_partial_copy_generic | ||
284 | |||
285 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM | ||
286 | |||
287 | #define ARGBASE 16 | ||
288 | #define FP 12 | ||
289 | |||
290 | csum_partial_copy_generic: | ||
291 | subl $4,%esp | ||
292 | pushl %edi | ||
293 | pushl %esi | ||
294 | pushl %ebx | ||
295 | movl ARGBASE+16(%esp),%eax # sum | ||
296 | movl ARGBASE+12(%esp),%ecx # len | ||
297 | movl ARGBASE+4(%esp),%esi # src | ||
298 | movl ARGBASE+8(%esp),%edi # dst | ||
299 | |||
300 | testl $2, %edi # Check alignment. | ||
301 | jz 2f # Jump if alignment is ok. | ||
302 | subl $2, %ecx # Alignment uses up two bytes. | ||
303 | jae 1f # Jump if we had at least two bytes. | ||
304 | addl $2, %ecx # ecx was < 2. Deal with it. | ||
305 | jmp 4f | ||
306 | SRC(1: movw (%esi), %bx ) | ||
307 | addl $2, %esi | ||
308 | DST( movw %bx, (%edi) ) | ||
309 | addl $2, %edi | ||
310 | addw %bx, %ax | ||
311 | adcl $0, %eax | ||
312 | 2: | ||
313 | movl %ecx, FP(%esp) | ||
314 | shrl $5, %ecx | ||
315 | jz 2f | ||
316 | testl %esi, %esi | ||
317 | SRC(1: movl (%esi), %ebx ) | ||
318 | SRC( movl 4(%esi), %edx ) | ||
319 | adcl %ebx, %eax | ||
320 | DST( movl %ebx, (%edi) ) | ||
321 | adcl %edx, %eax | ||
322 | DST( movl %edx, 4(%edi) ) | ||
323 | |||
324 | SRC( movl 8(%esi), %ebx ) | ||
325 | SRC( movl 12(%esi), %edx ) | ||
326 | adcl %ebx, %eax | ||
327 | DST( movl %ebx, 8(%edi) ) | ||
328 | adcl %edx, %eax | ||
329 | DST( movl %edx, 12(%edi) ) | ||
330 | |||
331 | SRC( movl 16(%esi), %ebx ) | ||
332 | SRC( movl 20(%esi), %edx ) | ||
333 | adcl %ebx, %eax | ||
334 | DST( movl %ebx, 16(%edi) ) | ||
335 | adcl %edx, %eax | ||
336 | DST( movl %edx, 20(%edi) ) | ||
337 | |||
338 | SRC( movl 24(%esi), %ebx ) | ||
339 | SRC( movl 28(%esi), %edx ) | ||
340 | adcl %ebx, %eax | ||
341 | DST( movl %ebx, 24(%edi) ) | ||
342 | adcl %edx, %eax | ||
343 | DST( movl %edx, 28(%edi) ) | ||
344 | |||
345 | lea 32(%esi), %esi | ||
346 | lea 32(%edi), %edi | ||
347 | dec %ecx | ||
348 | jne 1b | ||
349 | adcl $0, %eax | ||
350 | 2: movl FP(%esp), %edx | ||
351 | movl %edx, %ecx | ||
352 | andl $0x1c, %edx | ||
353 | je 4f | ||
354 | shrl $2, %edx # This clears CF | ||
355 | SRC(3: movl (%esi), %ebx ) | ||
356 | adcl %ebx, %eax | ||
357 | DST( movl %ebx, (%edi) ) | ||
358 | lea 4(%esi), %esi | ||
359 | lea 4(%edi), %edi | ||
360 | dec %edx | ||
361 | jne 3b | ||
362 | adcl $0, %eax | ||
363 | 4: andl $3, %ecx | ||
364 | jz 7f | ||
365 | cmpl $2, %ecx | ||
366 | jb 5f | ||
367 | SRC( movw (%esi), %cx ) | ||
368 | leal 2(%esi), %esi | ||
369 | DST( movw %cx, (%edi) ) | ||
370 | leal 2(%edi), %edi | ||
371 | je 6f | ||
372 | shll $16,%ecx | ||
373 | SRC(5: movb (%esi), %cl ) | ||
374 | DST( movb %cl, (%edi) ) | ||
375 | 6: addl %ecx, %eax | ||
376 | adcl $0, %eax | ||
377 | 7: | ||
378 | 5000: | ||
379 | |||
380 | # Exception handler: | ||
381 | .section .fixup, "ax" | ||
382 | |||
383 | 6001: | ||
384 | movl ARGBASE+20(%esp), %ebx # src_err_ptr | ||
385 | movl $-EFAULT, (%ebx) | ||
386 | |||
387 | # zero the complete destination - computing the rest | ||
388 | # is too much work | ||
389 | movl ARGBASE+8(%esp), %edi # dst | ||
390 | movl ARGBASE+12(%esp), %ecx # len | ||
391 | xorl %eax,%eax | ||
392 | rep ; stosb | ||
393 | |||
394 | jmp 5000b | ||
395 | |||
396 | 6002: | ||
397 | movl ARGBASE+24(%esp), %ebx # dst_err_ptr | ||
398 | movl $-EFAULT,(%ebx) | ||
399 | jmp 5000b | ||
400 | |||
401 | .previous | ||
402 | |||
403 | popl %ebx | ||
404 | popl %esi | ||
405 | popl %edi | ||
406 | popl %ecx # equivalent to addl $4,%esp | ||
407 | ret | ||
408 | |||
409 | #else | ||
410 | |||
411 | /* Version for PentiumII/PPro */ | ||
412 | |||
413 | #define ROUND1(x) \ | ||
414 | SRC(movl x(%esi), %ebx ) ; \ | ||
415 | addl %ebx, %eax ; \ | ||
416 | DST(movl %ebx, x(%edi) ) ; | ||
417 | |||
418 | #define ROUND(x) \ | ||
419 | SRC(movl x(%esi), %ebx ) ; \ | ||
420 | adcl %ebx, %eax ; \ | ||
421 | DST(movl %ebx, x(%edi) ) ; | ||
422 | |||
423 | #define ARGBASE 12 | ||
424 | |||
425 | csum_partial_copy_generic: | ||
426 | pushl %ebx | ||
427 | pushl %edi | ||
428 | pushl %esi | ||
429 | movl ARGBASE+4(%esp),%esi #src | ||
430 | movl ARGBASE+8(%esp),%edi #dst | ||
431 | movl ARGBASE+12(%esp),%ecx #len | ||
432 | movl ARGBASE+16(%esp),%eax #sum | ||
433 | # movl %ecx, %edx | ||
434 | movl %ecx, %ebx | ||
435 | movl %esi, %edx | ||
436 | shrl $6, %ecx | ||
437 | andl $0x3c, %ebx | ||
438 | negl %ebx | ||
439 | subl %ebx, %esi | ||
440 | subl %ebx, %edi | ||
441 | lea -1(%esi),%edx | ||
442 | andl $-32,%edx | ||
443 | lea 3f(%ebx,%ebx), %ebx | ||
444 | testl %esi, %esi | ||
445 | jmp *%ebx | ||
446 | 1: addl $64,%esi | ||
447 | addl $64,%edi | ||
448 | SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) | ||
449 | ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) | ||
450 | ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) | ||
451 | ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) | ||
452 | ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) | ||
453 | 3: adcl $0,%eax | ||
454 | addl $64, %edx | ||
455 | dec %ecx | ||
456 | jge 1b | ||
457 | 4: movl ARGBASE+12(%esp),%edx #len | ||
458 | andl $3, %edx | ||
459 | jz 7f | ||
460 | cmpl $2, %edx | ||
461 | jb 5f | ||
462 | SRC( movw (%esi), %dx ) | ||
463 | leal 2(%esi), %esi | ||
464 | DST( movw %dx, (%edi) ) | ||
465 | leal 2(%edi), %edi | ||
466 | je 6f | ||
467 | shll $16,%edx | ||
468 | 5: | ||
469 | SRC( movb (%esi), %dl ) | ||
470 | DST( movb %dl, (%edi) ) | ||
471 | 6: addl %edx, %eax | ||
472 | adcl $0, %eax | ||
473 | 7: | ||
474 | .section .fixup, "ax" | ||
475 | 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr | ||
476 | movl $-EFAULT, (%ebx) | ||
477 | # zero the complete destination (computing the rest is too much work) | ||
478 | movl ARGBASE+8(%esp),%edi # dst | ||
479 | movl ARGBASE+12(%esp),%ecx # len | ||
480 | xorl %eax,%eax | ||
481 | rep; stosb | ||
482 | jmp 7b | ||
483 | 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr | ||
484 | movl $-EFAULT, (%ebx) | ||
485 | jmp 7b | ||
486 | .previous | ||
487 | |||
488 | popl %esi | ||
489 | popl %edi | ||
490 | popl %ebx | ||
491 | ret | ||
492 | |||
493 | #undef ROUND | ||
494 | #undef ROUND1 | ||
495 | |||
496 | #endif | ||
diff --git a/arch/i386/lib/dec_and_lock.c b/arch/i386/lib/dec_and_lock.c new file mode 100644 index 000000000000..ab43394dc775 --- /dev/null +++ b/arch/i386/lib/dec_and_lock.c | |||
@@ -0,0 +1,40 @@ | |||
1 | /* | ||
2 | * x86 version of "atomic_dec_and_lock()" using | ||
3 | * the atomic "cmpxchg" instruction. | ||
4 | * | ||
5 | * (For CPU's lacking cmpxchg, we use the slow | ||
6 | * generic version, and this one never even gets | ||
7 | * compiled). | ||
8 | */ | ||
9 | |||
10 | #include <linux/spinlock.h> | ||
11 | #include <asm/atomic.h> | ||
12 | |||
13 | int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) | ||
14 | { | ||
15 | int counter; | ||
16 | int newcount; | ||
17 | |||
18 | repeat: | ||
19 | counter = atomic_read(atomic); | ||
20 | newcount = counter-1; | ||
21 | |||
22 | if (!newcount) | ||
23 | goto slow_path; | ||
24 | |||
25 | asm volatile("lock; cmpxchgl %1,%2" | ||
26 | :"=a" (newcount) | ||
27 | :"r" (newcount), "m" (atomic->counter), "0" (counter)); | ||
28 | |||
29 | /* If the above failed, "eax" will have changed */ | ||
30 | if (newcount != counter) | ||
31 | goto repeat; | ||
32 | return 0; | ||
33 | |||
34 | slow_path: | ||
35 | spin_lock(lock); | ||
36 | if (atomic_dec_and_test(atomic)) | ||
37 | return 1; | ||
38 | spin_unlock(lock); | ||
39 | return 0; | ||
40 | } | ||
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c new file mode 100644 index 000000000000..080639f262b1 --- /dev/null +++ b/arch/i386/lib/delay.c | |||
@@ -0,0 +1,49 @@ | |||
1 | /* | ||
2 | * Precise Delay Loops for i386 | ||
3 | * | ||
4 | * Copyright (C) 1993 Linus Torvalds | ||
5 | * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> | ||
6 | * | ||
7 | * The __delay function must _NOT_ be inlined as its execution time | ||
8 | * depends wildly on alignment on many x86 processors. The additional | ||
9 | * jump magic is needed to get the timing stable on all the CPU's | ||
10 | * we have to worry about. | ||
11 | */ | ||
12 | |||
13 | #include <linux/config.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/delay.h> | ||
16 | #include <asm/processor.h> | ||
17 | #include <asm/delay.h> | ||
18 | #include <asm/timer.h> | ||
19 | |||
20 | #ifdef CONFIG_SMP | ||
21 | #include <asm/smp.h> | ||
22 | #endif | ||
23 | |||
24 | extern struct timer_opts* timer; | ||
25 | |||
26 | void __delay(unsigned long loops) | ||
27 | { | ||
28 | cur_timer->delay(loops); | ||
29 | } | ||
30 | |||
31 | inline void __const_udelay(unsigned long xloops) | ||
32 | { | ||
33 | int d0; | ||
34 | xloops *= 4; | ||
35 | __asm__("mull %0" | ||
36 | :"=d" (xloops), "=&a" (d0) | ||
37 | :"1" (xloops),"0" (cpu_data[_smp_processor_id()].loops_per_jiffy * (HZ/4))); | ||
38 | __delay(++xloops); | ||
39 | } | ||
40 | |||
41 | void __udelay(unsigned long usecs) | ||
42 | { | ||
43 | __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ | ||
44 | } | ||
45 | |||
46 | void __ndelay(unsigned long nsecs) | ||
47 | { | ||
48 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ | ||
49 | } | ||
diff --git a/arch/i386/lib/getuser.S b/arch/i386/lib/getuser.S new file mode 100644 index 000000000000..62d7f178a326 --- /dev/null +++ b/arch/i386/lib/getuser.S | |||
@@ -0,0 +1,70 @@ | |||
1 | /* | ||
2 | * __get_user functions. | ||
3 | * | ||
4 | * (C) Copyright 1998 Linus Torvalds | ||
5 | * | ||
6 | * These functions have a non-standard call interface | ||
7 | * to make them more efficient, especially as they | ||
8 | * return an error value in addition to the "real" | ||
9 | * return value. | ||
10 | */ | ||
11 | #include <asm/thread_info.h> | ||
12 | |||
13 | |||
14 | /* | ||
15 | * __get_user_X | ||
16 | * | ||
17 | * Inputs: %eax contains the address | ||
18 | * | ||
19 | * Outputs: %eax is error code (0 or -EFAULT) | ||
20 | * %edx contains zero-extended value | ||
21 | * | ||
22 | * These functions should not modify any other registers, | ||
23 | * as they get called from within inline assembly. | ||
24 | */ | ||
25 | |||
26 | .text | ||
27 | .align 4 | ||
28 | .globl __get_user_1 | ||
29 | __get_user_1: | ||
30 | GET_THREAD_INFO(%edx) | ||
31 | cmpl TI_addr_limit(%edx),%eax | ||
32 | jae bad_get_user | ||
33 | 1: movzbl (%eax),%edx | ||
34 | xorl %eax,%eax | ||
35 | ret | ||
36 | |||
37 | .align 4 | ||
38 | .globl __get_user_2 | ||
39 | __get_user_2: | ||
40 | addl $1,%eax | ||
41 | jc bad_get_user | ||
42 | GET_THREAD_INFO(%edx) | ||
43 | cmpl TI_addr_limit(%edx),%eax | ||
44 | jae bad_get_user | ||
45 | 2: movzwl -1(%eax),%edx | ||
46 | xorl %eax,%eax | ||
47 | ret | ||
48 | |||
49 | .align 4 | ||
50 | .globl __get_user_4 | ||
51 | __get_user_4: | ||
52 | addl $3,%eax | ||
53 | jc bad_get_user | ||
54 | GET_THREAD_INFO(%edx) | ||
55 | cmpl TI_addr_limit(%edx),%eax | ||
56 | jae bad_get_user | ||
57 | 3: movl -3(%eax),%edx | ||
58 | xorl %eax,%eax | ||
59 | ret | ||
60 | |||
61 | bad_get_user: | ||
62 | xorl %edx,%edx | ||
63 | movl $-14,%eax | ||
64 | ret | ||
65 | |||
66 | .section __ex_table,"a" | ||
67 | .long 1b,bad_get_user | ||
68 | .long 2b,bad_get_user | ||
69 | .long 3b,bad_get_user | ||
70 | .previous | ||
diff --git a/arch/i386/lib/memcpy.c b/arch/i386/lib/memcpy.c new file mode 100644 index 000000000000..891b2359d18a --- /dev/null +++ b/arch/i386/lib/memcpy.c | |||
@@ -0,0 +1,44 @@ | |||
1 | #include <linux/config.h> | ||
2 | #include <linux/string.h> | ||
3 | #include <linux/module.h> | ||
4 | |||
5 | #undef memcpy | ||
6 | #undef memset | ||
7 | |||
8 | void *memcpy(void *to, const void *from, size_t n) | ||
9 | { | ||
10 | #ifdef CONFIG_X86_USE_3DNOW | ||
11 | return __memcpy3d(to, from, n); | ||
12 | #else | ||
13 | return __memcpy(to, from, n); | ||
14 | #endif | ||
15 | } | ||
16 | EXPORT_SYMBOL(memcpy); | ||
17 | |||
18 | void *memset(void *s, int c, size_t count) | ||
19 | { | ||
20 | return __memset(s, c, count); | ||
21 | } | ||
22 | EXPORT_SYMBOL(memset); | ||
23 | |||
24 | void *memmove(void *dest, const void *src, size_t n) | ||
25 | { | ||
26 | int d0, d1, d2; | ||
27 | |||
28 | if (dest < src) { | ||
29 | memcpy(dest,src,n); | ||
30 | } else { | ||
31 | __asm__ __volatile__( | ||
32 | "std\n\t" | ||
33 | "rep\n\t" | ||
34 | "movsb\n\t" | ||
35 | "cld" | ||
36 | : "=&c" (d0), "=&S" (d1), "=&D" (d2) | ||
37 | :"0" (n), | ||
38 | "1" (n-1+(const char *)src), | ||
39 | "2" (n-1+(char *)dest) | ||
40 | :"memory"); | ||
41 | } | ||
42 | return dest; | ||
43 | } | ||
44 | EXPORT_SYMBOL(memmove); | ||
diff --git a/arch/i386/lib/mmx.c b/arch/i386/lib/mmx.c new file mode 100644 index 000000000000..01f8b1a2cc84 --- /dev/null +++ b/arch/i386/lib/mmx.c | |||
@@ -0,0 +1,399 @@ | |||
1 | #include <linux/config.h> | ||
2 | #include <linux/types.h> | ||
3 | #include <linux/string.h> | ||
4 | #include <linux/sched.h> | ||
5 | #include <linux/hardirq.h> | ||
6 | |||
7 | #include <asm/i387.h> | ||
8 | |||
9 | |||
10 | /* | ||
11 | * MMX 3DNow! library helper functions | ||
12 | * | ||
13 | * To do: | ||
14 | * We can use MMX just for prefetch in IRQ's. This may be a win. | ||
15 | * (reported so on K6-III) | ||
16 | * We should use a better code neutral filler for the short jump | ||
17 | * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ?? | ||
18 | * We also want to clobber the filler register so we don't get any | ||
19 | * register forwarding stalls on the filler. | ||
20 | * | ||
21 | * Add *user handling. Checksums are not a win with MMX on any CPU | ||
22 | * tested so far for any MMX solution figured. | ||
23 | * | ||
24 | * 22/09/2000 - Arjan van de Ven | ||
25 | * Improved for non-egineering-sample Athlons | ||
26 | * | ||
27 | */ | ||
28 | |||
29 | void *_mmx_memcpy(void *to, const void *from, size_t len) | ||
30 | { | ||
31 | void *p; | ||
32 | int i; | ||
33 | |||
34 | if (unlikely(in_interrupt())) | ||
35 | return __memcpy(to, from, len); | ||
36 | |||
37 | p = to; | ||
38 | i = len >> 6; /* len/64 */ | ||
39 | |||
40 | kernel_fpu_begin(); | ||
41 | |||
42 | __asm__ __volatile__ ( | ||
43 | "1: prefetch (%0)\n" /* This set is 28 bytes */ | ||
44 | " prefetch 64(%0)\n" | ||
45 | " prefetch 128(%0)\n" | ||
46 | " prefetch 192(%0)\n" | ||
47 | " prefetch 256(%0)\n" | ||
48 | "2: \n" | ||
49 | ".section .fixup, \"ax\"\n" | ||
50 | "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ | ||
51 | " jmp 2b\n" | ||
52 | ".previous\n" | ||
53 | ".section __ex_table,\"a\"\n" | ||
54 | " .align 4\n" | ||
55 | " .long 1b, 3b\n" | ||
56 | ".previous" | ||
57 | : : "r" (from) ); | ||
58 | |||
59 | |||
60 | for(; i>5; i--) | ||
61 | { | ||
62 | __asm__ __volatile__ ( | ||
63 | "1: prefetch 320(%0)\n" | ||
64 | "2: movq (%0), %%mm0\n" | ||
65 | " movq 8(%0), %%mm1\n" | ||
66 | " movq 16(%0), %%mm2\n" | ||
67 | " movq 24(%0), %%mm3\n" | ||
68 | " movq %%mm0, (%1)\n" | ||
69 | " movq %%mm1, 8(%1)\n" | ||
70 | " movq %%mm2, 16(%1)\n" | ||
71 | " movq %%mm3, 24(%1)\n" | ||
72 | " movq 32(%0), %%mm0\n" | ||
73 | " movq 40(%0), %%mm1\n" | ||
74 | " movq 48(%0), %%mm2\n" | ||
75 | " movq 56(%0), %%mm3\n" | ||
76 | " movq %%mm0, 32(%1)\n" | ||
77 | " movq %%mm1, 40(%1)\n" | ||
78 | " movq %%mm2, 48(%1)\n" | ||
79 | " movq %%mm3, 56(%1)\n" | ||
80 | ".section .fixup, \"ax\"\n" | ||
81 | "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ | ||
82 | " jmp 2b\n" | ||
83 | ".previous\n" | ||
84 | ".section __ex_table,\"a\"\n" | ||
85 | " .align 4\n" | ||
86 | " .long 1b, 3b\n" | ||
87 | ".previous" | ||
88 | : : "r" (from), "r" (to) : "memory"); | ||
89 | from+=64; | ||
90 | to+=64; | ||
91 | } | ||
92 | |||
93 | for(; i>0; i--) | ||
94 | { | ||
95 | __asm__ __volatile__ ( | ||
96 | " movq (%0), %%mm0\n" | ||
97 | " movq 8(%0), %%mm1\n" | ||
98 | " movq 16(%0), %%mm2\n" | ||
99 | " movq 24(%0), %%mm3\n" | ||
100 | " movq %%mm0, (%1)\n" | ||
101 | " movq %%mm1, 8(%1)\n" | ||
102 | " movq %%mm2, 16(%1)\n" | ||
103 | " movq %%mm3, 24(%1)\n" | ||
104 | " movq 32(%0), %%mm0\n" | ||
105 | " movq 40(%0), %%mm1\n" | ||
106 | " movq 48(%0), %%mm2\n" | ||
107 | " movq 56(%0), %%mm3\n" | ||
108 | " movq %%mm0, 32(%1)\n" | ||
109 | " movq %%mm1, 40(%1)\n" | ||
110 | " movq %%mm2, 48(%1)\n" | ||
111 | " movq %%mm3, 56(%1)\n" | ||
112 | : : "r" (from), "r" (to) : "memory"); | ||
113 | from+=64; | ||
114 | to+=64; | ||
115 | } | ||
116 | /* | ||
117 | * Now do the tail of the block | ||
118 | */ | ||
119 | __memcpy(to, from, len&63); | ||
120 | kernel_fpu_end(); | ||
121 | return p; | ||
122 | } | ||
123 | |||
124 | #ifdef CONFIG_MK7 | ||
125 | |||
126 | /* | ||
127 | * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and | ||
128 | * other MMX using processors do not. | ||
129 | */ | ||
130 | |||
131 | static void fast_clear_page(void *page) | ||
132 | { | ||
133 | int i; | ||
134 | |||
135 | kernel_fpu_begin(); | ||
136 | |||
137 | __asm__ __volatile__ ( | ||
138 | " pxor %%mm0, %%mm0\n" : : | ||
139 | ); | ||
140 | |||
141 | for(i=0;i<4096/64;i++) | ||
142 | { | ||
143 | __asm__ __volatile__ ( | ||
144 | " movntq %%mm0, (%0)\n" | ||
145 | " movntq %%mm0, 8(%0)\n" | ||
146 | " movntq %%mm0, 16(%0)\n" | ||
147 | " movntq %%mm0, 24(%0)\n" | ||
148 | " movntq %%mm0, 32(%0)\n" | ||
149 | " movntq %%mm0, 40(%0)\n" | ||
150 | " movntq %%mm0, 48(%0)\n" | ||
151 | " movntq %%mm0, 56(%0)\n" | ||
152 | : : "r" (page) : "memory"); | ||
153 | page+=64; | ||
154 | } | ||
155 | /* since movntq is weakly-ordered, a "sfence" is needed to become | ||
156 | * ordered again. | ||
157 | */ | ||
158 | __asm__ __volatile__ ( | ||
159 | " sfence \n" : : | ||
160 | ); | ||
161 | kernel_fpu_end(); | ||
162 | } | ||
163 | |||
164 | static void fast_copy_page(void *to, void *from) | ||
165 | { | ||
166 | int i; | ||
167 | |||
168 | kernel_fpu_begin(); | ||
169 | |||
170 | /* maybe the prefetch stuff can go before the expensive fnsave... | ||
171 | * but that is for later. -AV | ||
172 | */ | ||
173 | __asm__ __volatile__ ( | ||
174 | "1: prefetch (%0)\n" | ||
175 | " prefetch 64(%0)\n" | ||
176 | " prefetch 128(%0)\n" | ||
177 | " prefetch 192(%0)\n" | ||
178 | " prefetch 256(%0)\n" | ||
179 | "2: \n" | ||
180 | ".section .fixup, \"ax\"\n" | ||
181 | "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ | ||
182 | " jmp 2b\n" | ||
183 | ".previous\n" | ||
184 | ".section __ex_table,\"a\"\n" | ||
185 | " .align 4\n" | ||
186 | " .long 1b, 3b\n" | ||
187 | ".previous" | ||
188 | : : "r" (from) ); | ||
189 | |||
190 | for(i=0; i<(4096-320)/64; i++) | ||
191 | { | ||
192 | __asm__ __volatile__ ( | ||
193 | "1: prefetch 320(%0)\n" | ||
194 | "2: movq (%0), %%mm0\n" | ||
195 | " movntq %%mm0, (%1)\n" | ||
196 | " movq 8(%0), %%mm1\n" | ||
197 | " movntq %%mm1, 8(%1)\n" | ||
198 | " movq 16(%0), %%mm2\n" | ||
199 | " movntq %%mm2, 16(%1)\n" | ||
200 | " movq 24(%0), %%mm3\n" | ||
201 | " movntq %%mm3, 24(%1)\n" | ||
202 | " movq 32(%0), %%mm4\n" | ||
203 | " movntq %%mm4, 32(%1)\n" | ||
204 | " movq 40(%0), %%mm5\n" | ||
205 | " movntq %%mm5, 40(%1)\n" | ||
206 | " movq 48(%0), %%mm6\n" | ||
207 | " movntq %%mm6, 48(%1)\n" | ||
208 | " movq 56(%0), %%mm7\n" | ||
209 | " movntq %%mm7, 56(%1)\n" | ||
210 | ".section .fixup, \"ax\"\n" | ||
211 | "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ | ||
212 | " jmp 2b\n" | ||
213 | ".previous\n" | ||
214 | ".section __ex_table,\"a\"\n" | ||
215 | " .align 4\n" | ||
216 | " .long 1b, 3b\n" | ||
217 | ".previous" | ||
218 | : : "r" (from), "r" (to) : "memory"); | ||
219 | from+=64; | ||
220 | to+=64; | ||
221 | } | ||
222 | for(i=(4096-320)/64; i<4096/64; i++) | ||
223 | { | ||
224 | __asm__ __volatile__ ( | ||
225 | "2: movq (%0), %%mm0\n" | ||
226 | " movntq %%mm0, (%1)\n" | ||
227 | " movq 8(%0), %%mm1\n" | ||
228 | " movntq %%mm1, 8(%1)\n" | ||
229 | " movq 16(%0), %%mm2\n" | ||
230 | " movntq %%mm2, 16(%1)\n" | ||
231 | " movq 24(%0), %%mm3\n" | ||
232 | " movntq %%mm3, 24(%1)\n" | ||
233 | " movq 32(%0), %%mm4\n" | ||
234 | " movntq %%mm4, 32(%1)\n" | ||
235 | " movq 40(%0), %%mm5\n" | ||
236 | " movntq %%mm5, 40(%1)\n" | ||
237 | " movq 48(%0), %%mm6\n" | ||
238 | " movntq %%mm6, 48(%1)\n" | ||
239 | " movq 56(%0), %%mm7\n" | ||
240 | " movntq %%mm7, 56(%1)\n" | ||
241 | : : "r" (from), "r" (to) : "memory"); | ||
242 | from+=64; | ||
243 | to+=64; | ||
244 | } | ||
245 | /* since movntq is weakly-ordered, a "sfence" is needed to become | ||
246 | * ordered again. | ||
247 | */ | ||
248 | __asm__ __volatile__ ( | ||
249 | " sfence \n" : : | ||
250 | ); | ||
251 | kernel_fpu_end(); | ||
252 | } | ||
253 | |||
254 | #else | ||
255 | |||
256 | /* | ||
257 | * Generic MMX implementation without K7 specific streaming | ||
258 | */ | ||
259 | |||
260 | static void fast_clear_page(void *page) | ||
261 | { | ||
262 | int i; | ||
263 | |||
264 | kernel_fpu_begin(); | ||
265 | |||
266 | __asm__ __volatile__ ( | ||
267 | " pxor %%mm0, %%mm0\n" : : | ||
268 | ); | ||
269 | |||
270 | for(i=0;i<4096/128;i++) | ||
271 | { | ||
272 | __asm__ __volatile__ ( | ||
273 | " movq %%mm0, (%0)\n" | ||
274 | " movq %%mm0, 8(%0)\n" | ||
275 | " movq %%mm0, 16(%0)\n" | ||
276 | " movq %%mm0, 24(%0)\n" | ||
277 | " movq %%mm0, 32(%0)\n" | ||
278 | " movq %%mm0, 40(%0)\n" | ||
279 | " movq %%mm0, 48(%0)\n" | ||
280 | " movq %%mm0, 56(%0)\n" | ||
281 | " movq %%mm0, 64(%0)\n" | ||
282 | " movq %%mm0, 72(%0)\n" | ||
283 | " movq %%mm0, 80(%0)\n" | ||
284 | " movq %%mm0, 88(%0)\n" | ||
285 | " movq %%mm0, 96(%0)\n" | ||
286 | " movq %%mm0, 104(%0)\n" | ||
287 | " movq %%mm0, 112(%0)\n" | ||
288 | " movq %%mm0, 120(%0)\n" | ||
289 | : : "r" (page) : "memory"); | ||
290 | page+=128; | ||
291 | } | ||
292 | |||
293 | kernel_fpu_end(); | ||
294 | } | ||
295 | |||
296 | static void fast_copy_page(void *to, void *from) | ||
297 | { | ||
298 | int i; | ||
299 | |||
300 | |||
301 | kernel_fpu_begin(); | ||
302 | |||
303 | __asm__ __volatile__ ( | ||
304 | "1: prefetch (%0)\n" | ||
305 | " prefetch 64(%0)\n" | ||
306 | " prefetch 128(%0)\n" | ||
307 | " prefetch 192(%0)\n" | ||
308 | " prefetch 256(%0)\n" | ||
309 | "2: \n" | ||
310 | ".section .fixup, \"ax\"\n" | ||
311 | "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ | ||
312 | " jmp 2b\n" | ||
313 | ".previous\n" | ||
314 | ".section __ex_table,\"a\"\n" | ||
315 | " .align 4\n" | ||
316 | " .long 1b, 3b\n" | ||
317 | ".previous" | ||
318 | : : "r" (from) ); | ||
319 | |||
320 | for(i=0; i<4096/64; i++) | ||
321 | { | ||
322 | __asm__ __volatile__ ( | ||
323 | "1: prefetch 320(%0)\n" | ||
324 | "2: movq (%0), %%mm0\n" | ||
325 | " movq 8(%0), %%mm1\n" | ||
326 | " movq 16(%0), %%mm2\n" | ||
327 | " movq 24(%0), %%mm3\n" | ||
328 | " movq %%mm0, (%1)\n" | ||
329 | " movq %%mm1, 8(%1)\n" | ||
330 | " movq %%mm2, 16(%1)\n" | ||
331 | " movq %%mm3, 24(%1)\n" | ||
332 | " movq 32(%0), %%mm0\n" | ||
333 | " movq 40(%0), %%mm1\n" | ||
334 | " movq 48(%0), %%mm2\n" | ||
335 | " movq 56(%0), %%mm3\n" | ||
336 | " movq %%mm0, 32(%1)\n" | ||
337 | " movq %%mm1, 40(%1)\n" | ||
338 | " movq %%mm2, 48(%1)\n" | ||
339 | " movq %%mm3, 56(%1)\n" | ||
340 | ".section .fixup, \"ax\"\n" | ||
341 | "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ | ||
342 | " jmp 2b\n" | ||
343 | ".previous\n" | ||
344 | ".section __ex_table,\"a\"\n" | ||
345 | " .align 4\n" | ||
346 | " .long 1b, 3b\n" | ||
347 | ".previous" | ||
348 | : : "r" (from), "r" (to) : "memory"); | ||
349 | from+=64; | ||
350 | to+=64; | ||
351 | } | ||
352 | kernel_fpu_end(); | ||
353 | } | ||
354 | |||
355 | |||
356 | #endif | ||
357 | |||
358 | /* | ||
359 | * Favour MMX for page clear and copy. | ||
360 | */ | ||
361 | |||
362 | static void slow_zero_page(void * page) | ||
363 | { | ||
364 | int d0, d1; | ||
365 | __asm__ __volatile__( \ | ||
366 | "cld\n\t" \ | ||
367 | "rep ; stosl" \ | ||
368 | : "=&c" (d0), "=&D" (d1) | ||
369 | :"a" (0),"1" (page),"0" (1024) | ||
370 | :"memory"); | ||
371 | } | ||
372 | |||
373 | void mmx_clear_page(void * page) | ||
374 | { | ||
375 | if(unlikely(in_interrupt())) | ||
376 | slow_zero_page(page); | ||
377 | else | ||
378 | fast_clear_page(page); | ||
379 | } | ||
380 | |||
381 | static void slow_copy_page(void *to, void *from) | ||
382 | { | ||
383 | int d0, d1, d2; | ||
384 | __asm__ __volatile__( \ | ||
385 | "cld\n\t" \ | ||
386 | "rep ; movsl" \ | ||
387 | : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ | ||
388 | : "0" (1024),"1" ((long) to),"2" ((long) from) \ | ||
389 | : "memory"); | ||
390 | } | ||
391 | |||
392 | |||
393 | void mmx_copy_page(void *to, void *from) | ||
394 | { | ||
395 | if(unlikely(in_interrupt())) | ||
396 | slow_copy_page(to, from); | ||
397 | else | ||
398 | fast_copy_page(to, from); | ||
399 | } | ||
diff --git a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S new file mode 100644 index 000000000000..a32d9f570f48 --- /dev/null +++ b/arch/i386/lib/putuser.S | |||
@@ -0,0 +1,87 @@ | |||
1 | /* | ||
2 | * __put_user functions. | ||
3 | * | ||
4 | * (C) Copyright 2005 Linus Torvalds | ||
5 | * | ||
6 | * These functions have a non-standard call interface | ||
7 | * to make them more efficient, especially as they | ||
8 | * return an error value in addition to the "real" | ||
9 | * return value. | ||
10 | */ | ||
11 | #include <asm/thread_info.h> | ||
12 | |||
13 | |||
14 | /* | ||
15 | * __put_user_X | ||
16 | * | ||
17 | * Inputs: %eax[:%edx] contains the data | ||
18 | * %ecx contains the address | ||
19 | * | ||
20 | * Outputs: %eax is error code (0 or -EFAULT) | ||
21 | * | ||
22 | * These functions should not modify any other registers, | ||
23 | * as they get called from within inline assembly. | ||
24 | */ | ||
25 | |||
26 | #define ENTER pushl %ebx ; GET_THREAD_INFO(%ebx) | ||
27 | #define EXIT popl %ebx ; ret | ||
28 | |||
29 | .text | ||
30 | .align 4 | ||
31 | .globl __put_user_1 | ||
32 | __put_user_1: | ||
33 | ENTER | ||
34 | cmpl TI_addr_limit(%ebx),%ecx | ||
35 | jae bad_put_user | ||
36 | 1: movb %al,(%ecx) | ||
37 | xorl %eax,%eax | ||
38 | EXIT | ||
39 | |||
40 | .align 4 | ||
41 | .globl __put_user_2 | ||
42 | __put_user_2: | ||
43 | ENTER | ||
44 | movl TI_addr_limit(%ebx),%ebx | ||
45 | subl $1,%ebx | ||
46 | cmpl %ebx,%ecx | ||
47 | jae bad_put_user | ||
48 | 2: movw %ax,(%ecx) | ||
49 | xorl %eax,%eax | ||
50 | EXIT | ||
51 | |||
52 | .align 4 | ||
53 | .globl __put_user_4 | ||
54 | __put_user_4: | ||
55 | ENTER | ||
56 | movl TI_addr_limit(%ebx),%ebx | ||
57 | subl $3,%ebx | ||
58 | cmpl %ebx,%ecx | ||
59 | jae bad_put_user | ||
60 | 3: movl %eax,(%ecx) | ||
61 | xorl %eax,%eax | ||
62 | EXIT | ||
63 | |||
64 | .align 4 | ||
65 | .globl __put_user_8 | ||
66 | __put_user_8: | ||
67 | ENTER | ||
68 | movl TI_addr_limit(%ebx),%ebx | ||
69 | subl $7,%ebx | ||
70 | cmpl %ebx,%ecx | ||
71 | jae bad_put_user | ||
72 | 4: movl %eax,(%ecx) | ||
73 | 5: movl %edx,4(%ecx) | ||
74 | xorl %eax,%eax | ||
75 | EXIT | ||
76 | |||
77 | bad_put_user: | ||
78 | movl $-14,%eax | ||
79 | EXIT | ||
80 | |||
81 | .section __ex_table,"a" | ||
82 | .long 1b,bad_put_user | ||
83 | .long 2b,bad_put_user | ||
84 | .long 3b,bad_put_user | ||
85 | .long 4b,bad_put_user | ||
86 | .long 5b,bad_put_user | ||
87 | .previous | ||
diff --git a/arch/i386/lib/strstr.c b/arch/i386/lib/strstr.c new file mode 100644 index 000000000000..a3dafbf59dae --- /dev/null +++ b/arch/i386/lib/strstr.c | |||
@@ -0,0 +1,31 @@ | |||
1 | #include <linux/string.h> | ||
2 | |||
3 | char * strstr(const char * cs,const char * ct) | ||
4 | { | ||
5 | int d0, d1; | ||
6 | register char * __res; | ||
7 | __asm__ __volatile__( | ||
8 | "movl %6,%%edi\n\t" | ||
9 | "repne\n\t" | ||
10 | "scasb\n\t" | ||
11 | "notl %%ecx\n\t" | ||
12 | "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ | ||
13 | "movl %%ecx,%%edx\n" | ||
14 | "1:\tmovl %6,%%edi\n\t" | ||
15 | "movl %%esi,%%eax\n\t" | ||
16 | "movl %%edx,%%ecx\n\t" | ||
17 | "repe\n\t" | ||
18 | "cmpsb\n\t" | ||
19 | "je 2f\n\t" /* also works for empty string, see above */ | ||
20 | "xchgl %%eax,%%esi\n\t" | ||
21 | "incl %%esi\n\t" | ||
22 | "cmpb $0,-1(%%eax)\n\t" | ||
23 | "jne 1b\n\t" | ||
24 | "xorl %%eax,%%eax\n\t" | ||
25 | "2:" | ||
26 | :"=a" (__res), "=&c" (d0), "=&S" (d1) | ||
27 | :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct) | ||
28 | :"dx", "di"); | ||
29 | return __res; | ||
30 | } | ||
31 | |||
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c new file mode 100644 index 000000000000..51aa2bbb0269 --- /dev/null +++ b/arch/i386/lib/usercopy.c | |||
@@ -0,0 +1,636 @@ | |||
1 | /* | ||
2 | * User address space access functions. | ||
3 | * The non inlined parts of asm-i386/uaccess.h are here. | ||
4 | * | ||
5 | * Copyright 1997 Andi Kleen <ak@muc.de> | ||
6 | * Copyright 1997 Linus Torvalds | ||
7 | */ | ||
8 | #include <linux/config.h> | ||
9 | #include <linux/mm.h> | ||
10 | #include <linux/highmem.h> | ||
11 | #include <linux/blkdev.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <asm/uaccess.h> | ||
14 | #include <asm/mmx.h> | ||
15 | |||
16 | static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) | ||
17 | { | ||
18 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
19 | if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask)) | ||
20 | return 0; | ||
21 | #endif | ||
22 | return 1; | ||
23 | } | ||
24 | #define movsl_is_ok(a1,a2,n) \ | ||
25 | __movsl_is_ok((unsigned long)(a1),(unsigned long)(a2),(n)) | ||
26 | |||
27 | /* | ||
28 | * Copy a null terminated string from userspace. | ||
29 | */ | ||
30 | |||
31 | #define __do_strncpy_from_user(dst,src,count,res) \ | ||
32 | do { \ | ||
33 | int __d0, __d1, __d2; \ | ||
34 | might_sleep(); \ | ||
35 | __asm__ __volatile__( \ | ||
36 | " testl %1,%1\n" \ | ||
37 | " jz 2f\n" \ | ||
38 | "0: lodsb\n" \ | ||
39 | " stosb\n" \ | ||
40 | " testb %%al,%%al\n" \ | ||
41 | " jz 1f\n" \ | ||
42 | " decl %1\n" \ | ||
43 | " jnz 0b\n" \ | ||
44 | "1: subl %1,%0\n" \ | ||
45 | "2:\n" \ | ||
46 | ".section .fixup,\"ax\"\n" \ | ||
47 | "3: movl %5,%0\n" \ | ||
48 | " jmp 2b\n" \ | ||
49 | ".previous\n" \ | ||
50 | ".section __ex_table,\"a\"\n" \ | ||
51 | " .align 4\n" \ | ||
52 | " .long 0b,3b\n" \ | ||
53 | ".previous" \ | ||
54 | : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ | ||
55 | "=&D" (__d2) \ | ||
56 | : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ | ||
57 | : "memory"); \ | ||
58 | } while (0) | ||
59 | |||
60 | /** | ||
61 | * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. | ||
62 | * @dst: Destination address, in kernel space. This buffer must be at | ||
63 | * least @count bytes long. | ||
64 | * @src: Source address, in user space. | ||
65 | * @count: Maximum number of bytes to copy, including the trailing NUL. | ||
66 | * | ||
67 | * Copies a NUL-terminated string from userspace to kernel space. | ||
68 | * Caller must check the specified block with access_ok() before calling | ||
69 | * this function. | ||
70 | * | ||
71 | * On success, returns the length of the string (not including the trailing | ||
72 | * NUL). | ||
73 | * | ||
74 | * If access to userspace fails, returns -EFAULT (some data may have been | ||
75 | * copied). | ||
76 | * | ||
77 | * If @count is smaller than the length of the string, copies @count bytes | ||
78 | * and returns @count. | ||
79 | */ | ||
80 | long | ||
81 | __strncpy_from_user(char *dst, const char __user *src, long count) | ||
82 | { | ||
83 | long res; | ||
84 | __do_strncpy_from_user(dst, src, count, res); | ||
85 | return res; | ||
86 | } | ||
87 | |||
88 | /** | ||
89 | * strncpy_from_user: - Copy a NUL terminated string from userspace. | ||
90 | * @dst: Destination address, in kernel space. This buffer must be at | ||
91 | * least @count bytes long. | ||
92 | * @src: Source address, in user space. | ||
93 | * @count: Maximum number of bytes to copy, including the trailing NUL. | ||
94 | * | ||
95 | * Copies a NUL-terminated string from userspace to kernel space. | ||
96 | * | ||
97 | * On success, returns the length of the string (not including the trailing | ||
98 | * NUL). | ||
99 | * | ||
100 | * If access to userspace fails, returns -EFAULT (some data may have been | ||
101 | * copied). | ||
102 | * | ||
103 | * If @count is smaller than the length of the string, copies @count bytes | ||
104 | * and returns @count. | ||
105 | */ | ||
106 | long | ||
107 | strncpy_from_user(char *dst, const char __user *src, long count) | ||
108 | { | ||
109 | long res = -EFAULT; | ||
110 | if (access_ok(VERIFY_READ, src, 1)) | ||
111 | __do_strncpy_from_user(dst, src, count, res); | ||
112 | return res; | ||
113 | } | ||
114 | |||
115 | |||
116 | /* | ||
117 | * Zero Userspace | ||
118 | */ | ||
119 | |||
120 | #define __do_clear_user(addr,size) \ | ||
121 | do { \ | ||
122 | int __d0; \ | ||
123 | might_sleep(); \ | ||
124 | __asm__ __volatile__( \ | ||
125 | "0: rep; stosl\n" \ | ||
126 | " movl %2,%0\n" \ | ||
127 | "1: rep; stosb\n" \ | ||
128 | "2:\n" \ | ||
129 | ".section .fixup,\"ax\"\n" \ | ||
130 | "3: lea 0(%2,%0,4),%0\n" \ | ||
131 | " jmp 2b\n" \ | ||
132 | ".previous\n" \ | ||
133 | ".section __ex_table,\"a\"\n" \ | ||
134 | " .align 4\n" \ | ||
135 | " .long 0b,3b\n" \ | ||
136 | " .long 1b,2b\n" \ | ||
137 | ".previous" \ | ||
138 | : "=&c"(size), "=&D" (__d0) \ | ||
139 | : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ | ||
140 | } while (0) | ||
141 | |||
142 | /** | ||
143 | * clear_user: - Zero a block of memory in user space. | ||
144 | * @to: Destination address, in user space. | ||
145 | * @n: Number of bytes to zero. | ||
146 | * | ||
147 | * Zero a block of memory in user space. | ||
148 | * | ||
149 | * Returns number of bytes that could not be cleared. | ||
150 | * On success, this will be zero. | ||
151 | */ | ||
152 | unsigned long | ||
153 | clear_user(void __user *to, unsigned long n) | ||
154 | { | ||
155 | might_sleep(); | ||
156 | if (access_ok(VERIFY_WRITE, to, n)) | ||
157 | __do_clear_user(to, n); | ||
158 | return n; | ||
159 | } | ||
160 | |||
161 | /** | ||
162 | * __clear_user: - Zero a block of memory in user space, with less checking. | ||
163 | * @to: Destination address, in user space. | ||
164 | * @n: Number of bytes to zero. | ||
165 | * | ||
166 | * Zero a block of memory in user space. Caller must check | ||
167 | * the specified block with access_ok() before calling this function. | ||
168 | * | ||
169 | * Returns number of bytes that could not be cleared. | ||
170 | * On success, this will be zero. | ||
171 | */ | ||
172 | unsigned long | ||
173 | __clear_user(void __user *to, unsigned long n) | ||
174 | { | ||
175 | __do_clear_user(to, n); | ||
176 | return n; | ||
177 | } | ||
178 | |||
179 | /** | ||
180 | * strlen_user: - Get the size of a string in user space. | ||
181 | * @s: The string to measure. | ||
182 | * @n: The maximum valid length | ||
183 | * | ||
184 | * Get the size of a NUL-terminated string in user space. | ||
185 | * | ||
186 | * Returns the size of the string INCLUDING the terminating NUL. | ||
187 | * On exception, returns 0. | ||
188 | * If the string is too long, returns a value greater than @n. | ||
189 | */ | ||
190 | long strnlen_user(const char __user *s, long n) | ||
191 | { | ||
192 | unsigned long mask = -__addr_ok(s); | ||
193 | unsigned long res, tmp; | ||
194 | |||
195 | might_sleep(); | ||
196 | |||
197 | __asm__ __volatile__( | ||
198 | " testl %0, %0\n" | ||
199 | " jz 3f\n" | ||
200 | " andl %0,%%ecx\n" | ||
201 | "0: repne; scasb\n" | ||
202 | " setne %%al\n" | ||
203 | " subl %%ecx,%0\n" | ||
204 | " addl %0,%%eax\n" | ||
205 | "1:\n" | ||
206 | ".section .fixup,\"ax\"\n" | ||
207 | "2: xorl %%eax,%%eax\n" | ||
208 | " jmp 1b\n" | ||
209 | "3: movb $1,%%al\n" | ||
210 | " jmp 1b\n" | ||
211 | ".previous\n" | ||
212 | ".section __ex_table,\"a\"\n" | ||
213 | " .align 4\n" | ||
214 | " .long 0b,2b\n" | ||
215 | ".previous" | ||
216 | :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) | ||
217 | :"0" (n), "1" (s), "2" (0), "3" (mask) | ||
218 | :"cc"); | ||
219 | return res & mask; | ||
220 | } | ||
221 | |||
222 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
223 | static unsigned long | ||
224 | __copy_user_intel(void __user *to, const void *from, unsigned long size) | ||
225 | { | ||
226 | int d0, d1; | ||
227 | __asm__ __volatile__( | ||
228 | " .align 2,0x90\n" | ||
229 | "1: movl 32(%4), %%eax\n" | ||
230 | " cmpl $67, %0\n" | ||
231 | " jbe 3f\n" | ||
232 | "2: movl 64(%4), %%eax\n" | ||
233 | " .align 2,0x90\n" | ||
234 | "3: movl 0(%4), %%eax\n" | ||
235 | "4: movl 4(%4), %%edx\n" | ||
236 | "5: movl %%eax, 0(%3)\n" | ||
237 | "6: movl %%edx, 4(%3)\n" | ||
238 | "7: movl 8(%4), %%eax\n" | ||
239 | "8: movl 12(%4),%%edx\n" | ||
240 | "9: movl %%eax, 8(%3)\n" | ||
241 | "10: movl %%edx, 12(%3)\n" | ||
242 | "11: movl 16(%4), %%eax\n" | ||
243 | "12: movl 20(%4), %%edx\n" | ||
244 | "13: movl %%eax, 16(%3)\n" | ||
245 | "14: movl %%edx, 20(%3)\n" | ||
246 | "15: movl 24(%4), %%eax\n" | ||
247 | "16: movl 28(%4), %%edx\n" | ||
248 | "17: movl %%eax, 24(%3)\n" | ||
249 | "18: movl %%edx, 28(%3)\n" | ||
250 | "19: movl 32(%4), %%eax\n" | ||
251 | "20: movl 36(%4), %%edx\n" | ||
252 | "21: movl %%eax, 32(%3)\n" | ||
253 | "22: movl %%edx, 36(%3)\n" | ||
254 | "23: movl 40(%4), %%eax\n" | ||
255 | "24: movl 44(%4), %%edx\n" | ||
256 | "25: movl %%eax, 40(%3)\n" | ||
257 | "26: movl %%edx, 44(%3)\n" | ||
258 | "27: movl 48(%4), %%eax\n" | ||
259 | "28: movl 52(%4), %%edx\n" | ||
260 | "29: movl %%eax, 48(%3)\n" | ||
261 | "30: movl %%edx, 52(%3)\n" | ||
262 | "31: movl 56(%4), %%eax\n" | ||
263 | "32: movl 60(%4), %%edx\n" | ||
264 | "33: movl %%eax, 56(%3)\n" | ||
265 | "34: movl %%edx, 60(%3)\n" | ||
266 | " addl $-64, %0\n" | ||
267 | " addl $64, %4\n" | ||
268 | " addl $64, %3\n" | ||
269 | " cmpl $63, %0\n" | ||
270 | " ja 1b\n" | ||
271 | "35: movl %0, %%eax\n" | ||
272 | " shrl $2, %0\n" | ||
273 | " andl $3, %%eax\n" | ||
274 | " cld\n" | ||
275 | "99: rep; movsl\n" | ||
276 | "36: movl %%eax, %0\n" | ||
277 | "37: rep; movsb\n" | ||
278 | "100:\n" | ||
279 | ".section .fixup,\"ax\"\n" | ||
280 | "101: lea 0(%%eax,%0,4),%0\n" | ||
281 | " jmp 100b\n" | ||
282 | ".previous\n" | ||
283 | ".section __ex_table,\"a\"\n" | ||
284 | " .align 4\n" | ||
285 | " .long 1b,100b\n" | ||
286 | " .long 2b,100b\n" | ||
287 | " .long 3b,100b\n" | ||
288 | " .long 4b,100b\n" | ||
289 | " .long 5b,100b\n" | ||
290 | " .long 6b,100b\n" | ||
291 | " .long 7b,100b\n" | ||
292 | " .long 8b,100b\n" | ||
293 | " .long 9b,100b\n" | ||
294 | " .long 10b,100b\n" | ||
295 | " .long 11b,100b\n" | ||
296 | " .long 12b,100b\n" | ||
297 | " .long 13b,100b\n" | ||
298 | " .long 14b,100b\n" | ||
299 | " .long 15b,100b\n" | ||
300 | " .long 16b,100b\n" | ||
301 | " .long 17b,100b\n" | ||
302 | " .long 18b,100b\n" | ||
303 | " .long 19b,100b\n" | ||
304 | " .long 20b,100b\n" | ||
305 | " .long 21b,100b\n" | ||
306 | " .long 22b,100b\n" | ||
307 | " .long 23b,100b\n" | ||
308 | " .long 24b,100b\n" | ||
309 | " .long 25b,100b\n" | ||
310 | " .long 26b,100b\n" | ||
311 | " .long 27b,100b\n" | ||
312 | " .long 28b,100b\n" | ||
313 | " .long 29b,100b\n" | ||
314 | " .long 30b,100b\n" | ||
315 | " .long 31b,100b\n" | ||
316 | " .long 32b,100b\n" | ||
317 | " .long 33b,100b\n" | ||
318 | " .long 34b,100b\n" | ||
319 | " .long 35b,100b\n" | ||
320 | " .long 36b,100b\n" | ||
321 | " .long 37b,100b\n" | ||
322 | " .long 99b,101b\n" | ||
323 | ".previous" | ||
324 | : "=&c"(size), "=&D" (d0), "=&S" (d1) | ||
325 | : "1"(to), "2"(from), "0"(size) | ||
326 | : "eax", "edx", "memory"); | ||
327 | return size; | ||
328 | } | ||
329 | |||
330 | static unsigned long | ||
331 | __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) | ||
332 | { | ||
333 | int d0, d1; | ||
334 | __asm__ __volatile__( | ||
335 | " .align 2,0x90\n" | ||
336 | "0: movl 32(%4), %%eax\n" | ||
337 | " cmpl $67, %0\n" | ||
338 | " jbe 2f\n" | ||
339 | "1: movl 64(%4), %%eax\n" | ||
340 | " .align 2,0x90\n" | ||
341 | "2: movl 0(%4), %%eax\n" | ||
342 | "21: movl 4(%4), %%edx\n" | ||
343 | " movl %%eax, 0(%3)\n" | ||
344 | " movl %%edx, 4(%3)\n" | ||
345 | "3: movl 8(%4), %%eax\n" | ||
346 | "31: movl 12(%4),%%edx\n" | ||
347 | " movl %%eax, 8(%3)\n" | ||
348 | " movl %%edx, 12(%3)\n" | ||
349 | "4: movl 16(%4), %%eax\n" | ||
350 | "41: movl 20(%4), %%edx\n" | ||
351 | " movl %%eax, 16(%3)\n" | ||
352 | " movl %%edx, 20(%3)\n" | ||
353 | "10: movl 24(%4), %%eax\n" | ||
354 | "51: movl 28(%4), %%edx\n" | ||
355 | " movl %%eax, 24(%3)\n" | ||
356 | " movl %%edx, 28(%3)\n" | ||
357 | "11: movl 32(%4), %%eax\n" | ||
358 | "61: movl 36(%4), %%edx\n" | ||
359 | " movl %%eax, 32(%3)\n" | ||
360 | " movl %%edx, 36(%3)\n" | ||
361 | "12: movl 40(%4), %%eax\n" | ||
362 | "71: movl 44(%4), %%edx\n" | ||
363 | " movl %%eax, 40(%3)\n" | ||
364 | " movl %%edx, 44(%3)\n" | ||
365 | "13: movl 48(%4), %%eax\n" | ||
366 | "81: movl 52(%4), %%edx\n" | ||
367 | " movl %%eax, 48(%3)\n" | ||
368 | " movl %%edx, 52(%3)\n" | ||
369 | "14: movl 56(%4), %%eax\n" | ||
370 | "91: movl 60(%4), %%edx\n" | ||
371 | " movl %%eax, 56(%3)\n" | ||
372 | " movl %%edx, 60(%3)\n" | ||
373 | " addl $-64, %0\n" | ||
374 | " addl $64, %4\n" | ||
375 | " addl $64, %3\n" | ||
376 | " cmpl $63, %0\n" | ||
377 | " ja 0b\n" | ||
378 | "5: movl %0, %%eax\n" | ||
379 | " shrl $2, %0\n" | ||
380 | " andl $3, %%eax\n" | ||
381 | " cld\n" | ||
382 | "6: rep; movsl\n" | ||
383 | " movl %%eax,%0\n" | ||
384 | "7: rep; movsb\n" | ||
385 | "8:\n" | ||
386 | ".section .fixup,\"ax\"\n" | ||
387 | "9: lea 0(%%eax,%0,4),%0\n" | ||
388 | "16: pushl %0\n" | ||
389 | " pushl %%eax\n" | ||
390 | " xorl %%eax,%%eax\n" | ||
391 | " rep; stosb\n" | ||
392 | " popl %%eax\n" | ||
393 | " popl %0\n" | ||
394 | " jmp 8b\n" | ||
395 | ".previous\n" | ||
396 | ".section __ex_table,\"a\"\n" | ||
397 | " .align 4\n" | ||
398 | " .long 0b,16b\n" | ||
399 | " .long 1b,16b\n" | ||
400 | " .long 2b,16b\n" | ||
401 | " .long 21b,16b\n" | ||
402 | " .long 3b,16b\n" | ||
403 | " .long 31b,16b\n" | ||
404 | " .long 4b,16b\n" | ||
405 | " .long 41b,16b\n" | ||
406 | " .long 10b,16b\n" | ||
407 | " .long 51b,16b\n" | ||
408 | " .long 11b,16b\n" | ||
409 | " .long 61b,16b\n" | ||
410 | " .long 12b,16b\n" | ||
411 | " .long 71b,16b\n" | ||
412 | " .long 13b,16b\n" | ||
413 | " .long 81b,16b\n" | ||
414 | " .long 14b,16b\n" | ||
415 | " .long 91b,16b\n" | ||
416 | " .long 6b,9b\n" | ||
417 | " .long 7b,16b\n" | ||
418 | ".previous" | ||
419 | : "=&c"(size), "=&D" (d0), "=&S" (d1) | ||
420 | : "1"(to), "2"(from), "0"(size) | ||
421 | : "eax", "edx", "memory"); | ||
422 | return size; | ||
423 | } | ||
424 | #else | ||
425 | /* | ||
426 | * Leave these declared but undefined. They should not be any references to | ||
427 | * them | ||
428 | */ | ||
429 | unsigned long | ||
430 | __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size); | ||
431 | unsigned long | ||
432 | __copy_user_intel(void __user *to, const void *from, unsigned long size); | ||
433 | #endif /* CONFIG_X86_INTEL_USERCOPY */ | ||
434 | |||
435 | /* Generic arbitrary sized copy. */ | ||
436 | #define __copy_user(to,from,size) \ | ||
437 | do { \ | ||
438 | int __d0, __d1, __d2; \ | ||
439 | __asm__ __volatile__( \ | ||
440 | " cmp $7,%0\n" \ | ||
441 | " jbe 1f\n" \ | ||
442 | " movl %1,%0\n" \ | ||
443 | " negl %0\n" \ | ||
444 | " andl $7,%0\n" \ | ||
445 | " subl %0,%3\n" \ | ||
446 | "4: rep; movsb\n" \ | ||
447 | " movl %3,%0\n" \ | ||
448 | " shrl $2,%0\n" \ | ||
449 | " andl $3,%3\n" \ | ||
450 | " .align 2,0x90\n" \ | ||
451 | "0: rep; movsl\n" \ | ||
452 | " movl %3,%0\n" \ | ||
453 | "1: rep; movsb\n" \ | ||
454 | "2:\n" \ | ||
455 | ".section .fixup,\"ax\"\n" \ | ||
456 | "5: addl %3,%0\n" \ | ||
457 | " jmp 2b\n" \ | ||
458 | "3: lea 0(%3,%0,4),%0\n" \ | ||
459 | " jmp 2b\n" \ | ||
460 | ".previous\n" \ | ||
461 | ".section __ex_table,\"a\"\n" \ | ||
462 | " .align 4\n" \ | ||
463 | " .long 4b,5b\n" \ | ||
464 | " .long 0b,3b\n" \ | ||
465 | " .long 1b,2b\n" \ | ||
466 | ".previous" \ | ||
467 | : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ | ||
468 | : "3"(size), "0"(size), "1"(to), "2"(from) \ | ||
469 | : "memory"); \ | ||
470 | } while (0) | ||
471 | |||
472 | #define __copy_user_zeroing(to,from,size) \ | ||
473 | do { \ | ||
474 | int __d0, __d1, __d2; \ | ||
475 | __asm__ __volatile__( \ | ||
476 | " cmp $7,%0\n" \ | ||
477 | " jbe 1f\n" \ | ||
478 | " movl %1,%0\n" \ | ||
479 | " negl %0\n" \ | ||
480 | " andl $7,%0\n" \ | ||
481 | " subl %0,%3\n" \ | ||
482 | "4: rep; movsb\n" \ | ||
483 | " movl %3,%0\n" \ | ||
484 | " shrl $2,%0\n" \ | ||
485 | " andl $3,%3\n" \ | ||
486 | " .align 2,0x90\n" \ | ||
487 | "0: rep; movsl\n" \ | ||
488 | " movl %3,%0\n" \ | ||
489 | "1: rep; movsb\n" \ | ||
490 | "2:\n" \ | ||
491 | ".section .fixup,\"ax\"\n" \ | ||
492 | "5: addl %3,%0\n" \ | ||
493 | " jmp 6f\n" \ | ||
494 | "3: lea 0(%3,%0,4),%0\n" \ | ||
495 | "6: pushl %0\n" \ | ||
496 | " pushl %%eax\n" \ | ||
497 | " xorl %%eax,%%eax\n" \ | ||
498 | " rep; stosb\n" \ | ||
499 | " popl %%eax\n" \ | ||
500 | " popl %0\n" \ | ||
501 | " jmp 2b\n" \ | ||
502 | ".previous\n" \ | ||
503 | ".section __ex_table,\"a\"\n" \ | ||
504 | " .align 4\n" \ | ||
505 | " .long 4b,5b\n" \ | ||
506 | " .long 0b,3b\n" \ | ||
507 | " .long 1b,6b\n" \ | ||
508 | ".previous" \ | ||
509 | : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ | ||
510 | : "3"(size), "0"(size), "1"(to), "2"(from) \ | ||
511 | : "memory"); \ | ||
512 | } while (0) | ||
513 | |||
514 | |||
515 | unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n) | ||
516 | { | ||
517 | BUG_ON((long) n < 0); | ||
518 | #ifndef CONFIG_X86_WP_WORKS_OK | ||
519 | if (unlikely(boot_cpu_data.wp_works_ok == 0) && | ||
520 | ((unsigned long )to) < TASK_SIZE) { | ||
521 | /* | ||
522 | * CPU does not honor the WP bit when writing | ||
523 | * from supervisory mode, and due to preemption or SMP, | ||
524 | * the page tables can change at any time. | ||
525 | * Do it manually. Manfred <manfred@colorfullife.com> | ||
526 | */ | ||
527 | while (n) { | ||
528 | unsigned long offset = ((unsigned long)to)%PAGE_SIZE; | ||
529 | unsigned long len = PAGE_SIZE - offset; | ||
530 | int retval; | ||
531 | struct page *pg; | ||
532 | void *maddr; | ||
533 | |||
534 | if (len > n) | ||
535 | len = n; | ||
536 | |||
537 | survive: | ||
538 | down_read(¤t->mm->mmap_sem); | ||
539 | retval = get_user_pages(current, current->mm, | ||
540 | (unsigned long )to, 1, 1, 0, &pg, NULL); | ||
541 | |||
542 | if (retval == -ENOMEM && current->pid == 1) { | ||
543 | up_read(¤t->mm->mmap_sem); | ||
544 | blk_congestion_wait(WRITE, HZ/50); | ||
545 | goto survive; | ||
546 | } | ||
547 | |||
548 | if (retval != 1) { | ||
549 | up_read(¤t->mm->mmap_sem); | ||
550 | break; | ||
551 | } | ||
552 | |||
553 | maddr = kmap_atomic(pg, KM_USER0); | ||
554 | memcpy(maddr + offset, from, len); | ||
555 | kunmap_atomic(maddr, KM_USER0); | ||
556 | set_page_dirty_lock(pg); | ||
557 | put_page(pg); | ||
558 | up_read(¤t->mm->mmap_sem); | ||
559 | |||
560 | from += len; | ||
561 | to += len; | ||
562 | n -= len; | ||
563 | } | ||
564 | return n; | ||
565 | } | ||
566 | #endif | ||
567 | if (movsl_is_ok(to, from, n)) | ||
568 | __copy_user(to, from, n); | ||
569 | else | ||
570 | n = __copy_user_intel(to, from, n); | ||
571 | return n; | ||
572 | } | ||
573 | |||
574 | unsigned long | ||
575 | __copy_from_user_ll(void *to, const void __user *from, unsigned long n) | ||
576 | { | ||
577 | BUG_ON((long)n < 0); | ||
578 | if (movsl_is_ok(to, from, n)) | ||
579 | __copy_user_zeroing(to, from, n); | ||
580 | else | ||
581 | n = __copy_user_zeroing_intel(to, from, n); | ||
582 | return n; | ||
583 | } | ||
584 | |||
585 | /** | ||
586 | * copy_to_user: - Copy a block of data into user space. | ||
587 | * @to: Destination address, in user space. | ||
588 | * @from: Source address, in kernel space. | ||
589 | * @n: Number of bytes to copy. | ||
590 | * | ||
591 | * Context: User context only. This function may sleep. | ||
592 | * | ||
593 | * Copy data from kernel space to user space. | ||
594 | * | ||
595 | * Returns number of bytes that could not be copied. | ||
596 | * On success, this will be zero. | ||
597 | */ | ||
598 | unsigned long | ||
599 | copy_to_user(void __user *to, const void *from, unsigned long n) | ||
600 | { | ||
601 | might_sleep(); | ||
602 | BUG_ON((long) n < 0); | ||
603 | if (access_ok(VERIFY_WRITE, to, n)) | ||
604 | n = __copy_to_user(to, from, n); | ||
605 | return n; | ||
606 | } | ||
607 | EXPORT_SYMBOL(copy_to_user); | ||
608 | |||
609 | /** | ||
610 | * copy_from_user: - Copy a block of data from user space. | ||
611 | * @to: Destination address, in kernel space. | ||
612 | * @from: Source address, in user space. | ||
613 | * @n: Number of bytes to copy. | ||
614 | * | ||
615 | * Context: User context only. This function may sleep. | ||
616 | * | ||
617 | * Copy data from user space to kernel space. | ||
618 | * | ||
619 | * Returns number of bytes that could not be copied. | ||
620 | * On success, this will be zero. | ||
621 | * | ||
622 | * If some data could not be copied, this function will pad the copied | ||
623 | * data to the requested size using zero bytes. | ||
624 | */ | ||
625 | unsigned long | ||
626 | copy_from_user(void *to, const void __user *from, unsigned long n) | ||
627 | { | ||
628 | might_sleep(); | ||
629 | BUG_ON((long) n < 0); | ||
630 | if (access_ok(VERIFY_READ, from, n)) | ||
631 | n = __copy_from_user(to, from, n); | ||
632 | else | ||
633 | memset(to, 0, n); | ||
634 | return n; | ||
635 | } | ||
636 | EXPORT_SYMBOL(copy_from_user); | ||