diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/lib/Makefile | 5 | ||||
-rw-r--r-- | arch/x86/lib/Makefile_32 | 11 | ||||
-rw-r--r-- | arch/x86/lib/bitops_32.c | 70 | ||||
-rw-r--r-- | arch/x86/lib/checksum_32.S | 546 | ||||
-rw-r--r-- | arch/x86/lib/delay_32.c | 103 | ||||
-rw-r--r-- | arch/x86/lib/getuser_32.S | 78 | ||||
-rw-r--r-- | arch/x86/lib/memcpy_32.c | 43 | ||||
-rw-r--r-- | arch/x86/lib/mmx_32.c | 403 | ||||
-rw-r--r-- | arch/x86/lib/msr-on-cpu.c | 119 | ||||
-rw-r--r-- | arch/x86/lib/putuser_32.S | 98 | ||||
-rw-r--r-- | arch/x86/lib/semaphore_32.S | 219 | ||||
-rw-r--r-- | arch/x86/lib/string_32.c | 257 | ||||
-rw-r--r-- | arch/x86/lib/strstr_32.c | 31 | ||||
-rw-r--r-- | arch/x86/lib/usercopy_32.c | 882 |
14 files changed, 2865 insertions, 0 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile new file mode 100644 index 000000000000..2d7d724a2a6a --- /dev/null +++ b/arch/x86/lib/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | ifeq ($(CONFIG_X86_32),y) | ||
2 | include ${srctree}/arch/x86/lib/Makefile_32 | ||
3 | else | ||
4 | include ${srctree}/arch/x86_64/lib/Makefile_64 | ||
5 | endif | ||
diff --git a/arch/x86/lib/Makefile_32 b/arch/x86/lib/Makefile_32 new file mode 100644 index 000000000000..98d1f1e2e2ef --- /dev/null +++ b/arch/x86/lib/Makefile_32 | |||
@@ -0,0 +1,11 @@ | |||
1 | # | ||
2 | # Makefile for i386-specific library files.. | ||
3 | # | ||
4 | |||
5 | |||
6 | lib-y = checksum_32.o delay_32.o usercopy_32.o getuser_32.o putuser_32.o memcpy_32.o strstr_32.o \ | ||
7 | bitops_32.o semaphore_32.o string_32.o | ||
8 | |||
9 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o | ||
10 | |||
11 | obj-$(CONFIG_SMP) += msr-on-cpu.o | ||
diff --git a/arch/x86/lib/bitops_32.c b/arch/x86/lib/bitops_32.c new file mode 100644 index 000000000000..afd0045595d4 --- /dev/null +++ b/arch/x86/lib/bitops_32.c | |||
@@ -0,0 +1,70 @@ | |||
1 | #include <linux/bitops.h> | ||
2 | #include <linux/module.h> | ||
3 | |||
4 | /** | ||
5 | * find_next_bit - find the first set bit in a memory region | ||
6 | * @addr: The address to base the search on | ||
7 | * @offset: The bitnumber to start searching at | ||
8 | * @size: The maximum size to search | ||
9 | */ | ||
10 | int find_next_bit(const unsigned long *addr, int size, int offset) | ||
11 | { | ||
12 | const unsigned long *p = addr + (offset >> 5); | ||
13 | int set = 0, bit = offset & 31, res; | ||
14 | |||
15 | if (bit) { | ||
16 | /* | ||
17 | * Look for nonzero in the first 32 bits: | ||
18 | */ | ||
19 | __asm__("bsfl %1,%0\n\t" | ||
20 | "jne 1f\n\t" | ||
21 | "movl $32, %0\n" | ||
22 | "1:" | ||
23 | : "=r" (set) | ||
24 | : "r" (*p >> bit)); | ||
25 | if (set < (32 - bit)) | ||
26 | return set + offset; | ||
27 | set = 32 - bit; | ||
28 | p++; | ||
29 | } | ||
30 | /* | ||
31 | * No set bit yet, search remaining full words for a bit | ||
32 | */ | ||
33 | res = find_first_bit (p, size - 32 * (p - addr)); | ||
34 | return (offset + set + res); | ||
35 | } | ||
36 | EXPORT_SYMBOL(find_next_bit); | ||
37 | |||
38 | /** | ||
39 | * find_next_zero_bit - find the first zero bit in a memory region | ||
40 | * @addr: The address to base the search on | ||
41 | * @offset: The bitnumber to start searching at | ||
42 | * @size: The maximum size to search | ||
43 | */ | ||
44 | int find_next_zero_bit(const unsigned long *addr, int size, int offset) | ||
45 | { | ||
46 | const unsigned long *p = addr + (offset >> 5); | ||
47 | int set = 0, bit = offset & 31, res; | ||
48 | |||
49 | if (bit) { | ||
50 | /* | ||
51 | * Look for zero in the first 32 bits. | ||
52 | */ | ||
53 | __asm__("bsfl %1,%0\n\t" | ||
54 | "jne 1f\n\t" | ||
55 | "movl $32, %0\n" | ||
56 | "1:" | ||
57 | : "=r" (set) | ||
58 | : "r" (~(*p >> bit))); | ||
59 | if (set < (32 - bit)) | ||
60 | return set + offset; | ||
61 | set = 32 - bit; | ||
62 | p++; | ||
63 | } | ||
64 | /* | ||
65 | * No zero yet, search remaining full bytes for a zero | ||
66 | */ | ||
67 | res = find_first_zero_bit(p, size - 32 * (p - addr)); | ||
68 | return (offset + set + res); | ||
69 | } | ||
70 | EXPORT_SYMBOL(find_next_zero_bit); | ||
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S new file mode 100644 index 000000000000..adbccd0bbb78 --- /dev/null +++ b/arch/x86/lib/checksum_32.S | |||
@@ -0,0 +1,546 @@ | |||
1 | /* | ||
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | ||
3 | * operating system. INET is implemented using the BSD Socket | ||
4 | * interface as the means of communication with the user level. | ||
5 | * | ||
6 | * IP/TCP/UDP checksumming routines | ||
7 | * | ||
8 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | ||
9 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | ||
10 | * Tom May, <ftom@netcom.com> | ||
11 | * Pentium Pro/II routines: | ||
12 | * Alexander Kjeldaas <astor@guardian.no> | ||
13 | * Finn Arne Gangstad <finnag@guardian.no> | ||
14 | * Lots of code moved from tcp.c and ip.c; see those files | ||
15 | * for more names. | ||
16 | * | ||
17 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception | ||
18 | * handling. | ||
19 | * Andi Kleen, add zeroing on error | ||
20 | * converted to pure assembler | ||
21 | * | ||
22 | * This program is free software; you can redistribute it and/or | ||
23 | * modify it under the terms of the GNU General Public License | ||
24 | * as published by the Free Software Foundation; either version | ||
25 | * 2 of the License, or (at your option) any later version. | ||
26 | */ | ||
27 | |||
28 | #include <linux/linkage.h> | ||
29 | #include <asm/dwarf2.h> | ||
30 | #include <asm/errno.h> | ||
31 | |||
32 | /* | ||
33 | * computes a partial checksum, e.g. for TCP/UDP fragments | ||
34 | */ | ||
35 | |||
36 | /* | ||
37 | unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | ||
38 | */ | ||
39 | |||
40 | .text | ||
41 | |||
42 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM | ||
43 | |||
44 | /* | ||
45 | * Experiments with Ethernet and SLIP connections show that buff | ||
46 | * is aligned on either a 2-byte or 4-byte boundary. We get at | ||
47 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. | ||
48 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | ||
49 | * alignment for the unrolled loop. | ||
50 | */ | ||
51 | ENTRY(csum_partial) | ||
52 | CFI_STARTPROC | ||
53 | pushl %esi | ||
54 | CFI_ADJUST_CFA_OFFSET 4 | ||
55 | CFI_REL_OFFSET esi, 0 | ||
56 | pushl %ebx | ||
57 | CFI_ADJUST_CFA_OFFSET 4 | ||
58 | CFI_REL_OFFSET ebx, 0 | ||
59 | movl 20(%esp),%eax # Function arg: unsigned int sum | ||
60 | movl 16(%esp),%ecx # Function arg: int len | ||
61 | movl 12(%esp),%esi # Function arg: unsigned char *buff | ||
62 | testl $3, %esi # Check alignment. | ||
63 | jz 2f # Jump if alignment is ok. | ||
64 | testl $1, %esi # Check alignment. | ||
65 | jz 10f # Jump if alignment is boundary of 2bytes. | ||
66 | |||
67 | # buf is odd | ||
68 | dec %ecx | ||
69 | jl 8f | ||
70 | movzbl (%esi), %ebx | ||
71 | adcl %ebx, %eax | ||
72 | roll $8, %eax | ||
73 | inc %esi | ||
74 | testl $2, %esi | ||
75 | jz 2f | ||
76 | 10: | ||
77 | subl $2, %ecx # Alignment uses up two bytes. | ||
78 | jae 1f # Jump if we had at least two bytes. | ||
79 | addl $2, %ecx # ecx was < 2. Deal with it. | ||
80 | jmp 4f | ||
81 | 1: movw (%esi), %bx | ||
82 | addl $2, %esi | ||
83 | addw %bx, %ax | ||
84 | adcl $0, %eax | ||
85 | 2: | ||
86 | movl %ecx, %edx | ||
87 | shrl $5, %ecx | ||
88 | jz 2f | ||
89 | testl %esi, %esi | ||
90 | 1: movl (%esi), %ebx | ||
91 | adcl %ebx, %eax | ||
92 | movl 4(%esi), %ebx | ||
93 | adcl %ebx, %eax | ||
94 | movl 8(%esi), %ebx | ||
95 | adcl %ebx, %eax | ||
96 | movl 12(%esi), %ebx | ||
97 | adcl %ebx, %eax | ||
98 | movl 16(%esi), %ebx | ||
99 | adcl %ebx, %eax | ||
100 | movl 20(%esi), %ebx | ||
101 | adcl %ebx, %eax | ||
102 | movl 24(%esi), %ebx | ||
103 | adcl %ebx, %eax | ||
104 | movl 28(%esi), %ebx | ||
105 | adcl %ebx, %eax | ||
106 | lea 32(%esi), %esi | ||
107 | dec %ecx | ||
108 | jne 1b | ||
109 | adcl $0, %eax | ||
110 | 2: movl %edx, %ecx | ||
111 | andl $0x1c, %edx | ||
112 | je 4f | ||
113 | shrl $2, %edx # This clears CF | ||
114 | 3: adcl (%esi), %eax | ||
115 | lea 4(%esi), %esi | ||
116 | dec %edx | ||
117 | jne 3b | ||
118 | adcl $0, %eax | ||
119 | 4: andl $3, %ecx | ||
120 | jz 7f | ||
121 | cmpl $2, %ecx | ||
122 | jb 5f | ||
123 | movw (%esi),%cx | ||
124 | leal 2(%esi),%esi | ||
125 | je 6f | ||
126 | shll $16,%ecx | ||
127 | 5: movb (%esi),%cl | ||
128 | 6: addl %ecx,%eax | ||
129 | adcl $0, %eax | ||
130 | 7: | ||
131 | testl $1, 12(%esp) | ||
132 | jz 8f | ||
133 | roll $8, %eax | ||
134 | 8: | ||
135 | popl %ebx | ||
136 | CFI_ADJUST_CFA_OFFSET -4 | ||
137 | CFI_RESTORE ebx | ||
138 | popl %esi | ||
139 | CFI_ADJUST_CFA_OFFSET -4 | ||
140 | CFI_RESTORE esi | ||
141 | ret | ||
142 | CFI_ENDPROC | ||
143 | ENDPROC(csum_partial) | ||
144 | |||
145 | #else | ||
146 | |||
147 | /* Version for PentiumII/PPro */ | ||
148 | |||
149 | ENTRY(csum_partial) | ||
150 | CFI_STARTPROC | ||
151 | pushl %esi | ||
152 | CFI_ADJUST_CFA_OFFSET 4 | ||
153 | CFI_REL_OFFSET esi, 0 | ||
154 | pushl %ebx | ||
155 | CFI_ADJUST_CFA_OFFSET 4 | ||
156 | CFI_REL_OFFSET ebx, 0 | ||
157 | movl 20(%esp),%eax # Function arg: unsigned int sum | ||
158 | movl 16(%esp),%ecx # Function arg: int len | ||
159 | movl 12(%esp),%esi # Function arg: const unsigned char *buf | ||
160 | |||
161 | testl $3, %esi | ||
162 | jnz 25f | ||
163 | 10: | ||
164 | movl %ecx, %edx | ||
165 | movl %ecx, %ebx | ||
166 | andl $0x7c, %ebx | ||
167 | shrl $7, %ecx | ||
168 | addl %ebx,%esi | ||
169 | shrl $2, %ebx | ||
170 | negl %ebx | ||
171 | lea 45f(%ebx,%ebx,2), %ebx | ||
172 | testl %esi, %esi | ||
173 | jmp *%ebx | ||
174 | |||
175 | # Handle 2-byte-aligned regions | ||
176 | 20: addw (%esi), %ax | ||
177 | lea 2(%esi), %esi | ||
178 | adcl $0, %eax | ||
179 | jmp 10b | ||
180 | 25: | ||
181 | testl $1, %esi | ||
182 | jz 30f | ||
183 | # buf is odd | ||
184 | dec %ecx | ||
185 | jl 90f | ||
186 | movzbl (%esi), %ebx | ||
187 | addl %ebx, %eax | ||
188 | adcl $0, %eax | ||
189 | roll $8, %eax | ||
190 | inc %esi | ||
191 | testl $2, %esi | ||
192 | jz 10b | ||
193 | |||
194 | 30: subl $2, %ecx | ||
195 | ja 20b | ||
196 | je 32f | ||
197 | addl $2, %ecx | ||
198 | jz 80f | ||
199 | movzbl (%esi),%ebx # csumming 1 byte, 2-aligned | ||
200 | addl %ebx, %eax | ||
201 | adcl $0, %eax | ||
202 | jmp 80f | ||
203 | 32: | ||
204 | addw (%esi), %ax # csumming 2 bytes, 2-aligned | ||
205 | adcl $0, %eax | ||
206 | jmp 80f | ||
207 | |||
208 | 40: | ||
209 | addl -128(%esi), %eax | ||
210 | adcl -124(%esi), %eax | ||
211 | adcl -120(%esi), %eax | ||
212 | adcl -116(%esi), %eax | ||
213 | adcl -112(%esi), %eax | ||
214 | adcl -108(%esi), %eax | ||
215 | adcl -104(%esi), %eax | ||
216 | adcl -100(%esi), %eax | ||
217 | adcl -96(%esi), %eax | ||
218 | adcl -92(%esi), %eax | ||
219 | adcl -88(%esi), %eax | ||
220 | adcl -84(%esi), %eax | ||
221 | adcl -80(%esi), %eax | ||
222 | adcl -76(%esi), %eax | ||
223 | adcl -72(%esi), %eax | ||
224 | adcl -68(%esi), %eax | ||
225 | adcl -64(%esi), %eax | ||
226 | adcl -60(%esi), %eax | ||
227 | adcl -56(%esi), %eax | ||
228 | adcl -52(%esi), %eax | ||
229 | adcl -48(%esi), %eax | ||
230 | adcl -44(%esi), %eax | ||
231 | adcl -40(%esi), %eax | ||
232 | adcl -36(%esi), %eax | ||
233 | adcl -32(%esi), %eax | ||
234 | adcl -28(%esi), %eax | ||
235 | adcl -24(%esi), %eax | ||
236 | adcl -20(%esi), %eax | ||
237 | adcl -16(%esi), %eax | ||
238 | adcl -12(%esi), %eax | ||
239 | adcl -8(%esi), %eax | ||
240 | adcl -4(%esi), %eax | ||
241 | 45: | ||
242 | lea 128(%esi), %esi | ||
243 | adcl $0, %eax | ||
244 | dec %ecx | ||
245 | jge 40b | ||
246 | movl %edx, %ecx | ||
247 | 50: andl $3, %ecx | ||
248 | jz 80f | ||
249 | |||
250 | # Handle the last 1-3 bytes without jumping | ||
251 | notl %ecx # 1->2, 2->1, 3->0, higher bits are masked | ||
252 | movl $0xffffff,%ebx # by the shll and shrl instructions | ||
253 | shll $3,%ecx | ||
254 | shrl %cl,%ebx | ||
255 | andl -128(%esi),%ebx # esi is 4-aligned so should be ok | ||
256 | addl %ebx,%eax | ||
257 | adcl $0,%eax | ||
258 | 80: | ||
259 | testl $1, 12(%esp) | ||
260 | jz 90f | ||
261 | roll $8, %eax | ||
262 | 90: | ||
263 | popl %ebx | ||
264 | CFI_ADJUST_CFA_OFFSET -4 | ||
265 | CFI_RESTORE ebx | ||
266 | popl %esi | ||
267 | CFI_ADJUST_CFA_OFFSET -4 | ||
268 | CFI_RESTORE esi | ||
269 | ret | ||
270 | CFI_ENDPROC | ||
271 | ENDPROC(csum_partial) | ||
272 | |||
273 | #endif | ||
274 | |||
275 | /* | ||
276 | unsigned int csum_partial_copy_generic (const char *src, char *dst, | ||
277 | int len, int sum, int *src_err_ptr, int *dst_err_ptr) | ||
278 | */ | ||
279 | |||
280 | /* | ||
281 | * Copy from ds while checksumming, otherwise like csum_partial | ||
282 | * | ||
283 | * The macros SRC and DST specify the type of access for the instruction. | ||
284 | * thus we can call a custom exception handler for all access types. | ||
285 | * | ||
286 | * FIXME: could someone double-check whether I haven't mixed up some SRC and | ||
287 | * DST definitions? It's damn hard to trigger all cases. I hope I got | ||
288 | * them all but there's no guarantee. | ||
289 | */ | ||
290 | |||
291 | #define SRC(y...) \ | ||
292 | 9999: y; \ | ||
293 | .section __ex_table, "a"; \ | ||
294 | .long 9999b, 6001f ; \ | ||
295 | .previous | ||
296 | |||
297 | #define DST(y...) \ | ||
298 | 9999: y; \ | ||
299 | .section __ex_table, "a"; \ | ||
300 | .long 9999b, 6002f ; \ | ||
301 | .previous | ||
302 | |||
303 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM | ||
304 | |||
305 | #define ARGBASE 16 | ||
306 | #define FP 12 | ||
307 | |||
308 | ENTRY(csum_partial_copy_generic) | ||
309 | CFI_STARTPROC | ||
310 | subl $4,%esp | ||
311 | CFI_ADJUST_CFA_OFFSET 4 | ||
312 | pushl %edi | ||
313 | CFI_ADJUST_CFA_OFFSET 4 | ||
314 | CFI_REL_OFFSET edi, 0 | ||
315 | pushl %esi | ||
316 | CFI_ADJUST_CFA_OFFSET 4 | ||
317 | CFI_REL_OFFSET esi, 0 | ||
318 | pushl %ebx | ||
319 | CFI_ADJUST_CFA_OFFSET 4 | ||
320 | CFI_REL_OFFSET ebx, 0 | ||
321 | movl ARGBASE+16(%esp),%eax # sum | ||
322 | movl ARGBASE+12(%esp),%ecx # len | ||
323 | movl ARGBASE+4(%esp),%esi # src | ||
324 | movl ARGBASE+8(%esp),%edi # dst | ||
325 | |||
326 | testl $2, %edi # Check alignment. | ||
327 | jz 2f # Jump if alignment is ok. | ||
328 | subl $2, %ecx # Alignment uses up two bytes. | ||
329 | jae 1f # Jump if we had at least two bytes. | ||
330 | addl $2, %ecx # ecx was < 2. Deal with it. | ||
331 | jmp 4f | ||
332 | SRC(1: movw (%esi), %bx ) | ||
333 | addl $2, %esi | ||
334 | DST( movw %bx, (%edi) ) | ||
335 | addl $2, %edi | ||
336 | addw %bx, %ax | ||
337 | adcl $0, %eax | ||
338 | 2: | ||
339 | movl %ecx, FP(%esp) | ||
340 | shrl $5, %ecx | ||
341 | jz 2f | ||
342 | testl %esi, %esi | ||
343 | SRC(1: movl (%esi), %ebx ) | ||
344 | SRC( movl 4(%esi), %edx ) | ||
345 | adcl %ebx, %eax | ||
346 | DST( movl %ebx, (%edi) ) | ||
347 | adcl %edx, %eax | ||
348 | DST( movl %edx, 4(%edi) ) | ||
349 | |||
350 | SRC( movl 8(%esi), %ebx ) | ||
351 | SRC( movl 12(%esi), %edx ) | ||
352 | adcl %ebx, %eax | ||
353 | DST( movl %ebx, 8(%edi) ) | ||
354 | adcl %edx, %eax | ||
355 | DST( movl %edx, 12(%edi) ) | ||
356 | |||
357 | SRC( movl 16(%esi), %ebx ) | ||
358 | SRC( movl 20(%esi), %edx ) | ||
359 | adcl %ebx, %eax | ||
360 | DST( movl %ebx, 16(%edi) ) | ||
361 | adcl %edx, %eax | ||
362 | DST( movl %edx, 20(%edi) ) | ||
363 | |||
364 | SRC( movl 24(%esi), %ebx ) | ||
365 | SRC( movl 28(%esi), %edx ) | ||
366 | adcl %ebx, %eax | ||
367 | DST( movl %ebx, 24(%edi) ) | ||
368 | adcl %edx, %eax | ||
369 | DST( movl %edx, 28(%edi) ) | ||
370 | |||
371 | lea 32(%esi), %esi | ||
372 | lea 32(%edi), %edi | ||
373 | dec %ecx | ||
374 | jne 1b | ||
375 | adcl $0, %eax | ||
376 | 2: movl FP(%esp), %edx | ||
377 | movl %edx, %ecx | ||
378 | andl $0x1c, %edx | ||
379 | je 4f | ||
380 | shrl $2, %edx # This clears CF | ||
381 | SRC(3: movl (%esi), %ebx ) | ||
382 | adcl %ebx, %eax | ||
383 | DST( movl %ebx, (%edi) ) | ||
384 | lea 4(%esi), %esi | ||
385 | lea 4(%edi), %edi | ||
386 | dec %edx | ||
387 | jne 3b | ||
388 | adcl $0, %eax | ||
389 | 4: andl $3, %ecx | ||
390 | jz 7f | ||
391 | cmpl $2, %ecx | ||
392 | jb 5f | ||
393 | SRC( movw (%esi), %cx ) | ||
394 | leal 2(%esi), %esi | ||
395 | DST( movw %cx, (%edi) ) | ||
396 | leal 2(%edi), %edi | ||
397 | je 6f | ||
398 | shll $16,%ecx | ||
399 | SRC(5: movb (%esi), %cl ) | ||
400 | DST( movb %cl, (%edi) ) | ||
401 | 6: addl %ecx, %eax | ||
402 | adcl $0, %eax | ||
403 | 7: | ||
404 | 5000: | ||
405 | |||
406 | # Exception handler: | ||
407 | .section .fixup, "ax" | ||
408 | |||
409 | 6001: | ||
410 | movl ARGBASE+20(%esp), %ebx # src_err_ptr | ||
411 | movl $-EFAULT, (%ebx) | ||
412 | |||
413 | # zero the complete destination - computing the rest | ||
414 | # is too much work | ||
415 | movl ARGBASE+8(%esp), %edi # dst | ||
416 | movl ARGBASE+12(%esp), %ecx # len | ||
417 | xorl %eax,%eax | ||
418 | rep ; stosb | ||
419 | |||
420 | jmp 5000b | ||
421 | |||
422 | 6002: | ||
423 | movl ARGBASE+24(%esp), %ebx # dst_err_ptr | ||
424 | movl $-EFAULT,(%ebx) | ||
425 | jmp 5000b | ||
426 | |||
427 | .previous | ||
428 | |||
429 | popl %ebx | ||
430 | CFI_ADJUST_CFA_OFFSET -4 | ||
431 | CFI_RESTORE ebx | ||
432 | popl %esi | ||
433 | CFI_ADJUST_CFA_OFFSET -4 | ||
434 | CFI_RESTORE esi | ||
435 | popl %edi | ||
436 | CFI_ADJUST_CFA_OFFSET -4 | ||
437 | CFI_RESTORE edi | ||
438 | popl %ecx # equivalent to addl $4,%esp | ||
439 | CFI_ADJUST_CFA_OFFSET -4 | ||
440 | ret | ||
441 | CFI_ENDPROC | ||
442 | ENDPROC(csum_partial_copy_generic) | ||
443 | |||
444 | #else | ||
445 | |||
446 | /* Version for PentiumII/PPro */ | ||
447 | |||
448 | #define ROUND1(x) \ | ||
449 | SRC(movl x(%esi), %ebx ) ; \ | ||
450 | addl %ebx, %eax ; \ | ||
451 | DST(movl %ebx, x(%edi) ) ; | ||
452 | |||
453 | #define ROUND(x) \ | ||
454 | SRC(movl x(%esi), %ebx ) ; \ | ||
455 | adcl %ebx, %eax ; \ | ||
456 | DST(movl %ebx, x(%edi) ) ; | ||
457 | |||
458 | #define ARGBASE 12 | ||
459 | |||
460 | ENTRY(csum_partial_copy_generic) | ||
461 | CFI_STARTPROC | ||
462 | pushl %ebx | ||
463 | CFI_ADJUST_CFA_OFFSET 4 | ||
464 | CFI_REL_OFFSET ebx, 0 | ||
465 | pushl %edi | ||
466 | CFI_ADJUST_CFA_OFFSET 4 | ||
467 | CFI_REL_OFFSET edi, 0 | ||
468 | pushl %esi | ||
469 | CFI_ADJUST_CFA_OFFSET 4 | ||
470 | CFI_REL_OFFSET esi, 0 | ||
471 | movl ARGBASE+4(%esp),%esi #src | ||
472 | movl ARGBASE+8(%esp),%edi #dst | ||
473 | movl ARGBASE+12(%esp),%ecx #len | ||
474 | movl ARGBASE+16(%esp),%eax #sum | ||
475 | # movl %ecx, %edx | ||
476 | movl %ecx, %ebx | ||
477 | movl %esi, %edx | ||
478 | shrl $6, %ecx | ||
479 | andl $0x3c, %ebx | ||
480 | negl %ebx | ||
481 | subl %ebx, %esi | ||
482 | subl %ebx, %edi | ||
483 | lea -1(%esi),%edx | ||
484 | andl $-32,%edx | ||
485 | lea 3f(%ebx,%ebx), %ebx | ||
486 | testl %esi, %esi | ||
487 | jmp *%ebx | ||
488 | 1: addl $64,%esi | ||
489 | addl $64,%edi | ||
490 | SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) | ||
491 | ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) | ||
492 | ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) | ||
493 | ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) | ||
494 | ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) | ||
495 | 3: adcl $0,%eax | ||
496 | addl $64, %edx | ||
497 | dec %ecx | ||
498 | jge 1b | ||
499 | 4: movl ARGBASE+12(%esp),%edx #len | ||
500 | andl $3, %edx | ||
501 | jz 7f | ||
502 | cmpl $2, %edx | ||
503 | jb 5f | ||
504 | SRC( movw (%esi), %dx ) | ||
505 | leal 2(%esi), %esi | ||
506 | DST( movw %dx, (%edi) ) | ||
507 | leal 2(%edi), %edi | ||
508 | je 6f | ||
509 | shll $16,%edx | ||
510 | 5: | ||
511 | SRC( movb (%esi), %dl ) | ||
512 | DST( movb %dl, (%edi) ) | ||
513 | 6: addl %edx, %eax | ||
514 | adcl $0, %eax | ||
515 | 7: | ||
516 | .section .fixup, "ax" | ||
517 | 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr | ||
518 | movl $-EFAULT, (%ebx) | ||
519 | # zero the complete destination (computing the rest is too much work) | ||
520 | movl ARGBASE+8(%esp),%edi # dst | ||
521 | movl ARGBASE+12(%esp),%ecx # len | ||
522 | xorl %eax,%eax | ||
523 | rep; stosb | ||
524 | jmp 7b | ||
525 | 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr | ||
526 | movl $-EFAULT, (%ebx) | ||
527 | jmp 7b | ||
528 | .previous | ||
529 | |||
530 | popl %esi | ||
531 | CFI_ADJUST_CFA_OFFSET -4 | ||
532 | CFI_RESTORE esi | ||
533 | popl %edi | ||
534 | CFI_ADJUST_CFA_OFFSET -4 | ||
535 | CFI_RESTORE edi | ||
536 | popl %ebx | ||
537 | CFI_ADJUST_CFA_OFFSET -4 | ||
538 | CFI_RESTORE ebx | ||
539 | ret | ||
540 | CFI_ENDPROC | ||
541 | ENDPROC(csum_partial_copy_generic) | ||
542 | |||
543 | #undef ROUND | ||
544 | #undef ROUND1 | ||
545 | |||
546 | #endif | ||
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c new file mode 100644 index 000000000000..f6edb11364df --- /dev/null +++ b/arch/x86/lib/delay_32.c | |||
@@ -0,0 +1,103 @@ | |||
1 | /* | ||
2 | * Precise Delay Loops for i386 | ||
3 | * | ||
4 | * Copyright (C) 1993 Linus Torvalds | ||
5 | * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> | ||
6 | * | ||
7 | * The __delay function must _NOT_ be inlined as its execution time | ||
8 | * depends wildly on alignment on many x86 processors. The additional | ||
9 | * jump magic is needed to get the timing stable on all the CPU's | ||
10 | * we have to worry about. | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/delay.h> | ||
16 | |||
17 | #include <asm/processor.h> | ||
18 | #include <asm/delay.h> | ||
19 | #include <asm/timer.h> | ||
20 | |||
21 | #ifdef CONFIG_SMP | ||
22 | # include <asm/smp.h> | ||
23 | #endif | ||
24 | |||
25 | /* simple loop based delay: */ | ||
26 | static void delay_loop(unsigned long loops) | ||
27 | { | ||
28 | int d0; | ||
29 | |||
30 | __asm__ __volatile__( | ||
31 | "\tjmp 1f\n" | ||
32 | ".align 16\n" | ||
33 | "1:\tjmp 2f\n" | ||
34 | ".align 16\n" | ||
35 | "2:\tdecl %0\n\tjns 2b" | ||
36 | :"=&a" (d0) | ||
37 | :"0" (loops)); | ||
38 | } | ||
39 | |||
40 | /* TSC based delay: */ | ||
41 | static void delay_tsc(unsigned long loops) | ||
42 | { | ||
43 | unsigned long bclock, now; | ||
44 | |||
45 | rdtscl(bclock); | ||
46 | do { | ||
47 | rep_nop(); | ||
48 | rdtscl(now); | ||
49 | } while ((now-bclock) < loops); | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * Since we calibrate only once at boot, this | ||
54 | * function should be set once at boot and not changed | ||
55 | */ | ||
56 | static void (*delay_fn)(unsigned long) = delay_loop; | ||
57 | |||
58 | void use_tsc_delay(void) | ||
59 | { | ||
60 | delay_fn = delay_tsc; | ||
61 | } | ||
62 | |||
63 | int read_current_timer(unsigned long *timer_val) | ||
64 | { | ||
65 | if (delay_fn == delay_tsc) { | ||
66 | rdtscl(*timer_val); | ||
67 | return 0; | ||
68 | } | ||
69 | return -1; | ||
70 | } | ||
71 | |||
72 | void __delay(unsigned long loops) | ||
73 | { | ||
74 | delay_fn(loops); | ||
75 | } | ||
76 | |||
77 | inline void __const_udelay(unsigned long xloops) | ||
78 | { | ||
79 | int d0; | ||
80 | |||
81 | xloops *= 4; | ||
82 | __asm__("mull %0" | ||
83 | :"=d" (xloops), "=&a" (d0) | ||
84 | :"1" (xloops), "0" | ||
85 | (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4))); | ||
86 | |||
87 | __delay(++xloops); | ||
88 | } | ||
89 | |||
90 | void __udelay(unsigned long usecs) | ||
91 | { | ||
92 | __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ | ||
93 | } | ||
94 | |||
95 | void __ndelay(unsigned long nsecs) | ||
96 | { | ||
97 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ | ||
98 | } | ||
99 | |||
100 | EXPORT_SYMBOL(__delay); | ||
101 | EXPORT_SYMBOL(__const_udelay); | ||
102 | EXPORT_SYMBOL(__udelay); | ||
103 | EXPORT_SYMBOL(__ndelay); | ||
diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S new file mode 100644 index 000000000000..6d84b53f12a2 --- /dev/null +++ b/arch/x86/lib/getuser_32.S | |||
@@ -0,0 +1,78 @@ | |||
1 | /* | ||
2 | * __get_user functions. | ||
3 | * | ||
4 | * (C) Copyright 1998 Linus Torvalds | ||
5 | * | ||
6 | * These functions have a non-standard call interface | ||
7 | * to make them more efficient, especially as they | ||
8 | * return an error value in addition to the "real" | ||
9 | * return value. | ||
10 | */ | ||
11 | #include <linux/linkage.h> | ||
12 | #include <asm/dwarf2.h> | ||
13 | #include <asm/thread_info.h> | ||
14 | |||
15 | |||
16 | /* | ||
17 | * __get_user_X | ||
18 | * | ||
19 | * Inputs: %eax contains the address | ||
20 | * | ||
21 | * Outputs: %eax is error code (0 or -EFAULT) | ||
22 | * %edx contains zero-extended value | ||
23 | * | ||
24 | * These functions should not modify any other registers, | ||
25 | * as they get called from within inline assembly. | ||
26 | */ | ||
27 | |||
28 | .text | ||
29 | ENTRY(__get_user_1) | ||
30 | CFI_STARTPROC | ||
31 | GET_THREAD_INFO(%edx) | ||
32 | cmpl TI_addr_limit(%edx),%eax | ||
33 | jae bad_get_user | ||
34 | 1: movzbl (%eax),%edx | ||
35 | xorl %eax,%eax | ||
36 | ret | ||
37 | CFI_ENDPROC | ||
38 | ENDPROC(__get_user_1) | ||
39 | |||
40 | ENTRY(__get_user_2) | ||
41 | CFI_STARTPROC | ||
42 | addl $1,%eax | ||
43 | jc bad_get_user | ||
44 | GET_THREAD_INFO(%edx) | ||
45 | cmpl TI_addr_limit(%edx),%eax | ||
46 | jae bad_get_user | ||
47 | 2: movzwl -1(%eax),%edx | ||
48 | xorl %eax,%eax | ||
49 | ret | ||
50 | CFI_ENDPROC | ||
51 | ENDPROC(__get_user_2) | ||
52 | |||
53 | ENTRY(__get_user_4) | ||
54 | CFI_STARTPROC | ||
55 | addl $3,%eax | ||
56 | jc bad_get_user | ||
57 | GET_THREAD_INFO(%edx) | ||
58 | cmpl TI_addr_limit(%edx),%eax | ||
59 | jae bad_get_user | ||
60 | 3: movl -3(%eax),%edx | ||
61 | xorl %eax,%eax | ||
62 | ret | ||
63 | CFI_ENDPROC | ||
64 | ENDPROC(__get_user_4) | ||
65 | |||
66 | bad_get_user: | ||
67 | CFI_STARTPROC | ||
68 | xorl %edx,%edx | ||
69 | movl $-14,%eax | ||
70 | ret | ||
71 | CFI_ENDPROC | ||
72 | END(bad_get_user) | ||
73 | |||
74 | .section __ex_table,"a" | ||
75 | .long 1b,bad_get_user | ||
76 | .long 2b,bad_get_user | ||
77 | .long 3b,bad_get_user | ||
78 | .previous | ||
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c new file mode 100644 index 000000000000..8ac51b82a632 --- /dev/null +++ b/arch/x86/lib/memcpy_32.c | |||
@@ -0,0 +1,43 @@ | |||
1 | #include <linux/string.h> | ||
2 | #include <linux/module.h> | ||
3 | |||
4 | #undef memcpy | ||
5 | #undef memset | ||
6 | |||
7 | void *memcpy(void *to, const void *from, size_t n) | ||
8 | { | ||
9 | #ifdef CONFIG_X86_USE_3DNOW | ||
10 | return __memcpy3d(to, from, n); | ||
11 | #else | ||
12 | return __memcpy(to, from, n); | ||
13 | #endif | ||
14 | } | ||
15 | EXPORT_SYMBOL(memcpy); | ||
16 | |||
17 | void *memset(void *s, int c, size_t count) | ||
18 | { | ||
19 | return __memset(s, c, count); | ||
20 | } | ||
21 | EXPORT_SYMBOL(memset); | ||
22 | |||
23 | void *memmove(void *dest, const void *src, size_t n) | ||
24 | { | ||
25 | int d0, d1, d2; | ||
26 | |||
27 | if (dest < src) { | ||
28 | memcpy(dest,src,n); | ||
29 | } else { | ||
30 | __asm__ __volatile__( | ||
31 | "std\n\t" | ||
32 | "rep\n\t" | ||
33 | "movsb\n\t" | ||
34 | "cld" | ||
35 | : "=&c" (d0), "=&S" (d1), "=&D" (d2) | ||
36 | :"0" (n), | ||
37 | "1" (n-1+(const char *)src), | ||
38 | "2" (n-1+(char *)dest) | ||
39 | :"memory"); | ||
40 | } | ||
41 | return dest; | ||
42 | } | ||
43 | EXPORT_SYMBOL(memmove); | ||
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c new file mode 100644 index 000000000000..28084d2e8dd4 --- /dev/null +++ b/arch/x86/lib/mmx_32.c | |||
@@ -0,0 +1,403 @@ | |||
1 | #include <linux/types.h> | ||
2 | #include <linux/string.h> | ||
3 | #include <linux/sched.h> | ||
4 | #include <linux/hardirq.h> | ||
5 | #include <linux/module.h> | ||
6 | |||
7 | #include <asm/i387.h> | ||
8 | |||
9 | |||
10 | /* | ||
11 | * MMX 3DNow! library helper functions | ||
12 | * | ||
13 | * To do: | ||
14 | * We can use MMX just for prefetch in IRQ's. This may be a win. | ||
15 | * (reported so on K6-III) | ||
16 | * We should use a better code neutral filler for the short jump | ||
17 | * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ?? | ||
18 | * We also want to clobber the filler register so we don't get any | ||
19 | * register forwarding stalls on the filler. | ||
20 | * | ||
21 | * Add *user handling. Checksums are not a win with MMX on any CPU | ||
22 | * tested so far for any MMX solution figured. | ||
23 | * | ||
24 | * 22/09/2000 - Arjan van de Ven | ||
25 | * Improved for non-egineering-sample Athlons | ||
26 | * | ||
27 | */ | ||
28 | |||
29 | void *_mmx_memcpy(void *to, const void *from, size_t len) | ||
30 | { | ||
31 | void *p; | ||
32 | int i; | ||
33 | |||
34 | if (unlikely(in_interrupt())) | ||
35 | return __memcpy(to, from, len); | ||
36 | |||
37 | p = to; | ||
38 | i = len >> 6; /* len/64 */ | ||
39 | |||
40 | kernel_fpu_begin(); | ||
41 | |||
42 | __asm__ __volatile__ ( | ||
43 | "1: prefetch (%0)\n" /* This set is 28 bytes */ | ||
44 | " prefetch 64(%0)\n" | ||
45 | " prefetch 128(%0)\n" | ||
46 | " prefetch 192(%0)\n" | ||
47 | " prefetch 256(%0)\n" | ||
48 | "2: \n" | ||
49 | ".section .fixup, \"ax\"\n" | ||
50 | "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ | ||
51 | " jmp 2b\n" | ||
52 | ".previous\n" | ||
53 | ".section __ex_table,\"a\"\n" | ||
54 | " .align 4\n" | ||
55 | " .long 1b, 3b\n" | ||
56 | ".previous" | ||
57 | : : "r" (from) ); | ||
58 | |||
59 | |||
60 | for(; i>5; i--) | ||
61 | { | ||
62 | __asm__ __volatile__ ( | ||
63 | "1: prefetch 320(%0)\n" | ||
64 | "2: movq (%0), %%mm0\n" | ||
65 | " movq 8(%0), %%mm1\n" | ||
66 | " movq 16(%0), %%mm2\n" | ||
67 | " movq 24(%0), %%mm3\n" | ||
68 | " movq %%mm0, (%1)\n" | ||
69 | " movq %%mm1, 8(%1)\n" | ||
70 | " movq %%mm2, 16(%1)\n" | ||
71 | " movq %%mm3, 24(%1)\n" | ||
72 | " movq 32(%0), %%mm0\n" | ||
73 | " movq 40(%0), %%mm1\n" | ||
74 | " movq 48(%0), %%mm2\n" | ||
75 | " movq 56(%0), %%mm3\n" | ||
76 | " movq %%mm0, 32(%1)\n" | ||
77 | " movq %%mm1, 40(%1)\n" | ||
78 | " movq %%mm2, 48(%1)\n" | ||
79 | " movq %%mm3, 56(%1)\n" | ||
80 | ".section .fixup, \"ax\"\n" | ||
81 | "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ | ||
82 | " jmp 2b\n" | ||
83 | ".previous\n" | ||
84 | ".section __ex_table,\"a\"\n" | ||
85 | " .align 4\n" | ||
86 | " .long 1b, 3b\n" | ||
87 | ".previous" | ||
88 | : : "r" (from), "r" (to) : "memory"); | ||
89 | from+=64; | ||
90 | to+=64; | ||
91 | } | ||
92 | |||
93 | for(; i>0; i--) | ||
94 | { | ||
95 | __asm__ __volatile__ ( | ||
96 | " movq (%0), %%mm0\n" | ||
97 | " movq 8(%0), %%mm1\n" | ||
98 | " movq 16(%0), %%mm2\n" | ||
99 | " movq 24(%0), %%mm3\n" | ||
100 | " movq %%mm0, (%1)\n" | ||
101 | " movq %%mm1, 8(%1)\n" | ||
102 | " movq %%mm2, 16(%1)\n" | ||
103 | " movq %%mm3, 24(%1)\n" | ||
104 | " movq 32(%0), %%mm0\n" | ||
105 | " movq 40(%0), %%mm1\n" | ||
106 | " movq 48(%0), %%mm2\n" | ||
107 | " movq 56(%0), %%mm3\n" | ||
108 | " movq %%mm0, 32(%1)\n" | ||
109 | " movq %%mm1, 40(%1)\n" | ||
110 | " movq %%mm2, 48(%1)\n" | ||
111 | " movq %%mm3, 56(%1)\n" | ||
112 | : : "r" (from), "r" (to) : "memory"); | ||
113 | from+=64; | ||
114 | to+=64; | ||
115 | } | ||
116 | /* | ||
117 | * Now do the tail of the block | ||
118 | */ | ||
119 | __memcpy(to, from, len&63); | ||
120 | kernel_fpu_end(); | ||
121 | return p; | ||
122 | } | ||
123 | |||
124 | #ifdef CONFIG_MK7 | ||
125 | |||
126 | /* | ||
127 | * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and | ||
128 | * other MMX using processors do not. | ||
129 | */ | ||
130 | |||
131 | static void fast_clear_page(void *page) | ||
132 | { | ||
133 | int i; | ||
134 | |||
135 | kernel_fpu_begin(); | ||
136 | |||
137 | __asm__ __volatile__ ( | ||
138 | " pxor %%mm0, %%mm0\n" : : | ||
139 | ); | ||
140 | |||
141 | for(i=0;i<4096/64;i++) | ||
142 | { | ||
143 | __asm__ __volatile__ ( | ||
144 | " movntq %%mm0, (%0)\n" | ||
145 | " movntq %%mm0, 8(%0)\n" | ||
146 | " movntq %%mm0, 16(%0)\n" | ||
147 | " movntq %%mm0, 24(%0)\n" | ||
148 | " movntq %%mm0, 32(%0)\n" | ||
149 | " movntq %%mm0, 40(%0)\n" | ||
150 | " movntq %%mm0, 48(%0)\n" | ||
151 | " movntq %%mm0, 56(%0)\n" | ||
152 | : : "r" (page) : "memory"); | ||
153 | page+=64; | ||
154 | } | ||
155 | /* since movntq is weakly-ordered, a "sfence" is needed to become | ||
156 | * ordered again. | ||
157 | */ | ||
158 | __asm__ __volatile__ ( | ||
159 | " sfence \n" : : | ||
160 | ); | ||
161 | kernel_fpu_end(); | ||
162 | } | ||
163 | |||
164 | static void fast_copy_page(void *to, void *from) | ||
165 | { | ||
166 | int i; | ||
167 | |||
168 | kernel_fpu_begin(); | ||
169 | |||
170 | /* maybe the prefetch stuff can go before the expensive fnsave... | ||
171 | * but that is for later. -AV | ||
172 | */ | ||
173 | __asm__ __volatile__ ( | ||
174 | "1: prefetch (%0)\n" | ||
175 | " prefetch 64(%0)\n" | ||
176 | " prefetch 128(%0)\n" | ||
177 | " prefetch 192(%0)\n" | ||
178 | " prefetch 256(%0)\n" | ||
179 | "2: \n" | ||
180 | ".section .fixup, \"ax\"\n" | ||
181 | "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ | ||
182 | " jmp 2b\n" | ||
183 | ".previous\n" | ||
184 | ".section __ex_table,\"a\"\n" | ||
185 | " .align 4\n" | ||
186 | " .long 1b, 3b\n" | ||
187 | ".previous" | ||
188 | : : "r" (from) ); | ||
189 | |||
190 | for(i=0; i<(4096-320)/64; i++) | ||
191 | { | ||
192 | __asm__ __volatile__ ( | ||
193 | "1: prefetch 320(%0)\n" | ||
194 | "2: movq (%0), %%mm0\n" | ||
195 | " movntq %%mm0, (%1)\n" | ||
196 | " movq 8(%0), %%mm1\n" | ||
197 | " movntq %%mm1, 8(%1)\n" | ||
198 | " movq 16(%0), %%mm2\n" | ||
199 | " movntq %%mm2, 16(%1)\n" | ||
200 | " movq 24(%0), %%mm3\n" | ||
201 | " movntq %%mm3, 24(%1)\n" | ||
202 | " movq 32(%0), %%mm4\n" | ||
203 | " movntq %%mm4, 32(%1)\n" | ||
204 | " movq 40(%0), %%mm5\n" | ||
205 | " movntq %%mm5, 40(%1)\n" | ||
206 | " movq 48(%0), %%mm6\n" | ||
207 | " movntq %%mm6, 48(%1)\n" | ||
208 | " movq 56(%0), %%mm7\n" | ||
209 | " movntq %%mm7, 56(%1)\n" | ||
210 | ".section .fixup, \"ax\"\n" | ||
211 | "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ | ||
212 | " jmp 2b\n" | ||
213 | ".previous\n" | ||
214 | ".section __ex_table,\"a\"\n" | ||
215 | " .align 4\n" | ||
216 | " .long 1b, 3b\n" | ||
217 | ".previous" | ||
218 | : : "r" (from), "r" (to) : "memory"); | ||
219 | from+=64; | ||
220 | to+=64; | ||
221 | } | ||
222 | for(i=(4096-320)/64; i<4096/64; i++) | ||
223 | { | ||
224 | __asm__ __volatile__ ( | ||
225 | "2: movq (%0), %%mm0\n" | ||
226 | " movntq %%mm0, (%1)\n" | ||
227 | " movq 8(%0), %%mm1\n" | ||
228 | " movntq %%mm1, 8(%1)\n" | ||
229 | " movq 16(%0), %%mm2\n" | ||
230 | " movntq %%mm2, 16(%1)\n" | ||
231 | " movq 24(%0), %%mm3\n" | ||
232 | " movntq %%mm3, 24(%1)\n" | ||
233 | " movq 32(%0), %%mm4\n" | ||
234 | " movntq %%mm4, 32(%1)\n" | ||
235 | " movq 40(%0), %%mm5\n" | ||
236 | " movntq %%mm5, 40(%1)\n" | ||
237 | " movq 48(%0), %%mm6\n" | ||
238 | " movntq %%mm6, 48(%1)\n" | ||
239 | " movq 56(%0), %%mm7\n" | ||
240 | " movntq %%mm7, 56(%1)\n" | ||
241 | : : "r" (from), "r" (to) : "memory"); | ||
242 | from+=64; | ||
243 | to+=64; | ||
244 | } | ||
245 | /* since movntq is weakly-ordered, a "sfence" is needed to become | ||
246 | * ordered again. | ||
247 | */ | ||
248 | __asm__ __volatile__ ( | ||
249 | " sfence \n" : : | ||
250 | ); | ||
251 | kernel_fpu_end(); | ||
252 | } | ||
253 | |||
254 | #else | ||
255 | |||
256 | /* | ||
257 | * Generic MMX implementation without K7 specific streaming | ||
258 | */ | ||
259 | |||
260 | static void fast_clear_page(void *page) | ||
261 | { | ||
262 | int i; | ||
263 | |||
264 | kernel_fpu_begin(); | ||
265 | |||
266 | __asm__ __volatile__ ( | ||
267 | " pxor %%mm0, %%mm0\n" : : | ||
268 | ); | ||
269 | |||
270 | for(i=0;i<4096/128;i++) | ||
271 | { | ||
272 | __asm__ __volatile__ ( | ||
273 | " movq %%mm0, (%0)\n" | ||
274 | " movq %%mm0, 8(%0)\n" | ||
275 | " movq %%mm0, 16(%0)\n" | ||
276 | " movq %%mm0, 24(%0)\n" | ||
277 | " movq %%mm0, 32(%0)\n" | ||
278 | " movq %%mm0, 40(%0)\n" | ||
279 | " movq %%mm0, 48(%0)\n" | ||
280 | " movq %%mm0, 56(%0)\n" | ||
281 | " movq %%mm0, 64(%0)\n" | ||
282 | " movq %%mm0, 72(%0)\n" | ||
283 | " movq %%mm0, 80(%0)\n" | ||
284 | " movq %%mm0, 88(%0)\n" | ||
285 | " movq %%mm0, 96(%0)\n" | ||
286 | " movq %%mm0, 104(%0)\n" | ||
287 | " movq %%mm0, 112(%0)\n" | ||
288 | " movq %%mm0, 120(%0)\n" | ||
289 | : : "r" (page) : "memory"); | ||
290 | page+=128; | ||
291 | } | ||
292 | |||
293 | kernel_fpu_end(); | ||
294 | } | ||
295 | |||
296 | static void fast_copy_page(void *to, void *from) | ||
297 | { | ||
298 | int i; | ||
299 | |||
300 | |||
301 | kernel_fpu_begin(); | ||
302 | |||
303 | __asm__ __volatile__ ( | ||
304 | "1: prefetch (%0)\n" | ||
305 | " prefetch 64(%0)\n" | ||
306 | " prefetch 128(%0)\n" | ||
307 | " prefetch 192(%0)\n" | ||
308 | " prefetch 256(%0)\n" | ||
309 | "2: \n" | ||
310 | ".section .fixup, \"ax\"\n" | ||
311 | "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ | ||
312 | " jmp 2b\n" | ||
313 | ".previous\n" | ||
314 | ".section __ex_table,\"a\"\n" | ||
315 | " .align 4\n" | ||
316 | " .long 1b, 3b\n" | ||
317 | ".previous" | ||
318 | : : "r" (from) ); | ||
319 | |||
320 | for(i=0; i<4096/64; i++) | ||
321 | { | ||
322 | __asm__ __volatile__ ( | ||
323 | "1: prefetch 320(%0)\n" | ||
324 | "2: movq (%0), %%mm0\n" | ||
325 | " movq 8(%0), %%mm1\n" | ||
326 | " movq 16(%0), %%mm2\n" | ||
327 | " movq 24(%0), %%mm3\n" | ||
328 | " movq %%mm0, (%1)\n" | ||
329 | " movq %%mm1, 8(%1)\n" | ||
330 | " movq %%mm2, 16(%1)\n" | ||
331 | " movq %%mm3, 24(%1)\n" | ||
332 | " movq 32(%0), %%mm0\n" | ||
333 | " movq 40(%0), %%mm1\n" | ||
334 | " movq 48(%0), %%mm2\n" | ||
335 | " movq 56(%0), %%mm3\n" | ||
336 | " movq %%mm0, 32(%1)\n" | ||
337 | " movq %%mm1, 40(%1)\n" | ||
338 | " movq %%mm2, 48(%1)\n" | ||
339 | " movq %%mm3, 56(%1)\n" | ||
340 | ".section .fixup, \"ax\"\n" | ||
341 | "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ | ||
342 | " jmp 2b\n" | ||
343 | ".previous\n" | ||
344 | ".section __ex_table,\"a\"\n" | ||
345 | " .align 4\n" | ||
346 | " .long 1b, 3b\n" | ||
347 | ".previous" | ||
348 | : : "r" (from), "r" (to) : "memory"); | ||
349 | from+=64; | ||
350 | to+=64; | ||
351 | } | ||
352 | kernel_fpu_end(); | ||
353 | } | ||
354 | |||
355 | |||
356 | #endif | ||
357 | |||
358 | /* | ||
359 | * Favour MMX for page clear and copy. | ||
360 | */ | ||
361 | |||
362 | static void slow_zero_page(void * page) | ||
363 | { | ||
364 | int d0, d1; | ||
365 | __asm__ __volatile__( \ | ||
366 | "cld\n\t" \ | ||
367 | "rep ; stosl" \ | ||
368 | : "=&c" (d0), "=&D" (d1) | ||
369 | :"a" (0),"1" (page),"0" (1024) | ||
370 | :"memory"); | ||
371 | } | ||
372 | |||
373 | void mmx_clear_page(void * page) | ||
374 | { | ||
375 | if(unlikely(in_interrupt())) | ||
376 | slow_zero_page(page); | ||
377 | else | ||
378 | fast_clear_page(page); | ||
379 | } | ||
380 | |||
381 | static void slow_copy_page(void *to, void *from) | ||
382 | { | ||
383 | int d0, d1, d2; | ||
384 | __asm__ __volatile__( \ | ||
385 | "cld\n\t" \ | ||
386 | "rep ; movsl" \ | ||
387 | : "=&c" (d0), "=&D" (d1), "=&S" (d2) \ | ||
388 | : "0" (1024),"1" ((long) to),"2" ((long) from) \ | ||
389 | : "memory"); | ||
390 | } | ||
391 | |||
392 | |||
393 | void mmx_copy_page(void *to, void *from) | ||
394 | { | ||
395 | if(unlikely(in_interrupt())) | ||
396 | slow_copy_page(to, from); | ||
397 | else | ||
398 | fast_copy_page(to, from); | ||
399 | } | ||
400 | |||
401 | EXPORT_SYMBOL(_mmx_memcpy); | ||
402 | EXPORT_SYMBOL(mmx_clear_page); | ||
403 | EXPORT_SYMBOL(mmx_copy_page); | ||
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c new file mode 100644 index 000000000000..7767962f25d3 --- /dev/null +++ b/arch/x86/lib/msr-on-cpu.c | |||
@@ -0,0 +1,119 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/preempt.h> | ||
3 | #include <linux/smp.h> | ||
4 | #include <asm/msr.h> | ||
5 | |||
6 | struct msr_info { | ||
7 | u32 msr_no; | ||
8 | u32 l, h; | ||
9 | int err; | ||
10 | }; | ||
11 | |||
12 | static void __rdmsr_on_cpu(void *info) | ||
13 | { | ||
14 | struct msr_info *rv = info; | ||
15 | |||
16 | rdmsr(rv->msr_no, rv->l, rv->h); | ||
17 | } | ||
18 | |||
19 | static void __rdmsr_safe_on_cpu(void *info) | ||
20 | { | ||
21 | struct msr_info *rv = info; | ||
22 | |||
23 | rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h); | ||
24 | } | ||
25 | |||
26 | static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe) | ||
27 | { | ||
28 | int err = 0; | ||
29 | preempt_disable(); | ||
30 | if (smp_processor_id() == cpu) | ||
31 | if (safe) | ||
32 | err = rdmsr_safe(msr_no, l, h); | ||
33 | else | ||
34 | rdmsr(msr_no, *l, *h); | ||
35 | else { | ||
36 | struct msr_info rv; | ||
37 | |||
38 | rv.msr_no = msr_no; | ||
39 | if (safe) { | ||
40 | smp_call_function_single(cpu, __rdmsr_safe_on_cpu, | ||
41 | &rv, 0, 1); | ||
42 | err = rv.err; | ||
43 | } else { | ||
44 | smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1); | ||
45 | } | ||
46 | *l = rv.l; | ||
47 | *h = rv.h; | ||
48 | } | ||
49 | preempt_enable(); | ||
50 | return err; | ||
51 | } | ||
52 | |||
53 | static void __wrmsr_on_cpu(void *info) | ||
54 | { | ||
55 | struct msr_info *rv = info; | ||
56 | |||
57 | wrmsr(rv->msr_no, rv->l, rv->h); | ||
58 | } | ||
59 | |||
60 | static void __wrmsr_safe_on_cpu(void *info) | ||
61 | { | ||
62 | struct msr_info *rv = info; | ||
63 | |||
64 | rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h); | ||
65 | } | ||
66 | |||
67 | static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe) | ||
68 | { | ||
69 | int err = 0; | ||
70 | preempt_disable(); | ||
71 | if (smp_processor_id() == cpu) | ||
72 | if (safe) | ||
73 | err = wrmsr_safe(msr_no, l, h); | ||
74 | else | ||
75 | wrmsr(msr_no, l, h); | ||
76 | else { | ||
77 | struct msr_info rv; | ||
78 | |||
79 | rv.msr_no = msr_no; | ||
80 | rv.l = l; | ||
81 | rv.h = h; | ||
82 | if (safe) { | ||
83 | smp_call_function_single(cpu, __wrmsr_safe_on_cpu, | ||
84 | &rv, 0, 1); | ||
85 | err = rv.err; | ||
86 | } else { | ||
87 | smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1); | ||
88 | } | ||
89 | } | ||
90 | preempt_enable(); | ||
91 | return err; | ||
92 | } | ||
93 | |||
94 | void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | ||
95 | { | ||
96 | _wrmsr_on_cpu(cpu, msr_no, l, h, 0); | ||
97 | } | ||
98 | |||
99 | void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) | ||
100 | { | ||
101 | _rdmsr_on_cpu(cpu, msr_no, l, h, 0); | ||
102 | } | ||
103 | |||
104 | /* These "safe" variants are slower and should be used when the target MSR | ||
105 | may not actually exist. */ | ||
106 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | ||
107 | { | ||
108 | return _wrmsr_on_cpu(cpu, msr_no, l, h, 1); | ||
109 | } | ||
110 | |||
111 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) | ||
112 | { | ||
113 | return _rdmsr_on_cpu(cpu, msr_no, l, h, 1); | ||
114 | } | ||
115 | |||
116 | EXPORT_SYMBOL(rdmsr_on_cpu); | ||
117 | EXPORT_SYMBOL(wrmsr_on_cpu); | ||
118 | EXPORT_SYMBOL(rdmsr_safe_on_cpu); | ||
119 | EXPORT_SYMBOL(wrmsr_safe_on_cpu); | ||
diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser_32.S new file mode 100644 index 000000000000..f58fba109d18 --- /dev/null +++ b/arch/x86/lib/putuser_32.S | |||
@@ -0,0 +1,98 @@ | |||
1 | /* | ||
2 | * __put_user functions. | ||
3 | * | ||
4 | * (C) Copyright 2005 Linus Torvalds | ||
5 | * | ||
6 | * These functions have a non-standard call interface | ||
7 | * to make them more efficient, especially as they | ||
8 | * return an error value in addition to the "real" | ||
9 | * return value. | ||
10 | */ | ||
11 | #include <linux/linkage.h> | ||
12 | #include <asm/dwarf2.h> | ||
13 | #include <asm/thread_info.h> | ||
14 | |||
15 | |||
16 | /* | ||
17 | * __put_user_X | ||
18 | * | ||
19 | * Inputs: %eax[:%edx] contains the data | ||
20 | * %ecx contains the address | ||
21 | * | ||
22 | * Outputs: %eax is error code (0 or -EFAULT) | ||
23 | * | ||
24 | * These functions should not modify any other registers, | ||
25 | * as they get called from within inline assembly. | ||
26 | */ | ||
27 | |||
28 | #define ENTER CFI_STARTPROC ; \ | ||
29 | pushl %ebx ; \ | ||
30 | CFI_ADJUST_CFA_OFFSET 4 ; \ | ||
31 | CFI_REL_OFFSET ebx, 0 ; \ | ||
32 | GET_THREAD_INFO(%ebx) | ||
33 | #define EXIT popl %ebx ; \ | ||
34 | CFI_ADJUST_CFA_OFFSET -4 ; \ | ||
35 | CFI_RESTORE ebx ; \ | ||
36 | ret ; \ | ||
37 | CFI_ENDPROC | ||
38 | |||
39 | .text | ||
40 | ENTRY(__put_user_1) | ||
41 | ENTER | ||
42 | cmpl TI_addr_limit(%ebx),%ecx | ||
43 | jae bad_put_user | ||
44 | 1: movb %al,(%ecx) | ||
45 | xorl %eax,%eax | ||
46 | EXIT | ||
47 | ENDPROC(__put_user_1) | ||
48 | |||
49 | ENTRY(__put_user_2) | ||
50 | ENTER | ||
51 | movl TI_addr_limit(%ebx),%ebx | ||
52 | subl $1,%ebx | ||
53 | cmpl %ebx,%ecx | ||
54 | jae bad_put_user | ||
55 | 2: movw %ax,(%ecx) | ||
56 | xorl %eax,%eax | ||
57 | EXIT | ||
58 | ENDPROC(__put_user_2) | ||
59 | |||
60 | ENTRY(__put_user_4) | ||
61 | ENTER | ||
62 | movl TI_addr_limit(%ebx),%ebx | ||
63 | subl $3,%ebx | ||
64 | cmpl %ebx,%ecx | ||
65 | jae bad_put_user | ||
66 | 3: movl %eax,(%ecx) | ||
67 | xorl %eax,%eax | ||
68 | EXIT | ||
69 | ENDPROC(__put_user_4) | ||
70 | |||
71 | ENTRY(__put_user_8) | ||
72 | ENTER | ||
73 | movl TI_addr_limit(%ebx),%ebx | ||
74 | subl $7,%ebx | ||
75 | cmpl %ebx,%ecx | ||
76 | jae bad_put_user | ||
77 | 4: movl %eax,(%ecx) | ||
78 | 5: movl %edx,4(%ecx) | ||
79 | xorl %eax,%eax | ||
80 | EXIT | ||
81 | ENDPROC(__put_user_8) | ||
82 | |||
83 | bad_put_user: | ||
84 | CFI_STARTPROC simple | ||
85 | CFI_DEF_CFA esp, 2*4 | ||
86 | CFI_OFFSET eip, -1*4 | ||
87 | CFI_OFFSET ebx, -2*4 | ||
88 | movl $-14,%eax | ||
89 | EXIT | ||
90 | END(bad_put_user) | ||
91 | |||
92 | .section __ex_table,"a" | ||
93 | .long 1b,bad_put_user | ||
94 | .long 2b,bad_put_user | ||
95 | .long 3b,bad_put_user | ||
96 | .long 4b,bad_put_user | ||
97 | .long 5b,bad_put_user | ||
98 | .previous | ||
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S new file mode 100644 index 000000000000..c01eb39c0b43 --- /dev/null +++ b/arch/x86/lib/semaphore_32.S | |||
@@ -0,0 +1,219 @@ | |||
1 | /* | ||
2 | * i386 semaphore implementation. | ||
3 | * | ||
4 | * (C) Copyright 1999 Linus Torvalds | ||
5 | * | ||
6 | * Portions Copyright 1999 Red Hat, Inc. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> | ||
14 | */ | ||
15 | |||
16 | #include <linux/linkage.h> | ||
17 | #include <asm/rwlock.h> | ||
18 | #include <asm/alternative-asm.i> | ||
19 | #include <asm/frame.i> | ||
20 | #include <asm/dwarf2.h> | ||
21 | |||
22 | /* | ||
23 | * The semaphore operations have a special calling sequence that | ||
24 | * allow us to do a simpler in-line version of them. These routines | ||
25 | * need to convert that sequence back into the C sequence when | ||
26 | * there is contention on the semaphore. | ||
27 | * | ||
28 | * %eax contains the semaphore pointer on entry. Save the C-clobbered | ||
29 | * registers (%eax, %edx and %ecx) except %eax whish is either a return | ||
30 | * value or just clobbered.. | ||
31 | */ | ||
32 | .section .sched.text | ||
33 | ENTRY(__down_failed) | ||
34 | CFI_STARTPROC | ||
35 | FRAME | ||
36 | pushl %edx | ||
37 | CFI_ADJUST_CFA_OFFSET 4 | ||
38 | CFI_REL_OFFSET edx,0 | ||
39 | pushl %ecx | ||
40 | CFI_ADJUST_CFA_OFFSET 4 | ||
41 | CFI_REL_OFFSET ecx,0 | ||
42 | call __down | ||
43 | popl %ecx | ||
44 | CFI_ADJUST_CFA_OFFSET -4 | ||
45 | CFI_RESTORE ecx | ||
46 | popl %edx | ||
47 | CFI_ADJUST_CFA_OFFSET -4 | ||
48 | CFI_RESTORE edx | ||
49 | ENDFRAME | ||
50 | ret | ||
51 | CFI_ENDPROC | ||
52 | END(__down_failed) | ||
53 | |||
54 | ENTRY(__down_failed_interruptible) | ||
55 | CFI_STARTPROC | ||
56 | FRAME | ||
57 | pushl %edx | ||
58 | CFI_ADJUST_CFA_OFFSET 4 | ||
59 | CFI_REL_OFFSET edx,0 | ||
60 | pushl %ecx | ||
61 | CFI_ADJUST_CFA_OFFSET 4 | ||
62 | CFI_REL_OFFSET ecx,0 | ||
63 | call __down_interruptible | ||
64 | popl %ecx | ||
65 | CFI_ADJUST_CFA_OFFSET -4 | ||
66 | CFI_RESTORE ecx | ||
67 | popl %edx | ||
68 | CFI_ADJUST_CFA_OFFSET -4 | ||
69 | CFI_RESTORE edx | ||
70 | ENDFRAME | ||
71 | ret | ||
72 | CFI_ENDPROC | ||
73 | END(__down_failed_interruptible) | ||
74 | |||
75 | ENTRY(__down_failed_trylock) | ||
76 | CFI_STARTPROC | ||
77 | FRAME | ||
78 | pushl %edx | ||
79 | CFI_ADJUST_CFA_OFFSET 4 | ||
80 | CFI_REL_OFFSET edx,0 | ||
81 | pushl %ecx | ||
82 | CFI_ADJUST_CFA_OFFSET 4 | ||
83 | CFI_REL_OFFSET ecx,0 | ||
84 | call __down_trylock | ||
85 | popl %ecx | ||
86 | CFI_ADJUST_CFA_OFFSET -4 | ||
87 | CFI_RESTORE ecx | ||
88 | popl %edx | ||
89 | CFI_ADJUST_CFA_OFFSET -4 | ||
90 | CFI_RESTORE edx | ||
91 | ENDFRAME | ||
92 | ret | ||
93 | CFI_ENDPROC | ||
94 | END(__down_failed_trylock) | ||
95 | |||
96 | ENTRY(__up_wakeup) | ||
97 | CFI_STARTPROC | ||
98 | FRAME | ||
99 | pushl %edx | ||
100 | CFI_ADJUST_CFA_OFFSET 4 | ||
101 | CFI_REL_OFFSET edx,0 | ||
102 | pushl %ecx | ||
103 | CFI_ADJUST_CFA_OFFSET 4 | ||
104 | CFI_REL_OFFSET ecx,0 | ||
105 | call __up | ||
106 | popl %ecx | ||
107 | CFI_ADJUST_CFA_OFFSET -4 | ||
108 | CFI_RESTORE ecx | ||
109 | popl %edx | ||
110 | CFI_ADJUST_CFA_OFFSET -4 | ||
111 | CFI_RESTORE edx | ||
112 | ENDFRAME | ||
113 | ret | ||
114 | CFI_ENDPROC | ||
115 | END(__up_wakeup) | ||
116 | |||
117 | /* | ||
118 | * rw spinlock fallbacks | ||
119 | */ | ||
120 | #ifdef CONFIG_SMP | ||
121 | ENTRY(__write_lock_failed) | ||
122 | CFI_STARTPROC simple | ||
123 | FRAME | ||
124 | 2: LOCK_PREFIX | ||
125 | addl $ RW_LOCK_BIAS,(%eax) | ||
126 | 1: rep; nop | ||
127 | cmpl $ RW_LOCK_BIAS,(%eax) | ||
128 | jne 1b | ||
129 | LOCK_PREFIX | ||
130 | subl $ RW_LOCK_BIAS,(%eax) | ||
131 | jnz 2b | ||
132 | ENDFRAME | ||
133 | ret | ||
134 | CFI_ENDPROC | ||
135 | END(__write_lock_failed) | ||
136 | |||
137 | ENTRY(__read_lock_failed) | ||
138 | CFI_STARTPROC | ||
139 | FRAME | ||
140 | 2: LOCK_PREFIX | ||
141 | incl (%eax) | ||
142 | 1: rep; nop | ||
143 | cmpl $1,(%eax) | ||
144 | js 1b | ||
145 | LOCK_PREFIX | ||
146 | decl (%eax) | ||
147 | js 2b | ||
148 | ENDFRAME | ||
149 | ret | ||
150 | CFI_ENDPROC | ||
151 | END(__read_lock_failed) | ||
152 | |||
153 | #endif | ||
154 | |||
155 | #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM | ||
156 | |||
157 | /* Fix up special calling conventions */ | ||
158 | ENTRY(call_rwsem_down_read_failed) | ||
159 | CFI_STARTPROC | ||
160 | push %ecx | ||
161 | CFI_ADJUST_CFA_OFFSET 4 | ||
162 | CFI_REL_OFFSET ecx,0 | ||
163 | push %edx | ||
164 | CFI_ADJUST_CFA_OFFSET 4 | ||
165 | CFI_REL_OFFSET edx,0 | ||
166 | call rwsem_down_read_failed | ||
167 | pop %edx | ||
168 | CFI_ADJUST_CFA_OFFSET -4 | ||
169 | pop %ecx | ||
170 | CFI_ADJUST_CFA_OFFSET -4 | ||
171 | ret | ||
172 | CFI_ENDPROC | ||
173 | END(call_rwsem_down_read_failed) | ||
174 | |||
175 | ENTRY(call_rwsem_down_write_failed) | ||
176 | CFI_STARTPROC | ||
177 | push %ecx | ||
178 | CFI_ADJUST_CFA_OFFSET 4 | ||
179 | CFI_REL_OFFSET ecx,0 | ||
180 | calll rwsem_down_write_failed | ||
181 | pop %ecx | ||
182 | CFI_ADJUST_CFA_OFFSET -4 | ||
183 | ret | ||
184 | CFI_ENDPROC | ||
185 | END(call_rwsem_down_write_failed) | ||
186 | |||
187 | ENTRY(call_rwsem_wake) | ||
188 | CFI_STARTPROC | ||
189 | decw %dx /* do nothing if still outstanding active readers */ | ||
190 | jnz 1f | ||
191 | push %ecx | ||
192 | CFI_ADJUST_CFA_OFFSET 4 | ||
193 | CFI_REL_OFFSET ecx,0 | ||
194 | call rwsem_wake | ||
195 | pop %ecx | ||
196 | CFI_ADJUST_CFA_OFFSET -4 | ||
197 | 1: ret | ||
198 | CFI_ENDPROC | ||
199 | END(call_rwsem_wake) | ||
200 | |||
201 | /* Fix up special calling conventions */ | ||
202 | ENTRY(call_rwsem_downgrade_wake) | ||
203 | CFI_STARTPROC | ||
204 | push %ecx | ||
205 | CFI_ADJUST_CFA_OFFSET 4 | ||
206 | CFI_REL_OFFSET ecx,0 | ||
207 | push %edx | ||
208 | CFI_ADJUST_CFA_OFFSET 4 | ||
209 | CFI_REL_OFFSET edx,0 | ||
210 | call rwsem_downgrade_wake | ||
211 | pop %edx | ||
212 | CFI_ADJUST_CFA_OFFSET -4 | ||
213 | pop %ecx | ||
214 | CFI_ADJUST_CFA_OFFSET -4 | ||
215 | ret | ||
216 | CFI_ENDPROC | ||
217 | END(call_rwsem_downgrade_wake) | ||
218 | |||
219 | #endif | ||
diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c new file mode 100644 index 000000000000..2c773fefa3dd --- /dev/null +++ b/arch/x86/lib/string_32.c | |||
@@ -0,0 +1,257 @@ | |||
1 | /* | ||
2 | * Most of the string-functions are rather heavily hand-optimized, | ||
3 | * see especially strsep,strstr,str[c]spn. They should work, but are not | ||
4 | * very easy to understand. Everything is done entirely within the register | ||
5 | * set, making the functions fast and clean. String instructions have been | ||
6 | * used through-out, making for "slightly" unclear code :-) | ||
7 | * | ||
8 | * AK: On P4 and K7 using non string instruction implementations might be faster | ||
9 | * for large memory blocks. But most of them are unlikely to be used on large | ||
10 | * strings. | ||
11 | */ | ||
12 | |||
13 | #include <linux/string.h> | ||
14 | #include <linux/module.h> | ||
15 | |||
16 | #ifdef __HAVE_ARCH_STRCPY | ||
17 | char *strcpy(char * dest,const char *src) | ||
18 | { | ||
19 | int d0, d1, d2; | ||
20 | asm volatile( "1:\tlodsb\n\t" | ||
21 | "stosb\n\t" | ||
22 | "testb %%al,%%al\n\t" | ||
23 | "jne 1b" | ||
24 | : "=&S" (d0), "=&D" (d1), "=&a" (d2) | ||
25 | :"0" (src),"1" (dest) : "memory"); | ||
26 | return dest; | ||
27 | } | ||
28 | EXPORT_SYMBOL(strcpy); | ||
29 | #endif | ||
30 | |||
31 | #ifdef __HAVE_ARCH_STRNCPY | ||
32 | char *strncpy(char * dest,const char *src,size_t count) | ||
33 | { | ||
34 | int d0, d1, d2, d3; | ||
35 | asm volatile( "1:\tdecl %2\n\t" | ||
36 | "js 2f\n\t" | ||
37 | "lodsb\n\t" | ||
38 | "stosb\n\t" | ||
39 | "testb %%al,%%al\n\t" | ||
40 | "jne 1b\n\t" | ||
41 | "rep\n\t" | ||
42 | "stosb\n" | ||
43 | "2:" | ||
44 | : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) | ||
45 | :"0" (src),"1" (dest),"2" (count) : "memory"); | ||
46 | return dest; | ||
47 | } | ||
48 | EXPORT_SYMBOL(strncpy); | ||
49 | #endif | ||
50 | |||
51 | #ifdef __HAVE_ARCH_STRCAT | ||
52 | char *strcat(char * dest,const char * src) | ||
53 | { | ||
54 | int d0, d1, d2, d3; | ||
55 | asm volatile( "repne\n\t" | ||
56 | "scasb\n\t" | ||
57 | "decl %1\n" | ||
58 | "1:\tlodsb\n\t" | ||
59 | "stosb\n\t" | ||
60 | "testb %%al,%%al\n\t" | ||
61 | "jne 1b" | ||
62 | : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) | ||
63 | : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory"); | ||
64 | return dest; | ||
65 | } | ||
66 | EXPORT_SYMBOL(strcat); | ||
67 | #endif | ||
68 | |||
69 | #ifdef __HAVE_ARCH_STRNCAT | ||
70 | char *strncat(char * dest,const char * src,size_t count) | ||
71 | { | ||
72 | int d0, d1, d2, d3; | ||
73 | asm volatile( "repne\n\t" | ||
74 | "scasb\n\t" | ||
75 | "decl %1\n\t" | ||
76 | "movl %8,%3\n" | ||
77 | "1:\tdecl %3\n\t" | ||
78 | "js 2f\n\t" | ||
79 | "lodsb\n\t" | ||
80 | "stosb\n\t" | ||
81 | "testb %%al,%%al\n\t" | ||
82 | "jne 1b\n" | ||
83 | "2:\txorl %2,%2\n\t" | ||
84 | "stosb" | ||
85 | : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) | ||
86 | : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count) | ||
87 | : "memory"); | ||
88 | return dest; | ||
89 | } | ||
90 | EXPORT_SYMBOL(strncat); | ||
91 | #endif | ||
92 | |||
93 | #ifdef __HAVE_ARCH_STRCMP | ||
94 | int strcmp(const char * cs,const char * ct) | ||
95 | { | ||
96 | int d0, d1; | ||
97 | int res; | ||
98 | asm volatile( "1:\tlodsb\n\t" | ||
99 | "scasb\n\t" | ||
100 | "jne 2f\n\t" | ||
101 | "testb %%al,%%al\n\t" | ||
102 | "jne 1b\n\t" | ||
103 | "xorl %%eax,%%eax\n\t" | ||
104 | "jmp 3f\n" | ||
105 | "2:\tsbbl %%eax,%%eax\n\t" | ||
106 | "orb $1,%%al\n" | ||
107 | "3:" | ||
108 | :"=a" (res), "=&S" (d0), "=&D" (d1) | ||
109 | :"1" (cs),"2" (ct) | ||
110 | :"memory"); | ||
111 | return res; | ||
112 | } | ||
113 | EXPORT_SYMBOL(strcmp); | ||
114 | #endif | ||
115 | |||
116 | #ifdef __HAVE_ARCH_STRNCMP | ||
117 | int strncmp(const char * cs,const char * ct,size_t count) | ||
118 | { | ||
119 | int res; | ||
120 | int d0, d1, d2; | ||
121 | asm volatile( "1:\tdecl %3\n\t" | ||
122 | "js 2f\n\t" | ||
123 | "lodsb\n\t" | ||
124 | "scasb\n\t" | ||
125 | "jne 3f\n\t" | ||
126 | "testb %%al,%%al\n\t" | ||
127 | "jne 1b\n" | ||
128 | "2:\txorl %%eax,%%eax\n\t" | ||
129 | "jmp 4f\n" | ||
130 | "3:\tsbbl %%eax,%%eax\n\t" | ||
131 | "orb $1,%%al\n" | ||
132 | "4:" | ||
133 | :"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2) | ||
134 | :"1" (cs),"2" (ct),"3" (count) | ||
135 | :"memory"); | ||
136 | return res; | ||
137 | } | ||
138 | EXPORT_SYMBOL(strncmp); | ||
139 | #endif | ||
140 | |||
141 | #ifdef __HAVE_ARCH_STRCHR | ||
142 | char *strchr(const char * s, int c) | ||
143 | { | ||
144 | int d0; | ||
145 | char * res; | ||
146 | asm volatile( "movb %%al,%%ah\n" | ||
147 | "1:\tlodsb\n\t" | ||
148 | "cmpb %%ah,%%al\n\t" | ||
149 | "je 2f\n\t" | ||
150 | "testb %%al,%%al\n\t" | ||
151 | "jne 1b\n\t" | ||
152 | "movl $1,%1\n" | ||
153 | "2:\tmovl %1,%0\n\t" | ||
154 | "decl %0" | ||
155 | :"=a" (res), "=&S" (d0) | ||
156 | :"1" (s),"0" (c) | ||
157 | :"memory"); | ||
158 | return res; | ||
159 | } | ||
160 | EXPORT_SYMBOL(strchr); | ||
161 | #endif | ||
162 | |||
163 | #ifdef __HAVE_ARCH_STRRCHR | ||
164 | char *strrchr(const char * s, int c) | ||
165 | { | ||
166 | int d0, d1; | ||
167 | char * res; | ||
168 | asm volatile( "movb %%al,%%ah\n" | ||
169 | "1:\tlodsb\n\t" | ||
170 | "cmpb %%ah,%%al\n\t" | ||
171 | "jne 2f\n\t" | ||
172 | "leal -1(%%esi),%0\n" | ||
173 | "2:\ttestb %%al,%%al\n\t" | ||
174 | "jne 1b" | ||
175 | :"=g" (res), "=&S" (d0), "=&a" (d1) | ||
176 | :"0" (0),"1" (s),"2" (c) | ||
177 | :"memory"); | ||
178 | return res; | ||
179 | } | ||
180 | EXPORT_SYMBOL(strrchr); | ||
181 | #endif | ||
182 | |||
183 | #ifdef __HAVE_ARCH_STRLEN | ||
184 | size_t strlen(const char * s) | ||
185 | { | ||
186 | int d0; | ||
187 | int res; | ||
188 | asm volatile( "repne\n\t" | ||
189 | "scasb\n\t" | ||
190 | "notl %0\n\t" | ||
191 | "decl %0" | ||
192 | :"=c" (res), "=&D" (d0) | ||
193 | :"1" (s),"a" (0), "0" (0xffffffffu) | ||
194 | :"memory"); | ||
195 | return res; | ||
196 | } | ||
197 | EXPORT_SYMBOL(strlen); | ||
198 | #endif | ||
199 | |||
200 | #ifdef __HAVE_ARCH_MEMCHR | ||
201 | void *memchr(const void *cs,int c,size_t count) | ||
202 | { | ||
203 | int d0; | ||
204 | void *res; | ||
205 | if (!count) | ||
206 | return NULL; | ||
207 | asm volatile( "repne\n\t" | ||
208 | "scasb\n\t" | ||
209 | "je 1f\n\t" | ||
210 | "movl $1,%0\n" | ||
211 | "1:\tdecl %0" | ||
212 | :"=D" (res), "=&c" (d0) | ||
213 | :"a" (c),"0" (cs),"1" (count) | ||
214 | :"memory"); | ||
215 | return res; | ||
216 | } | ||
217 | EXPORT_SYMBOL(memchr); | ||
218 | #endif | ||
219 | |||
220 | #ifdef __HAVE_ARCH_MEMSCAN | ||
221 | void *memscan(void * addr, int c, size_t size) | ||
222 | { | ||
223 | if (!size) | ||
224 | return addr; | ||
225 | asm volatile("repnz; scasb\n\t" | ||
226 | "jnz 1f\n\t" | ||
227 | "dec %%edi\n" | ||
228 | "1:" | ||
229 | : "=D" (addr), "=c" (size) | ||
230 | : "0" (addr), "1" (size), "a" (c) | ||
231 | : "memory"); | ||
232 | return addr; | ||
233 | } | ||
234 | EXPORT_SYMBOL(memscan); | ||
235 | #endif | ||
236 | |||
237 | #ifdef __HAVE_ARCH_STRNLEN | ||
238 | size_t strnlen(const char *s, size_t count) | ||
239 | { | ||
240 | int d0; | ||
241 | int res; | ||
242 | asm volatile( "movl %2,%0\n\t" | ||
243 | "jmp 2f\n" | ||
244 | "1:\tcmpb $0,(%0)\n\t" | ||
245 | "je 3f\n\t" | ||
246 | "incl %0\n" | ||
247 | "2:\tdecl %1\n\t" | ||
248 | "cmpl $-1,%1\n\t" | ||
249 | "jne 1b\n" | ||
250 | "3:\tsubl %2,%0" | ||
251 | :"=a" (res), "=&d" (d0) | ||
252 | :"c" (s),"1" (count) | ||
253 | :"memory"); | ||
254 | return res; | ||
255 | } | ||
256 | EXPORT_SYMBOL(strnlen); | ||
257 | #endif | ||
diff --git a/arch/x86/lib/strstr_32.c b/arch/x86/lib/strstr_32.c new file mode 100644 index 000000000000..a3dafbf59dae --- /dev/null +++ b/arch/x86/lib/strstr_32.c | |||
@@ -0,0 +1,31 @@ | |||
1 | #include <linux/string.h> | ||
2 | |||
3 | char * strstr(const char * cs,const char * ct) | ||
4 | { | ||
5 | int d0, d1; | ||
6 | register char * __res; | ||
7 | __asm__ __volatile__( | ||
8 | "movl %6,%%edi\n\t" | ||
9 | "repne\n\t" | ||
10 | "scasb\n\t" | ||
11 | "notl %%ecx\n\t" | ||
12 | "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ | ||
13 | "movl %%ecx,%%edx\n" | ||
14 | "1:\tmovl %6,%%edi\n\t" | ||
15 | "movl %%esi,%%eax\n\t" | ||
16 | "movl %%edx,%%ecx\n\t" | ||
17 | "repe\n\t" | ||
18 | "cmpsb\n\t" | ||
19 | "je 2f\n\t" /* also works for empty string, see above */ | ||
20 | "xchgl %%eax,%%esi\n\t" | ||
21 | "incl %%esi\n\t" | ||
22 | "cmpb $0,-1(%%eax)\n\t" | ||
23 | "jne 1b\n\t" | ||
24 | "xorl %%eax,%%eax\n\t" | ||
25 | "2:" | ||
26 | :"=a" (__res), "=&c" (d0), "=&S" (d1) | ||
27 | :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct) | ||
28 | :"dx", "di"); | ||
29 | return __res; | ||
30 | } | ||
31 | |||
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c new file mode 100644 index 000000000000..9f38b12b4af1 --- /dev/null +++ b/arch/x86/lib/usercopy_32.c | |||
@@ -0,0 +1,882 @@ | |||
1 | /* | ||
2 | * User address space access functions. | ||
3 | * The non inlined parts of asm-i386/uaccess.h are here. | ||
4 | * | ||
5 | * Copyright 1997 Andi Kleen <ak@muc.de> | ||
6 | * Copyright 1997 Linus Torvalds | ||
7 | */ | ||
8 | #include <linux/mm.h> | ||
9 | #include <linux/highmem.h> | ||
10 | #include <linux/blkdev.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/backing-dev.h> | ||
13 | #include <linux/interrupt.h> | ||
14 | #include <asm/uaccess.h> | ||
15 | #include <asm/mmx.h> | ||
16 | |||
17 | static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) | ||
18 | { | ||
19 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
20 | if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask)) | ||
21 | return 0; | ||
22 | #endif | ||
23 | return 1; | ||
24 | } | ||
25 | #define movsl_is_ok(a1,a2,n) \ | ||
26 | __movsl_is_ok((unsigned long)(a1),(unsigned long)(a2),(n)) | ||
27 | |||
28 | /* | ||
29 | * Copy a null terminated string from userspace. | ||
30 | */ | ||
31 | |||
32 | #define __do_strncpy_from_user(dst,src,count,res) \ | ||
33 | do { \ | ||
34 | int __d0, __d1, __d2; \ | ||
35 | might_sleep(); \ | ||
36 | __asm__ __volatile__( \ | ||
37 | " testl %1,%1\n" \ | ||
38 | " jz 2f\n" \ | ||
39 | "0: lodsb\n" \ | ||
40 | " stosb\n" \ | ||
41 | " testb %%al,%%al\n" \ | ||
42 | " jz 1f\n" \ | ||
43 | " decl %1\n" \ | ||
44 | " jnz 0b\n" \ | ||
45 | "1: subl %1,%0\n" \ | ||
46 | "2:\n" \ | ||
47 | ".section .fixup,\"ax\"\n" \ | ||
48 | "3: movl %5,%0\n" \ | ||
49 | " jmp 2b\n" \ | ||
50 | ".previous\n" \ | ||
51 | ".section __ex_table,\"a\"\n" \ | ||
52 | " .align 4\n" \ | ||
53 | " .long 0b,3b\n" \ | ||
54 | ".previous" \ | ||
55 | : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ | ||
56 | "=&D" (__d2) \ | ||
57 | : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ | ||
58 | : "memory"); \ | ||
59 | } while (0) | ||
60 | |||
61 | /** | ||
62 | * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. | ||
63 | * @dst: Destination address, in kernel space. This buffer must be at | ||
64 | * least @count bytes long. | ||
65 | * @src: Source address, in user space. | ||
66 | * @count: Maximum number of bytes to copy, including the trailing NUL. | ||
67 | * | ||
68 | * Copies a NUL-terminated string from userspace to kernel space. | ||
69 | * Caller must check the specified block with access_ok() before calling | ||
70 | * this function. | ||
71 | * | ||
72 | * On success, returns the length of the string (not including the trailing | ||
73 | * NUL). | ||
74 | * | ||
75 | * If access to userspace fails, returns -EFAULT (some data may have been | ||
76 | * copied). | ||
77 | * | ||
78 | * If @count is smaller than the length of the string, copies @count bytes | ||
79 | * and returns @count. | ||
80 | */ | ||
81 | long | ||
82 | __strncpy_from_user(char *dst, const char __user *src, long count) | ||
83 | { | ||
84 | long res; | ||
85 | __do_strncpy_from_user(dst, src, count, res); | ||
86 | return res; | ||
87 | } | ||
88 | EXPORT_SYMBOL(__strncpy_from_user); | ||
89 | |||
90 | /** | ||
91 | * strncpy_from_user: - Copy a NUL terminated string from userspace. | ||
92 | * @dst: Destination address, in kernel space. This buffer must be at | ||
93 | * least @count bytes long. | ||
94 | * @src: Source address, in user space. | ||
95 | * @count: Maximum number of bytes to copy, including the trailing NUL. | ||
96 | * | ||
97 | * Copies a NUL-terminated string from userspace to kernel space. | ||
98 | * | ||
99 | * On success, returns the length of the string (not including the trailing | ||
100 | * NUL). | ||
101 | * | ||
102 | * If access to userspace fails, returns -EFAULT (some data may have been | ||
103 | * copied). | ||
104 | * | ||
105 | * If @count is smaller than the length of the string, copies @count bytes | ||
106 | * and returns @count. | ||
107 | */ | ||
108 | long | ||
109 | strncpy_from_user(char *dst, const char __user *src, long count) | ||
110 | { | ||
111 | long res = -EFAULT; | ||
112 | if (access_ok(VERIFY_READ, src, 1)) | ||
113 | __do_strncpy_from_user(dst, src, count, res); | ||
114 | return res; | ||
115 | } | ||
116 | EXPORT_SYMBOL(strncpy_from_user); | ||
117 | |||
118 | /* | ||
119 | * Zero Userspace | ||
120 | */ | ||
121 | |||
122 | #define __do_clear_user(addr,size) \ | ||
123 | do { \ | ||
124 | int __d0; \ | ||
125 | might_sleep(); \ | ||
126 | __asm__ __volatile__( \ | ||
127 | "0: rep; stosl\n" \ | ||
128 | " movl %2,%0\n" \ | ||
129 | "1: rep; stosb\n" \ | ||
130 | "2:\n" \ | ||
131 | ".section .fixup,\"ax\"\n" \ | ||
132 | "3: lea 0(%2,%0,4),%0\n" \ | ||
133 | " jmp 2b\n" \ | ||
134 | ".previous\n" \ | ||
135 | ".section __ex_table,\"a\"\n" \ | ||
136 | " .align 4\n" \ | ||
137 | " .long 0b,3b\n" \ | ||
138 | " .long 1b,2b\n" \ | ||
139 | ".previous" \ | ||
140 | : "=&c"(size), "=&D" (__d0) \ | ||
141 | : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \ | ||
142 | } while (0) | ||
143 | |||
144 | /** | ||
145 | * clear_user: - Zero a block of memory in user space. | ||
146 | * @to: Destination address, in user space. | ||
147 | * @n: Number of bytes to zero. | ||
148 | * | ||
149 | * Zero a block of memory in user space. | ||
150 | * | ||
151 | * Returns number of bytes that could not be cleared. | ||
152 | * On success, this will be zero. | ||
153 | */ | ||
154 | unsigned long | ||
155 | clear_user(void __user *to, unsigned long n) | ||
156 | { | ||
157 | might_sleep(); | ||
158 | if (access_ok(VERIFY_WRITE, to, n)) | ||
159 | __do_clear_user(to, n); | ||
160 | return n; | ||
161 | } | ||
162 | EXPORT_SYMBOL(clear_user); | ||
163 | |||
164 | /** | ||
165 | * __clear_user: - Zero a block of memory in user space, with less checking. | ||
166 | * @to: Destination address, in user space. | ||
167 | * @n: Number of bytes to zero. | ||
168 | * | ||
169 | * Zero a block of memory in user space. Caller must check | ||
170 | * the specified block with access_ok() before calling this function. | ||
171 | * | ||
172 | * Returns number of bytes that could not be cleared. | ||
173 | * On success, this will be zero. | ||
174 | */ | ||
175 | unsigned long | ||
176 | __clear_user(void __user *to, unsigned long n) | ||
177 | { | ||
178 | __do_clear_user(to, n); | ||
179 | return n; | ||
180 | } | ||
181 | EXPORT_SYMBOL(__clear_user); | ||
182 | |||
183 | /** | ||
184 | * strnlen_user: - Get the size of a string in user space. | ||
185 | * @s: The string to measure. | ||
186 | * @n: The maximum valid length | ||
187 | * | ||
188 | * Get the size of a NUL-terminated string in user space. | ||
189 | * | ||
190 | * Returns the size of the string INCLUDING the terminating NUL. | ||
191 | * On exception, returns 0. | ||
192 | * If the string is too long, returns a value greater than @n. | ||
193 | */ | ||
194 | long strnlen_user(const char __user *s, long n) | ||
195 | { | ||
196 | unsigned long mask = -__addr_ok(s); | ||
197 | unsigned long res, tmp; | ||
198 | |||
199 | might_sleep(); | ||
200 | |||
201 | __asm__ __volatile__( | ||
202 | " testl %0, %0\n" | ||
203 | " jz 3f\n" | ||
204 | " andl %0,%%ecx\n" | ||
205 | "0: repne; scasb\n" | ||
206 | " setne %%al\n" | ||
207 | " subl %%ecx,%0\n" | ||
208 | " addl %0,%%eax\n" | ||
209 | "1:\n" | ||
210 | ".section .fixup,\"ax\"\n" | ||
211 | "2: xorl %%eax,%%eax\n" | ||
212 | " jmp 1b\n" | ||
213 | "3: movb $1,%%al\n" | ||
214 | " jmp 1b\n" | ||
215 | ".previous\n" | ||
216 | ".section __ex_table,\"a\"\n" | ||
217 | " .align 4\n" | ||
218 | " .long 0b,2b\n" | ||
219 | ".previous" | ||
220 | :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) | ||
221 | :"0" (n), "1" (s), "2" (0), "3" (mask) | ||
222 | :"cc"); | ||
223 | return res & mask; | ||
224 | } | ||
225 | EXPORT_SYMBOL(strnlen_user); | ||
226 | |||
227 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
228 | static unsigned long | ||
229 | __copy_user_intel(void __user *to, const void *from, unsigned long size) | ||
230 | { | ||
231 | int d0, d1; | ||
232 | __asm__ __volatile__( | ||
233 | " .align 2,0x90\n" | ||
234 | "1: movl 32(%4), %%eax\n" | ||
235 | " cmpl $67, %0\n" | ||
236 | " jbe 3f\n" | ||
237 | "2: movl 64(%4), %%eax\n" | ||
238 | " .align 2,0x90\n" | ||
239 | "3: movl 0(%4), %%eax\n" | ||
240 | "4: movl 4(%4), %%edx\n" | ||
241 | "5: movl %%eax, 0(%3)\n" | ||
242 | "6: movl %%edx, 4(%3)\n" | ||
243 | "7: movl 8(%4), %%eax\n" | ||
244 | "8: movl 12(%4),%%edx\n" | ||
245 | "9: movl %%eax, 8(%3)\n" | ||
246 | "10: movl %%edx, 12(%3)\n" | ||
247 | "11: movl 16(%4), %%eax\n" | ||
248 | "12: movl 20(%4), %%edx\n" | ||
249 | "13: movl %%eax, 16(%3)\n" | ||
250 | "14: movl %%edx, 20(%3)\n" | ||
251 | "15: movl 24(%4), %%eax\n" | ||
252 | "16: movl 28(%4), %%edx\n" | ||
253 | "17: movl %%eax, 24(%3)\n" | ||
254 | "18: movl %%edx, 28(%3)\n" | ||
255 | "19: movl 32(%4), %%eax\n" | ||
256 | "20: movl 36(%4), %%edx\n" | ||
257 | "21: movl %%eax, 32(%3)\n" | ||
258 | "22: movl %%edx, 36(%3)\n" | ||
259 | "23: movl 40(%4), %%eax\n" | ||
260 | "24: movl 44(%4), %%edx\n" | ||
261 | "25: movl %%eax, 40(%3)\n" | ||
262 | "26: movl %%edx, 44(%3)\n" | ||
263 | "27: movl 48(%4), %%eax\n" | ||
264 | "28: movl 52(%4), %%edx\n" | ||
265 | "29: movl %%eax, 48(%3)\n" | ||
266 | "30: movl %%edx, 52(%3)\n" | ||
267 | "31: movl 56(%4), %%eax\n" | ||
268 | "32: movl 60(%4), %%edx\n" | ||
269 | "33: movl %%eax, 56(%3)\n" | ||
270 | "34: movl %%edx, 60(%3)\n" | ||
271 | " addl $-64, %0\n" | ||
272 | " addl $64, %4\n" | ||
273 | " addl $64, %3\n" | ||
274 | " cmpl $63, %0\n" | ||
275 | " ja 1b\n" | ||
276 | "35: movl %0, %%eax\n" | ||
277 | " shrl $2, %0\n" | ||
278 | " andl $3, %%eax\n" | ||
279 | " cld\n" | ||
280 | "99: rep; movsl\n" | ||
281 | "36: movl %%eax, %0\n" | ||
282 | "37: rep; movsb\n" | ||
283 | "100:\n" | ||
284 | ".section .fixup,\"ax\"\n" | ||
285 | "101: lea 0(%%eax,%0,4),%0\n" | ||
286 | " jmp 100b\n" | ||
287 | ".previous\n" | ||
288 | ".section __ex_table,\"a\"\n" | ||
289 | " .align 4\n" | ||
290 | " .long 1b,100b\n" | ||
291 | " .long 2b,100b\n" | ||
292 | " .long 3b,100b\n" | ||
293 | " .long 4b,100b\n" | ||
294 | " .long 5b,100b\n" | ||
295 | " .long 6b,100b\n" | ||
296 | " .long 7b,100b\n" | ||
297 | " .long 8b,100b\n" | ||
298 | " .long 9b,100b\n" | ||
299 | " .long 10b,100b\n" | ||
300 | " .long 11b,100b\n" | ||
301 | " .long 12b,100b\n" | ||
302 | " .long 13b,100b\n" | ||
303 | " .long 14b,100b\n" | ||
304 | " .long 15b,100b\n" | ||
305 | " .long 16b,100b\n" | ||
306 | " .long 17b,100b\n" | ||
307 | " .long 18b,100b\n" | ||
308 | " .long 19b,100b\n" | ||
309 | " .long 20b,100b\n" | ||
310 | " .long 21b,100b\n" | ||
311 | " .long 22b,100b\n" | ||
312 | " .long 23b,100b\n" | ||
313 | " .long 24b,100b\n" | ||
314 | " .long 25b,100b\n" | ||
315 | " .long 26b,100b\n" | ||
316 | " .long 27b,100b\n" | ||
317 | " .long 28b,100b\n" | ||
318 | " .long 29b,100b\n" | ||
319 | " .long 30b,100b\n" | ||
320 | " .long 31b,100b\n" | ||
321 | " .long 32b,100b\n" | ||
322 | " .long 33b,100b\n" | ||
323 | " .long 34b,100b\n" | ||
324 | " .long 35b,100b\n" | ||
325 | " .long 36b,100b\n" | ||
326 | " .long 37b,100b\n" | ||
327 | " .long 99b,101b\n" | ||
328 | ".previous" | ||
329 | : "=&c"(size), "=&D" (d0), "=&S" (d1) | ||
330 | : "1"(to), "2"(from), "0"(size) | ||
331 | : "eax", "edx", "memory"); | ||
332 | return size; | ||
333 | } | ||
334 | |||
335 | static unsigned long | ||
336 | __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size) | ||
337 | { | ||
338 | int d0, d1; | ||
339 | __asm__ __volatile__( | ||
340 | " .align 2,0x90\n" | ||
341 | "0: movl 32(%4), %%eax\n" | ||
342 | " cmpl $67, %0\n" | ||
343 | " jbe 2f\n" | ||
344 | "1: movl 64(%4), %%eax\n" | ||
345 | " .align 2,0x90\n" | ||
346 | "2: movl 0(%4), %%eax\n" | ||
347 | "21: movl 4(%4), %%edx\n" | ||
348 | " movl %%eax, 0(%3)\n" | ||
349 | " movl %%edx, 4(%3)\n" | ||
350 | "3: movl 8(%4), %%eax\n" | ||
351 | "31: movl 12(%4),%%edx\n" | ||
352 | " movl %%eax, 8(%3)\n" | ||
353 | " movl %%edx, 12(%3)\n" | ||
354 | "4: movl 16(%4), %%eax\n" | ||
355 | "41: movl 20(%4), %%edx\n" | ||
356 | " movl %%eax, 16(%3)\n" | ||
357 | " movl %%edx, 20(%3)\n" | ||
358 | "10: movl 24(%4), %%eax\n" | ||
359 | "51: movl 28(%4), %%edx\n" | ||
360 | " movl %%eax, 24(%3)\n" | ||
361 | " movl %%edx, 28(%3)\n" | ||
362 | "11: movl 32(%4), %%eax\n" | ||
363 | "61: movl 36(%4), %%edx\n" | ||
364 | " movl %%eax, 32(%3)\n" | ||
365 | " movl %%edx, 36(%3)\n" | ||
366 | "12: movl 40(%4), %%eax\n" | ||
367 | "71: movl 44(%4), %%edx\n" | ||
368 | " movl %%eax, 40(%3)\n" | ||
369 | " movl %%edx, 44(%3)\n" | ||
370 | "13: movl 48(%4), %%eax\n" | ||
371 | "81: movl 52(%4), %%edx\n" | ||
372 | " movl %%eax, 48(%3)\n" | ||
373 | " movl %%edx, 52(%3)\n" | ||
374 | "14: movl 56(%4), %%eax\n" | ||
375 | "91: movl 60(%4), %%edx\n" | ||
376 | " movl %%eax, 56(%3)\n" | ||
377 | " movl %%edx, 60(%3)\n" | ||
378 | " addl $-64, %0\n" | ||
379 | " addl $64, %4\n" | ||
380 | " addl $64, %3\n" | ||
381 | " cmpl $63, %0\n" | ||
382 | " ja 0b\n" | ||
383 | "5: movl %0, %%eax\n" | ||
384 | " shrl $2, %0\n" | ||
385 | " andl $3, %%eax\n" | ||
386 | " cld\n" | ||
387 | "6: rep; movsl\n" | ||
388 | " movl %%eax,%0\n" | ||
389 | "7: rep; movsb\n" | ||
390 | "8:\n" | ||
391 | ".section .fixup,\"ax\"\n" | ||
392 | "9: lea 0(%%eax,%0,4),%0\n" | ||
393 | "16: pushl %0\n" | ||
394 | " pushl %%eax\n" | ||
395 | " xorl %%eax,%%eax\n" | ||
396 | " rep; stosb\n" | ||
397 | " popl %%eax\n" | ||
398 | " popl %0\n" | ||
399 | " jmp 8b\n" | ||
400 | ".previous\n" | ||
401 | ".section __ex_table,\"a\"\n" | ||
402 | " .align 4\n" | ||
403 | " .long 0b,16b\n" | ||
404 | " .long 1b,16b\n" | ||
405 | " .long 2b,16b\n" | ||
406 | " .long 21b,16b\n" | ||
407 | " .long 3b,16b\n" | ||
408 | " .long 31b,16b\n" | ||
409 | " .long 4b,16b\n" | ||
410 | " .long 41b,16b\n" | ||
411 | " .long 10b,16b\n" | ||
412 | " .long 51b,16b\n" | ||
413 | " .long 11b,16b\n" | ||
414 | " .long 61b,16b\n" | ||
415 | " .long 12b,16b\n" | ||
416 | " .long 71b,16b\n" | ||
417 | " .long 13b,16b\n" | ||
418 | " .long 81b,16b\n" | ||
419 | " .long 14b,16b\n" | ||
420 | " .long 91b,16b\n" | ||
421 | " .long 6b,9b\n" | ||
422 | " .long 7b,16b\n" | ||
423 | ".previous" | ||
424 | : "=&c"(size), "=&D" (d0), "=&S" (d1) | ||
425 | : "1"(to), "2"(from), "0"(size) | ||
426 | : "eax", "edx", "memory"); | ||
427 | return size; | ||
428 | } | ||
429 | |||
430 | /* | ||
431 | * Non Temporal Hint version of __copy_user_zeroing_intel. It is cache aware. | ||
432 | * hyoshiok@miraclelinux.com | ||
433 | */ | ||
434 | |||
435 | static unsigned long __copy_user_zeroing_intel_nocache(void *to, | ||
436 | const void __user *from, unsigned long size) | ||
437 | { | ||
438 | int d0, d1; | ||
439 | |||
440 | __asm__ __volatile__( | ||
441 | " .align 2,0x90\n" | ||
442 | "0: movl 32(%4), %%eax\n" | ||
443 | " cmpl $67, %0\n" | ||
444 | " jbe 2f\n" | ||
445 | "1: movl 64(%4), %%eax\n" | ||
446 | " .align 2,0x90\n" | ||
447 | "2: movl 0(%4), %%eax\n" | ||
448 | "21: movl 4(%4), %%edx\n" | ||
449 | " movnti %%eax, 0(%3)\n" | ||
450 | " movnti %%edx, 4(%3)\n" | ||
451 | "3: movl 8(%4), %%eax\n" | ||
452 | "31: movl 12(%4),%%edx\n" | ||
453 | " movnti %%eax, 8(%3)\n" | ||
454 | " movnti %%edx, 12(%3)\n" | ||
455 | "4: movl 16(%4), %%eax\n" | ||
456 | "41: movl 20(%4), %%edx\n" | ||
457 | " movnti %%eax, 16(%3)\n" | ||
458 | " movnti %%edx, 20(%3)\n" | ||
459 | "10: movl 24(%4), %%eax\n" | ||
460 | "51: movl 28(%4), %%edx\n" | ||
461 | " movnti %%eax, 24(%3)\n" | ||
462 | " movnti %%edx, 28(%3)\n" | ||
463 | "11: movl 32(%4), %%eax\n" | ||
464 | "61: movl 36(%4), %%edx\n" | ||
465 | " movnti %%eax, 32(%3)\n" | ||
466 | " movnti %%edx, 36(%3)\n" | ||
467 | "12: movl 40(%4), %%eax\n" | ||
468 | "71: movl 44(%4), %%edx\n" | ||
469 | " movnti %%eax, 40(%3)\n" | ||
470 | " movnti %%edx, 44(%3)\n" | ||
471 | "13: movl 48(%4), %%eax\n" | ||
472 | "81: movl 52(%4), %%edx\n" | ||
473 | " movnti %%eax, 48(%3)\n" | ||
474 | " movnti %%edx, 52(%3)\n" | ||
475 | "14: movl 56(%4), %%eax\n" | ||
476 | "91: movl 60(%4), %%edx\n" | ||
477 | " movnti %%eax, 56(%3)\n" | ||
478 | " movnti %%edx, 60(%3)\n" | ||
479 | " addl $-64, %0\n" | ||
480 | " addl $64, %4\n" | ||
481 | " addl $64, %3\n" | ||
482 | " cmpl $63, %0\n" | ||
483 | " ja 0b\n" | ||
484 | " sfence \n" | ||
485 | "5: movl %0, %%eax\n" | ||
486 | " shrl $2, %0\n" | ||
487 | " andl $3, %%eax\n" | ||
488 | " cld\n" | ||
489 | "6: rep; movsl\n" | ||
490 | " movl %%eax,%0\n" | ||
491 | "7: rep; movsb\n" | ||
492 | "8:\n" | ||
493 | ".section .fixup,\"ax\"\n" | ||
494 | "9: lea 0(%%eax,%0,4),%0\n" | ||
495 | "16: pushl %0\n" | ||
496 | " pushl %%eax\n" | ||
497 | " xorl %%eax,%%eax\n" | ||
498 | " rep; stosb\n" | ||
499 | " popl %%eax\n" | ||
500 | " popl %0\n" | ||
501 | " jmp 8b\n" | ||
502 | ".previous\n" | ||
503 | ".section __ex_table,\"a\"\n" | ||
504 | " .align 4\n" | ||
505 | " .long 0b,16b\n" | ||
506 | " .long 1b,16b\n" | ||
507 | " .long 2b,16b\n" | ||
508 | " .long 21b,16b\n" | ||
509 | " .long 3b,16b\n" | ||
510 | " .long 31b,16b\n" | ||
511 | " .long 4b,16b\n" | ||
512 | " .long 41b,16b\n" | ||
513 | " .long 10b,16b\n" | ||
514 | " .long 51b,16b\n" | ||
515 | " .long 11b,16b\n" | ||
516 | " .long 61b,16b\n" | ||
517 | " .long 12b,16b\n" | ||
518 | " .long 71b,16b\n" | ||
519 | " .long 13b,16b\n" | ||
520 | " .long 81b,16b\n" | ||
521 | " .long 14b,16b\n" | ||
522 | " .long 91b,16b\n" | ||
523 | " .long 6b,9b\n" | ||
524 | " .long 7b,16b\n" | ||
525 | ".previous" | ||
526 | : "=&c"(size), "=&D" (d0), "=&S" (d1) | ||
527 | : "1"(to), "2"(from), "0"(size) | ||
528 | : "eax", "edx", "memory"); | ||
529 | return size; | ||
530 | } | ||
531 | |||
532 | static unsigned long __copy_user_intel_nocache(void *to, | ||
533 | const void __user *from, unsigned long size) | ||
534 | { | ||
535 | int d0, d1; | ||
536 | |||
537 | __asm__ __volatile__( | ||
538 | " .align 2,0x90\n" | ||
539 | "0: movl 32(%4), %%eax\n" | ||
540 | " cmpl $67, %0\n" | ||
541 | " jbe 2f\n" | ||
542 | "1: movl 64(%4), %%eax\n" | ||
543 | " .align 2,0x90\n" | ||
544 | "2: movl 0(%4), %%eax\n" | ||
545 | "21: movl 4(%4), %%edx\n" | ||
546 | " movnti %%eax, 0(%3)\n" | ||
547 | " movnti %%edx, 4(%3)\n" | ||
548 | "3: movl 8(%4), %%eax\n" | ||
549 | "31: movl 12(%4),%%edx\n" | ||
550 | " movnti %%eax, 8(%3)\n" | ||
551 | " movnti %%edx, 12(%3)\n" | ||
552 | "4: movl 16(%4), %%eax\n" | ||
553 | "41: movl 20(%4), %%edx\n" | ||
554 | " movnti %%eax, 16(%3)\n" | ||
555 | " movnti %%edx, 20(%3)\n" | ||
556 | "10: movl 24(%4), %%eax\n" | ||
557 | "51: movl 28(%4), %%edx\n" | ||
558 | " movnti %%eax, 24(%3)\n" | ||
559 | " movnti %%edx, 28(%3)\n" | ||
560 | "11: movl 32(%4), %%eax\n" | ||
561 | "61: movl 36(%4), %%edx\n" | ||
562 | " movnti %%eax, 32(%3)\n" | ||
563 | " movnti %%edx, 36(%3)\n" | ||
564 | "12: movl 40(%4), %%eax\n" | ||
565 | "71: movl 44(%4), %%edx\n" | ||
566 | " movnti %%eax, 40(%3)\n" | ||
567 | " movnti %%edx, 44(%3)\n" | ||
568 | "13: movl 48(%4), %%eax\n" | ||
569 | "81: movl 52(%4), %%edx\n" | ||
570 | " movnti %%eax, 48(%3)\n" | ||
571 | " movnti %%edx, 52(%3)\n" | ||
572 | "14: movl 56(%4), %%eax\n" | ||
573 | "91: movl 60(%4), %%edx\n" | ||
574 | " movnti %%eax, 56(%3)\n" | ||
575 | " movnti %%edx, 60(%3)\n" | ||
576 | " addl $-64, %0\n" | ||
577 | " addl $64, %4\n" | ||
578 | " addl $64, %3\n" | ||
579 | " cmpl $63, %0\n" | ||
580 | " ja 0b\n" | ||
581 | " sfence \n" | ||
582 | "5: movl %0, %%eax\n" | ||
583 | " shrl $2, %0\n" | ||
584 | " andl $3, %%eax\n" | ||
585 | " cld\n" | ||
586 | "6: rep; movsl\n" | ||
587 | " movl %%eax,%0\n" | ||
588 | "7: rep; movsb\n" | ||
589 | "8:\n" | ||
590 | ".section .fixup,\"ax\"\n" | ||
591 | "9: lea 0(%%eax,%0,4),%0\n" | ||
592 | "16: jmp 8b\n" | ||
593 | ".previous\n" | ||
594 | ".section __ex_table,\"a\"\n" | ||
595 | " .align 4\n" | ||
596 | " .long 0b,16b\n" | ||
597 | " .long 1b,16b\n" | ||
598 | " .long 2b,16b\n" | ||
599 | " .long 21b,16b\n" | ||
600 | " .long 3b,16b\n" | ||
601 | " .long 31b,16b\n" | ||
602 | " .long 4b,16b\n" | ||
603 | " .long 41b,16b\n" | ||
604 | " .long 10b,16b\n" | ||
605 | " .long 51b,16b\n" | ||
606 | " .long 11b,16b\n" | ||
607 | " .long 61b,16b\n" | ||
608 | " .long 12b,16b\n" | ||
609 | " .long 71b,16b\n" | ||
610 | " .long 13b,16b\n" | ||
611 | " .long 81b,16b\n" | ||
612 | " .long 14b,16b\n" | ||
613 | " .long 91b,16b\n" | ||
614 | " .long 6b,9b\n" | ||
615 | " .long 7b,16b\n" | ||
616 | ".previous" | ||
617 | : "=&c"(size), "=&D" (d0), "=&S" (d1) | ||
618 | : "1"(to), "2"(from), "0"(size) | ||
619 | : "eax", "edx", "memory"); | ||
620 | return size; | ||
621 | } | ||
622 | |||
623 | #else | ||
624 | |||
625 | /* | ||
626 | * Leave these declared but undefined. They should not be any references to | ||
627 | * them | ||
628 | */ | ||
629 | unsigned long __copy_user_zeroing_intel(void *to, const void __user *from, | ||
630 | unsigned long size); | ||
631 | unsigned long __copy_user_intel(void __user *to, const void *from, | ||
632 | unsigned long size); | ||
633 | unsigned long __copy_user_zeroing_intel_nocache(void *to, | ||
634 | const void __user *from, unsigned long size); | ||
635 | #endif /* CONFIG_X86_INTEL_USERCOPY */ | ||
636 | |||
637 | /* Generic arbitrary sized copy. */ | ||
638 | #define __copy_user(to,from,size) \ | ||
639 | do { \ | ||
640 | int __d0, __d1, __d2; \ | ||
641 | __asm__ __volatile__( \ | ||
642 | " cmp $7,%0\n" \ | ||
643 | " jbe 1f\n" \ | ||
644 | " movl %1,%0\n" \ | ||
645 | " negl %0\n" \ | ||
646 | " andl $7,%0\n" \ | ||
647 | " subl %0,%3\n" \ | ||
648 | "4: rep; movsb\n" \ | ||
649 | " movl %3,%0\n" \ | ||
650 | " shrl $2,%0\n" \ | ||
651 | " andl $3,%3\n" \ | ||
652 | " .align 2,0x90\n" \ | ||
653 | "0: rep; movsl\n" \ | ||
654 | " movl %3,%0\n" \ | ||
655 | "1: rep; movsb\n" \ | ||
656 | "2:\n" \ | ||
657 | ".section .fixup,\"ax\"\n" \ | ||
658 | "5: addl %3,%0\n" \ | ||
659 | " jmp 2b\n" \ | ||
660 | "3: lea 0(%3,%0,4),%0\n" \ | ||
661 | " jmp 2b\n" \ | ||
662 | ".previous\n" \ | ||
663 | ".section __ex_table,\"a\"\n" \ | ||
664 | " .align 4\n" \ | ||
665 | " .long 4b,5b\n" \ | ||
666 | " .long 0b,3b\n" \ | ||
667 | " .long 1b,2b\n" \ | ||
668 | ".previous" \ | ||
669 | : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ | ||
670 | : "3"(size), "0"(size), "1"(to), "2"(from) \ | ||
671 | : "memory"); \ | ||
672 | } while (0) | ||
673 | |||
674 | #define __copy_user_zeroing(to,from,size) \ | ||
675 | do { \ | ||
676 | int __d0, __d1, __d2; \ | ||
677 | __asm__ __volatile__( \ | ||
678 | " cmp $7,%0\n" \ | ||
679 | " jbe 1f\n" \ | ||
680 | " movl %1,%0\n" \ | ||
681 | " negl %0\n" \ | ||
682 | " andl $7,%0\n" \ | ||
683 | " subl %0,%3\n" \ | ||
684 | "4: rep; movsb\n" \ | ||
685 | " movl %3,%0\n" \ | ||
686 | " shrl $2,%0\n" \ | ||
687 | " andl $3,%3\n" \ | ||
688 | " .align 2,0x90\n" \ | ||
689 | "0: rep; movsl\n" \ | ||
690 | " movl %3,%0\n" \ | ||
691 | "1: rep; movsb\n" \ | ||
692 | "2:\n" \ | ||
693 | ".section .fixup,\"ax\"\n" \ | ||
694 | "5: addl %3,%0\n" \ | ||
695 | " jmp 6f\n" \ | ||
696 | "3: lea 0(%3,%0,4),%0\n" \ | ||
697 | "6: pushl %0\n" \ | ||
698 | " pushl %%eax\n" \ | ||
699 | " xorl %%eax,%%eax\n" \ | ||
700 | " rep; stosb\n" \ | ||
701 | " popl %%eax\n" \ | ||
702 | " popl %0\n" \ | ||
703 | " jmp 2b\n" \ | ||
704 | ".previous\n" \ | ||
705 | ".section __ex_table,\"a\"\n" \ | ||
706 | " .align 4\n" \ | ||
707 | " .long 4b,5b\n" \ | ||
708 | " .long 0b,3b\n" \ | ||
709 | " .long 1b,6b\n" \ | ||
710 | ".previous" \ | ||
711 | : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \ | ||
712 | : "3"(size), "0"(size), "1"(to), "2"(from) \ | ||
713 | : "memory"); \ | ||
714 | } while (0) | ||
715 | |||
716 | unsigned long __copy_to_user_ll(void __user *to, const void *from, | ||
717 | unsigned long n) | ||
718 | { | ||
719 | #ifndef CONFIG_X86_WP_WORKS_OK | ||
720 | if (unlikely(boot_cpu_data.wp_works_ok == 0) && | ||
721 | ((unsigned long )to) < TASK_SIZE) { | ||
722 | /* | ||
723 | * When we are in an atomic section (see | ||
724 | * mm/filemap.c:file_read_actor), return the full | ||
725 | * length to take the slow path. | ||
726 | */ | ||
727 | if (in_atomic()) | ||
728 | return n; | ||
729 | |||
730 | /* | ||
731 | * CPU does not honor the WP bit when writing | ||
732 | * from supervisory mode, and due to preemption or SMP, | ||
733 | * the page tables can change at any time. | ||
734 | * Do it manually. Manfred <manfred@colorfullife.com> | ||
735 | */ | ||
736 | while (n) { | ||
737 | unsigned long offset = ((unsigned long)to)%PAGE_SIZE; | ||
738 | unsigned long len = PAGE_SIZE - offset; | ||
739 | int retval; | ||
740 | struct page *pg; | ||
741 | void *maddr; | ||
742 | |||
743 | if (len > n) | ||
744 | len = n; | ||
745 | |||
746 | survive: | ||
747 | down_read(¤t->mm->mmap_sem); | ||
748 | retval = get_user_pages(current, current->mm, | ||
749 | (unsigned long )to, 1, 1, 0, &pg, NULL); | ||
750 | |||
751 | if (retval == -ENOMEM && is_init(current)) { | ||
752 | up_read(¤t->mm->mmap_sem); | ||
753 | congestion_wait(WRITE, HZ/50); | ||
754 | goto survive; | ||
755 | } | ||
756 | |||
757 | if (retval != 1) { | ||
758 | up_read(¤t->mm->mmap_sem); | ||
759 | break; | ||
760 | } | ||
761 | |||
762 | maddr = kmap_atomic(pg, KM_USER0); | ||
763 | memcpy(maddr + offset, from, len); | ||
764 | kunmap_atomic(maddr, KM_USER0); | ||
765 | set_page_dirty_lock(pg); | ||
766 | put_page(pg); | ||
767 | up_read(¤t->mm->mmap_sem); | ||
768 | |||
769 | from += len; | ||
770 | to += len; | ||
771 | n -= len; | ||
772 | } | ||
773 | return n; | ||
774 | } | ||
775 | #endif | ||
776 | if (movsl_is_ok(to, from, n)) | ||
777 | __copy_user(to, from, n); | ||
778 | else | ||
779 | n = __copy_user_intel(to, from, n); | ||
780 | return n; | ||
781 | } | ||
782 | EXPORT_SYMBOL(__copy_to_user_ll); | ||
783 | |||
784 | unsigned long __copy_from_user_ll(void *to, const void __user *from, | ||
785 | unsigned long n) | ||
786 | { | ||
787 | if (movsl_is_ok(to, from, n)) | ||
788 | __copy_user_zeroing(to, from, n); | ||
789 | else | ||
790 | n = __copy_user_zeroing_intel(to, from, n); | ||
791 | return n; | ||
792 | } | ||
793 | EXPORT_SYMBOL(__copy_from_user_ll); | ||
794 | |||
795 | unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, | ||
796 | unsigned long n) | ||
797 | { | ||
798 | if (movsl_is_ok(to, from, n)) | ||
799 | __copy_user(to, from, n); | ||
800 | else | ||
801 | n = __copy_user_intel((void __user *)to, | ||
802 | (const void *)from, n); | ||
803 | return n; | ||
804 | } | ||
805 | EXPORT_SYMBOL(__copy_from_user_ll_nozero); | ||
806 | |||
807 | unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, | ||
808 | unsigned long n) | ||
809 | { | ||
810 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
811 | if ( n > 64 && cpu_has_xmm2) | ||
812 | n = __copy_user_zeroing_intel_nocache(to, from, n); | ||
813 | else | ||
814 | __copy_user_zeroing(to, from, n); | ||
815 | #else | ||
816 | __copy_user_zeroing(to, from, n); | ||
817 | #endif | ||
818 | return n; | ||
819 | } | ||
820 | |||
821 | unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, | ||
822 | unsigned long n) | ||
823 | { | ||
824 | #ifdef CONFIG_X86_INTEL_USERCOPY | ||
825 | if ( n > 64 && cpu_has_xmm2) | ||
826 | n = __copy_user_intel_nocache(to, from, n); | ||
827 | else | ||
828 | __copy_user(to, from, n); | ||
829 | #else | ||
830 | __copy_user(to, from, n); | ||
831 | #endif | ||
832 | return n; | ||
833 | } | ||
834 | |||
835 | /** | ||
836 | * copy_to_user: - Copy a block of data into user space. | ||
837 | * @to: Destination address, in user space. | ||
838 | * @from: Source address, in kernel space. | ||
839 | * @n: Number of bytes to copy. | ||
840 | * | ||
841 | * Context: User context only. This function may sleep. | ||
842 | * | ||
843 | * Copy data from kernel space to user space. | ||
844 | * | ||
845 | * Returns number of bytes that could not be copied. | ||
846 | * On success, this will be zero. | ||
847 | */ | ||
848 | unsigned long | ||
849 | copy_to_user(void __user *to, const void *from, unsigned long n) | ||
850 | { | ||
851 | if (access_ok(VERIFY_WRITE, to, n)) | ||
852 | n = __copy_to_user(to, from, n); | ||
853 | return n; | ||
854 | } | ||
855 | EXPORT_SYMBOL(copy_to_user); | ||
856 | |||
857 | /** | ||
858 | * copy_from_user: - Copy a block of data from user space. | ||
859 | * @to: Destination address, in kernel space. | ||
860 | * @from: Source address, in user space. | ||
861 | * @n: Number of bytes to copy. | ||
862 | * | ||
863 | * Context: User context only. This function may sleep. | ||
864 | * | ||
865 | * Copy data from user space to kernel space. | ||
866 | * | ||
867 | * Returns number of bytes that could not be copied. | ||
868 | * On success, this will be zero. | ||
869 | * | ||
870 | * If some data could not be copied, this function will pad the copied | ||
871 | * data to the requested size using zero bytes. | ||
872 | */ | ||
873 | unsigned long | ||
874 | copy_from_user(void *to, const void __user *from, unsigned long n) | ||
875 | { | ||
876 | if (access_ok(VERIFY_READ, from, n)) | ||
877 | n = __copy_from_user(to, from, n); | ||
878 | else | ||
879 | memset(to, 0, n); | ||
880 | return n; | ||
881 | } | ||
882 | EXPORT_SYMBOL(copy_from_user); | ||