aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/i386/lib
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/i386/lib')
-rw-r--r--arch/i386/lib/Makefile10
-rw-r--r--arch/i386/lib/bitops.c70
-rw-r--r--arch/i386/lib/checksum.S496
-rw-r--r--arch/i386/lib/dec_and_lock.c40
-rw-r--r--arch/i386/lib/delay.c49
-rw-r--r--arch/i386/lib/getuser.S70
-rw-r--r--arch/i386/lib/memcpy.c44
-rw-r--r--arch/i386/lib/mmx.c399
-rw-r--r--arch/i386/lib/putuser.S87
-rw-r--r--arch/i386/lib/strstr.c31
-rw-r--r--arch/i386/lib/usercopy.c636
11 files changed, 1932 insertions, 0 deletions
diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
new file mode 100644
index 000000000000..7b1932d20f96
--- /dev/null
+++ b/arch/i386/lib/Makefile
@@ -0,0 +1,10 @@
1#
2# Makefile for i386-specific library files..
3#
4
5
6lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
7 bitops.o
8
9lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
10lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
diff --git a/arch/i386/lib/bitops.c b/arch/i386/lib/bitops.c
new file mode 100644
index 000000000000..97db3853dc82
--- /dev/null
+++ b/arch/i386/lib/bitops.c
@@ -0,0 +1,70 @@
1#include <linux/bitops.h>
2#include <linux/module.h>
3
4/**
5 * find_next_bit - find the first set bit in a memory region
6 * @addr: The address to base the search on
7 * @offset: The bitnumber to start searching at
8 * @size: The maximum size to search
9 */
10int find_next_bit(const unsigned long *addr, int size, int offset)
11{
12 const unsigned long *p = addr + (offset >> 5);
13 int set = 0, bit = offset & 31, res;
14
15 if (bit) {
16 /*
17 * Look for nonzero in the first 32 bits:
18 */
19 __asm__("bsfl %1,%0\n\t"
20 "jne 1f\n\t"
21 "movl $32, %0\n"
22 "1:"
23 : "=r" (set)
24 : "r" (*p >> bit));
25 if (set < (32 - bit))
26 return set + offset;
27 set = 32 - bit;
28 p++;
29 }
30 /*
31 * No set bit yet, search remaining full words for a bit
32 */
33 res = find_first_bit (p, size - 32 * (p - addr));
34 return (offset + set + res);
35}
36EXPORT_SYMBOL(find_next_bit);
37
38/**
39 * find_next_zero_bit - find the first zero bit in a memory region
40 * @addr: The address to base the search on
41 * @offset: The bitnumber to start searching at
42 * @size: The maximum size to search
43 */
44int find_next_zero_bit(const unsigned long *addr, int size, int offset)
45{
46 unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
47 int set = 0, bit = offset & 31, res;
48
49 if (bit) {
50 /*
51 * Look for zero in the first 32 bits.
52 */
53 __asm__("bsfl %1,%0\n\t"
54 "jne 1f\n\t"
55 "movl $32, %0\n"
56 "1:"
57 : "=r" (set)
58 : "r" (~(*p >> bit)));
59 if (set < (32 - bit))
60 return set + offset;
61 set = 32 - bit;
62 p++;
63 }
64 /*
65 * No zero yet, search remaining full bytes for a zero
66 */
67 res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr));
68 return (offset + set + res);
69}
70EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/i386/lib/checksum.S b/arch/i386/lib/checksum.S
new file mode 100644
index 000000000000..94c7867ddc33
--- /dev/null
+++ b/arch/i386/lib/checksum.S
@@ -0,0 +1,496 @@
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IP/TCP/UDP checksumming routines
7 *
8 * Authors: Jorge Cwik, <jorge@laser.satlink.net>
9 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
10 * Tom May, <ftom@netcom.com>
11 * Pentium Pro/II routines:
12 * Alexander Kjeldaas <astor@guardian.no>
13 * Finn Arne Gangstad <finnag@guardian.no>
14 * Lots of code moved from tcp.c and ip.c; see those files
15 * for more names.
16 *
17 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
18 * handling.
19 * Andi Kleen, add zeroing on error
20 * converted to pure assembler
21 *
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
26 */
27
28#include <linux/config.h>
29#include <asm/errno.h>
30
31/*
32 * computes a partial checksum, e.g. for TCP/UDP fragments
33 */
34
35/*
36unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
37 */
38
39.text
40.align 4
41.globl csum_partial
42
43#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
44
45 /*
46 * Experiments with Ethernet and SLIP connections show that buff
47 * is aligned on either a 2-byte or 4-byte boundary. We get at
48 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
49 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
50 * alignment for the unrolled loop.
51 */
52csum_partial:
53 pushl %esi
54 pushl %ebx
55 movl 20(%esp),%eax # Function arg: unsigned int sum
56 movl 16(%esp),%ecx # Function arg: int len
57 movl 12(%esp),%esi # Function arg: unsigned char *buff
58 testl $3, %esi # Check alignment.
59 jz 2f # Jump if alignment is ok.
60 testl $1, %esi # Check alignment.
61 jz 10f # Jump if alignment is boundary of 2bytes.
62
63 # buf is odd
64 dec %ecx
65 jl 8f
66 movzbl (%esi), %ebx
67 adcl %ebx, %eax
68 roll $8, %eax
69 inc %esi
70 testl $2, %esi
71 jz 2f
7210:
73 subl $2, %ecx # Alignment uses up two bytes.
74 jae 1f # Jump if we had at least two bytes.
75 addl $2, %ecx # ecx was < 2. Deal with it.
76 jmp 4f
771: movw (%esi), %bx
78 addl $2, %esi
79 addw %bx, %ax
80 adcl $0, %eax
812:
82 movl %ecx, %edx
83 shrl $5, %ecx
84 jz 2f
85 testl %esi, %esi
861: movl (%esi), %ebx
87 adcl %ebx, %eax
88 movl 4(%esi), %ebx
89 adcl %ebx, %eax
90 movl 8(%esi), %ebx
91 adcl %ebx, %eax
92 movl 12(%esi), %ebx
93 adcl %ebx, %eax
94 movl 16(%esi), %ebx
95 adcl %ebx, %eax
96 movl 20(%esi), %ebx
97 adcl %ebx, %eax
98 movl 24(%esi), %ebx
99 adcl %ebx, %eax
100 movl 28(%esi), %ebx
101 adcl %ebx, %eax
102 lea 32(%esi), %esi
103 dec %ecx
104 jne 1b
105 adcl $0, %eax
1062: movl %edx, %ecx
107 andl $0x1c, %edx
108 je 4f
109 shrl $2, %edx # This clears CF
1103: adcl (%esi), %eax
111 lea 4(%esi), %esi
112 dec %edx
113 jne 3b
114 adcl $0, %eax
1154: andl $3, %ecx
116 jz 7f
117 cmpl $2, %ecx
118 jb 5f
119 movw (%esi),%cx
120 leal 2(%esi),%esi
121 je 6f
122 shll $16,%ecx
1235: movb (%esi),%cl
1246: addl %ecx,%eax
125 adcl $0, %eax
1267:
127 testl $1, 12(%esp)
128 jz 8f
129 roll $8, %eax
1308:
131 popl %ebx
132 popl %esi
133 ret
134
135#else
136
137/* Version for PentiumII/PPro */
138
139csum_partial:
140 pushl %esi
141 pushl %ebx
142 movl 20(%esp),%eax # Function arg: unsigned int sum
143 movl 16(%esp),%ecx # Function arg: int len
144 movl 12(%esp),%esi # Function arg: const unsigned char *buf
145
146 testl $3, %esi
147 jnz 25f
14810:
149 movl %ecx, %edx
150 movl %ecx, %ebx
151 andl $0x7c, %ebx
152 shrl $7, %ecx
153 addl %ebx,%esi
154 shrl $2, %ebx
155 negl %ebx
156 lea 45f(%ebx,%ebx,2), %ebx
157 testl %esi, %esi
158 jmp *%ebx
159
160 # Handle 2-byte-aligned regions
16120: addw (%esi), %ax
162 lea 2(%esi), %esi
163 adcl $0, %eax
164 jmp 10b
16525:
166 testl $1, %esi
167 jz 30f
168 # buf is odd
169 dec %ecx
170 jl 90f
171 movzbl (%esi), %ebx
172 addl %ebx, %eax
173 adcl $0, %eax
174 roll $8, %eax
175 inc %esi
176 testl $2, %esi
177 jz 10b
178
17930: subl $2, %ecx
180 ja 20b
181 je 32f
182 addl $2, %ecx
183 jz 80f
184 movzbl (%esi),%ebx # csumming 1 byte, 2-aligned
185 addl %ebx, %eax
186 adcl $0, %eax
187 jmp 80f
18832:
189 addw (%esi), %ax # csumming 2 bytes, 2-aligned
190 adcl $0, %eax
191 jmp 80f
192
19340:
194 addl -128(%esi), %eax
195 adcl -124(%esi), %eax
196 adcl -120(%esi), %eax
197 adcl -116(%esi), %eax
198 adcl -112(%esi), %eax
199 adcl -108(%esi), %eax
200 adcl -104(%esi), %eax
201 adcl -100(%esi), %eax
202 adcl -96(%esi), %eax
203 adcl -92(%esi), %eax
204 adcl -88(%esi), %eax
205 adcl -84(%esi), %eax
206 adcl -80(%esi), %eax
207 adcl -76(%esi), %eax
208 adcl -72(%esi), %eax
209 adcl -68(%esi), %eax
210 adcl -64(%esi), %eax
211 adcl -60(%esi), %eax
212 adcl -56(%esi), %eax
213 adcl -52(%esi), %eax
214 adcl -48(%esi), %eax
215 adcl -44(%esi), %eax
216 adcl -40(%esi), %eax
217 adcl -36(%esi), %eax
218 adcl -32(%esi), %eax
219 adcl -28(%esi), %eax
220 adcl -24(%esi), %eax
221 adcl -20(%esi), %eax
222 adcl -16(%esi), %eax
223 adcl -12(%esi), %eax
224 adcl -8(%esi), %eax
225 adcl -4(%esi), %eax
22645:
227 lea 128(%esi), %esi
228 adcl $0, %eax
229 dec %ecx
230 jge 40b
231 movl %edx, %ecx
23250: andl $3, %ecx
233 jz 80f
234
235 # Handle the last 1-3 bytes without jumping
236 notl %ecx # 1->2, 2->1, 3->0, higher bits are masked
237 movl $0xffffff,%ebx # by the shll and shrl instructions
238 shll $3,%ecx
239 shrl %cl,%ebx
240 andl -128(%esi),%ebx # esi is 4-aligned so should be ok
241 addl %ebx,%eax
242 adcl $0,%eax
24380:
244 testl $1, 12(%esp)
245 jz 90f
246 roll $8, %eax
24790:
248 popl %ebx
249 popl %esi
250 ret
251
252#endif
253
254/*
255unsigned int csum_partial_copy_generic (const char *src, char *dst,
256 int len, int sum, int *src_err_ptr, int *dst_err_ptr)
257 */
258
259/*
260 * Copy from ds while checksumming, otherwise like csum_partial
261 *
262 * The macros SRC and DST specify the type of access for the instruction.
263 * thus we can call a custom exception handler for all access types.
264 *
265 * FIXME: could someone double-check whether I haven't mixed up some SRC and
266 * DST definitions? It's damn hard to trigger all cases. I hope I got
267 * them all but there's no guarantee.
268 */
269
270#define SRC(y...) \
271 9999: y; \
272 .section __ex_table, "a"; \
273 .long 9999b, 6001f ; \
274 .previous
275
276#define DST(y...) \
277 9999: y; \
278 .section __ex_table, "a"; \
279 .long 9999b, 6002f ; \
280 .previous
281
282.align 4
283.globl csum_partial_copy_generic
284
285#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
286
287#define ARGBASE 16
288#define FP 12
289
290csum_partial_copy_generic:
291 subl $4,%esp
292 pushl %edi
293 pushl %esi
294 pushl %ebx
295 movl ARGBASE+16(%esp),%eax # sum
296 movl ARGBASE+12(%esp),%ecx # len
297 movl ARGBASE+4(%esp),%esi # src
298 movl ARGBASE+8(%esp),%edi # dst
299
300 testl $2, %edi # Check alignment.
301 jz 2f # Jump if alignment is ok.
302 subl $2, %ecx # Alignment uses up two bytes.
303 jae 1f # Jump if we had at least two bytes.
304 addl $2, %ecx # ecx was < 2. Deal with it.
305 jmp 4f
306SRC(1: movw (%esi), %bx )
307 addl $2, %esi
308DST( movw %bx, (%edi) )
309 addl $2, %edi
310 addw %bx, %ax
311 adcl $0, %eax
3122:
313 movl %ecx, FP(%esp)
314 shrl $5, %ecx
315 jz 2f
316 testl %esi, %esi
317SRC(1: movl (%esi), %ebx )
318SRC( movl 4(%esi), %edx )
319 adcl %ebx, %eax
320DST( movl %ebx, (%edi) )
321 adcl %edx, %eax
322DST( movl %edx, 4(%edi) )
323
324SRC( movl 8(%esi), %ebx )
325SRC( movl 12(%esi), %edx )
326 adcl %ebx, %eax
327DST( movl %ebx, 8(%edi) )
328 adcl %edx, %eax
329DST( movl %edx, 12(%edi) )
330
331SRC( movl 16(%esi), %ebx )
332SRC( movl 20(%esi), %edx )
333 adcl %ebx, %eax
334DST( movl %ebx, 16(%edi) )
335 adcl %edx, %eax
336DST( movl %edx, 20(%edi) )
337
338SRC( movl 24(%esi), %ebx )
339SRC( movl 28(%esi), %edx )
340 adcl %ebx, %eax
341DST( movl %ebx, 24(%edi) )
342 adcl %edx, %eax
343DST( movl %edx, 28(%edi) )
344
345 lea 32(%esi), %esi
346 lea 32(%edi), %edi
347 dec %ecx
348 jne 1b
349 adcl $0, %eax
3502: movl FP(%esp), %edx
351 movl %edx, %ecx
352 andl $0x1c, %edx
353 je 4f
354 shrl $2, %edx # This clears CF
355SRC(3: movl (%esi), %ebx )
356 adcl %ebx, %eax
357DST( movl %ebx, (%edi) )
358 lea 4(%esi), %esi
359 lea 4(%edi), %edi
360 dec %edx
361 jne 3b
362 adcl $0, %eax
3634: andl $3, %ecx
364 jz 7f
365 cmpl $2, %ecx
366 jb 5f
367SRC( movw (%esi), %cx )
368 leal 2(%esi), %esi
369DST( movw %cx, (%edi) )
370 leal 2(%edi), %edi
371 je 6f
372 shll $16,%ecx
373SRC(5: movb (%esi), %cl )
374DST( movb %cl, (%edi) )
3756: addl %ecx, %eax
376 adcl $0, %eax
3777:
3785000:
379
380# Exception handler:
381.section .fixup, "ax"
382
3836001:
384 movl ARGBASE+20(%esp), %ebx # src_err_ptr
385 movl $-EFAULT, (%ebx)
386
387 # zero the complete destination - computing the rest
388 # is too much work
389 movl ARGBASE+8(%esp), %edi # dst
390 movl ARGBASE+12(%esp), %ecx # len
391 xorl %eax,%eax
392 rep ; stosb
393
394 jmp 5000b
395
3966002:
397 movl ARGBASE+24(%esp), %ebx # dst_err_ptr
398 movl $-EFAULT,(%ebx)
399 jmp 5000b
400
401.previous
402
403 popl %ebx
404 popl %esi
405 popl %edi
406 popl %ecx # equivalent to addl $4,%esp
407 ret
408
409#else
410
411/* Version for PentiumII/PPro */
412
413#define ROUND1(x) \
414 SRC(movl x(%esi), %ebx ) ; \
415 addl %ebx, %eax ; \
416 DST(movl %ebx, x(%edi) ) ;
417
418#define ROUND(x) \
419 SRC(movl x(%esi), %ebx ) ; \
420 adcl %ebx, %eax ; \
421 DST(movl %ebx, x(%edi) ) ;
422
423#define ARGBASE 12
424
425csum_partial_copy_generic:
426 pushl %ebx
427 pushl %edi
428 pushl %esi
429 movl ARGBASE+4(%esp),%esi #src
430 movl ARGBASE+8(%esp),%edi #dst
431 movl ARGBASE+12(%esp),%ecx #len
432 movl ARGBASE+16(%esp),%eax #sum
433# movl %ecx, %edx
434 movl %ecx, %ebx
435 movl %esi, %edx
436 shrl $6, %ecx
437 andl $0x3c, %ebx
438 negl %ebx
439 subl %ebx, %esi
440 subl %ebx, %edi
441 lea -1(%esi),%edx
442 andl $-32,%edx
443 lea 3f(%ebx,%ebx), %ebx
444 testl %esi, %esi
445 jmp *%ebx
4461: addl $64,%esi
447 addl $64,%edi
448 SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
449 ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)
450 ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)
451 ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)
452 ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4)
4533: adcl $0,%eax
454 addl $64, %edx
455 dec %ecx
456 jge 1b
4574: movl ARGBASE+12(%esp),%edx #len
458 andl $3, %edx
459 jz 7f
460 cmpl $2, %edx
461 jb 5f
462SRC( movw (%esi), %dx )
463 leal 2(%esi), %esi
464DST( movw %dx, (%edi) )
465 leal 2(%edi), %edi
466 je 6f
467 shll $16,%edx
4685:
469SRC( movb (%esi), %dl )
470DST( movb %dl, (%edi) )
4716: addl %edx, %eax
472 adcl $0, %eax
4737:
474.section .fixup, "ax"
4756001: movl ARGBASE+20(%esp), %ebx # src_err_ptr
476 movl $-EFAULT, (%ebx)
477 # zero the complete destination (computing the rest is too much work)
478 movl ARGBASE+8(%esp),%edi # dst
479 movl ARGBASE+12(%esp),%ecx # len
480 xorl %eax,%eax
481 rep; stosb
482 jmp 7b
4836002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr
484 movl $-EFAULT, (%ebx)
485 jmp 7b
486.previous
487
488 popl %esi
489 popl %edi
490 popl %ebx
491 ret
492
493#undef ROUND
494#undef ROUND1
495
496#endif
diff --git a/arch/i386/lib/dec_and_lock.c b/arch/i386/lib/dec_and_lock.c
new file mode 100644
index 000000000000..ab43394dc775
--- /dev/null
+++ b/arch/i386/lib/dec_and_lock.c
@@ -0,0 +1,40 @@
1/*
2 * x86 version of "atomic_dec_and_lock()" using
3 * the atomic "cmpxchg" instruction.
4 *
5 * (For CPU's lacking cmpxchg, we use the slow
6 * generic version, and this one never even gets
7 * compiled).
8 */
9
10#include <linux/spinlock.h>
11#include <asm/atomic.h>
12
13int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
14{
15 int counter;
16 int newcount;
17
18repeat:
19 counter = atomic_read(atomic);
20 newcount = counter-1;
21
22 if (!newcount)
23 goto slow_path;
24
25 asm volatile("lock; cmpxchgl %1,%2"
26 :"=a" (newcount)
27 :"r" (newcount), "m" (atomic->counter), "0" (counter));
28
29 /* If the above failed, "eax" will have changed */
30 if (newcount != counter)
31 goto repeat;
32 return 0;
33
34slow_path:
35 spin_lock(lock);
36 if (atomic_dec_and_test(atomic))
37 return 1;
38 spin_unlock(lock);
39 return 0;
40}
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
new file mode 100644
index 000000000000..080639f262b1
--- /dev/null
+++ b/arch/i386/lib/delay.c
@@ -0,0 +1,49 @@
1/*
2 * Precise Delay Loops for i386
3 *
4 * Copyright (C) 1993 Linus Torvalds
5 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
6 *
7 * The __delay function must _NOT_ be inlined as its execution time
8 * depends wildly on alignment on many x86 processors. The additional
9 * jump magic is needed to get the timing stable on all the CPU's
10 * we have to worry about.
11 */
12
13#include <linux/config.h>
14#include <linux/sched.h>
15#include <linux/delay.h>
16#include <asm/processor.h>
17#include <asm/delay.h>
18#include <asm/timer.h>
19
20#ifdef CONFIG_SMP
21#include <asm/smp.h>
22#endif
23
24extern struct timer_opts* timer;
25
26void __delay(unsigned long loops)
27{
28 cur_timer->delay(loops);
29}
30
31inline void __const_udelay(unsigned long xloops)
32{
33 int d0;
34 xloops *= 4;
35 __asm__("mull %0"
36 :"=d" (xloops), "=&a" (d0)
37 :"1" (xloops),"0" (cpu_data[_smp_processor_id()].loops_per_jiffy * (HZ/4)));
38 __delay(++xloops);
39}
40
41void __udelay(unsigned long usecs)
42{
43 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
44}
45
46void __ndelay(unsigned long nsecs)
47{
48 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
49}
diff --git a/arch/i386/lib/getuser.S b/arch/i386/lib/getuser.S
new file mode 100644
index 000000000000..62d7f178a326
--- /dev/null
+++ b/arch/i386/lib/getuser.S
@@ -0,0 +1,70 @@
1/*
2 * __get_user functions.
3 *
4 * (C) Copyright 1998 Linus Torvalds
5 *
6 * These functions have a non-standard call interface
7 * to make them more efficient, especially as they
8 * return an error value in addition to the "real"
9 * return value.
10 */
11#include <asm/thread_info.h>
12
13
14/*
15 * __get_user_X
16 *
17 * Inputs: %eax contains the address
18 *
19 * Outputs: %eax is error code (0 or -EFAULT)
20 * %edx contains zero-extended value
21 *
22 * These functions should not modify any other registers,
23 * as they get called from within inline assembly.
24 */
25
26.text
27.align 4
28.globl __get_user_1
29__get_user_1:
30 GET_THREAD_INFO(%edx)
31 cmpl TI_addr_limit(%edx),%eax
32 jae bad_get_user
331: movzbl (%eax),%edx
34 xorl %eax,%eax
35 ret
36
37.align 4
38.globl __get_user_2
39__get_user_2:
40 addl $1,%eax
41 jc bad_get_user
42 GET_THREAD_INFO(%edx)
43 cmpl TI_addr_limit(%edx),%eax
44 jae bad_get_user
452: movzwl -1(%eax),%edx
46 xorl %eax,%eax
47 ret
48
49.align 4
50.globl __get_user_4
51__get_user_4:
52 addl $3,%eax
53 jc bad_get_user
54 GET_THREAD_INFO(%edx)
55 cmpl TI_addr_limit(%edx),%eax
56 jae bad_get_user
573: movl -3(%eax),%edx
58 xorl %eax,%eax
59 ret
60
61bad_get_user:
62 xorl %edx,%edx
63 movl $-14,%eax
64 ret
65
66.section __ex_table,"a"
67 .long 1b,bad_get_user
68 .long 2b,bad_get_user
69 .long 3b,bad_get_user
70.previous
diff --git a/arch/i386/lib/memcpy.c b/arch/i386/lib/memcpy.c
new file mode 100644
index 000000000000..891b2359d18a
--- /dev/null
+++ b/arch/i386/lib/memcpy.c
@@ -0,0 +1,44 @@
1#include <linux/config.h>
2#include <linux/string.h>
3#include <linux/module.h>
4
5#undef memcpy
6#undef memset
7
8void *memcpy(void *to, const void *from, size_t n)
9{
10#ifdef CONFIG_X86_USE_3DNOW
11 return __memcpy3d(to, from, n);
12#else
13 return __memcpy(to, from, n);
14#endif
15}
16EXPORT_SYMBOL(memcpy);
17
18void *memset(void *s, int c, size_t count)
19{
20 return __memset(s, c, count);
21}
22EXPORT_SYMBOL(memset);
23
24void *memmove(void *dest, const void *src, size_t n)
25{
26 int d0, d1, d2;
27
28 if (dest < src) {
29 memcpy(dest,src,n);
30 } else {
31 __asm__ __volatile__(
32 "std\n\t"
33 "rep\n\t"
34 "movsb\n\t"
35 "cld"
36 : "=&c" (d0), "=&S" (d1), "=&D" (d2)
37 :"0" (n),
38 "1" (n-1+(const char *)src),
39 "2" (n-1+(char *)dest)
40 :"memory");
41 }
42 return dest;
43}
44EXPORT_SYMBOL(memmove);
diff --git a/arch/i386/lib/mmx.c b/arch/i386/lib/mmx.c
new file mode 100644
index 000000000000..01f8b1a2cc84
--- /dev/null
+++ b/arch/i386/lib/mmx.c
@@ -0,0 +1,399 @@
1#include <linux/config.h>
2#include <linux/types.h>
3#include <linux/string.h>
4#include <linux/sched.h>
5#include <linux/hardirq.h>
6
7#include <asm/i387.h>
8
9
10/*
11 * MMX 3DNow! library helper functions
12 *
13 * To do:
14 * We can use MMX just for prefetch in IRQ's. This may be a win.
15 * (reported so on K6-III)
16 * We should use a better code neutral filler for the short jump
17 * leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
18 * We also want to clobber the filler register so we don't get any
19 * register forwarding stalls on the filler.
20 *
21 * Add *user handling. Checksums are not a win with MMX on any CPU
22 * tested so far for any MMX solution figured.
23 *
24 * 22/09/2000 - Arjan van de Ven
25 * Improved for non-egineering-sample Athlons
26 *
27 */
28
29void *_mmx_memcpy(void *to, const void *from, size_t len)
30{
31 void *p;
32 int i;
33
34 if (unlikely(in_interrupt()))
35 return __memcpy(to, from, len);
36
37 p = to;
38 i = len >> 6; /* len/64 */
39
40 kernel_fpu_begin();
41
42 __asm__ __volatile__ (
43 "1: prefetch (%0)\n" /* This set is 28 bytes */
44 " prefetch 64(%0)\n"
45 " prefetch 128(%0)\n"
46 " prefetch 192(%0)\n"
47 " prefetch 256(%0)\n"
48 "2: \n"
49 ".section .fixup, \"ax\"\n"
50 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
51 " jmp 2b\n"
52 ".previous\n"
53 ".section __ex_table,\"a\"\n"
54 " .align 4\n"
55 " .long 1b, 3b\n"
56 ".previous"
57 : : "r" (from) );
58
59
60 for(; i>5; i--)
61 {
62 __asm__ __volatile__ (
63 "1: prefetch 320(%0)\n"
64 "2: movq (%0), %%mm0\n"
65 " movq 8(%0), %%mm1\n"
66 " movq 16(%0), %%mm2\n"
67 " movq 24(%0), %%mm3\n"
68 " movq %%mm0, (%1)\n"
69 " movq %%mm1, 8(%1)\n"
70 " movq %%mm2, 16(%1)\n"
71 " movq %%mm3, 24(%1)\n"
72 " movq 32(%0), %%mm0\n"
73 " movq 40(%0), %%mm1\n"
74 " movq 48(%0), %%mm2\n"
75 " movq 56(%0), %%mm3\n"
76 " movq %%mm0, 32(%1)\n"
77 " movq %%mm1, 40(%1)\n"
78 " movq %%mm2, 48(%1)\n"
79 " movq %%mm3, 56(%1)\n"
80 ".section .fixup, \"ax\"\n"
81 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
82 " jmp 2b\n"
83 ".previous\n"
84 ".section __ex_table,\"a\"\n"
85 " .align 4\n"
86 " .long 1b, 3b\n"
87 ".previous"
88 : : "r" (from), "r" (to) : "memory");
89 from+=64;
90 to+=64;
91 }
92
93 for(; i>0; i--)
94 {
95 __asm__ __volatile__ (
96 " movq (%0), %%mm0\n"
97 " movq 8(%0), %%mm1\n"
98 " movq 16(%0), %%mm2\n"
99 " movq 24(%0), %%mm3\n"
100 " movq %%mm0, (%1)\n"
101 " movq %%mm1, 8(%1)\n"
102 " movq %%mm2, 16(%1)\n"
103 " movq %%mm3, 24(%1)\n"
104 " movq 32(%0), %%mm0\n"
105 " movq 40(%0), %%mm1\n"
106 " movq 48(%0), %%mm2\n"
107 " movq 56(%0), %%mm3\n"
108 " movq %%mm0, 32(%1)\n"
109 " movq %%mm1, 40(%1)\n"
110 " movq %%mm2, 48(%1)\n"
111 " movq %%mm3, 56(%1)\n"
112 : : "r" (from), "r" (to) : "memory");
113 from+=64;
114 to+=64;
115 }
116 /*
117 * Now do the tail of the block
118 */
119 __memcpy(to, from, len&63);
120 kernel_fpu_end();
121 return p;
122}
123
124#ifdef CONFIG_MK7
125
126/*
127 * The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
128 * other MMX using processors do not.
129 */
130
131static void fast_clear_page(void *page)
132{
133 int i;
134
135 kernel_fpu_begin();
136
137 __asm__ __volatile__ (
138 " pxor %%mm0, %%mm0\n" : :
139 );
140
141 for(i=0;i<4096/64;i++)
142 {
143 __asm__ __volatile__ (
144 " movntq %%mm0, (%0)\n"
145 " movntq %%mm0, 8(%0)\n"
146 " movntq %%mm0, 16(%0)\n"
147 " movntq %%mm0, 24(%0)\n"
148 " movntq %%mm0, 32(%0)\n"
149 " movntq %%mm0, 40(%0)\n"
150 " movntq %%mm0, 48(%0)\n"
151 " movntq %%mm0, 56(%0)\n"
152 : : "r" (page) : "memory");
153 page+=64;
154 }
155 /* since movntq is weakly-ordered, a "sfence" is needed to become
156 * ordered again.
157 */
158 __asm__ __volatile__ (
159 " sfence \n" : :
160 );
161 kernel_fpu_end();
162}
163
164static void fast_copy_page(void *to, void *from)
165{
166 int i;
167
168 kernel_fpu_begin();
169
170 /* maybe the prefetch stuff can go before the expensive fnsave...
171 * but that is for later. -AV
172 */
173 __asm__ __volatile__ (
174 "1: prefetch (%0)\n"
175 " prefetch 64(%0)\n"
176 " prefetch 128(%0)\n"
177 " prefetch 192(%0)\n"
178 " prefetch 256(%0)\n"
179 "2: \n"
180 ".section .fixup, \"ax\"\n"
181 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
182 " jmp 2b\n"
183 ".previous\n"
184 ".section __ex_table,\"a\"\n"
185 " .align 4\n"
186 " .long 1b, 3b\n"
187 ".previous"
188 : : "r" (from) );
189
190 for(i=0; i<(4096-320)/64; i++)
191 {
192 __asm__ __volatile__ (
193 "1: prefetch 320(%0)\n"
194 "2: movq (%0), %%mm0\n"
195 " movntq %%mm0, (%1)\n"
196 " movq 8(%0), %%mm1\n"
197 " movntq %%mm1, 8(%1)\n"
198 " movq 16(%0), %%mm2\n"
199 " movntq %%mm2, 16(%1)\n"
200 " movq 24(%0), %%mm3\n"
201 " movntq %%mm3, 24(%1)\n"
202 " movq 32(%0), %%mm4\n"
203 " movntq %%mm4, 32(%1)\n"
204 " movq 40(%0), %%mm5\n"
205 " movntq %%mm5, 40(%1)\n"
206 " movq 48(%0), %%mm6\n"
207 " movntq %%mm6, 48(%1)\n"
208 " movq 56(%0), %%mm7\n"
209 " movntq %%mm7, 56(%1)\n"
210 ".section .fixup, \"ax\"\n"
211 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
212 " jmp 2b\n"
213 ".previous\n"
214 ".section __ex_table,\"a\"\n"
215 " .align 4\n"
216 " .long 1b, 3b\n"
217 ".previous"
218 : : "r" (from), "r" (to) : "memory");
219 from+=64;
220 to+=64;
221 }
222 for(i=(4096-320)/64; i<4096/64; i++)
223 {
224 __asm__ __volatile__ (
225 "2: movq (%0), %%mm0\n"
226 " movntq %%mm0, (%1)\n"
227 " movq 8(%0), %%mm1\n"
228 " movntq %%mm1, 8(%1)\n"
229 " movq 16(%0), %%mm2\n"
230 " movntq %%mm2, 16(%1)\n"
231 " movq 24(%0), %%mm3\n"
232 " movntq %%mm3, 24(%1)\n"
233 " movq 32(%0), %%mm4\n"
234 " movntq %%mm4, 32(%1)\n"
235 " movq 40(%0), %%mm5\n"
236 " movntq %%mm5, 40(%1)\n"
237 " movq 48(%0), %%mm6\n"
238 " movntq %%mm6, 48(%1)\n"
239 " movq 56(%0), %%mm7\n"
240 " movntq %%mm7, 56(%1)\n"
241 : : "r" (from), "r" (to) : "memory");
242 from+=64;
243 to+=64;
244 }
245 /* since movntq is weakly-ordered, a "sfence" is needed to become
246 * ordered again.
247 */
248 __asm__ __volatile__ (
249 " sfence \n" : :
250 );
251 kernel_fpu_end();
252}
253
254#else
255
256/*
257 * Generic MMX implementation without K7 specific streaming
258 */
259
260static void fast_clear_page(void *page)
261{
262 int i;
263
264 kernel_fpu_begin();
265
266 __asm__ __volatile__ (
267 " pxor %%mm0, %%mm0\n" : :
268 );
269
270 for(i=0;i<4096/128;i++)
271 {
272 __asm__ __volatile__ (
273 " movq %%mm0, (%0)\n"
274 " movq %%mm0, 8(%0)\n"
275 " movq %%mm0, 16(%0)\n"
276 " movq %%mm0, 24(%0)\n"
277 " movq %%mm0, 32(%0)\n"
278 " movq %%mm0, 40(%0)\n"
279 " movq %%mm0, 48(%0)\n"
280 " movq %%mm0, 56(%0)\n"
281 " movq %%mm0, 64(%0)\n"
282 " movq %%mm0, 72(%0)\n"
283 " movq %%mm0, 80(%0)\n"
284 " movq %%mm0, 88(%0)\n"
285 " movq %%mm0, 96(%0)\n"
286 " movq %%mm0, 104(%0)\n"
287 " movq %%mm0, 112(%0)\n"
288 " movq %%mm0, 120(%0)\n"
289 : : "r" (page) : "memory");
290 page+=128;
291 }
292
293 kernel_fpu_end();
294}
295
296static void fast_copy_page(void *to, void *from)
297{
298 int i;
299
300
301 kernel_fpu_begin();
302
303 __asm__ __volatile__ (
304 "1: prefetch (%0)\n"
305 " prefetch 64(%0)\n"
306 " prefetch 128(%0)\n"
307 " prefetch 192(%0)\n"
308 " prefetch 256(%0)\n"
309 "2: \n"
310 ".section .fixup, \"ax\"\n"
311 "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
312 " jmp 2b\n"
313 ".previous\n"
314 ".section __ex_table,\"a\"\n"
315 " .align 4\n"
316 " .long 1b, 3b\n"
317 ".previous"
318 : : "r" (from) );
319
320 for(i=0; i<4096/64; i++)
321 {
322 __asm__ __volatile__ (
323 "1: prefetch 320(%0)\n"
324 "2: movq (%0), %%mm0\n"
325 " movq 8(%0), %%mm1\n"
326 " movq 16(%0), %%mm2\n"
327 " movq 24(%0), %%mm3\n"
328 " movq %%mm0, (%1)\n"
329 " movq %%mm1, 8(%1)\n"
330 " movq %%mm2, 16(%1)\n"
331 " movq %%mm3, 24(%1)\n"
332 " movq 32(%0), %%mm0\n"
333 " movq 40(%0), %%mm1\n"
334 " movq 48(%0), %%mm2\n"
335 " movq 56(%0), %%mm3\n"
336 " movq %%mm0, 32(%1)\n"
337 " movq %%mm1, 40(%1)\n"
338 " movq %%mm2, 48(%1)\n"
339 " movq %%mm3, 56(%1)\n"
340 ".section .fixup, \"ax\"\n"
341 "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
342 " jmp 2b\n"
343 ".previous\n"
344 ".section __ex_table,\"a\"\n"
345 " .align 4\n"
346 " .long 1b, 3b\n"
347 ".previous"
348 : : "r" (from), "r" (to) : "memory");
349 from+=64;
350 to+=64;
351 }
352 kernel_fpu_end();
353}
354
355
356#endif
357
358/*
359 * Favour MMX for page clear and copy.
360 */
361
362static void slow_zero_page(void * page)
363{
364 int d0, d1;
365 __asm__ __volatile__( \
366 "cld\n\t" \
367 "rep ; stosl" \
368 : "=&c" (d0), "=&D" (d1)
369 :"a" (0),"1" (page),"0" (1024)
370 :"memory");
371}
372
373void mmx_clear_page(void * page)
374{
375 if(unlikely(in_interrupt()))
376 slow_zero_page(page);
377 else
378 fast_clear_page(page);
379}
380
381static void slow_copy_page(void *to, void *from)
382{
383 int d0, d1, d2;
384 __asm__ __volatile__( \
385 "cld\n\t" \
386 "rep ; movsl" \
387 : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
388 : "0" (1024),"1" ((long) to),"2" ((long) from) \
389 : "memory");
390}
391
392
393void mmx_copy_page(void *to, void *from)
394{
395 if(unlikely(in_interrupt()))
396 slow_copy_page(to, from);
397 else
398 fast_copy_page(to, from);
399}
diff --git a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
new file mode 100644
index 000000000000..a32d9f570f48
--- /dev/null
+++ b/arch/i386/lib/putuser.S
@@ -0,0 +1,87 @@
1/*
2 * __put_user functions.
3 *
4 * (C) Copyright 2005 Linus Torvalds
5 *
6 * These functions have a non-standard call interface
7 * to make them more efficient, especially as they
8 * return an error value in addition to the "real"
9 * return value.
10 */
11#include <asm/thread_info.h>
12
13
14/*
15 * __put_user_X
16 *
17 * Inputs: %eax[:%edx] contains the data
18 * %ecx contains the address
19 *
20 * Outputs: %eax is error code (0 or -EFAULT)
21 *
22 * These functions should not modify any other registers,
23 * as they get called from within inline assembly.
24 */
25
26#define ENTER pushl %ebx ; GET_THREAD_INFO(%ebx)
27#define EXIT popl %ebx ; ret
28
29.text
30.align 4
31.globl __put_user_1
32__put_user_1:
33 ENTER
34 cmpl TI_addr_limit(%ebx),%ecx
35 jae bad_put_user
361: movb %al,(%ecx)
37 xorl %eax,%eax
38 EXIT
39
40.align 4
41.globl __put_user_2
42__put_user_2:
43 ENTER
44 movl TI_addr_limit(%ebx),%ebx
45 subl $1,%ebx
46 cmpl %ebx,%ecx
47 jae bad_put_user
482: movw %ax,(%ecx)
49 xorl %eax,%eax
50 EXIT
51
52.align 4
53.globl __put_user_4
54__put_user_4:
55 ENTER
56 movl TI_addr_limit(%ebx),%ebx
57 subl $3,%ebx
58 cmpl %ebx,%ecx
59 jae bad_put_user
603: movl %eax,(%ecx)
61 xorl %eax,%eax
62 EXIT
63
64.align 4
65.globl __put_user_8
66__put_user_8:
67 ENTER
68 movl TI_addr_limit(%ebx),%ebx
69 subl $7,%ebx
70 cmpl %ebx,%ecx
71 jae bad_put_user
724: movl %eax,(%ecx)
735: movl %edx,4(%ecx)
74 xorl %eax,%eax
75 EXIT
76
77bad_put_user:
78 movl $-14,%eax
79 EXIT
80
81.section __ex_table,"a"
82 .long 1b,bad_put_user
83 .long 2b,bad_put_user
84 .long 3b,bad_put_user
85 .long 4b,bad_put_user
86 .long 5b,bad_put_user
87.previous
diff --git a/arch/i386/lib/strstr.c b/arch/i386/lib/strstr.c
new file mode 100644
index 000000000000..a3dafbf59dae
--- /dev/null
+++ b/arch/i386/lib/strstr.c
@@ -0,0 +1,31 @@
1#include <linux/string.h>
2
3char * strstr(const char * cs,const char * ct)
4{
5int d0, d1;
6register char * __res;
7__asm__ __volatile__(
8 "movl %6,%%edi\n\t"
9 "repne\n\t"
10 "scasb\n\t"
11 "notl %%ecx\n\t"
12 "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */
13 "movl %%ecx,%%edx\n"
14 "1:\tmovl %6,%%edi\n\t"
15 "movl %%esi,%%eax\n\t"
16 "movl %%edx,%%ecx\n\t"
17 "repe\n\t"
18 "cmpsb\n\t"
19 "je 2f\n\t" /* also works for empty string, see above */
20 "xchgl %%eax,%%esi\n\t"
21 "incl %%esi\n\t"
22 "cmpb $0,-1(%%eax)\n\t"
23 "jne 1b\n\t"
24 "xorl %%eax,%%eax\n\t"
25 "2:"
26 :"=a" (__res), "=&c" (d0), "=&S" (d1)
27 :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
28 :"dx", "di");
29return __res;
30}
31
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
new file mode 100644
index 000000000000..51aa2bbb0269
--- /dev/null
+++ b/arch/i386/lib/usercopy.c
@@ -0,0 +1,636 @@
1/*
2 * User address space access functions.
3 * The non inlined parts of asm-i386/uaccess.h are here.
4 *
5 * Copyright 1997 Andi Kleen <ak@muc.de>
6 * Copyright 1997 Linus Torvalds
7 */
8#include <linux/config.h>
9#include <linux/mm.h>
10#include <linux/highmem.h>
11#include <linux/blkdev.h>
12#include <linux/module.h>
13#include <asm/uaccess.h>
14#include <asm/mmx.h>
15
16static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n)
17{
18#ifdef CONFIG_X86_INTEL_USERCOPY
19 if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask))
20 return 0;
21#endif
22 return 1;
23}
24#define movsl_is_ok(a1,a2,n) \
25 __movsl_is_ok((unsigned long)(a1),(unsigned long)(a2),(n))
26
27/*
28 * Copy a null terminated string from userspace.
29 */
30
31#define __do_strncpy_from_user(dst,src,count,res) \
32do { \
33 int __d0, __d1, __d2; \
34 might_sleep(); \
35 __asm__ __volatile__( \
36 " testl %1,%1\n" \
37 " jz 2f\n" \
38 "0: lodsb\n" \
39 " stosb\n" \
40 " testb %%al,%%al\n" \
41 " jz 1f\n" \
42 " decl %1\n" \
43 " jnz 0b\n" \
44 "1: subl %1,%0\n" \
45 "2:\n" \
46 ".section .fixup,\"ax\"\n" \
47 "3: movl %5,%0\n" \
48 " jmp 2b\n" \
49 ".previous\n" \
50 ".section __ex_table,\"a\"\n" \
51 " .align 4\n" \
52 " .long 0b,3b\n" \
53 ".previous" \
54 : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
55 "=&D" (__d2) \
56 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
57 : "memory"); \
58} while (0)
59
60/**
61 * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
62 * @dst: Destination address, in kernel space. This buffer must be at
63 * least @count bytes long.
64 * @src: Source address, in user space.
65 * @count: Maximum number of bytes to copy, including the trailing NUL.
66 *
67 * Copies a NUL-terminated string from userspace to kernel space.
68 * Caller must check the specified block with access_ok() before calling
69 * this function.
70 *
71 * On success, returns the length of the string (not including the trailing
72 * NUL).
73 *
74 * If access to userspace fails, returns -EFAULT (some data may have been
75 * copied).
76 *
77 * If @count is smaller than the length of the string, copies @count bytes
78 * and returns @count.
79 */
80long
81__strncpy_from_user(char *dst, const char __user *src, long count)
82{
83 long res;
84 __do_strncpy_from_user(dst, src, count, res);
85 return res;
86}
87
88/**
89 * strncpy_from_user: - Copy a NUL terminated string from userspace.
90 * @dst: Destination address, in kernel space. This buffer must be at
91 * least @count bytes long.
92 * @src: Source address, in user space.
93 * @count: Maximum number of bytes to copy, including the trailing NUL.
94 *
95 * Copies a NUL-terminated string from userspace to kernel space.
96 *
97 * On success, returns the length of the string (not including the trailing
98 * NUL).
99 *
100 * If access to userspace fails, returns -EFAULT (some data may have been
101 * copied).
102 *
103 * If @count is smaller than the length of the string, copies @count bytes
104 * and returns @count.
105 */
106long
107strncpy_from_user(char *dst, const char __user *src, long count)
108{
109 long res = -EFAULT;
110 if (access_ok(VERIFY_READ, src, 1))
111 __do_strncpy_from_user(dst, src, count, res);
112 return res;
113}
114
115
116/*
117 * Zero Userspace
118 */
119
120#define __do_clear_user(addr,size) \
121do { \
122 int __d0; \
123 might_sleep(); \
124 __asm__ __volatile__( \
125 "0: rep; stosl\n" \
126 " movl %2,%0\n" \
127 "1: rep; stosb\n" \
128 "2:\n" \
129 ".section .fixup,\"ax\"\n" \
130 "3: lea 0(%2,%0,4),%0\n" \
131 " jmp 2b\n" \
132 ".previous\n" \
133 ".section __ex_table,\"a\"\n" \
134 " .align 4\n" \
135 " .long 0b,3b\n" \
136 " .long 1b,2b\n" \
137 ".previous" \
138 : "=&c"(size), "=&D" (__d0) \
139 : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0)); \
140} while (0)
141
142/**
143 * clear_user: - Zero a block of memory in user space.
144 * @to: Destination address, in user space.
145 * @n: Number of bytes to zero.
146 *
147 * Zero a block of memory in user space.
148 *
149 * Returns number of bytes that could not be cleared.
150 * On success, this will be zero.
151 */
152unsigned long
153clear_user(void __user *to, unsigned long n)
154{
155 might_sleep();
156 if (access_ok(VERIFY_WRITE, to, n))
157 __do_clear_user(to, n);
158 return n;
159}
160
161/**
162 * __clear_user: - Zero a block of memory in user space, with less checking.
163 * @to: Destination address, in user space.
164 * @n: Number of bytes to zero.
165 *
166 * Zero a block of memory in user space. Caller must check
167 * the specified block with access_ok() before calling this function.
168 *
169 * Returns number of bytes that could not be cleared.
170 * On success, this will be zero.
171 */
172unsigned long
173__clear_user(void __user *to, unsigned long n)
174{
175 __do_clear_user(to, n);
176 return n;
177}
178
179/**
180 * strlen_user: - Get the size of a string in user space.
181 * @s: The string to measure.
182 * @n: The maximum valid length
183 *
184 * Get the size of a NUL-terminated string in user space.
185 *
186 * Returns the size of the string INCLUDING the terminating NUL.
187 * On exception, returns 0.
188 * If the string is too long, returns a value greater than @n.
189 */
190long strnlen_user(const char __user *s, long n)
191{
192 unsigned long mask = -__addr_ok(s);
193 unsigned long res, tmp;
194
195 might_sleep();
196
197 __asm__ __volatile__(
198 " testl %0, %0\n"
199 " jz 3f\n"
200 " andl %0,%%ecx\n"
201 "0: repne; scasb\n"
202 " setne %%al\n"
203 " subl %%ecx,%0\n"
204 " addl %0,%%eax\n"
205 "1:\n"
206 ".section .fixup,\"ax\"\n"
207 "2: xorl %%eax,%%eax\n"
208 " jmp 1b\n"
209 "3: movb $1,%%al\n"
210 " jmp 1b\n"
211 ".previous\n"
212 ".section __ex_table,\"a\"\n"
213 " .align 4\n"
214 " .long 0b,2b\n"
215 ".previous"
216 :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
217 :"0" (n), "1" (s), "2" (0), "3" (mask)
218 :"cc");
219 return res & mask;
220}
221
222#ifdef CONFIG_X86_INTEL_USERCOPY
223static unsigned long
224__copy_user_intel(void __user *to, const void *from, unsigned long size)
225{
226 int d0, d1;
227 __asm__ __volatile__(
228 " .align 2,0x90\n"
229 "1: movl 32(%4), %%eax\n"
230 " cmpl $67, %0\n"
231 " jbe 3f\n"
232 "2: movl 64(%4), %%eax\n"
233 " .align 2,0x90\n"
234 "3: movl 0(%4), %%eax\n"
235 "4: movl 4(%4), %%edx\n"
236 "5: movl %%eax, 0(%3)\n"
237 "6: movl %%edx, 4(%3)\n"
238 "7: movl 8(%4), %%eax\n"
239 "8: movl 12(%4),%%edx\n"
240 "9: movl %%eax, 8(%3)\n"
241 "10: movl %%edx, 12(%3)\n"
242 "11: movl 16(%4), %%eax\n"
243 "12: movl 20(%4), %%edx\n"
244 "13: movl %%eax, 16(%3)\n"
245 "14: movl %%edx, 20(%3)\n"
246 "15: movl 24(%4), %%eax\n"
247 "16: movl 28(%4), %%edx\n"
248 "17: movl %%eax, 24(%3)\n"
249 "18: movl %%edx, 28(%3)\n"
250 "19: movl 32(%4), %%eax\n"
251 "20: movl 36(%4), %%edx\n"
252 "21: movl %%eax, 32(%3)\n"
253 "22: movl %%edx, 36(%3)\n"
254 "23: movl 40(%4), %%eax\n"
255 "24: movl 44(%4), %%edx\n"
256 "25: movl %%eax, 40(%3)\n"
257 "26: movl %%edx, 44(%3)\n"
258 "27: movl 48(%4), %%eax\n"
259 "28: movl 52(%4), %%edx\n"
260 "29: movl %%eax, 48(%3)\n"
261 "30: movl %%edx, 52(%3)\n"
262 "31: movl 56(%4), %%eax\n"
263 "32: movl 60(%4), %%edx\n"
264 "33: movl %%eax, 56(%3)\n"
265 "34: movl %%edx, 60(%3)\n"
266 " addl $-64, %0\n"
267 " addl $64, %4\n"
268 " addl $64, %3\n"
269 " cmpl $63, %0\n"
270 " ja 1b\n"
271 "35: movl %0, %%eax\n"
272 " shrl $2, %0\n"
273 " andl $3, %%eax\n"
274 " cld\n"
275 "99: rep; movsl\n"
276 "36: movl %%eax, %0\n"
277 "37: rep; movsb\n"
278 "100:\n"
279 ".section .fixup,\"ax\"\n"
280 "101: lea 0(%%eax,%0,4),%0\n"
281 " jmp 100b\n"
282 ".previous\n"
283 ".section __ex_table,\"a\"\n"
284 " .align 4\n"
285 " .long 1b,100b\n"
286 " .long 2b,100b\n"
287 " .long 3b,100b\n"
288 " .long 4b,100b\n"
289 " .long 5b,100b\n"
290 " .long 6b,100b\n"
291 " .long 7b,100b\n"
292 " .long 8b,100b\n"
293 " .long 9b,100b\n"
294 " .long 10b,100b\n"
295 " .long 11b,100b\n"
296 " .long 12b,100b\n"
297 " .long 13b,100b\n"
298 " .long 14b,100b\n"
299 " .long 15b,100b\n"
300 " .long 16b,100b\n"
301 " .long 17b,100b\n"
302 " .long 18b,100b\n"
303 " .long 19b,100b\n"
304 " .long 20b,100b\n"
305 " .long 21b,100b\n"
306 " .long 22b,100b\n"
307 " .long 23b,100b\n"
308 " .long 24b,100b\n"
309 " .long 25b,100b\n"
310 " .long 26b,100b\n"
311 " .long 27b,100b\n"
312 " .long 28b,100b\n"
313 " .long 29b,100b\n"
314 " .long 30b,100b\n"
315 " .long 31b,100b\n"
316 " .long 32b,100b\n"
317 " .long 33b,100b\n"
318 " .long 34b,100b\n"
319 " .long 35b,100b\n"
320 " .long 36b,100b\n"
321 " .long 37b,100b\n"
322 " .long 99b,101b\n"
323 ".previous"
324 : "=&c"(size), "=&D" (d0), "=&S" (d1)
325 : "1"(to), "2"(from), "0"(size)
326 : "eax", "edx", "memory");
327 return size;
328}
329
330static unsigned long
331__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
332{
333 int d0, d1;
334 __asm__ __volatile__(
335 " .align 2,0x90\n"
336 "0: movl 32(%4), %%eax\n"
337 " cmpl $67, %0\n"
338 " jbe 2f\n"
339 "1: movl 64(%4), %%eax\n"
340 " .align 2,0x90\n"
341 "2: movl 0(%4), %%eax\n"
342 "21: movl 4(%4), %%edx\n"
343 " movl %%eax, 0(%3)\n"
344 " movl %%edx, 4(%3)\n"
345 "3: movl 8(%4), %%eax\n"
346 "31: movl 12(%4),%%edx\n"
347 " movl %%eax, 8(%3)\n"
348 " movl %%edx, 12(%3)\n"
349 "4: movl 16(%4), %%eax\n"
350 "41: movl 20(%4), %%edx\n"
351 " movl %%eax, 16(%3)\n"
352 " movl %%edx, 20(%3)\n"
353 "10: movl 24(%4), %%eax\n"
354 "51: movl 28(%4), %%edx\n"
355 " movl %%eax, 24(%3)\n"
356 " movl %%edx, 28(%3)\n"
357 "11: movl 32(%4), %%eax\n"
358 "61: movl 36(%4), %%edx\n"
359 " movl %%eax, 32(%3)\n"
360 " movl %%edx, 36(%3)\n"
361 "12: movl 40(%4), %%eax\n"
362 "71: movl 44(%4), %%edx\n"
363 " movl %%eax, 40(%3)\n"
364 " movl %%edx, 44(%3)\n"
365 "13: movl 48(%4), %%eax\n"
366 "81: movl 52(%4), %%edx\n"
367 " movl %%eax, 48(%3)\n"
368 " movl %%edx, 52(%3)\n"
369 "14: movl 56(%4), %%eax\n"
370 "91: movl 60(%4), %%edx\n"
371 " movl %%eax, 56(%3)\n"
372 " movl %%edx, 60(%3)\n"
373 " addl $-64, %0\n"
374 " addl $64, %4\n"
375 " addl $64, %3\n"
376 " cmpl $63, %0\n"
377 " ja 0b\n"
378 "5: movl %0, %%eax\n"
379 " shrl $2, %0\n"
380 " andl $3, %%eax\n"
381 " cld\n"
382 "6: rep; movsl\n"
383 " movl %%eax,%0\n"
384 "7: rep; movsb\n"
385 "8:\n"
386 ".section .fixup,\"ax\"\n"
387 "9: lea 0(%%eax,%0,4),%0\n"
388 "16: pushl %0\n"
389 " pushl %%eax\n"
390 " xorl %%eax,%%eax\n"
391 " rep; stosb\n"
392 " popl %%eax\n"
393 " popl %0\n"
394 " jmp 8b\n"
395 ".previous\n"
396 ".section __ex_table,\"a\"\n"
397 " .align 4\n"
398 " .long 0b,16b\n"
399 " .long 1b,16b\n"
400 " .long 2b,16b\n"
401 " .long 21b,16b\n"
402 " .long 3b,16b\n"
403 " .long 31b,16b\n"
404 " .long 4b,16b\n"
405 " .long 41b,16b\n"
406 " .long 10b,16b\n"
407 " .long 51b,16b\n"
408 " .long 11b,16b\n"
409 " .long 61b,16b\n"
410 " .long 12b,16b\n"
411 " .long 71b,16b\n"
412 " .long 13b,16b\n"
413 " .long 81b,16b\n"
414 " .long 14b,16b\n"
415 " .long 91b,16b\n"
416 " .long 6b,9b\n"
417 " .long 7b,16b\n"
418 ".previous"
419 : "=&c"(size), "=&D" (d0), "=&S" (d1)
420 : "1"(to), "2"(from), "0"(size)
421 : "eax", "edx", "memory");
422 return size;
423}
424#else
425/*
426 * Leave these declared but undefined. They should not be any references to
427 * them
428 */
429unsigned long
430__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size);
431unsigned long
432__copy_user_intel(void __user *to, const void *from, unsigned long size);
433#endif /* CONFIG_X86_INTEL_USERCOPY */
434
435/* Generic arbitrary sized copy. */
436#define __copy_user(to,from,size) \
437do { \
438 int __d0, __d1, __d2; \
439 __asm__ __volatile__( \
440 " cmp $7,%0\n" \
441 " jbe 1f\n" \
442 " movl %1,%0\n" \
443 " negl %0\n" \
444 " andl $7,%0\n" \
445 " subl %0,%3\n" \
446 "4: rep; movsb\n" \
447 " movl %3,%0\n" \
448 " shrl $2,%0\n" \
449 " andl $3,%3\n" \
450 " .align 2,0x90\n" \
451 "0: rep; movsl\n" \
452 " movl %3,%0\n" \
453 "1: rep; movsb\n" \
454 "2:\n" \
455 ".section .fixup,\"ax\"\n" \
456 "5: addl %3,%0\n" \
457 " jmp 2b\n" \
458 "3: lea 0(%3,%0,4),%0\n" \
459 " jmp 2b\n" \
460 ".previous\n" \
461 ".section __ex_table,\"a\"\n" \
462 " .align 4\n" \
463 " .long 4b,5b\n" \
464 " .long 0b,3b\n" \
465 " .long 1b,2b\n" \
466 ".previous" \
467 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
468 : "3"(size), "0"(size), "1"(to), "2"(from) \
469 : "memory"); \
470} while (0)
471
472#define __copy_user_zeroing(to,from,size) \
473do { \
474 int __d0, __d1, __d2; \
475 __asm__ __volatile__( \
476 " cmp $7,%0\n" \
477 " jbe 1f\n" \
478 " movl %1,%0\n" \
479 " negl %0\n" \
480 " andl $7,%0\n" \
481 " subl %0,%3\n" \
482 "4: rep; movsb\n" \
483 " movl %3,%0\n" \
484 " shrl $2,%0\n" \
485 " andl $3,%3\n" \
486 " .align 2,0x90\n" \
487 "0: rep; movsl\n" \
488 " movl %3,%0\n" \
489 "1: rep; movsb\n" \
490 "2:\n" \
491 ".section .fixup,\"ax\"\n" \
492 "5: addl %3,%0\n" \
493 " jmp 6f\n" \
494 "3: lea 0(%3,%0,4),%0\n" \
495 "6: pushl %0\n" \
496 " pushl %%eax\n" \
497 " xorl %%eax,%%eax\n" \
498 " rep; stosb\n" \
499 " popl %%eax\n" \
500 " popl %0\n" \
501 " jmp 2b\n" \
502 ".previous\n" \
503 ".section __ex_table,\"a\"\n" \
504 " .align 4\n" \
505 " .long 4b,5b\n" \
506 " .long 0b,3b\n" \
507 " .long 1b,6b\n" \
508 ".previous" \
509 : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
510 : "3"(size), "0"(size), "1"(to), "2"(from) \
511 : "memory"); \
512} while (0)
513
514
515unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n)
516{
517 BUG_ON((long) n < 0);
518#ifndef CONFIG_X86_WP_WORKS_OK
519 if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
520 ((unsigned long )to) < TASK_SIZE) {
521 /*
522 * CPU does not honor the WP bit when writing
523 * from supervisory mode, and due to preemption or SMP,
524 * the page tables can change at any time.
525 * Do it manually. Manfred <manfred@colorfullife.com>
526 */
527 while (n) {
528 unsigned long offset = ((unsigned long)to)%PAGE_SIZE;
529 unsigned long len = PAGE_SIZE - offset;
530 int retval;
531 struct page *pg;
532 void *maddr;
533
534 if (len > n)
535 len = n;
536
537survive:
538 down_read(&current->mm->mmap_sem);
539 retval = get_user_pages(current, current->mm,
540 (unsigned long )to, 1, 1, 0, &pg, NULL);
541
542 if (retval == -ENOMEM && current->pid == 1) {
543 up_read(&current->mm->mmap_sem);
544 blk_congestion_wait(WRITE, HZ/50);
545 goto survive;
546 }
547
548 if (retval != 1) {
549 up_read(&current->mm->mmap_sem);
550 break;
551 }
552
553 maddr = kmap_atomic(pg, KM_USER0);
554 memcpy(maddr + offset, from, len);
555 kunmap_atomic(maddr, KM_USER0);
556 set_page_dirty_lock(pg);
557 put_page(pg);
558 up_read(&current->mm->mmap_sem);
559
560 from += len;
561 to += len;
562 n -= len;
563 }
564 return n;
565 }
566#endif
567 if (movsl_is_ok(to, from, n))
568 __copy_user(to, from, n);
569 else
570 n = __copy_user_intel(to, from, n);
571 return n;
572}
573
574unsigned long
575__copy_from_user_ll(void *to, const void __user *from, unsigned long n)
576{
577 BUG_ON((long)n < 0);
578 if (movsl_is_ok(to, from, n))
579 __copy_user_zeroing(to, from, n);
580 else
581 n = __copy_user_zeroing_intel(to, from, n);
582 return n;
583}
584
585/**
586 * copy_to_user: - Copy a block of data into user space.
587 * @to: Destination address, in user space.
588 * @from: Source address, in kernel space.
589 * @n: Number of bytes to copy.
590 *
591 * Context: User context only. This function may sleep.
592 *
593 * Copy data from kernel space to user space.
594 *
595 * Returns number of bytes that could not be copied.
596 * On success, this will be zero.
597 */
598unsigned long
599copy_to_user(void __user *to, const void *from, unsigned long n)
600{
601 might_sleep();
602 BUG_ON((long) n < 0);
603 if (access_ok(VERIFY_WRITE, to, n))
604 n = __copy_to_user(to, from, n);
605 return n;
606}
607EXPORT_SYMBOL(copy_to_user);
608
609/**
610 * copy_from_user: - Copy a block of data from user space.
611 * @to: Destination address, in kernel space.
612 * @from: Source address, in user space.
613 * @n: Number of bytes to copy.
614 *
615 * Context: User context only. This function may sleep.
616 *
617 * Copy data from user space to kernel space.
618 *
619 * Returns number of bytes that could not be copied.
620 * On success, this will be zero.
621 *
622 * If some data could not be copied, this function will pad the copied
623 * data to the requested size using zero bytes.
624 */
625unsigned long
626copy_from_user(void *to, const void __user *from, unsigned long n)
627{
628 might_sleep();
629 BUG_ON((long) n < 0);
630 if (access_ok(VERIFY_READ, from, n))
631 n = __copy_from_user(to, from, n);
632 else
633 memset(to, 0, n);
634 return n;
635}
636EXPORT_SYMBOL(copy_from_user);