Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/i386/lib
11 files changed, 1932 insertions, 0 deletions
diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
new file mode 100644
index 000000000000..7b1932d20f96
--- /dev/null
+++ b/arch/i386/lib/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for i386-specific library files..
+#
+lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
+        bitops.o
+lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
+lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
diff --git a/arch/i386/lib/bitops.c b/arch/i386/lib/bitops.c
new file mode 100644
index 000000000000..97db3853dc82
--- /dev/null
+++ b/arch/i386/lib/bitops.c
@@ -0,0 +1,70 @@
+#include <linux/bitops.h>
+#include <linux/module.h>
+/**
+ * find_next_bit - find the first set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+int find_next_bit(const unsigned long *addr, int size, int offset)
+{
+        const unsigned long *p = addr + (offset >> 5);
+        int set = 0, bit = offset & 31, res;
+        if (bit) {
+                /*
+                 * Look for nonzero in the first 32 bits:
+                 */
+                __asm__("bsfl %1,%0\n\t"
+                        "jne 1f\n\t"
+                        "movl $32, %0\n"
+                        "1:"
+                        : "=r" (set)
+                        : "r" (*p >> bit));
+                if (set < (32 - bit))
+                        return set + offset;
+                set = 32 - bit;
+                p++;
+        }
+        /*
+         * No set bit yet, search remaining full words for a bit
+         */
+        res = find_first_bit (p, size - 32 * (p - addr));
+        return (offset + set + res);
+}
+EXPORT_SYMBOL(find_next_bit);
+/**
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+int find_next_zero_bit(const unsigned long *addr, int size, int offset)
+{
+        unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
+        int set = 0, bit = offset & 31, res;
+        if (bit) {
+                /*
+                 * Look for zero in the first 32 bits.
+                 */
+                __asm__("bsfl %1,%0\n\t"
+                        "jne 1f\n\t"
+                        "movl $32, %0\n"
+                        "1:"
+                        : "=r" (set)
+                        : "r" (~(*p >> bit)));
+                if (set < (32 - bit))
+                        return set + offset;
+                set = 32 - bit;
+                p++;
+        }
+        /*
+         * No zero yet, search remaining full bytes for a zero
+         */
+        res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr));
+        return (offset + set + res);
+}
+EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/i386/lib/checksum.S b/arch/i386/lib/checksum.S
new file mode 100644
index 000000000000..94c7867ddc33
--- /dev/null
+++ b/arch/i386/lib/checksum.S
@@ -0,0 +1,496 @@
+/*
+ * INET         An implementation of the TCP/IP protocol suite for the LINUX
+ *              operating system.  INET is implemented using the  BSD Socket
+ *              interface as the means of communication with the user level.
+ *
+ *              IP/TCP/UDP checksumming routines
+ *
+ * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
+ *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *              Tom May, <ftom@netcom.com>
+ *              Pentium Pro/II routines:
+ *              Alexander Kjeldaas <astor@guardian.no>
+ *              Finn Arne Gangstad <finnag@guardian.no>
+ *              Lots of code moved from tcp.c and ip.c; see those files
+ *              for more names.
+ *
+ * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
+ *                           handling.
+ *              Andi Kleen,  add zeroing on error
+ *                   converted to pure assembler
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/errno.h>
+                                
+/*
+ * computes a partial checksum, e.g. for TCP/UDP fragments
+ */
+/*      
+unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
+ */
+                
+.text
+.align 4
+.globl csum_partial                                                             
+                
+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
+          /*            
+           * Experiments with Ethernet and SLIP connections show that buff
+           * is aligned on either a 2-byte or 4-byte boundary.  We get at
+           * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
+           * Fortunately, it is easy to convert 2-byte alignment to 4-byte
+           * alignment for the unrolled loop.
+           */           
+csum_partial:   
+        pushl %esi
+        pushl %ebx
+        movl 20(%esp),%eax      # Function arg: unsigned int sum
+        movl 16(%esp),%ecx      # Function arg: int len
+        movl 12(%esp),%esi      # Function arg: unsigned char *buff
+        testl $3, %esi          # Check alignment.
+        jz 2f                   # Jump if alignment is ok.
+        testl $1, %esi          # Check alignment.
+        jz 10f                  # Jump if alignment is boundary of 2bytes.
+        # buf is odd
+        dec %ecx
+        jl 8f
+        movzbl (%esi), %ebx
+        adcl %ebx, %eax
+        roll $8, %eax
+        inc %esi
+        testl $2, %esi
+        jz 2f
+10:
+        subl $2, %ecx           # Alignment uses up two bytes.
+        jae 1f                  # Jump if we had at least two bytes.
+        addl $2, %ecx           # ecx was < 2.  Deal with it.
+        jmp 4f
+1:      movw (%esi), %bx
+        addl $2, %esi
+        addw %bx, %ax
+        adcl $0, %eax
+2:
+        movl %ecx, %edx
+        shrl $5, %ecx
+        jz 2f
+        testl %esi, %esi
+1:      movl (%esi), %ebx
+        adcl %ebx, %eax
+        movl 4(%esi), %ebx
+        adcl %ebx, %eax
+        movl 8(%esi), %ebx
+        adcl %ebx, %eax
+        movl 12(%esi), %ebx
+        adcl %ebx, %eax
+        movl 16(%esi), %ebx
+        adcl %ebx, %eax
+        movl 20(%esi), %ebx
+        adcl %ebx, %eax
+        movl 24(%esi), %ebx
+        adcl %ebx, %eax
+        movl 28(%esi), %ebx
+        adcl %ebx, %eax
+        lea 32(%esi), %esi
+        dec %ecx
+        jne 1b
+        adcl $0, %eax
+2:      movl %edx, %ecx
+        andl $0x1c, %edx
+        je 4f
+        shrl $2, %edx           # This clears CF
+3:      adcl (%esi), %eax
+        lea 4(%esi), %esi
+        dec %edx
+        jne 3b
+        adcl $0, %eax
+4:      andl $3, %ecx
+        jz 7f
+        cmpl $2, %ecx
+        jb 5f
+        movw (%esi),%cx
+        leal 2(%esi),%esi
+        je 6f
+        shll $16,%ecx
+5:      movb (%esi),%cl
+6:      addl %ecx,%eax
+        adcl $0, %eax 
+7:      
+        testl $1, 12(%esp)
+        jz 8f
+        roll $8, %eax
+8:
+        popl %ebx
+        popl %esi
+        ret
+#else
+/* Version for PentiumII/PPro */
+csum_partial:
+        pushl %esi
+        pushl %ebx
+        movl 20(%esp),%eax      # Function arg: unsigned int sum
+        movl 16(%esp),%ecx      # Function arg: int len
+        movl 12(%esp),%esi      # Function arg: const unsigned char *buf
+        testl $3, %esi         
+        jnz 25f                 
+10:
+        movl %ecx, %edx
+        movl %ecx, %ebx
+        andl $0x7c, %ebx
+        shrl $7, %ecx
+        addl %ebx,%esi
+        shrl $2, %ebx  
+        negl %ebx
+        lea 45f(%ebx,%ebx,2), %ebx
+        testl %esi, %esi
+        jmp *%ebx
+        # Handle 2-byte-aligned regions
+20:     addw (%esi), %ax
+        lea 2(%esi), %esi
+        adcl $0, %eax
+        jmp 10b
+25:
+        testl $1, %esi         
+        jz 30f                 
+        # buf is odd
+        dec %ecx
+        jl 90f
+        movzbl (%esi), %ebx
+        addl %ebx, %eax
+        adcl $0, %eax
+        roll $8, %eax
+        inc %esi
+        testl $2, %esi
+        jz 10b
+30:     subl $2, %ecx          
+        ja 20b                 
+        je 32f
+        addl $2, %ecx
+        jz 80f
+        movzbl (%esi),%ebx      # csumming 1 byte, 2-aligned
+        addl %ebx, %eax
+        adcl $0, %eax
+        jmp 80f
+32:
+        addw (%esi), %ax        # csumming 2 bytes, 2-aligned
+        adcl $0, %eax
+        jmp 80f
+40: 
+        addl -128(%esi), %eax
+        adcl -124(%esi), %eax
+        adcl -120(%esi), %eax
+        adcl -116(%esi), %eax   
+        adcl -112(%esi), %eax   
+        adcl -108(%esi), %eax
+        adcl -104(%esi), %eax
+        adcl -100(%esi), %eax
+        adcl -96(%esi), %eax
+        adcl -92(%esi), %eax
+        adcl -88(%esi), %eax
+        adcl -84(%esi), %eax
+        adcl -80(%esi), %eax
+        adcl -76(%esi), %eax
+        adcl -72(%esi), %eax
+        adcl -68(%esi), %eax
+        adcl -64(%esi), %eax     
+        adcl -60(%esi), %eax     
+        adcl -56(%esi), %eax     
+        adcl -52(%esi), %eax   
+        adcl -48(%esi), %eax   
+        adcl -44(%esi), %eax
+        adcl -40(%esi), %eax
+        adcl -36(%esi), %eax
+        adcl -32(%esi), %eax
+        adcl -28(%esi), %eax
+        adcl -24(%esi), %eax
+        adcl -20(%esi), %eax
+        adcl -16(%esi), %eax
+        adcl -12(%esi), %eax
+        adcl -8(%esi), %eax
+        adcl -4(%esi), %eax
+45:
+        lea 128(%esi), %esi
+        adcl $0, %eax
+        dec %ecx
+        jge 40b
+        movl %edx, %ecx
+50:     andl $3, %ecx
+        jz 80f
+        # Handle the last 1-3 bytes without jumping
+        notl %ecx               # 1->2, 2->1, 3->0, higher bits are masked
+        movl $0xffffff,%ebx     # by the shll and shrl instructions
+        shll $3,%ecx
+        shrl %cl,%ebx
+        andl -128(%esi),%ebx    # esi is 4-aligned so should be ok
+        addl %ebx,%eax
+        adcl $0,%eax
+80: 
+        testl $1, 12(%esp)
+        jz 90f
+        roll $8, %eax
+90: 
+        popl %ebx
+        popl %esi
+        ret
+                                
+#endif
+/*
+unsigned int csum_partial_copy_generic (const char *src, char *dst,
+                                  int len, int sum, int *src_err_ptr, int *dst_err_ptr)
+ */ 
+/*
+ * Copy from ds while checksumming, otherwise like csum_partial
+ *
+ * The macros SRC and DST specify the type of access for the instruction.
+ * thus we can call a custom exception handler for all access types.
+ *
+ * FIXME: could someone double-check whether I haven't mixed up some SRC and
+ *        DST definitions? It's damn hard to trigger all cases.  I hope I got
+ *        them all but there's no guarantee.
+ */
+#define SRC(y...)                       \
+        9999: y;                        \
+        .section __ex_table, "a";       \
+        .long 9999b, 6001f      ;       \
+        .previous
+#define DST(y...)                       \
+        9999: y;                        \
+        .section __ex_table, "a";       \
+        .long 9999b, 6002f      ;       \
+        .previous
+.align 4
+.globl csum_partial_copy_generic
+                                
+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
+#define ARGBASE 16              
+#define FP              12
+                
+csum_partial_copy_generic:
+        subl  $4,%esp   
+        pushl %edi
+        pushl %esi
+        pushl %ebx
+        movl ARGBASE+16(%esp),%eax      # sum
+        movl ARGBASE+12(%esp),%ecx      # len
+        movl ARGBASE+4(%esp),%esi       # src
+        movl ARGBASE+8(%esp),%edi       # dst
+        testl $2, %edi                  # Check alignment. 
+        jz 2f                           # Jump if alignment is ok.
+        subl $2, %ecx                   # Alignment uses up two bytes.
+        jae 1f                          # Jump if we had at least two bytes.
+        addl $2, %ecx                   # ecx was < 2.  Deal with it.
+        jmp 4f
+SRC(1:  movw (%esi), %bx        )
+        addl $2, %esi
+DST(    movw %bx, (%edi)        )
+        addl $2, %edi
+        addw %bx, %ax   
+        adcl $0, %eax
+2:
+        movl %ecx, FP(%esp)
+        shrl $5, %ecx
+        jz 2f
+        testl %esi, %esi
+SRC(1:  movl (%esi), %ebx       )
+SRC(    movl 4(%esi), %edx      )
+        adcl %ebx, %eax
+DST(    movl %ebx, (%edi)       )
+        adcl %edx, %eax
+DST(    movl %edx, 4(%edi)      )
+SRC(    movl 8(%esi), %ebx      )
+SRC(    movl 12(%esi), %edx     )
+        adcl %ebx, %eax
+DST(    movl %ebx, 8(%edi)      )
+        adcl %edx, %eax
+DST(    movl %edx, 12(%edi)     )
+SRC(    movl 16(%esi), %ebx     )
+SRC(    movl 20(%esi), %edx     )
+        adcl %ebx, %eax
+DST(    movl %ebx, 16(%edi)     )
+        adcl %edx, %eax
+DST(    movl %edx, 20(%edi)     )
+SRC(    movl 24(%esi), %ebx     )
+SRC(    movl 28(%esi), %edx     )
+        adcl %ebx, %eax
+DST(    movl %ebx, 24(%edi)     )
+        adcl %edx, %eax
+DST(    movl %edx, 28(%edi)     )
+        lea 32(%esi), %esi
+        lea 32(%edi), %edi
+        dec %ecx
+        jne 1b
+        adcl $0, %eax
+2:      movl FP(%esp), %edx
+        movl %edx, %ecx
+        andl $0x1c, %edx
+        je 4f
+        shrl $2, %edx                   # This clears CF
+SRC(3:  movl (%esi), %ebx       )
+        adcl %ebx, %eax
+DST(    movl %ebx, (%edi)       )
+        lea 4(%esi), %esi
+        lea 4(%edi), %edi
+        dec %edx
+        jne 3b
+        adcl $0, %eax
+4:      andl $3, %ecx
+        jz 7f
+        cmpl $2, %ecx
+        jb 5f
+SRC(    movw (%esi), %cx        )
+        leal 2(%esi), %esi
+DST(    movw %cx, (%edi)        )
+        leal 2(%edi), %edi
+        je 6f
+        shll $16,%ecx
+SRC(5:  movb (%esi), %cl        )
+DST(    movb %cl, (%edi)        )
+6:      addl %ecx, %eax
+        adcl $0, %eax
+7:
+5000:
+# Exception handler:
+.section .fixup, "ax"                                                   
+6001:
+        movl ARGBASE+20(%esp), %ebx     # src_err_ptr
+        movl $-EFAULT, (%ebx)
+        # zero the complete destination - computing the rest
+        # is too much work 
+        movl ARGBASE+8(%esp), %edi      # dst
+        movl ARGBASE+12(%esp), %ecx     # len
+        xorl %eax,%eax
+        rep ; stosb
+        jmp 5000b
+6002:
+        movl ARGBASE+24(%esp), %ebx     # dst_err_ptr
+        movl $-EFAULT,(%ebx)
+        jmp 5000b
+.previous
+        popl %ebx
+        popl %esi
+        popl %edi
+        popl %ecx                       # equivalent to addl $4,%esp
+        ret     
+#else
+/* Version for PentiumII/PPro */
+#define ROUND1(x) \
+        SRC(movl x(%esi), %ebx  )       ;       \
+        addl %ebx, %eax                 ;       \
+        DST(movl %ebx, x(%edi)  )       ; 
+#define ROUND(x) \
+        SRC(movl x(%esi), %ebx  )       ;       \
+        adcl %ebx, %eax                 ;       \
+        DST(movl %ebx, x(%edi)  )       ;
+#define ARGBASE 12
+                
+csum_partial_copy_generic:
+        pushl %ebx
+        pushl %edi
+        pushl %esi
+        movl ARGBASE+4(%esp),%esi       #src
+        movl ARGBASE+8(%esp),%edi       #dst    
+        movl ARGBASE+12(%esp),%ecx      #len
+        movl ARGBASE+16(%esp),%eax      #sum
+#       movl %ecx, %edx  
+        movl %ecx, %ebx  
+        movl %esi, %edx
+        shrl $6, %ecx     
+        andl $0x3c, %ebx  
+        negl %ebx
+        subl %ebx, %esi  
+        subl %ebx, %edi  
+        lea  -1(%esi),%edx
+        andl $-32,%edx
+        lea 3f(%ebx,%ebx), %ebx
+        testl %esi, %esi 
+        jmp *%ebx
+1:      addl $64,%esi
+        addl $64,%edi 
+        SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl)
+        ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)    
+        ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)    
+        ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)    
+        ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)     
+3:      adcl $0,%eax
+        addl $64, %edx
+        dec %ecx
+        jge 1b
+4:      movl ARGBASE+12(%esp),%edx      #len
+        andl $3, %edx
+        jz 7f
+        cmpl $2, %edx
+        jb 5f
+SRC(    movw (%esi), %dx         )
+        leal 2(%esi), %esi
+DST(    movw %dx, (%edi)         )
+        leal 2(%edi), %edi
+        je 6f
+        shll $16,%edx
+5:
+SRC(    movb (%esi), %dl         )
+DST(    movb %dl, (%edi)         )
+6:      addl %edx, %eax
+        adcl $0, %eax
+7:
+.section .fixup, "ax"
+6001:   movl    ARGBASE+20(%esp), %ebx  # src_err_ptr   
+        movl $-EFAULT, (%ebx)
+        # zero the complete destination (computing the rest is too much work)
+        movl ARGBASE+8(%esp),%edi       # dst
+        movl ARGBASE+12(%esp),%ecx      # len
+        xorl %eax,%eax
+        rep; stosb
+        jmp 7b
+6002:   movl ARGBASE+24(%esp), %ebx     # dst_err_ptr
+        movl $-EFAULT, (%ebx)
+        jmp  7b                 
+.previous                               
+        popl %esi
+        popl %edi
+        popl %ebx
+        ret
+                                
+#undef ROUND
+#undef ROUND1           
+                
+#endif
diff --git a/arch/i386/lib/dec_and_lock.c b/arch/i386/lib/dec_and_lock.c
new file mode 100644
index 000000000000..ab43394dc775
--- /dev/null
+++ b/arch/i386/lib/dec_and_lock.c
@@ -0,0 +1,40 @@
+/*
+ * x86 version of "atomic_dec_and_lock()" using
+ * the atomic "cmpxchg" instruction.
+ *
+ * (For CPU's lacking cmpxchg, we use the slow
+ * generic version, and this one never even gets
+ * compiled).
+ */
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
+{
+        int counter;
+        int newcount;
+repeat:
+        counter = atomic_read(atomic);
+        newcount = counter-1;
+        if (!newcount)
+                goto slow_path;
+        asm volatile("lock; cmpxchgl %1,%2"
+                :"=a" (newcount)
+                :"r" (newcount), "m" (atomic->counter), "0" (counter));
+        /* If the above failed, "eax" will have changed */
+        if (newcount != counter)
+                goto repeat;
+        return 0;
+slow_path:
+        spin_lock(lock);
+        if (atomic_dec_and_test(atomic))
+                return 1;
+        spin_unlock(lock);
+        return 0;
+}
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
new file mode 100644
index 000000000000..080639f262b1
--- /dev/null
+++ b/arch/i386/lib/delay.c
@@ -0,0 +1,49 @@
+/*
+ *      Precise Delay Loops for i386
+ *
+ *      Copyright (C) 1993 Linus Torvalds
+ *      Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ *      The __delay function must _NOT_ be inlined as its execution time
+ *      depends wildly on alignment on many x86 processors. The additional
+ *      jump magic is needed to get the timing stable on all the CPU's
+ *      we have to worry about.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <asm/processor.h>
+#include <asm/delay.h>
+#include <asm/timer.h>
+#ifdef CONFIG_SMP
+#include <asm/smp.h>
+#endif
+extern struct timer_opts* timer;
+void __delay(unsigned long loops)
+{
+        cur_timer->delay(loops);
+}
+inline void __const_udelay(unsigned long xloops)
+{
+        int d0;
+        xloops *= 4;
+        __asm__("mull %0"
+                :"=d" (xloops), "=&a" (d0)
+                :"1" (xloops),"0" (cpu_data[_smp_processor_id()].loops_per_jiffy * (HZ/4)));
+        __delay(++xloops);
+}
+void __udelay(unsigned long usecs)
+{
+        __const_udelay(usecs * 0x000010c7);  /* 2**32 / 1000000 (rounded up) */
+}
+void __ndelay(unsigned long nsecs)
+{
+        __const_udelay(nsecs * 0x00005);  /* 2**32 / 1000000000 (rounded up) */
+}
diff --git a/arch/i386/lib/getuser.S b/arch/i386/lib/getuser.S
new file mode 100644
index 000000000000..62d7f178a326
--- /dev/null
+++ b/arch/i386/lib/getuser.S
@@ -0,0 +1,70 @@
+/*
+ * __get_user functions.
+ *
+ * (C) Copyright 1998 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+/*
+ * __get_user_X
+ *
+ * Inputs:      %eax contains the address
+ *
+ * Outputs:     %eax is error code (0 or -EFAULT)
+ *              %edx contains zero-extended value
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+.text
+.align 4
+.globl __get_user_1
+__get_user_1:
+        GET_THREAD_INFO(%edx)
+        cmpl TI_addr_limit(%edx),%eax
+        jae bad_get_user
+1:      movzbl (%eax),%edx
+        xorl %eax,%eax
+        ret
+.align 4
+.globl __get_user_2
+__get_user_2:
+        addl $1,%eax
+        jc bad_get_user
+        GET_THREAD_INFO(%edx)
+        cmpl TI_addr_limit(%edx),%eax
+        jae bad_get_user
+2:      movzwl -1(%eax),%edx
+        xorl %eax,%eax
+        ret
+.align 4
+.globl __get_user_4
+__get_user_4:
+        addl $3,%eax
+        jc bad_get_user
+        GET_THREAD_INFO(%edx)
+        cmpl TI_addr_limit(%edx),%eax
+        jae bad_get_user
+3:      movl -3(%eax),%edx
+        xorl %eax,%eax
+        ret
+bad_get_user:
+        xorl %edx,%edx
+        movl $-14,%eax
+        ret
+.section __ex_table,"a"
+        .long 1b,bad_get_user
+        .long 2b,bad_get_user
+        .long 3b,bad_get_user
+.previous
diff --git a/arch/i386/lib/memcpy.c b/arch/i386/lib/memcpy.c
new file mode 100644
index 000000000000..891b2359d18a
--- /dev/null
+++ b/arch/i386/lib/memcpy.c
@@ -0,0 +1,44 @@
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#undef memcpy
+#undef memset
+void *memcpy(void *to, const void *from, size_t n)
+{
+#ifdef CONFIG_X86_USE_3DNOW
+        return __memcpy3d(to, from, n);
+#else
+        return __memcpy(to, from, n);
+#endif
+}
+EXPORT_SYMBOL(memcpy);
+void *memset(void *s, int c, size_t count)
+{
+        return __memset(s, c, count);
+}
+EXPORT_SYMBOL(memset);
+void *memmove(void *dest, const void *src, size_t n)
+{
+        int d0, d1, d2;
+        if (dest < src) {
+                memcpy(dest,src,n);
+        } else {
+                __asm__ __volatile__(
+                        "std\n\t"
+                        "rep\n\t"
+                        "movsb\n\t"
+                        "cld"
+                        : "=&c" (d0), "=&S" (d1), "=&D" (d2)
+                        :"0" (n),
+                         "1" (n-1+(const char *)src),
+                         "2" (n-1+(char *)dest)
+                        :"memory");
+        }
+        return dest;
+}
+EXPORT_SYMBOL(memmove);
diff --git a/arch/i386/lib/mmx.c b/arch/i386/lib/mmx.c
new file mode 100644
index 000000000000..01f8b1a2cc84
--- /dev/null
+++ b/arch/i386/lib/mmx.c
@@ -0,0 +1,399 @@
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/hardirq.h>
+#include <asm/i387.h>
+/*
+ *      MMX 3DNow! library helper functions
+ *
+ *      To do:
+ *      We can use MMX just for prefetch in IRQ's. This may be a win. 
+ *              (reported so on K6-III)
+ *      We should use a better code neutral filler for the short jump
+ *              leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
+ *      We also want to clobber the filler register so we don't get any
+ *              register forwarding stalls on the filler. 
+ *
+ *      Add *user handling. Checksums are not a win with MMX on any CPU
+ *      tested so far for any MMX solution figured.
+ *
+ *      22/09/2000 - Arjan van de Ven 
+ *              Improved for non-egineering-sample Athlons 
+ *
+ */
+ 
+void *_mmx_memcpy(void *to, const void *from, size_t len)
+{
+        void *p;
+        int i;
+        if (unlikely(in_interrupt()))
+                return __memcpy(to, from, len);
+        p = to;
+        i = len >> 6; /* len/64 */
+        kernel_fpu_begin();
+        __asm__ __volatile__ (
+                "1: prefetch (%0)\n"            /* This set is 28 bytes */
+                "   prefetch 64(%0)\n"
+                "   prefetch 128(%0)\n"
+                "   prefetch 192(%0)\n"
+                "   prefetch 256(%0)\n"
+                "2:  \n"
+                ".section .fixup, \"ax\"\n"
+                "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
+                "   jmp 2b\n"
+                ".previous\n"
+                ".section __ex_table,\"a\"\n"
+                "       .align 4\n"
+                "       .long 1b, 3b\n"
+                ".previous"
+                : : "r" (from) );
+                
+        
+        for(; i>5; i--)
+        {
+                __asm__ __volatile__ (
+                "1:  prefetch 320(%0)\n"
+                "2:  movq (%0), %%mm0\n"
+                "  movq 8(%0), %%mm1\n"
+                "  movq 16(%0), %%mm2\n"
+                "  movq 24(%0), %%mm3\n"
+                "  movq %%mm0, (%1)\n"
+                "  movq %%mm1, 8(%1)\n"
+                "  movq %%mm2, 16(%1)\n"
+                "  movq %%mm3, 24(%1)\n"
+                "  movq 32(%0), %%mm0\n"
+                "  movq 40(%0), %%mm1\n"
+                "  movq 48(%0), %%mm2\n"
+                "  movq 56(%0), %%mm3\n"
+                "  movq %%mm0, 32(%1)\n"
+                "  movq %%mm1, 40(%1)\n"
+                "  movq %%mm2, 48(%1)\n"
+                "  movq %%mm3, 56(%1)\n"
+                ".section .fixup, \"ax\"\n"
+                "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
+                "   jmp 2b\n"
+                ".previous\n"
+                ".section __ex_table,\"a\"\n"
+                "       .align 4\n"
+                "       .long 1b, 3b\n"
+                ".previous"
+                : : "r" (from), "r" (to) : "memory");
+                from+=64;
+                to+=64;
+        }
+        for(; i>0; i--)
+        {
+                __asm__ __volatile__ (
+                "  movq (%0), %%mm0\n"
+                "  movq 8(%0), %%mm1\n"
+                "  movq 16(%0), %%mm2\n"
+                "  movq 24(%0), %%mm3\n"
+                "  movq %%mm0, (%1)\n"
+                "  movq %%mm1, 8(%1)\n"
+                "  movq %%mm2, 16(%1)\n"
+                "  movq %%mm3, 24(%1)\n"
+                "  movq 32(%0), %%mm0\n"
+                "  movq 40(%0), %%mm1\n"
+                "  movq 48(%0), %%mm2\n"
+                "  movq 56(%0), %%mm3\n"
+                "  movq %%mm0, 32(%1)\n"
+                "  movq %%mm1, 40(%1)\n"
+                "  movq %%mm2, 48(%1)\n"
+                "  movq %%mm3, 56(%1)\n"
+                : : "r" (from), "r" (to) : "memory");
+                from+=64;
+                to+=64;
+        }
+        /*
+         *      Now do the tail of the block
+         */
+        __memcpy(to, from, len&63);
+        kernel_fpu_end();
+        return p;
+}
+#ifdef CONFIG_MK7
+/*
+ *      The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
+ *      other MMX using processors do not.
+ */
+static void fast_clear_page(void *page)
+{
+        int i;
+        kernel_fpu_begin();
+        
+        __asm__ __volatile__ (
+                "  pxor %%mm0, %%mm0\n" : :
+        );
+        for(i=0;i<4096/64;i++)
+        {
+                __asm__ __volatile__ (
+                "  movntq %%mm0, (%0)\n"
+                "  movntq %%mm0, 8(%0)\n"
+                "  movntq %%mm0, 16(%0)\n"
+                "  movntq %%mm0, 24(%0)\n"
+                "  movntq %%mm0, 32(%0)\n"
+                "  movntq %%mm0, 40(%0)\n"
+                "  movntq %%mm0, 48(%0)\n"
+                "  movntq %%mm0, 56(%0)\n"
+                : : "r" (page) : "memory");
+                page+=64;
+        }
+        /* since movntq is weakly-ordered, a "sfence" is needed to become
+         * ordered again.
+         */
+        __asm__ __volatile__ (
+                "  sfence \n" : :
+        );
+        kernel_fpu_end();
+}
+static void fast_copy_page(void *to, void *from)
+{
+        int i;
+        kernel_fpu_begin();
+        /* maybe the prefetch stuff can go before the expensive fnsave...
+         * but that is for later. -AV
+         */
+        __asm__ __volatile__ (
+                "1: prefetch (%0)\n"
+                "   prefetch 64(%0)\n"
+                "   prefetch 128(%0)\n"
+                "   prefetch 192(%0)\n"
+                "   prefetch 256(%0)\n"
+                "2:  \n"
+                ".section .fixup, \"ax\"\n"
+                "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
+                "   jmp 2b\n"
+                ".previous\n"
+                ".section __ex_table,\"a\"\n"
+                "       .align 4\n"
+                "       .long 1b, 3b\n"
+                ".previous"
+                : : "r" (from) );
+        for(i=0; i<(4096-320)/64; i++)
+        {
+                __asm__ __volatile__ (
+                "1: prefetch 320(%0)\n"
+                "2: movq (%0), %%mm0\n"
+                "   movntq %%mm0, (%1)\n"
+                "   movq 8(%0), %%mm1\n"
+                "   movntq %%mm1, 8(%1)\n"
+                "   movq 16(%0), %%mm2\n"
+                "   movntq %%mm2, 16(%1)\n"
+                "   movq 24(%0), %%mm3\n"
+                "   movntq %%mm3, 24(%1)\n"
+                "   movq 32(%0), %%mm4\n"
+                "   movntq %%mm4, 32(%1)\n"
+                "   movq 40(%0), %%mm5\n"
+                "   movntq %%mm5, 40(%1)\n"
+                "   movq 48(%0), %%mm6\n"
+                "   movntq %%mm6, 48(%1)\n"
+                "   movq 56(%0), %%mm7\n"
+                "   movntq %%mm7, 56(%1)\n"
+                ".section .fixup, \"ax\"\n"
+                "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
+                "   jmp 2b\n"
+                ".previous\n"
+                ".section __ex_table,\"a\"\n"
+                "       .align 4\n"
+                "       .long 1b, 3b\n"
+                ".previous"
+                : : "r" (from), "r" (to) : "memory");
+                from+=64;
+                to+=64;
+        }
+        for(i=(4096-320)/64; i<4096/64; i++)
+        {
+                __asm__ __volatile__ (
+                "2: movq (%0), %%mm0\n"
+                "   movntq %%mm0, (%1)\n"
+                "   movq 8(%0), %%mm1\n"
+                "   movntq %%mm1, 8(%1)\n"
+                "   movq 16(%0), %%mm2\n"
+                "   movntq %%mm2, 16(%1)\n"
+                "   movq 24(%0), %%mm3\n"
+                "   movntq %%mm3, 24(%1)\n"
+                "   movq 32(%0), %%mm4\n"
+                "   movntq %%mm4, 32(%1)\n"
+                "   movq 40(%0), %%mm5\n"
+                "   movntq %%mm5, 40(%1)\n"
+                "   movq 48(%0), %%mm6\n"
+                "   movntq %%mm6, 48(%1)\n"
+                "   movq 56(%0), %%mm7\n"
+                "   movntq %%mm7, 56(%1)\n"
+                : : "r" (from), "r" (to) : "memory");
+                from+=64;
+                to+=64;
+        }
+        /* since movntq is weakly-ordered, a "sfence" is needed to become
+         * ordered again.
+         */
+        __asm__ __volatile__ (
+                "  sfence \n" : :
+        );
+        kernel_fpu_end();
+}
+#else
+/*
+ *      Generic MMX implementation without K7 specific streaming
+ */
+ 
+static void fast_clear_page(void *page)
+{
+        int i;
+        
+        kernel_fpu_begin();
+        
+        __asm__ __volatile__ (
+                "  pxor %%mm0, %%mm0\n" : :
+        );
+        for(i=0;i<4096/128;i++)
+        {
+                __asm__ __volatile__ (
+                "  movq %%mm0, (%0)\n"
+                "  movq %%mm0, 8(%0)\n"
+                "  movq %%mm0, 16(%0)\n"
+                "  movq %%mm0, 24(%0)\n"
+                "  movq %%mm0, 32(%0)\n"
+                "  movq %%mm0, 40(%0)\n"
+                "  movq %%mm0, 48(%0)\n"
+                "  movq %%mm0, 56(%0)\n"
+                "  movq %%mm0, 64(%0)\n"
+                "  movq %%mm0, 72(%0)\n"
+                "  movq %%mm0, 80(%0)\n"
+                "  movq %%mm0, 88(%0)\n"
+                "  movq %%mm0, 96(%0)\n"
+                "  movq %%mm0, 104(%0)\n"
+                "  movq %%mm0, 112(%0)\n"
+                "  movq %%mm0, 120(%0)\n"
+                : : "r" (page) : "memory");
+                page+=128;
+        }
+        kernel_fpu_end();
+}
+static void fast_copy_page(void *to, void *from)
+{
+        int i;
+        
+        
+        kernel_fpu_begin();
+        __asm__ __volatile__ (
+                "1: prefetch (%0)\n"
+                "   prefetch 64(%0)\n"
+                "   prefetch 128(%0)\n"
+                "   prefetch 192(%0)\n"
+                "   prefetch 256(%0)\n"
+                "2:  \n"
+                ".section .fixup, \"ax\"\n"
+                "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
+                "   jmp 2b\n"
+                ".previous\n"
+                ".section __ex_table,\"a\"\n"
+                "       .align 4\n"
+                "       .long 1b, 3b\n"
+                ".previous"
+                : : "r" (from) );
+        for(i=0; i<4096/64; i++)
+        {
+                __asm__ __volatile__ (
+                "1: prefetch 320(%0)\n"
+                "2: movq (%0), %%mm0\n"
+                "   movq 8(%0), %%mm1\n"
+                "   movq 16(%0), %%mm2\n"
+                "   movq 24(%0), %%mm3\n"
+                "   movq %%mm0, (%1)\n"
+                "   movq %%mm1, 8(%1)\n"
+                "   movq %%mm2, 16(%1)\n"
+                "   movq %%mm3, 24(%1)\n"
+                "   movq 32(%0), %%mm0\n"
+                "   movq 40(%0), %%mm1\n"
+                "   movq 48(%0), %%mm2\n"
+                "   movq 56(%0), %%mm3\n"
+                "   movq %%mm0, 32(%1)\n"
+                "   movq %%mm1, 40(%1)\n"
+                "   movq %%mm2, 48(%1)\n"
+                "   movq %%mm3, 56(%1)\n"
+                ".section .fixup, \"ax\"\n"
+                "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
+                "   jmp 2b\n"
+                ".previous\n"
+                ".section __ex_table,\"a\"\n"
+                "       .align 4\n"
+                "       .long 1b, 3b\n"
+                ".previous"
+                : : "r" (from), "r" (to) : "memory");
+                from+=64;
+                to+=64;
+        }
+        kernel_fpu_end();
+}
+#endif
+/*
+ *      Favour MMX for page clear and copy. 
+ */
+static void slow_zero_page(void * page)
+{
+        int d0, d1;
+        __asm__ __volatile__( \
+                "cld\n\t" \
+                "rep ; stosl" \
+                : "=&c" (d0), "=&D" (d1)
+                :"a" (0),"1" (page),"0" (1024)
+                :"memory");
+}
+ 
+void mmx_clear_page(void * page)
+{
+        if(unlikely(in_interrupt()))
+                slow_zero_page(page);
+        else
+                fast_clear_page(page);
+}
+static void slow_copy_page(void *to, void *from)
+{
+        int d0, d1, d2;
+        __asm__ __volatile__( \
+                "cld\n\t" \
+                "rep ; movsl" \
+                : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
+                : "0" (1024),"1" ((long) to),"2" ((long) from) \
+                : "memory");
+}
+  
+void mmx_copy_page(void *to, void *from)
+{
+        if(unlikely(in_interrupt()))
+                slow_copy_page(to, from);
+        else
+                fast_copy_page(to, from);
+}
diff --git a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S
new file mode 100644
index 000000000000..a32d9f570f48
--- /dev/null
+++ b/arch/i386/lib/putuser.S
@@ -0,0 +1,87 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 2005 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+#include <asm/thread_info.h>
+/*
+ * __put_user_X
+ *
+ * Inputs:      %eax[:%edx] contains the data
+ *              %ecx contains the address
+ *
+ * Outputs:     %eax is error code (0 or -EFAULT)
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+#define ENTER   pushl %ebx ; GET_THREAD_INFO(%ebx)
+#define EXIT    popl %ebx ; ret
+.text
+.align 4
+.globl __put_user_1
+__put_user_1:
+        ENTER
+        cmpl TI_addr_limit(%ebx),%ecx
+        jae bad_put_user
+1:      movb %al,(%ecx)
+        xorl %eax,%eax
+        EXIT
+.align 4
+.globl __put_user_2
+__put_user_2:
+        ENTER
+        movl TI_addr_limit(%ebx),%ebx
+        subl $1,%ebx
+        cmpl %ebx,%ecx
+        jae bad_put_user
+2:      movw %ax,(%ecx)
+        xorl %eax,%eax
+        EXIT
+.align 4
+.globl __put_user_4
+__put_user_4:
+        ENTER
+        movl TI_addr_limit(%ebx),%ebx
+        subl $3,%ebx
+        cmpl %ebx,%ecx
+        jae bad_put_user
+3:      movl %eax,(%ecx)
+        xorl %eax,%eax
+        EXIT
+.align 4
+.globl __put_user_8
+__put_user_8:
+        ENTER
+        movl TI_addr_limit(%ebx),%ebx
+        subl $7,%ebx
+        cmpl %ebx,%ecx
+        jae bad_put_user
+4:      movl %eax,(%ecx)
+5:      movl %edx,4(%ecx)
+        xorl %eax,%eax
+        EXIT
+bad_put_user:
+        movl $-14,%eax
+        EXIT
+.section __ex_table,"a"
+        .long 1b,bad_put_user
+        .long 2b,bad_put_user
+        .long 3b,bad_put_user
+        .long 4b,bad_put_user
+        .long 5b,bad_put_user
+.previous
diff --git a/arch/i386/lib/strstr.c b/arch/i386/lib/strstr.c
new file mode 100644
index 000000000000..a3dafbf59dae
--- /dev/null
+++ b/arch/i386/lib/strstr.c
@@ -0,0 +1,31 @@
+#include <linux/string.h>
+char * strstr(const char * cs,const char * ct)
+{
+int     d0, d1;
+register char * __res;
+__asm__ __volatile__(
+        "movl %6,%%edi\n\t"
+        "repne\n\t"
+        "scasb\n\t"
+        "notl %%ecx\n\t"
+        "decl %%ecx\n\t"        /* NOTE! This also sets Z if searchstring='' */
+        "movl %%ecx,%%edx\n"
+        "1:\tmovl %6,%%edi\n\t"
+        "movl %%esi,%%eax\n\t"
+        "movl %%edx,%%ecx\n\t"
+        "repe\n\t"
+        "cmpsb\n\t"
+        "je 2f\n\t"             /* also works for empty string, see above */
+        "xchgl %%eax,%%esi\n\t"
+        "incl %%esi\n\t"
+        "cmpb $0,-1(%%eax)\n\t"
+        "jne 1b\n\t"
+        "xorl %%eax,%%eax\n\t"
+        "2:"
+        :"=a" (__res), "=&c" (d0), "=&S" (d1)
+        :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
+        :"dx", "di");
+return __res;
+}
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
new file mode 100644
index 000000000000..51aa2bbb0269
--- /dev/null
+++ b/arch/i386/lib/usercopy.c
@@ -0,0 +1,636 @@
+/* 
+ * User address space access functions.
+ * The non inlined parts of asm-i386/uaccess.h are here.
+ *
+ * Copyright 1997 Andi Kleen <ak@muc.de>
+ * Copyright 1997 Linus Torvalds
+ */
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/blkdev.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/mmx.h>
+static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n)
+{
+#ifdef CONFIG_X86_INTEL_USERCOPY
+        if (n >= 64 && ((a1 ^ a2) & movsl_mask.mask))
+                return 0;
+#endif
+        return 1;
+}
+#define movsl_is_ok(a1,a2,n) \
+        __movsl_is_ok((unsigned long)(a1),(unsigned long)(a2),(n))
+/*
+ * Copy a null terminated string from userspace.
+ */
+#define __do_strncpy_from_user(dst,src,count,res)                          \
+do {                                                                       \
+        int __d0, __d1, __d2;                                              \
+        might_sleep();                                                     \
+        __asm__ __volatile__(                                              \
+                "       testl %1,%1\n"                                     \
+                "       jz 2f\n"                                           \
+                "0:     lodsb\n"                                           \
+                "       stosb\n"                                           \
+                "       testb %%al,%%al\n"                                 \
+                "       jz 1f\n"                                           \
+                "       decl %1\n"                                         \
+                "       jnz 0b\n"                                          \
+                "1:     subl %1,%0\n"                                      \
+                "2:\n"                                                     \
+                ".section .fixup,\"ax\"\n"                                 \
+                "3:     movl %5,%0\n"                                      \
+                "       jmp 2b\n"                                          \
+                ".previous\n"                                              \
+                ".section __ex_table,\"a\"\n"                              \
+                "       .align 4\n"                                        \
+                "       .long 0b,3b\n"                                     \
+                ".previous"                                                \
+                : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),      \
+                  "=&D" (__d2)                                             \
+                : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
+                : "memory");                                               \
+} while (0)
+/**
+ * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking.
+ * @dst:   Destination address, in kernel space.  This buffer must be at
+ *         least @count bytes long.
+ * @src:   Source address, in user space.
+ * @count: Maximum number of bytes to copy, including the trailing NUL.
+ * 
+ * Copies a NUL-terminated string from userspace to kernel space.
+ * Caller must check the specified block with access_ok() before calling
+ * this function.
+ *
+ * On success, returns the length of the string (not including the trailing
+ * NUL).
+ *
+ * If access to userspace fails, returns -EFAULT (some data may have been
+ * copied).
+ *
+ * If @count is smaller than the length of the string, copies @count bytes
+ * and returns @count.
+ */
+long
+__strncpy_from_user(char *dst, const char __user *src, long count)
+{
+        long res;
+        __do_strncpy_from_user(dst, src, count, res);
+        return res;
+}
+/**
+ * strncpy_from_user: - Copy a NUL terminated string from userspace.
+ * @dst:   Destination address, in kernel space.  This buffer must be at
+ *         least @count bytes long.
+ * @src:   Source address, in user space.
+ * @count: Maximum number of bytes to copy, including the trailing NUL.
+ * 
+ * Copies a NUL-terminated string from userspace to kernel space.
+ *
+ * On success, returns the length of the string (not including the trailing
+ * NUL).
+ *
+ * If access to userspace fails, returns -EFAULT (some data may have been
+ * copied).
+ *
+ * If @count is smaller than the length of the string, copies @count bytes
+ * and returns @count.
+ */
+long
+strncpy_from_user(char *dst, const char __user *src, long count)
+{
+        long res = -EFAULT;
+        if (access_ok(VERIFY_READ, src, 1))
+                __do_strncpy_from_user(dst, src, count, res);
+        return res;
+}
+/*
+ * Zero Userspace
+ */
+#define __do_clear_user(addr,size)                                      \
+do {                                                                    \
+        int __d0;                                                       \
+        might_sleep();                                                  \
+        __asm__ __volatile__(                                           \
+                "0:     rep; stosl\n"                                   \
+                "       movl %2,%0\n"                                   \
+                "1:     rep; stosb\n"                                   \
+                "2:\n"                                                  \
+                ".section .fixup,\"ax\"\n"                              \
+                "3:     lea 0(%2,%0,4),%0\n"                            \
+                "       jmp 2b\n"                                       \
+                ".previous\n"                                           \
+                ".section __ex_table,\"a\"\n"                           \
+                "       .align 4\n"                                     \
+                "       .long 0b,3b\n"                                  \
+                "       .long 1b,2b\n"                                  \
+                ".previous"                                             \
+                : "=&c"(size), "=&D" (__d0)                             \
+                : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0));     \
+} while (0)
+/**
+ * clear_user: - Zero a block of memory in user space.
+ * @to:   Destination address, in user space.
+ * @n:    Number of bytes to zero.
+ *
+ * Zero a block of memory in user space.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+unsigned long
+clear_user(void __user *to, unsigned long n)
+{
+        might_sleep();
+        if (access_ok(VERIFY_WRITE, to, n))
+                __do_clear_user(to, n);
+        return n;
+}
+/**
+ * __clear_user: - Zero a block of memory in user space, with less checking.
+ * @to:   Destination address, in user space.
+ * @n:    Number of bytes to zero.
+ *
+ * Zero a block of memory in user space.  Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+unsigned long
+__clear_user(void __user *to, unsigned long n)
+{
+        __do_clear_user(to, n);
+        return n;
+}
+/**
+ * strlen_user: - Get the size of a string in user space.
+ * @s: The string to measure.
+ * @n: The maximum valid length
+ *
+ * Get the size of a NUL-terminated string in user space.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ * On exception, returns 0.
+ * If the string is too long, returns a value greater than @n.
+ */
+long strnlen_user(const char __user *s, long n)
+{
+        unsigned long mask = -__addr_ok(s);
+        unsigned long res, tmp;
+        might_sleep();
+        __asm__ __volatile__(
+                "       testl %0, %0\n"
+                "       jz 3f\n"
+                "       andl %0,%%ecx\n"
+                "0:     repne; scasb\n"
+                "       setne %%al\n"
+                "       subl %%ecx,%0\n"
+                "       addl %0,%%eax\n"
+                "1:\n"
+                ".section .fixup,\"ax\"\n"
+                "2:     xorl %%eax,%%eax\n"
+                "       jmp 1b\n"
+                "3:     movb $1,%%al\n"
+                "       jmp 1b\n"
+                ".previous\n"
+                ".section __ex_table,\"a\"\n"
+                "       .align 4\n"
+                "       .long 0b,2b\n"
+                ".previous"
+                :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp)
+                :"0" (n), "1" (s), "2" (0), "3" (mask)
+                :"cc");
+        return res & mask;
+}
+#ifdef CONFIG_X86_INTEL_USERCOPY
+static unsigned long
+__copy_user_intel(void __user *to, const void *from, unsigned long size)
+{
+        int d0, d1;
+        __asm__ __volatile__(
+                       "       .align 2,0x90\n"
+                       "1:     movl 32(%4), %%eax\n"
+                       "       cmpl $67, %0\n"
+                       "       jbe 3f\n"
+                       "2:     movl 64(%4), %%eax\n"
+                       "       .align 2,0x90\n"
+                       "3:     movl 0(%4), %%eax\n"
+                       "4:     movl 4(%4), %%edx\n"
+                       "5:     movl %%eax, 0(%3)\n"
+                       "6:     movl %%edx, 4(%3)\n"
+                       "7:     movl 8(%4), %%eax\n"
+                       "8:     movl 12(%4),%%edx\n"
+                       "9:     movl %%eax, 8(%3)\n"
+                       "10:    movl %%edx, 12(%3)\n"
+                       "11:    movl 16(%4), %%eax\n"
+                       "12:    movl 20(%4), %%edx\n"
+                       "13:    movl %%eax, 16(%3)\n"
+                       "14:    movl %%edx, 20(%3)\n"
+                       "15:    movl 24(%4), %%eax\n"
+                       "16:    movl 28(%4), %%edx\n"
+                       "17:    movl %%eax, 24(%3)\n"
+                       "18:    movl %%edx, 28(%3)\n"
+                       "19:    movl 32(%4), %%eax\n"
+                       "20:    movl 36(%4), %%edx\n"
+                       "21:    movl %%eax, 32(%3)\n"
+                       "22:    movl %%edx, 36(%3)\n"
+                       "23:    movl 40(%4), %%eax\n"
+                       "24:    movl 44(%4), %%edx\n"
+                       "25:    movl %%eax, 40(%3)\n"
+                       "26:    movl %%edx, 44(%3)\n"
+                       "27:    movl 48(%4), %%eax\n"
+                       "28:    movl 52(%4), %%edx\n"
+                       "29:    movl %%eax, 48(%3)\n"
+                       "30:    movl %%edx, 52(%3)\n"
+                       "31:    movl 56(%4), %%eax\n"
+                       "32:    movl 60(%4), %%edx\n"
+                       "33:    movl %%eax, 56(%3)\n"
+                       "34:    movl %%edx, 60(%3)\n"
+                       "       addl $-64, %0\n"
+                       "       addl $64, %4\n"
+                       "       addl $64, %3\n"
+                       "       cmpl $63, %0\n"
+                       "       ja  1b\n"
+                       "35:    movl  %0, %%eax\n"
+                       "       shrl  $2, %0\n"
+                       "       andl  $3, %%eax\n"
+                       "       cld\n"
+                       "99:    rep; movsl\n"
+                       "36:    movl %%eax, %0\n"
+                       "37:    rep; movsb\n"
+                       "100:\n"
+                       ".section .fixup,\"ax\"\n"
+                       "101:   lea 0(%%eax,%0,4),%0\n"
+                       "       jmp 100b\n"
+                       ".previous\n"
+                       ".section __ex_table,\"a\"\n"
+                       "       .align 4\n"
+                       "       .long 1b,100b\n"
+                       "       .long 2b,100b\n"
+                       "       .long 3b,100b\n"
+                       "       .long 4b,100b\n"
+                       "       .long 5b,100b\n"
+                       "       .long 6b,100b\n"
+                       "       .long 7b,100b\n"
+                       "       .long 8b,100b\n"
+                       "       .long 9b,100b\n"
+                       "       .long 10b,100b\n"
+                       "       .long 11b,100b\n"
+                       "       .long 12b,100b\n"
+                       "       .long 13b,100b\n"
+                       "       .long 14b,100b\n"
+                       "       .long 15b,100b\n"
+                       "       .long 16b,100b\n"
+                       "       .long 17b,100b\n"
+                       "       .long 18b,100b\n"
+                       "       .long 19b,100b\n"
+                       "       .long 20b,100b\n"
+                       "       .long 21b,100b\n"
+                       "       .long 22b,100b\n"
+                       "       .long 23b,100b\n"
+                       "       .long 24b,100b\n"
+                       "       .long 25b,100b\n"
+                       "       .long 26b,100b\n"
+                       "       .long 27b,100b\n"
+                       "       .long 28b,100b\n"
+                       "       .long 29b,100b\n"
+                       "       .long 30b,100b\n"
+                       "       .long 31b,100b\n"
+                       "       .long 32b,100b\n"
+                       "       .long 33b,100b\n"
+                       "       .long 34b,100b\n"
+                       "       .long 35b,100b\n"
+                       "       .long 36b,100b\n"
+                       "       .long 37b,100b\n"
+                       "       .long 99b,101b\n"
+                       ".previous"
+                       : "=&c"(size), "=&D" (d0), "=&S" (d1)
+                       :  "1"(to), "2"(from), "0"(size)
+                       : "eax", "edx", "memory");
+        return size;
+}
+static unsigned long
+__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
+{
+        int d0, d1;
+        __asm__ __volatile__(
+                       "        .align 2,0x90\n"
+                       "0:      movl 32(%4), %%eax\n"
+                       "        cmpl $67, %0\n"      
+                       "        jbe 2f\n"            
+                       "1:      movl 64(%4), %%eax\n"
+                       "        .align 2,0x90\n"     
+                       "2:      movl 0(%4), %%eax\n" 
+                       "21:     movl 4(%4), %%edx\n" 
+                       "        movl %%eax, 0(%3)\n" 
+                       "        movl %%edx, 4(%3)\n" 
+                       "3:      movl 8(%4), %%eax\n" 
+                       "31:     movl 12(%4),%%edx\n" 
+                       "        movl %%eax, 8(%3)\n" 
+                       "        movl %%edx, 12(%3)\n"
+                       "4:      movl 16(%4), %%eax\n"
+                       "41:     movl 20(%4), %%edx\n"
+                       "        movl %%eax, 16(%3)\n"
+                       "        movl %%edx, 20(%3)\n"
+                       "10:     movl 24(%4), %%eax\n"
+                       "51:     movl 28(%4), %%edx\n"
+                       "        movl %%eax, 24(%3)\n"
+                       "        movl %%edx, 28(%3)\n"
+                       "11:     movl 32(%4), %%eax\n"
+                       "61:     movl 36(%4), %%edx\n"
+                       "        movl %%eax, 32(%3)\n"
+                       "        movl %%edx, 36(%3)\n"
+                       "12:     movl 40(%4), %%eax\n"
+                       "71:     movl 44(%4), %%edx\n"
+                       "        movl %%eax, 40(%3)\n"
+                       "        movl %%edx, 44(%3)\n"
+                       "13:     movl 48(%4), %%eax\n"
+                       "81:     movl 52(%4), %%edx\n"
+                       "        movl %%eax, 48(%3)\n"
+                       "        movl %%edx, 52(%3)\n"
+                       "14:     movl 56(%4), %%eax\n"
+                       "91:     movl 60(%4), %%edx\n"
+                       "        movl %%eax, 56(%3)\n"
+                       "        movl %%edx, 60(%3)\n"
+                       "        addl $-64, %0\n"     
+                       "        addl $64, %4\n"      
+                       "        addl $64, %3\n"      
+                       "        cmpl $63, %0\n"      
+                       "        ja  0b\n"            
+                       "5:      movl  %0, %%eax\n"   
+                       "        shrl  $2, %0\n"      
+                       "        andl $3, %%eax\n"    
+                       "        cld\n"               
+                       "6:      rep; movsl\n"   
+                       "        movl %%eax,%0\n"
+                       "7:      rep; movsb\n"   
+                       "8:\n"                   
+                       ".section .fixup,\"ax\"\n"
+                       "9:      lea 0(%%eax,%0,4),%0\n" 
+                       "16:     pushl %0\n"     
+                       "        pushl %%eax\n"  
+                       "        xorl %%eax,%%eax\n"
+                       "        rep; stosb\n"   
+                       "        popl %%eax\n"   
+                       "        popl %0\n"      
+                       "        jmp 8b\n"       
+                       ".previous\n"            
+                       ".section __ex_table,\"a\"\n"
+                       "        .align 4\n"        
+                       "        .long 0b,16b\n"  
+                       "        .long 1b,16b\n"
+                       "        .long 2b,16b\n"
+                       "        .long 21b,16b\n"
+                       "        .long 3b,16b\n" 
+                       "        .long 31b,16b\n"
+                       "        .long 4b,16b\n" 
+                       "        .long 41b,16b\n"
+                       "        .long 10b,16b\n"
+                       "        .long 51b,16b\n"
+                       "        .long 11b,16b\n"
+                       "        .long 61b,16b\n"
+                       "        .long 12b,16b\n"
+                       "        .long 71b,16b\n"
+                       "        .long 13b,16b\n"
+                       "        .long 81b,16b\n"
+                       "        .long 14b,16b\n"
+                       "        .long 91b,16b\n"
+                       "        .long 6b,9b\n"  
+                       "        .long 7b,16b\n" 
+                       ".previous"              
+                       : "=&c"(size), "=&D" (d0), "=&S" (d1)
+                       :  "1"(to), "2"(from), "0"(size)
+                       : "eax", "edx", "memory");
+        return size;
+}
+#else
+/*
+ * Leave these declared but undefined.  They should not be any references to
+ * them
+ */
+unsigned long
+__copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size);
+unsigned long
+__copy_user_intel(void __user *to, const void *from, unsigned long size);
+#endif /* CONFIG_X86_INTEL_USERCOPY */
+/* Generic arbitrary sized copy.  */
+#define __copy_user(to,from,size)                                       \
+do {                                                                    \
+        int __d0, __d1, __d2;                                           \
+        __asm__ __volatile__(                                           \
+                "       cmp  $7,%0\n"                                   \
+                "       jbe  1f\n"                                      \
+                "       movl %1,%0\n"                                   \
+                "       negl %0\n"                                      \
+                "       andl $7,%0\n"                                   \
+                "       subl %0,%3\n"                                   \
+                "4:     rep; movsb\n"                                   \
+                "       movl %3,%0\n"                                   \
+                "       shrl $2,%0\n"                                   \
+                "       andl $3,%3\n"                                   \
+                "       .align 2,0x90\n"                                \
+                "0:     rep; movsl\n"                                   \
+                "       movl %3,%0\n"                                   \
+                "1:     rep; movsb\n"                                   \
+                "2:\n"                                                  \
+                ".section .fixup,\"ax\"\n"                              \
+                "5:     addl %3,%0\n"                                   \
+                "       jmp 2b\n"                                       \
+                "3:     lea 0(%3,%0,4),%0\n"                            \
+                "       jmp 2b\n"                                       \
+                ".previous\n"                                           \
+                ".section __ex_table,\"a\"\n"                           \
+                "       .align 4\n"                                     \
+                "       .long 4b,5b\n"                                  \
+                "       .long 0b,3b\n"                                  \
+                "       .long 1b,2b\n"                                  \
+                ".previous"                                             \
+                : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2)   \
+                : "3"(size), "0"(size), "1"(to), "2"(from)              \
+                : "memory");                                            \
+} while (0)
+#define __copy_user_zeroing(to,from,size)                               \
+do {                                                                    \
+        int __d0, __d1, __d2;                                           \
+        __asm__ __volatile__(                                           \
+                "       cmp  $7,%0\n"                                   \
+                "       jbe  1f\n"                                      \
+                "       movl %1,%0\n"                                   \
+                "       negl %0\n"                                      \
+                "       andl $7,%0\n"                                   \
+                "       subl %0,%3\n"                                   \
+                "4:     rep; movsb\n"                                   \
+                "       movl %3,%0\n"                                   \
+                "       shrl $2,%0\n"                                   \
+                "       andl $3,%3\n"                                   \
+                "       .align 2,0x90\n"                                \
+                "0:     rep; movsl\n"                                   \
+                "       movl %3,%0\n"                                   \
+                "1:     rep; movsb\n"                                   \
+                "2:\n"                                                  \
+                ".section .fixup,\"ax\"\n"                              \
+                "5:     addl %3,%0\n"                                   \
+                "       jmp 6f\n"                                       \
+                "3:     lea 0(%3,%0,4),%0\n"                            \
+                "6:     pushl %0\n"                                     \
+                "       pushl %%eax\n"                                  \
+                "       xorl %%eax,%%eax\n"                             \
+                "       rep; stosb\n"                                   \
+                "       popl %%eax\n"                                   \
+                "       popl %0\n"                                      \
+                "       jmp 2b\n"                                       \
+                ".previous\n"                                           \
+                ".section __ex_table,\"a\"\n"                           \
+                "       .align 4\n"                                     \
+                "       .long 4b,5b\n"                                  \
+                "       .long 0b,3b\n"                                  \
+                "       .long 1b,6b\n"                                  \
+                ".previous"                                             \
+                : "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2)   \
+                : "3"(size), "0"(size), "1"(to), "2"(from)              \
+                : "memory");                                            \
+} while (0)
+unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n)
+{
+        BUG_ON((long) n < 0);
+#ifndef CONFIG_X86_WP_WORKS_OK
+        if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
+                        ((unsigned long )to) < TASK_SIZE) {
+                /* 
+                 * CPU does not honor the WP bit when writing
+                 * from supervisory mode, and due to preemption or SMP,
+                 * the page tables can change at any time.
+                 * Do it manually.      Manfred <manfred@colorfullife.com>
+                 */
+                while (n) {
+                        unsigned long offset = ((unsigned long)to)%PAGE_SIZE;
+                        unsigned long len = PAGE_SIZE - offset;
+                        int retval;
+                        struct page *pg;
+                        void *maddr;
+                        
+                        if (len > n)
+                                len = n;
+survive:
+                        down_read(&current->mm->mmap_sem);
+                        retval = get_user_pages(current, current->mm,
+                                        (unsigned long )to, 1, 1, 0, &pg, NULL);
+                        if (retval == -ENOMEM && current->pid == 1) {
+                                up_read(&current->mm->mmap_sem);
+                                blk_congestion_wait(WRITE, HZ/50);
+                                goto survive;
+                        }
+                        if (retval != 1) {
+                                up_read(&current->mm->mmap_sem);
+                                break;
+                        }
+                        maddr = kmap_atomic(pg, KM_USER0);
+                        memcpy(maddr + offset, from, len);
+                        kunmap_atomic(maddr, KM_USER0);
+                        set_page_dirty_lock(pg);
+                        put_page(pg);
+                        up_read(&current->mm->mmap_sem);
+                        from += len;
+                        to += len;
+                        n -= len;
+                }
+                return n;
+        }
+#endif
+        if (movsl_is_ok(to, from, n))
+                __copy_user(to, from, n);
+        else
+                n = __copy_user_intel(to, from, n);
+        return n;
+}
+unsigned long
+__copy_from_user_ll(void *to, const void __user *from, unsigned long n)
+{
+        BUG_ON((long)n < 0);
+        if (movsl_is_ok(to, from, n))
+                __copy_user_zeroing(to, from, n);
+        else
+                n = __copy_user_zeroing_intel(to, from, n);
+        return n;
+}
+/**
+ * copy_to_user: - Copy a block of data into user space.
+ * @to:   Destination address, in user space.
+ * @from: Source address, in kernel space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from kernel space to user space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ */
+unsigned long
+copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+        might_sleep();
+        BUG_ON((long) n < 0);
+        if (access_ok(VERIFY_WRITE, to, n))
+                n = __copy_to_user(to, from, n);
+        return n;
+}
+EXPORT_SYMBOL(copy_to_user);
+/**
+ * copy_from_user: - Copy a block of data from user space.
+ * @to:   Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from user space to kernel space.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ */
+unsigned long
+copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+        might_sleep();
+        BUG_ON((long) n < 0);
+        if (access_ok(VERIFY_READ, from, n))
+                n = __copy_from_user(to, from, n);
+        else
+                memset(to, 0, n);
+        return n;
+}
+EXPORT_SYMBOL(copy_from_user);
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/i386/lib