Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/arm/lib
49 files changed, 5136 insertions, 0 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
new file mode 100644
index 000000000000..c0e65833ffc4
--- /dev/null
+++ b/arch/arm/lib/Makefile
@@ -0,0 +1,29 @@
+#
+# linux/arch/arm/lib/Makefile
+#
+# Copyright (C) 1995-2000 Russell King
+#
+lib-y           := backtrace.o changebit.o csumipv6.o csumpartial.o   \
+                   csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
+                   copy_page.o delay.o findbit.o memchr.o memcpy.o    \
+                   memset.o memzero.o setbit.o strncpy_from_user.o    \
+                   strnlen_user.o strchr.o strrchr.o testchangebit.o  \
+                   testclearbit.o testsetbit.o uaccess.o getuser.o    \
+                   putuser.o ashldi3.o ashrdi3.o lshrdi3.o muldi3.o   \
+                   ucmpdi2.o udivdi3.o lib1funcs.o div64.o            \
+                   io-readsb.o io-writesb.o io-readsl.o io-writesl.o
+ifeq ($(CONFIG_CPU_32v3),y)
+  lib-y += io-readsw-armv3.o io-writesw-armv3.o
+else
+  lib-y += io-readsw-armv4.o io-writesw-armv4.o
+endif
+lib-$(CONFIG_ARCH_RPC)          += ecard.o io-acorn.o floppydma.o
+lib-$(CONFIG_ARCH_CLPS7500)     += io-acorn.o
+lib-$(CONFIG_ARCH_L7200)        += io-acorn.o
+lib-$(CONFIG_ARCH_SHARK)        += io-shark.o
+$(obj)/csumpartialcopy.o:       $(obj)/csumpartialcopygeneric.S
+$(obj)/csumpartialcopyuser.o:   $(obj)/csumpartialcopygeneric.S
diff --git a/arch/arm/lib/ashldi3.c b/arch/arm/lib/ashldi3.c
new file mode 100644
index 000000000000..130f5a839669
--- /dev/null
+++ b/arch/arm/lib/ashldi3.c
@@ -0,0 +1,61 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+This file is part of GNU CC.
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#include "gcclib.h"
+DItype
+__ashldi3 (DItype u, word_type b)
+{
+  DIunion w;
+  word_type bm;
+  DIunion uu;
+  if (b == 0)
+    return u;
+  uu.ll = u;
+  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
+  if (bm <= 0)
+    {
+      w.s.low = 0;
+      w.s.high = (USItype)uu.s.low << -bm;
+    }
+  else
+    {
+      USItype carries = (USItype)uu.s.low >> bm;
+      w.s.low = (USItype)uu.s.low << b;
+      w.s.high = ((USItype)uu.s.high << b) | carries;
+    }
+  return w.ll;
+}
diff --git a/arch/arm/lib/ashrdi3.c b/arch/arm/lib/ashrdi3.c
new file mode 100644
index 000000000000..71625d218f8d
--- /dev/null
+++ b/arch/arm/lib/ashrdi3.c
@@ -0,0 +1,61 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+This file is part of GNU CC.
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#include "gcclib.h"
+DItype
+__ashrdi3 (DItype u, word_type b)
+{
+  DIunion w;
+  word_type bm;
+  DIunion uu;
+  if (b == 0)
+    return u;
+  uu.ll = u;
+  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
+  if (bm <= 0)
+    {
+      /* w.s.high = 1..1 or 0..0 */
+      w.s.high = uu.s.high >> (sizeof (SItype) * BITS_PER_UNIT - 1);
+      w.s.low = uu.s.high >> -bm;
+    }
+  else
+    {
+      USItype carries = (USItype)uu.s.high << bm;
+      w.s.high = uu.s.high >> b;
+      w.s.low = ((USItype)uu.s.low >> b) | carries;
+    }
+  return w.ll;
+}
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
new file mode 100644
index 000000000000..68a21c0f3f52
--- /dev/null
+++ b/arch/arm/lib/backtrace.S
@@ -0,0 +1,157 @@
+/*
+ *  linux/arch/arm/lib/backtrace.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 27/03/03 Ian Molton Clean up CONFIG_CPU
+ *
+ */
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+@ fp is 0 or stack frame
+#define frame   r4
+#define next    r5
+#define save    r6
+#define mask    r7
+#define offset  r8
+ENTRY(__backtrace)
+                mov     r1, #0x10
+                mov     r0, fp
+ENTRY(c_backtrace)
+#ifndef CONFIG_FRAME_POINTER
+                mov     pc, lr
+#else
+                stmfd   sp!, {r4 - r8, lr}      @ Save an extra register so we have a location...
+                tst     r1, #0x10               @ 26 or 32-bit?
+                moveq   mask, #0xfc000003
+                movne   mask, #0
+                tst     mask, r0
+                movne   r0, #0
+                movs    frame, r0
+1:              moveq   r0, #-2
+                LOADREGS(eqfd, sp!, {r4 - r8, pc})
+2:              stmfd   sp!, {pc}               @ calculate offset of PC in STMIA instruction
+                ldr     r0, [sp], #4
+                adr     r1, 2b - 4
+                sub     offset, r0, r1
+3:              tst     frame, mask             @ Check for address exceptions...
+                bne     1b
+1001:           ldr     next, [frame, #-12]     @ get fp
+1002:           ldr     r2, [frame, #-4]        @ get lr
+1003:           ldr     r3, [frame, #0]         @ get pc
+                sub     save, r3, offset        @ Correct PC for prefetching
+                bic     save, save, mask
+1004:           ldr     r1, [save, #0]          @ get instruction at function
+                mov     r1, r1, lsr #10
+                ldr     r3, .Ldsi+4
+                teq     r1, r3
+                subeq   save, save, #4
+                mov     r0, save
+                bic     r1, r2, mask
+                bl      dump_backtrace_entry
+                ldr     r0, [frame, #-8]        @ get sp
+                sub     r0, r0, #4
+1005:           ldr     r1, [save, #4]          @ get instruction at function+4
+                mov     r3, r1, lsr #10
+                ldr     r2, .Ldsi+4
+                teq     r3, r2                  @ Check for stmia sp!, {args}
+                addeq   save, save, #4          @ next instruction
+                bleq    .Ldumpstm
+                sub     r0, frame, #16
+1006:           ldr     r1, [save, #4]          @ Get 'stmia sp!, {rlist, fp, ip, lr, pc}' instruction
+                mov     r3, r1, lsr #10
+                ldr     r2, .Ldsi
+                teq     r3, r2
+                bleq    .Ldumpstm
+                /*
+                 * A zero next framepointer means we're done.
+                 */
+                teq     next, #0
+                LOADREGS(eqfd, sp!, {r4 - r8, pc})
+                /*
+                 * The next framepointer must be above the
+                 * current framepointer.
+                 */
+                cmp     next, frame
+                mov     frame, next
+                bhi     3b
+                b       1007f
+/*
+ * Fixup for LDMDB
+ */
+                .section .fixup,"ax"
+                .align  0
+1007:           ldr     r0, =.Lbad
+                mov     r1, frame
+                bl      printk
+                LOADREGS(fd, sp!, {r4 - r8, pc})
+                .ltorg
+                .previous
+                
+                .section __ex_table,"a"
+                .align  3
+                .long   1001b, 1007b
+                .long   1002b, 1007b
+                .long   1003b, 1007b
+                .long   1004b, 1007b
+                .long   1005b, 1007b
+                .long   1006b, 1007b
+                .previous
+#define instr r4
+#define reg   r5
+#define stack r6
+.Ldumpstm:      stmfd   sp!, {instr, reg, stack, r7, lr}
+                mov     stack, r0
+                mov     instr, r1
+                mov     reg, #9
+                mov     r7, #0
+1:              mov     r3, #1
+                tst     instr, r3, lsl reg
+                beq     2f
+                add     r7, r7, #1
+                teq     r7, #4
+                moveq   r7, #0
+                moveq   r3, #'\n'
+                movne   r3, #' '
+                ldr     r2, [stack], #-4
+                mov     r1, reg
+                adr     r0, .Lfp
+                bl      printk
+2:              subs    reg, reg, #1
+                bpl     1b
+                teq     r7, #0
+                adrne   r0, .Lcr
+                blne    printk
+                mov     r0, stack
+                LOADREGS(fd, sp!, {instr, reg, stack, r7, pc})
+.Lfp:           .asciz  " r%d = %08X%c"
+.Lcr:           .asciz  "\n"
+.Lbad:          .asciz  "Backtrace aborted due to bad frame pointer <%p>\n"
+                .align
+.Ldsi:          .word   0x00e92dd8 >> 2
+                .word   0x00e92d00 >> 2
+#endif
diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S
new file mode 100644
index 000000000000..3af45cab70e1
--- /dev/null
+++ b/arch/arm/lib/changebit.S
@@ -0,0 +1,28 @@
+/*
+ *  linux/arch/arm/lib/changebit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+/* Purpose  : Function to change a bit
+ * Prototype: int change_bit(int bit, void *addr)
+ */
+ENTRY(_change_bit_be)
+                eor     r0, r0, #0x18           @ big endian byte ordering
+ENTRY(_change_bit_le)
+                and     r2, r0, #7
+                mov     r3, #1
+                mov     r3, r3, lsl r2
+                save_and_disable_irqs ip, r2
+                ldrb    r2, [r1, r0, lsr #3]
+                eor     r2, r2, r3
+                strb    r2, [r1, r0, lsr #3]
+                restore_irqs ip
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S
new file mode 100644
index 000000000000..069a2ce413f0
--- /dev/null
+++ b/arch/arm/lib/clearbit.S
@@ -0,0 +1,31 @@
+/*
+ *  linux/arch/arm/lib/clearbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+/*
+ * Purpose  : Function to clear a bit
+ * Prototype: int clear_bit(int bit, void *addr)
+ */
+ENTRY(_clear_bit_be)
+                eor     r0, r0, #0x18           @ big endian byte ordering
+ENTRY(_clear_bit_le)
+                and     r2, r0, #7
+                mov     r3, #1
+                mov     r3, r3, lsl r2
+                save_and_disable_irqs ip, r2
+                ldrb    r2, [r1, r0, lsr #3]
+                bic     r2, r2, r3
+                strb    r2, [r1, r0, lsr #3]
+                restore_irqs ip
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S
new file mode 100644
index 000000000000..4c38abdbe497
--- /dev/null
+++ b/arch/arm/lib/copy_page.S
@@ -0,0 +1,46 @@
+/*
+ *  linux/arch/arm/lib/copypage.S
+ *
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/constants.h>
+#define COPY_COUNT (PAGE_SZ/64 PLD( -1 ))
+                .text
+                .align  5
+/*
+ * StrongARM optimised copy_page routine
+ * now 1.78bytes/cycle, was 1.60 bytes/cycle (50MHz bus -> 89MB/s)
+ * Note that we probably achieve closer to the 100MB/s target with
+ * the core clock switching.
+ */
+ENTRY(copy_page)
+                stmfd   sp!, {r4, lr}                   @       2
+        PLD(    pld     [r1, #0]                )
+        PLD(    pld     [r1, #32]               )
+                mov     r2, #COPY_COUNT                 @       1
+                ldmia   r1!, {r3, r4, ip, lr}           @       4+1
+1:      PLD(    pld     [r1, #64]               )
+        PLD(    pld     [r1, #96]               )
+2:              stmia   r0!, {r3, r4, ip, lr}           @       4
+                ldmia   r1!, {r3, r4, ip, lr}           @       4+1
+                stmia   r0!, {r3, r4, ip, lr}           @       4
+                ldmia   r1!, {r3, r4, ip, lr}           @       4+1
+                stmia   r0!, {r3, r4, ip, lr}           @       4
+                ldmia   r1!, {r3, r4, ip, lr}           @       4
+                subs    r2, r2, #1                      @       1
+                stmia   r0!, {r3, r4, ip, lr}           @       4
+                ldmgtia r1!, {r3, r4, ip, lr}           @       4
+                bgt     1b                              @       1
+        PLD(    ldmeqia r1!, {r3, r4, ip, lr}   )
+        PLD(    beq     2b                      )
+                LOADREGS(fd, sp!, {r4, pc})             @       3
diff --git a/arch/arm/lib/csumipv6.S b/arch/arm/lib/csumipv6.S
new file mode 100644
index 000000000000..7065a20ee8ad
--- /dev/null
+++ b/arch/arm/lib/csumipv6.S
@@ -0,0 +1,32 @@
+/*
+ *  linux/arch/arm/lib/csumipv6.S
+ *
+ *  Copyright (C) 1995-1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+ENTRY(__csum_ipv6_magic)
+                str     lr, [sp, #-4]!
+                adds    ip, r2, r3
+                ldmia   r1, {r1 - r3, lr}
+                adcs    ip, ip, r1
+                adcs    ip, ip, r2
+                adcs    ip, ip, r3
+                adcs    ip, ip, lr
+                ldmia   r0, {r0 - r3}
+                adcs    r0, ip, r0
+                adcs    r0, r0, r1
+                adcs    r0, r0, r2
+                ldr     r2, [sp, #4]
+                adcs    r0, r0, r3
+                adcs    r0, r0, r2
+                adcs    r0, r0, #0
+                LOADREGS(fd, sp!, {pc})
diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S
new file mode 100644
index 000000000000..cb5e3708f118
--- /dev/null
+++ b/arch/arm/lib/csumpartial.S
@@ -0,0 +1,137 @@
+/*
+ *  linux/arch/arm/lib/csumpartial.S
+ *
+ *  Copyright (C) 1995-1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+/*
+ * Function: __u32 csum_partial(const char *src, int len, __u32 sum)
+ * Params  : r0 = buffer, r1 = len, r2 = checksum
+ * Returns : r0 = new checksum
+ */
+buf     .req    r0
+len     .req    r1
+sum     .req    r2
+td0     .req    r3
+td1     .req    r4      @ save before use
+td2     .req    r5      @ save before use
+td3     .req    lr
+.zero:          mov     r0, sum
+                add     sp, sp, #4
+                ldr     pc, [sp], #4
+                /*
+                 * Handle 0 to 7 bytes, with any alignment of source and
+                 * destination pointers.  Note that when we get here, C = 0
+                 */
+.less8:         teq     len, #0                 @ check for zero count
+                beq     .zero
+                /* we must have at least one byte. */
+                tst     buf, #1                 @ odd address?
+                ldrneb  td0, [buf], #1
+                subne   len, len, #1
+                adcnes  sum, sum, td0, put_byte_1
+.less4:         tst     len, #6
+                beq     .less8_byte
+                /* we are now half-word aligned */
+.less8_wordlp:
+#if __LINUX_ARM_ARCH__ >= 4
+                ldrh    td0, [buf], #2
+                sub     len, len, #2
+#else
+                ldrb    td0, [buf], #1
+                ldrb    td3, [buf], #1
+                sub     len, len, #2
+#ifndef __ARMEB__
+                orr     td0, td0, td3, lsl #8
+#else
+                orr     td0, td3, td0, lsl #8
+#endif
+#endif
+                adcs    sum, sum, td0
+                tst     len, #6
+                bne     .less8_wordlp
+.less8_byte:    tst     len, #1                 @ odd number of bytes
+                ldrneb  td0, [buf], #1          @ include last byte
+                adcnes  sum, sum, td0, put_byte_0       @ update checksum
+.done:          adc     r0, sum, #0             @ collect up the last carry
+                ldr     td0, [sp], #4
+                tst     td0, #1                 @ check buffer alignment
+                movne   r0, r0, ror #8          @ rotate checksum by 8 bits
+                ldr     pc, [sp], #4            @ return
+.not_aligned:   tst     buf, #1                 @ odd address
+                ldrneb  td0, [buf], #1          @ make even
+                subne   len, len, #1
+                adcnes  sum, sum, td0, put_byte_1       @ update checksum
+                tst     buf, #2                 @ 32-bit aligned?
+#if __LINUX_ARM_ARCH__ >= 4
+                ldrneh  td0, [buf], #2          @ make 32-bit aligned
+                subne   len, len, #2
+#else
+                ldrneb  td0, [buf], #1
+                ldrneb  ip, [buf], #1
+                subne   len, len, #2
+#ifndef __ARMEB__
+                orrne   td0, td0, ip, lsl #8
+#else
+                orrne   td0, ip, td0, lsl #8
+#endif
+#endif
+                adcnes  sum, sum, td0           @ update checksum
+                mov     pc, lr
+ENTRY(csum_partial)
+                stmfd   sp!, {buf, lr}
+                cmp     len, #8                 @ Ensure that we have at least
+                blo     .less8                  @ 8 bytes to copy.
+                adds    sum, sum, #0            @ C = 0
+                tst     buf, #3                 @ Test destination alignment
+                blne    .not_aligned            @ aligh destination, return here
+1:              bics    ip, len, #31
+                beq     3f
+                stmfd   sp!, {r4 - r5}
+2:              ldmia   buf!, {td0, td1, td2, td3}
+                adcs    sum, sum, td0
+                adcs    sum, sum, td1
+                adcs    sum, sum, td2
+                adcs    sum, sum, td3
+                ldmia   buf!, {td0, td1, td2, td3}
+                adcs    sum, sum, td0
+                adcs    sum, sum, td1
+                adcs    sum, sum, td2
+                adcs    sum, sum, td3
+                sub     ip, ip, #32
+                teq     ip, #0
+                bne     2b
+                ldmfd   sp!, {r4 - r5}
+3:              tst     len, #0x1c              @ should not change C
+                beq     .less4
+4:              ldr     td0, [buf], #4
+                sub     len, len, #4
+                adcs    sum, sum, td0
+                tst     len, #0x1c
+                bne     4b
+                b       .less4
diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S
new file mode 100644
index 000000000000..990ee63b2465
--- /dev/null
+++ b/arch/arm/lib/csumpartialcopy.S
@@ -0,0 +1,52 @@
+/*
+ *  linux/arch/arm/lib/csumpartialcopy.S
+ *
+ *  Copyright (C) 1995-1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len, __u32 sum)
+ * Params  : r0 = src, r1 = dst, r2 = len, r3 = checksum
+ * Returns : r0 = new checksum
+ */
+                .macro  save_regs
+                stmfd   sp!, {r1, r4 - r8, fp, ip, lr, pc}
+                .endm
+                .macro  load_regs,flags
+                LOADREGS(\flags,fp,{r1, r4 - r8, fp, sp, pc})
+                .endm
+                .macro  load1b, reg1
+                ldrb    \reg1, [r0], #1
+                .endm
+                .macro  load2b, reg1, reg2
+                ldrb    \reg1, [r0], #1
+                ldrb    \reg2, [r0], #1
+                .endm
+                .macro  load1l, reg1
+                ldr     \reg1, [r0], #4
+                .endm
+                .macro  load2l, reg1, reg2
+                ldr     \reg1, [r0], #4
+                ldr     \reg2, [r0], #4
+                .endm
+                .macro  load4l, reg1, reg2, reg3, reg4
+                ldmia   r0!, {\reg1, \reg2, \reg3, \reg4}
+                .endm
+#define FN_ENTRY        ENTRY(csum_partial_copy_nocheck)
+#include "csumpartialcopygeneric.S"
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
new file mode 100644
index 000000000000..d3a2f4667db4
--- /dev/null
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -0,0 +1,331 @@
+/*
+ *  linux/arch/arm/lib/csumpartialcopygeneric.S
+ *
+ *  Copyright (C) 1995-2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/*
+ * unsigned int
+ * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
+ *  r0 = src, r1 = dst, r2 = len, r3 = sum
+ *  Returns : r0 = checksum
+ *
+ * Note that 'tst' and 'teq' preserve the carry flag.
+ */
+src     .req    r0
+dst     .req    r1
+len     .req    r2
+sum     .req    r3
+.zero:          mov     r0, sum
+                load_regs       ea
+                /*
+                 * Align an unaligned destination pointer.  We know that
+                 * we have >= 8 bytes here, so we don't need to check
+                 * the length.  Note that the source pointer hasn't been
+                 * aligned yet.
+                 */
+.dst_unaligned: tst     dst, #1
+                beq     .dst_16bit
+                load1b  ip
+                sub     len, len, #1
+                adcs    sum, sum, ip, put_byte_1        @ update checksum
+                strb    ip, [dst], #1
+                tst     dst, #2
+                moveq   pc, lr                  @ dst is now 32bit aligned
+.dst_16bit:     load2b  r8, ip
+                sub     len, len, #2
+                adcs    sum, sum, r8, put_byte_0
+                strb    r8, [dst], #1
+                adcs    sum, sum, ip, put_byte_1
+                strb    ip, [dst], #1
+                mov     pc, lr                  @ dst is now 32bit aligned
+                /*
+                 * Handle 0 to 7 bytes, with any alignment of source and
+                 * destination pointers.  Note that when we get here, C = 0
+                 */
+.less8:         teq     len, #0                 @ check for zero count
+                beq     .zero
+                /* we must have at least one byte. */
+                tst     dst, #1                 @ dst 16-bit aligned
+                beq     .less8_aligned
+                /* Align dst */
+                load1b  ip
+                sub     len, len, #1
+                adcs    sum, sum, ip, put_byte_1        @ update checksum
+                strb    ip, [dst], #1
+                tst     len, #6
+                beq     .less8_byteonly
+1:              load2b  r8, ip
+                sub     len, len, #2
+                adcs    sum, sum, r8, put_byte_0
+                strb    r8, [dst], #1
+                adcs    sum, sum, ip, put_byte_1
+                strb    ip, [dst], #1
+.less8_aligned: tst     len, #6
+                bne     1b
+.less8_byteonly:
+                tst     len, #1
+                beq     .done
+                load1b  r8
+                adcs    sum, sum, r8, put_byte_0        @ update checksum
+                strb    r8, [dst], #1
+                b       .done
+FN_ENTRY
+                mov     ip, sp
+                save_regs
+                sub     fp, ip, #4
+                cmp     len, #8                 @ Ensure that we have at least
+                blo     .less8                  @ 8 bytes to copy.
+                adds    sum, sum, #0            @ C = 0
+                tst     dst, #3                 @ Test destination alignment
+                blne    .dst_unaligned          @ align destination, return here
+                /*
+                 * Ok, the dst pointer is now 32bit aligned, and we know
+                 * that we must have more than 4 bytes to copy.  Note
+                 * that C contains the carry from the dst alignment above.
+                 */
+                tst     src, #3                 @ Test source alignment
+                bne     .src_not_aligned
+                /* Routine for src & dst aligned */
+                bics    ip, len, #15
+                beq     2f
+1:              load4l  r4, r5, r6, r7
+                stmia   dst!, {r4, r5, r6, r7}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                adcs    sum, sum, r6
+                adcs    sum, sum, r7
+                sub     ip, ip, #16
+                teq     ip, #0
+                bne     1b
+2:              ands    ip, len, #12
+                beq     4f
+                tst     ip, #8
+                beq     3f
+                load2l  r4, r5
+                stmia   dst!, {r4, r5}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                tst     ip, #4
+                beq     4f
+3:              load1l  r4
+                str     r4, [dst], #4
+                adcs    sum, sum, r4
+4:              ands    len, len, #3
+                beq     .done
+                load1l  r4
+                tst     len, #2
+                mov     r5, r4, get_byte_0
+                beq     .exit
+                adcs    sum, sum, r4, push #16
+                strb    r5, [dst], #1
+                mov     r5, r4, get_byte_1
+                strb    r5, [dst], #1
+                mov     r5, r4, get_byte_2
+.exit:          tst     len, #1
+                strneb  r5, [dst], #1
+                andne   r5, r5, #255
+                adcnes  sum, sum, r5, put_byte_0
+                /*
+                 * If the dst pointer was not 16-bit aligned, we
+                 * need to rotate the checksum here to get around
+                 * the inefficient byte manipulations in the
+                 * architecture independent code.
+                 */
+.done:          adc     r0, sum, #0
+                ldr     sum, [sp, #0]           @ dst
+                tst     sum, #1
+                movne   r0, r0, ror #8
+                load_regs       ea
+.src_not_aligned:
+                adc     sum, sum, #0            @ include C from dst alignment
+                and     ip, src, #3
+                bic     src, src, #3
+                load1l  r5
+                cmp     ip, #2
+                beq     .src2_aligned
+                bhi     .src3_aligned
+                mov     r4, r5, pull #8         @ C = 0
+                bics    ip, len, #15
+                beq     2f
+1:              load4l  r5, r6, r7, r8
+                orr     r4, r4, r5, push #24
+                mov     r5, r5, pull #8
+                orr     r5, r5, r6, push #24
+                mov     r6, r6, pull #8
+                orr     r6, r6, r7, push #24
+                mov     r7, r7, pull #8
+                orr     r7, r7, r8, push #24
+                stmia   dst!, {r4, r5, r6, r7}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                adcs    sum, sum, r6
+                adcs    sum, sum, r7
+                mov     r4, r8, pull #8
+                sub     ip, ip, #16
+                teq     ip, #0
+                bne     1b
+2:              ands    ip, len, #12
+                beq     4f
+                tst     ip, #8
+                beq     3f
+                load2l  r5, r6
+                orr     r4, r4, r5, push #24
+                mov     r5, r5, pull #8
+                orr     r5, r5, r6, push #24
+                stmia   dst!, {r4, r5}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                mov     r4, r6, pull #8
+                tst     ip, #4
+                beq     4f
+3:              load1l  r5
+                orr     r4, r4, r5, push #24
+                str     r4, [dst], #4
+                adcs    sum, sum, r4
+                mov     r4, r5, pull #8
+4:              ands    len, len, #3
+                beq     .done
+                mov     r5, r4, get_byte_0
+                tst     len, #2
+                beq     .exit
+                adcs    sum, sum, r4, push #16
+                strb    r5, [dst], #1
+                mov     r5, r4, get_byte_1
+                strb    r5, [dst], #1
+                mov     r5, r4, get_byte_2
+                b       .exit
+.src2_aligned:  mov     r4, r5, pull #16
+                adds    sum, sum, #0
+                bics    ip, len, #15
+                beq     2f
+1:              load4l  r5, r6, r7, r8
+                orr     r4, r4, r5, push #16
+                mov     r5, r5, pull #16
+                orr     r5, r5, r6, push #16
+                mov     r6, r6, pull #16
+                orr     r6, r6, r7, push #16
+                mov     r7, r7, pull #16
+                orr     r7, r7, r8, push #16
+                stmia   dst!, {r4, r5, r6, r7}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                adcs    sum, sum, r6
+                adcs    sum, sum, r7
+                mov     r4, r8, pull #16
+                sub     ip, ip, #16
+                teq     ip, #0
+                bne     1b
+2:              ands    ip, len, #12
+                beq     4f
+                tst     ip, #8
+                beq     3f
+                load2l  r5, r6
+                orr     r4, r4, r5, push #16
+                mov     r5, r5, pull #16
+                orr     r5, r5, r6, push #16
+                stmia   dst!, {r4, r5}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                mov     r4, r6, pull #16
+                tst     ip, #4
+                beq     4f
+3:              load1l  r5
+                orr     r4, r4, r5, push #16
+                str     r4, [dst], #4
+                adcs    sum, sum, r4
+                mov     r4, r5, pull #16
+4:              ands    len, len, #3
+                beq     .done
+                mov     r5, r4, get_byte_0
+                tst     len, #2
+                beq     .exit
+                adcs    sum, sum, r4
+                strb    r5, [dst], #1
+                mov     r5, r4, get_byte_1
+                strb    r5, [dst], #1
+                tst     len, #1
+                beq     .done
+                load1b  r5
+                b       .exit
+.src3_aligned:  mov     r4, r5, pull #24
+                adds    sum, sum, #0
+                bics    ip, len, #15
+                beq     2f
+1:              load4l  r5, r6, r7, r8
+                orr     r4, r4, r5, push #8
+                mov     r5, r5, pull #24
+                orr     r5, r5, r6, push #8
+                mov     r6, r6, pull #24
+                orr     r6, r6, r7, push #8
+                mov     r7, r7, pull #24
+                orr     r7, r7, r8, push #8
+                stmia   dst!, {r4, r5, r6, r7}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                adcs    sum, sum, r6
+                adcs    sum, sum, r7
+                mov     r4, r8, pull #24
+                sub     ip, ip, #16
+                teq     ip, #0
+                bne     1b
+2:              ands    ip, len, #12
+                beq     4f
+                tst     ip, #8
+                beq     3f
+                load2l  r5, r6
+                orr     r4, r4, r5, push #8
+                mov     r5, r5, pull #24
+                orr     r5, r5, r6, push #8
+                stmia   dst!, {r4, r5}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                mov     r4, r6, pull #24
+                tst     ip, #4
+                beq     4f
+3:              load1l  r5
+                orr     r4, r4, r5, push #8
+                str     r4, [dst], #4
+                adcs    sum, sum, r4
+                mov     r4, r5, pull #24
+4:              ands    len, len, #3
+                beq     .done
+                mov     r5, r4, get_byte_0
+                tst     len, #2
+                beq     .exit
+                strb    r5, [dst], #1
+                adcs    sum, sum, r4
+                load1l  r4
+                mov     r5, r4, get_byte_0
+                strb    r5, [dst], #1
+                adcs    sum, sum, r4, push #24
+                mov     r5, r4, get_byte_1
+                b       .exit
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
new file mode 100644
index 000000000000..46a2dc962e9d
--- /dev/null
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -0,0 +1,104 @@
+/*
+ *  linux/arch/arm/lib/csumpartialcopyuser.S
+ *
+ *  Copyright (C) 1995-1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 27/03/03 Ian Molton Clean up CONFIG_CPU
+ *
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/constants.h>
+                .text
+                .macro  save_regs
+                stmfd   sp!, {r1 - r2, r4 - r8, fp, ip, lr, pc}
+                .endm
+                .macro  load_regs,flags
+                ldm\flags       fp, {r1, r2, r4-r8, fp, sp, pc}
+                .endm
+                .macro  load1b, reg1
+9999:           ldrbt   \reg1, [r0], $1
+                .section __ex_table, "a"
+                .align  3
+                .long   9999b, 6001f
+                .previous
+                .endm
+                .macro  load2b, reg1, reg2
+9999:           ldrbt   \reg1, [r0], $1
+9998:           ldrbt   \reg2, [r0], $1
+                .section __ex_table, "a"
+                .long   9999b, 6001f
+                .long   9998b, 6001f
+                .previous
+                .endm
+                .macro  load1l, reg1
+9999:           ldrt    \reg1, [r0], $4
+                .section __ex_table, "a"
+                .align  3
+                .long   9999b, 6001f
+                .previous
+                .endm
+                .macro  load2l, reg1, reg2
+9999:           ldrt    \reg1, [r0], $4
+9998:           ldrt    \reg2, [r0], $4
+                .section __ex_table, "a"
+                .long   9999b, 6001f
+                .long   9998b, 6001f
+                .previous
+                .endm
+                .macro  load4l, reg1, reg2, reg3, reg4
+9999:           ldrt    \reg1, [r0], $4
+9998:           ldrt    \reg2, [r0], $4
+9997:           ldrt    \reg3, [r0], $4
+9996:           ldrt    \reg4, [r0], $4
+                .section __ex_table, "a"
+                .long   9999b, 6001f
+                .long   9998b, 6001f
+                .long   9997b, 6001f
+                .long   9996b, 6001f
+                .previous
+                .endm
+/*
+ * unsigned int
+ * csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, int *err_ptr)
+ *  r0 = src, r1 = dst, r2 = len, r3 = sum, [sp] = *err_ptr
+ *  Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT
+ */
+#define FN_ENTRY        ENTRY(csum_partial_copy_from_user)
+#include "csumpartialcopygeneric.S"
+/*
+ * FIXME: minor buglet here
+ * We don't return the checksum for the data present in the buffer.  To do
+ * so properly, we would have to add in whatever registers were loaded before
+ * the fault, which, with the current asm above is not predictable.
+ */
+                .section .fixup,"ax"
+                .align  4
+6001:           mov     r4, #-EFAULT
+                ldr     r5, [fp, #4]            @ *err_ptr
+                str     r4, [r5]
+                ldmia   sp, {r1, r2}            @ retrieve dst, len
+                add     r2, r2, r1
+                mov     r0, #0                  @ zero the buffer
+6002:           teq     r2, r1
+                strneb  r0, [r1], #1
+                bne     6002b
+                load_regs       ea
+                .previous
diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay.S
new file mode 100644
index 000000000000..3c7f7e675dd8
--- /dev/null
+++ b/arch/arm/lib/delay.S
@@ -0,0 +1,58 @@
+/*
+ *  linux/arch/arm/lib/delay.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+LC0:            .word   loops_per_jiffy
+/*
+ * 0 <= r0 <= 2000
+ */
+ENTRY(__udelay)
+                mov     r2,     #0x6800
+                orr     r2, r2, #0x00db
+                mul     r0, r2, r0
+ENTRY(__const_udelay)                           @ 0 <= r0 <= 0x01ffffff
+                ldr     r2, LC0
+                ldr     r2, [r2]                @ max = 0x0fffffff
+                mov     r0, r0, lsr #11         @ max = 0x00003fff
+                mov     r2, r2, lsr #11         @ max = 0x0003ffff
+                mul     r0, r2, r0              @ max = 2^32-1
+                movs    r0, r0, lsr #6
+                RETINSTR(moveq,pc,lr)
+/*
+ * loops = (r0 * 0x10c6 * 100 * loops_per_jiffy) / 2^32
+ *
+ * Oh, if only we had a cycle counter...
+ */
+@ Delay routine
+ENTRY(__delay)
+                subs    r0, r0, #1
+#if 0
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+#endif
+                bhi     __delay
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
new file mode 100644
index 000000000000..ec9a1cd6176f
--- /dev/null
+++ b/arch/arm/lib/div64.S
@@ -0,0 +1,200 @@
+/*
+ *  linux/arch/arm/lib/div64.S
+ *
+ *  Optimized computation of 64-bit dividend / 32-bit divisor
+ *
+ *  Author:     Nicolas Pitre
+ *  Created:    Oct 5, 2003
+ *  Copyright:  Monta Vista Software, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#ifdef __ARMEB__
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#else
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#endif
+/*
+ * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
+ *
+ * Note: Calling convention is totally non standard for optimal code.
+ *       This is meant to be used by do_div() from include/asm/div64.h only.
+ *
+ * Input parameters:
+ *      xh-xl   = dividend (clobbered)
+ *      r4      = divisor (preserved)
+ *
+ * Output values:
+ *      yh-yl   = result
+ *      xh      = remainder
+ *
+ * Clobbered regs: xl, ip
+ */
+ENTRY(__do_div64)
+        @ Test for easy paths first.
+        subs    ip, r4, #1
+        bls     9f                      @ divisor is 0 or 1
+        tst     ip, r4
+        beq     8f                      @ divisor is power of 2
+        @ See if we need to handle upper 32-bit result.
+        cmp     xh, r4
+        mov     yh, #0
+        blo     3f
+        @ Align divisor with upper part of dividend.
+        @ The aligned divisor is stored in yl preserving the original.
+        @ The bit position is stored in ip.
+#if __LINUX_ARM_ARCH__ >= 5
+        clz     yl, r4
+        clz     ip, xh
+        sub     yl, yl, ip
+        mov     ip, #1
+        mov     ip, ip, lsl yl
+        mov     yl, r4, lsl yl
+#else
+        mov     yl, r4
+        mov     ip, #1
+1:      cmp     yl, #0x80000000
+        cmpcc   yl, xh
+        movcc   yl, yl, lsl #1
+        movcc   ip, ip, lsl #1
+        bcc     1b
+#endif
+        @ The division loop for needed upper bit positions.
+        @ Break out early if dividend reaches 0.
+2:      cmp     xh, yl
+        orrcs   yh, yh, ip
+        subcss  xh, xh, yl
+        movnes  ip, ip, lsr #1
+        mov     yl, yl, lsr #1
+        bne     2b
+        @ See if we need to handle lower 32-bit result.
+3:      cmp     xh, #0
+        mov     yl, #0
+        cmpeq   xl, r4
+        movlo   xh, xl
+        movlo   pc, lr
+        @ The division loop for lower bit positions.
+        @ Here we shift remainer bits leftwards rather than moving the
+        @ divisor for comparisons, considering the carry-out bit as well.
+        mov     ip, #0x80000000
+4:      movs    xl, xl, lsl #1
+        adcs    xh, xh, xh
+        beq     6f
+        cmpcc   xh, r4
+5:      orrcs   yl, yl, ip
+        subcs   xh, xh, r4
+        movs    ip, ip, lsr #1
+        bne     4b
+        mov     pc, lr
+        @ The top part of remainder became zero.  If carry is set
+        @ (the 33th bit) this is a false positive so resume the loop.
+        @ Otherwise, if lower part is also null then we are done.
+6:      bcs     5b
+        cmp     xl, #0
+        moveq   pc, lr
+        @ We still have remainer bits in the low part.  Bring them up.
+#if __LINUX_ARM_ARCH__ >= 5
+        clz     xh, xl                  @ we know xh is zero here so...
+        add     xh, xh, #1
+        mov     xl, xl, lsl xh
+        mov     ip, ip, lsr xh
+#else
+7:      movs    xl, xl, lsl #1
+        mov     ip, ip, lsr #1
+        bcc     7b
+#endif
+        @ Current remainder is now 1.  It is worthless to compare with
+        @ divisor at this point since divisor can not be smaller than 3 here.
+        @ If possible, branch for another shift in the division loop.
+        @ If no bit position left then we are done.
+        movs    ip, ip, lsr #1
+        mov     xh, #1
+        bne     4b
+        mov     pc, lr
+8:      @ Division by a power of 2: determine what that divisor order is
+        @ then simply shift values around
+#if __LINUX_ARM_ARCH__ >= 5
+        clz     ip, r4
+        rsb     ip, ip, #31
+#else
+        mov     yl, r4
+        cmp     r4, #(1 << 16)
+        mov     ip, #0
+        movhs   yl, yl, lsr #16
+        movhs   ip, #16
+        cmp     yl, #(1 << 8)
+        movhs   yl, yl, lsr #8
+        addhs   ip, ip, #8
+        cmp     yl, #(1 << 4)
+        movhs   yl, yl, lsr #4
+        addhs   ip, ip, #4
+        cmp     yl, #(1 << 2)
+        addhi   ip, ip, #3
+        addls   ip, ip, yl, lsr #1
+#endif
+        mov     yh, xh, lsr ip
+        mov     yl, xl, lsr ip
+        rsb     ip, ip, #32
+        orr     yl, yl, xh, lsl ip
+        mov     xh, xl, lsl ip
+        mov     xh, xh, lsr ip
+        mov     pc, lr
+        @ eq -> division by 1: obvious enough...
+9:      moveq   yl, xl
+        moveq   yh, xh
+        moveq   xh, #0
+        moveq   pc, lr
+        @ Division by 0:
+        str     lr, [sp, #-4]!
+        bl      __div0
+        @ as wrong as it could be...
+        mov     yl, #0
+        mov     yh, #0
+        mov     xh, #0
+        ldr     pc, [sp], #4
diff --git a/arch/arm/lib/ecard.S b/arch/arm/lib/ecard.S
new file mode 100644
index 000000000000..fb7b602a6f76
--- /dev/null
+++ b/arch/arm/lib/ecard.S
@@ -0,0 +1,45 @@
+/*
+ *  linux/arch/arm/lib/ecard.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 27/03/03 Ian Molton Clean up CONFIG_CPU
+ *
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+#define CPSR2SPSR(rt) \
+                mrs     rt, cpsr; \
+                msr     spsr_cxsf, rt
+@ Purpose: call an expansion card loader to read bytes.
+@ Proto  : char read_loader(int offset, char *card_base, char *loader);
+@ Returns: byte read
+ENTRY(ecard_loader_read)
+                stmfd   sp!, {r4 - r12, lr}
+                mov     r11, r1
+                mov     r1, r0
+                CPSR2SPSR(r0)
+                mov     lr, pc
+                mov     pc, r2
+                LOADREGS(fd, sp!, {r4 - r12, pc})
+@ Purpose: call an expansion card loader to reset the card
+@ Proto  : void read_loader(int card_base, char *loader);
+@ Returns: byte read
+ENTRY(ecard_loader_reset)
+                stmfd   sp!, {r4 - r12, lr}
+                mov     r11, r0
+                CPSR2SPSR(r0)
+                mov     lr, pc
+                add     pc, r1, #8
+                LOADREGS(fd, sp!, {r4 - r12, pc})
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
new file mode 100644
index 000000000000..f055d56ea68a
--- /dev/null
+++ b/arch/arm/lib/findbit.S
@@ -0,0 +1,168 @@
+/*
+ *  linux/arch/arm/lib/findbit.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16th March 2001 - John Ripley <jripley@sonicblue.com>
+ *   Fixed so that "size" is an exclusive not an inclusive quantity.
+ *   All users of these functions expect exclusive sizes, and may
+ *   also call with zero size.
+ * Reworked by rmk.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+/*
+ * Purpose  : Find a 'zero' bit
+ * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
+ */
+ENTRY(_find_first_zero_bit_le)
+                teq     r1, #0  
+                beq     3f
+                mov     r2, #0
+1:              ldrb    r3, [r0, r2, lsr #3]
+                eors    r3, r3, #0xff           @ invert bits
+                bne     .found                  @ any now set - found zero bit
+                add     r2, r2, #8              @ next bit pointer
+2:              cmp     r2, r1                  @ any more?
+                blo     1b
+3:              mov     r0, r1                  @ no free bits
+                RETINSTR(mov,pc,lr)
+/*
+ * Purpose  : Find next 'zero' bit
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
+ */
+ENTRY(_find_next_zero_bit_le)
+                teq     r1, #0
+                beq     3b
+                ands    ip, r2, #7
+                beq     1b                      @ If new byte, goto old routine
+                ldrb    r3, [r0, r2, lsr #3]
+                eor     r3, r3, #0xff           @ now looking for a 1 bit
+                movs    r3, r3, lsr ip          @ shift off unused bits
+                bne     .found
+                orr     r2, r2, #7              @ if zero, then no bits here
+                add     r2, r2, #1              @ align bit pointer
+                b       2b                      @ loop for next bit
+/*
+ * Purpose  : Find a 'one' bit
+ * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit);
+ */
+ENTRY(_find_first_bit_le)
+                teq     r1, #0  
+                beq     3f
+                mov     r2, #0
+1:              ldrb    r3, [r0, r2, lsr #3]
+                movs    r3, r3
+                bne     .found                  @ any now set - found zero bit
+                add     r2, r2, #8              @ next bit pointer
+2:              cmp     r2, r1                  @ any more?
+                blo     1b
+3:              mov     r0, r1                  @ no free bits
+                RETINSTR(mov,pc,lr)
+/*
+ * Purpose  : Find next 'one' bit
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
+ */
+ENTRY(_find_next_bit_le)
+                teq     r1, #0
+                beq     3b
+                ands    ip, r2, #7
+                beq     1b                      @ If new byte, goto old routine
+                ldrb    r3, [r0, r2, lsr #3]
+                movs    r3, r3, lsr ip          @ shift off unused bits
+                bne     .found
+                orr     r2, r2, #7              @ if zero, then no bits here
+                add     r2, r2, #1              @ align bit pointer
+                b       2b                      @ loop for next bit
+#ifdef __ARMEB__
+ENTRY(_find_first_zero_bit_be)
+                teq     r1, #0
+                beq     3f
+                mov     r2, #0
+1:              eor     r3, r2, #0x18           @ big endian byte ordering
+                ldrb    r3, [r0, r3, lsr #3]
+                eors    r3, r3, #0xff           @ invert bits
+                bne     .found                  @ any now set - found zero bit
+                add     r2, r2, #8              @ next bit pointer
+2:              cmp     r2, r1                  @ any more?
+                blo     1b
+3:              mov     r0, r1                  @ no free bits
+                RETINSTR(mov,pc,lr)
+ENTRY(_find_next_zero_bit_be)
+                teq     r1, #0
+                beq     3b
+                ands    ip, r2, #7
+                beq     1b                      @ If new byte, goto old routine
+                eor     r3, r2, #0x18           @ big endian byte ordering
+                ldrb    r3, [r0, r3, lsr #3]
+                eor     r3, r3, #0xff           @ now looking for a 1 bit
+                movs    r3, r3, lsr ip          @ shift off unused bits
+                bne     .found
+                orr     r2, r2, #7              @ if zero, then no bits here
+                add     r2, r2, #1              @ align bit pointer
+                b       2b                      @ loop for next bit
+ENTRY(_find_first_bit_be)
+                teq     r1, #0
+                beq     3f
+                mov     r2, #0
+1:              eor     r3, r2, #0x18           @ big endian byte ordering
+                ldrb    r3, [r0, r3, lsr #3]
+                movs    r3, r3
+                bne     .found                  @ any now set - found zero bit
+                add     r2, r2, #8              @ next bit pointer
+2:              cmp     r2, r1                  @ any more?
+                blo     1b
+3:              mov     r0, r1                  @ no free bits
+                RETINSTR(mov,pc,lr)
+ENTRY(_find_next_bit_be)
+                teq     r1, #0
+                beq     3b
+                ands    ip, r2, #7
+                beq     1b                      @ If new byte, goto old routine
+                eor     r3, r2, #0x18           @ big endian byte ordering
+                ldrb    r3, [r0, r3, lsr #3]
+                movs    r3, r3, lsr ip          @ shift off unused bits
+                bne     .found
+                orr     r2, r2, #7              @ if zero, then no bits here
+                add     r2, r2, #1              @ align bit pointer
+                b       2b                      @ loop for next bit
+#endif
+/*
+ * One or more bits in the LSB of r3 are assumed to be set.
+ */
+.found:
+#if __LINUX_ARM_ARCH__ >= 5
+                rsb     r1, r3, #0
+                and     r3, r3, r1
+                clz     r3, r3
+                rsb     r3, r3, #31
+                add     r0, r2, r3
+#else
+                tst     r3, #0x0f
+                addeq   r2, r2, #4
+                movne   r3, r3, lsl #4
+                tst     r3, #0x30
+                addeq   r2, r2, #2
+                movne   r3, r3, lsl #2
+                tst     r3, #0x40
+                addeq   r2, r2, #1
+                mov     r0, r2
+#endif
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm/lib/floppydma.S b/arch/arm/lib/floppydma.S
new file mode 100644
index 000000000000..617150b1baef
--- /dev/null
+++ b/arch/arm/lib/floppydma.S
@@ -0,0 +1,32 @@
+/*
+ *  linux/arch/arm/lib/floppydma.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+                .global floppy_fiqin_end
+ENTRY(floppy_fiqin_start)
+                subs    r9, r9, #1
+                ldrgtb  r12, [r11, #-4]
+                ldrleb  r12, [r11], #0
+                strb    r12, [r10], #1
+                subs    pc, lr, #4
+floppy_fiqin_end:
+                .global floppy_fiqout_end
+ENTRY(floppy_fiqout_start)
+                subs    r9, r9, #1
+                ldrgeb  r12, [r10], #1
+                movlt   r12, #0
+                strleb  r12, [r11], #0
+                subles  pc, lr, #4
+                strb    r12, [r11, #-4]
+                subs    pc, lr, #4
+floppy_fiqout_end:
diff --git a/arch/arm/lib/gcclib.h b/arch/arm/lib/gcclib.h
new file mode 100644
index 000000000000..65314a3d9e27
--- /dev/null
+++ b/arch/arm/lib/gcclib.h
@@ -0,0 +1,25 @@
+/* gcclib.h -- definitions for various functions 'borrowed' from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#define BITS_PER_UNIT  8
+#define SI_TYPE_SIZE (sizeof (SItype) * BITS_PER_UNIT)
+typedef unsigned int UQItype    __attribute__ ((mode (QI)));
+typedef          int SItype     __attribute__ ((mode (SI)));
+typedef unsigned int USItype    __attribute__ ((mode (SI)));
+typedef          int DItype     __attribute__ ((mode (DI)));
+typedef          int word_type  __attribute__ ((mode (__word__)));
+typedef unsigned int UDItype    __attribute__ ((mode (DI)));
+#ifdef __ARMEB__
+  struct DIstruct {SItype high, low;};
+#else
+  struct DIstruct {SItype low, high;};
+#endif
+typedef union
+{
+  struct DIstruct s;
+  DItype ll;
+} DIunion;
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
new file mode 100644
index 000000000000..64aa6f4fe5e4
--- /dev/null
+++ b/arch/arm/lib/getuser.S
@@ -0,0 +1,78 @@
+/*
+ *  linux/arch/arm/lib/getuser.S
+ *
+ *  Copyright (C) 2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Idea from x86 version, (C) Copyright 1998 Linus Torvalds
+ *
+ * These functions have a non-standard call interface to make them more
+ * efficient, especially as they return an error value in addition to
+ * the "real" return value.
+ *
+ * __get_user_X
+ *
+ * Inputs:      r0 contains the address
+ * Outputs:     r0 is the error code
+ *              r2, r3 contains the zero-extended value
+ *              lr corrupted
+ *
+ * No other registers must be altered.  (see include/asm-arm/uaccess.h
+ * for specific ASM register usage).
+ *
+ * Note that ADDR_LIMIT is either 0 or 0xc0000000.
+ * Note also that it is intended that __get_user_bad is not global.
+ */
+#include <asm/constants.h>
+#include <asm/thread_info.h>
+#include <asm/errno.h>
+        .global __get_user_1
+__get_user_1:
+1:      ldrbt   r2, [r0]
+        mov     r0, #0
+        mov     pc, lr
+        .global __get_user_2
+__get_user_2:
+2:      ldrbt   r2, [r0], #1
+3:      ldrbt   r3, [r0]
+#ifndef __ARMEB__
+        orr     r2, r2, r3, lsl #8
+#else
+        orr     r2, r3, r2, lsl #8
+#endif
+        mov     r0, #0
+        mov     pc, lr
+        .global __get_user_4
+__get_user_4:
+4:      ldrt    r2, [r0]
+        mov     r0, #0
+        mov     pc, lr
+        .global __get_user_8
+__get_user_8:
+5:      ldrt    r2, [r0], #4
+6:      ldrt    r3, [r0]
+        mov     r0, #0
+        mov     pc, lr
+__get_user_bad_8:
+        mov     r3, #0
+__get_user_bad:
+        mov     r2, #0
+        mov     r0, #-EFAULT
+        mov     pc, lr
+.section __ex_table, "a"
+        .long   1b, __get_user_bad
+        .long   2b, __get_user_bad
+        .long   3b, __get_user_bad
+        .long   4b, __get_user_bad
+        .long   5b, __get_user_bad_8
+        .long   6b, __get_user_bad_8
+.previous
diff --git a/arch/arm/lib/io-acorn.S b/arch/arm/lib/io-acorn.S
new file mode 100644
index 000000000000..3aacd01d40e1
--- /dev/null
+++ b/arch/arm/lib/io-acorn.S
@@ -0,0 +1,32 @@
+/*
+ *  linux/arch/arm/lib/io-acorn.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 27/03/03 Ian Molton Clean up CONFIG_CPU
+ *
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+                .text
+                .align
+.iosl_warning:
+                .ascii  "<4>insl/outsl not implemented, called from %08lX\0"
+                .align
+/*
+ * These make no sense on Acorn machines.
+ * Print a warning message.
+ */
+ENTRY(insl)
+ENTRY(outsl)
+                adr     r0, .iosl_warning
+                mov     r1, lr
+                b       printk
diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S
new file mode 100644
index 000000000000..081ef749298a
--- /dev/null
+++ b/arch/arm/lib/io-readsb.S
@@ -0,0 +1,122 @@
+/*
+ *  linux/arch/arm/lib/io-readsb.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+.insb_align:    rsb     ip, ip, #4
+                cmp     ip, r2
+                movgt   ip, r2
+                cmp     ip, #2
+                ldrb    r3, [r0]
+                strb    r3, [r1], #1
+                ldrgeb  r3, [r0]
+                strgeb  r3, [r1], #1
+                ldrgtb  r3, [r0]
+                strgtb  r3, [r1], #1
+                subs    r2, r2, ip
+                bne     .insb_aligned
+ENTRY(__raw_readsb)
+                teq     r2, #0          @ do we have to check for the zero len?
+                moveq   pc, lr
+                ands    ip, r1, #3
+                bne     .insb_align
+.insb_aligned:  stmfd   sp!, {r4 - r6, lr}
+                subs    r2, r2, #16
+                bmi     .insb_no_16
+.insb_16_lp:    ldrb    r3, [r0]
+                ldrb    r4, [r0]
+                ldrb    r5, [r0]
+                mov     r3, r3,     put_byte_0
+                ldrb    r6, [r0]
+                orr     r3, r3, r4, put_byte_1
+                ldrb    r4, [r0]
+                orr     r3, r3, r5, put_byte_2
+                ldrb    r5, [r0]
+                orr     r3, r3, r6, put_byte_3
+                ldrb    r6, [r0]
+                mov     r4, r4,     put_byte_0
+                ldrb    ip, [r0]
+                orr     r4, r4, r5, put_byte_1
+                ldrb    r5, [r0]
+                orr     r4, r4, r6, put_byte_2
+                ldrb    r6, [r0]
+                orr     r4, r4, ip, put_byte_3
+                ldrb    ip, [r0]
+                mov     r5, r5,     put_byte_0
+                ldrb    lr, [r0]
+                orr     r5, r5, r6, put_byte_1
+                ldrb    r6, [r0]
+                orr     r5, r5, ip, put_byte_2
+                ldrb    ip, [r0]
+                orr     r5, r5, lr, put_byte_3
+                ldrb    lr, [r0]
+                mov     r6, r6,     put_byte_0
+                orr     r6, r6, ip, put_byte_1
+                ldrb    ip, [r0]
+                orr     r6, r6, lr, put_byte_2
+                orr     r6, r6, ip, put_byte_3
+                stmia   r1!, {r3 - r6}
+                subs    r2, r2, #16
+                bpl     .insb_16_lp
+                tst     r2, #15
+                LOADREGS(eqfd, sp!, {r4 - r6, pc})
+.insb_no_16:    tst     r2, #8
+                beq     .insb_no_8
+                ldrb    r3, [r0]
+                ldrb    r4, [r0]
+                ldrb    r5, [r0]
+                mov     r3, r3,     put_byte_0
+                ldrb    r6, [r0]
+                orr     r3, r3, r4, put_byte_1
+                ldrb    r4, [r0]
+                orr     r3, r3, r5, put_byte_2
+                ldrb    r5, [r0]
+                orr     r3, r3, r6, put_byte_3
+                ldrb    r6, [r0]
+                mov     r4, r4,     put_byte_0
+                ldrb    ip, [r0]
+                orr     r4, r4, r5, put_byte_1
+                orr     r4, r4, r6, put_byte_2
+                orr     r4, r4, ip, put_byte_3
+                stmia   r1!, {r3, r4}
+.insb_no_8:     tst     r2, #4
+                beq     .insb_no_4
+                ldrb    r3, [r0]
+                ldrb    r4, [r0]
+                ldrb    r5, [r0]
+                ldrb    r6, [r0]
+                mov     r3, r3,     put_byte_0
+                orr     r3, r3, r4, put_byte_1
+                orr     r3, r3, r5, put_byte_2
+                orr     r3, r3, r6, put_byte_3
+                str     r3, [r1], #4
+.insb_no_4:     ands    r2, r2, #3
+                LOADREGS(eqfd, sp!, {r4 - r6, pc})
+                cmp     r2, #2
+                ldrb    r3, [r0]
+                strb    r3, [r1], #1
+                ldrgeb  r3, [r0]
+                strgeb  r3, [r1], #1
+                ldrgtb  r3, [r0]
+                strgtb  r3, [r1]
+                LOADREGS(fd, sp!, {r4 - r6, pc})
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
new file mode 100644
index 000000000000..75a9121cb23f
--- /dev/null
+++ b/arch/arm/lib/io-readsl.S
@@ -0,0 +1,78 @@
+/*
+ *  linux/arch/arm/lib/io-readsl.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+ENTRY(__raw_readsl)
+                teq     r2, #0          @ do we have to check for the zero len?
+                moveq   pc, lr
+                ands    ip, r1, #3
+                bne     3f
+                subs    r2, r2, #4
+                bmi     2f
+                stmfd   sp!, {r4, lr}
+1:              ldr     r3, [r0, #0]
+                ldr     r4, [r0, #0]
+                ldr     ip, [r0, #0]
+                ldr     lr, [r0, #0]
+                subs    r2, r2, #4
+                stmia   r1!, {r3, r4, ip, lr}
+                bpl     1b
+                ldmfd   sp!, {r4, lr}
+2:              movs    r2, r2, lsl #31
+                ldrcs   r3, [r0, #0]
+                ldrcs   ip, [r0, #0]
+                stmcsia r1!, {r3, ip}
+                ldrne   r3, [r0, #0]
+                strne   r3, [r1, #0]
+                mov     pc, lr
+3:              ldr     r3, [r0]
+                cmp     ip, #2
+                mov     ip, r3, get_byte_0
+                strb    ip, [r1], #1
+                bgt     6f
+                mov     ip, r3, get_byte_1
+                strb    ip, [r1], #1
+                beq     5f
+                mov     ip, r3, get_byte_2
+                strb    ip, [r1], #1
+4:              subs    r2, r2, #1
+                mov     ip, r3, pull #24
+                ldrne   r3, [r0]
+                orrne   ip, ip, r3, push #8
+                strne   ip, [r1], #4
+                bne     4b
+                b       8f
+5:              subs    r2, r2, #1
+                mov     ip, r3, pull #16
+                ldrne   r3, [r0]
+                orrne   ip, ip, r3, push #16
+                strne   ip, [r1], #4
+                bne     5b
+                b       7f
+6:              subs    r2, r2, #1
+                mov     ip, r3, pull #8
+                ldrne   r3, [r0]
+                orrne   ip, ip, r3, push #24
+                strne   ip, [r1], #4
+                bne     6b
+                mov     r3, ip, get_byte_2
+                strb    r3, [r1, #2]
+7:              mov     r3, ip, get_byte_1
+                strb    r3, [r1, #1]
+8:              mov     r3, ip, get_byte_0
+                strb    r3, [r1, #0]
+                mov     pc, lr
diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S
new file mode 100644
index 000000000000..476cf7f8a633
--- /dev/null
+++ b/arch/arm/lib/io-readsw-armv3.S
@@ -0,0 +1,107 @@
+/*
+ *  linux/arch/arm/lib/io-readsw-armv3.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+.insw_bad_alignment:
+                adr     r0, .insw_bad_align_msg
+                mov     r2, lr
+                b       panic
+.insw_bad_align_msg:
+                .asciz  "insw: bad buffer alignment (0x%p, lr=0x%08lX)\n"
+                .align
+.insw_align:    tst     r1, #1
+                bne     .insw_bad_alignment
+                ldr     r3, [r0]
+                strb    r3, [r1], #1
+                mov     r3, r3, lsr #8
+                strb    r3, [r1], #1
+                subs    r2, r2, #1
+                RETINSTR(moveq, pc, lr)
+ENTRY(__raw_readsw)
+                teq     r2, #0          @ do we have to check for the zero len?
+                moveq   pc, lr
+                tst     r1, #3
+                bne     .insw_align
+.insw_aligned:  mov     ip, #0xff
+                orr     ip, ip, ip, lsl #8
+                stmfd   sp!, {r4, r5, r6, lr}
+                subs    r2, r2, #8
+                bmi     .no_insw_8
+.insw_8_lp:     ldr     r3, [r0]
+                and     r3, r3, ip
+                ldr     r4, [r0]
+                orr     r3, r3, r4, lsl #16
+                ldr     r4, [r0]
+                and     r4, r4, ip
+                ldr     r5, [r0]
+                orr     r4, r4, r5, lsl #16
+                ldr     r5, [r0]
+                and     r5, r5, ip
+                ldr     r6, [r0]
+                orr     r5, r5, r6, lsl #16
+                ldr     r6, [r0]
+                and     r6, r6, ip
+                ldr     lr, [r0]
+                orr     r6, r6, lr, lsl #16
+                stmia   r1!, {r3 - r6}
+                subs    r2, r2, #8
+                bpl     .insw_8_lp
+                tst     r2, #7
+                LOADREGS(eqfd, sp!, {r4, r5, r6, pc})
+.no_insw_8:     tst     r2, #4
+                beq     .no_insw_4
+                ldr     r3, [r0]
+                and     r3, r3, ip
+                ldr     r4, [r0]
+                orr     r3, r3, r4, lsl #16
+                ldr     r4, [r0]
+                and     r4, r4, ip
+                ldr     r5, [r0]
+                orr     r4, r4, r5, lsl #16
+                stmia   r1!, {r3, r4}
+.no_insw_4:     tst     r2, #2
+                beq     .no_insw_2
+                ldr     r3, [r0]
+                and     r3, r3, ip
+                ldr     r4, [r0]
+                orr     r3, r3, r4, lsl #16
+                str     r3, [r1], #4
+.no_insw_2:     tst     r2, #1
+                ldrne   r3, [r0]
+                strneb  r3, [r1], #1
+                movne   r3, r3, lsr #8
+                strneb  r3, [r1]
+                LOADREGS(fd, sp!, {r4, r5, r6, pc})
diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S
new file mode 100644
index 000000000000..c92b66ecbe86
--- /dev/null
+++ b/arch/arm/lib/io-readsw-armv4.S
@@ -0,0 +1,130 @@
+/*
+ *  linux/arch/arm/lib/io-readsw-armv4.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .macro  pack, rd, hw1, hw2
+#ifndef __ARMEB__
+                orr     \rd, \hw1, \hw2, lsl #16
+#else
+                orr     \rd, \hw2, \hw1, lsl #16
+#endif
+                .endm
+.insw_align:    movs    ip, r1, lsl #31
+                bne     .insw_noalign
+                ldrh    ip, [r0]
+                sub     r2, r2, #1
+                strh    ip, [r1], #2
+ENTRY(__raw_readsw)
+                teq     r2, #0
+                moveq   pc, lr
+                tst     r1, #3
+                bne     .insw_align
+                stmfd   sp!, {r4, r5, lr}
+                subs    r2, r2, #8
+                bmi     .no_insw_8
+.insw_8_lp:     ldrh    r3, [r0]
+                ldrh    r4, [r0]
+                pack    r3, r3, r4
+                ldrh    r4, [r0]
+                ldrh    r5, [r0]
+                pack    r4, r4, r5
+                ldrh    r5, [r0]
+                ldrh    ip, [r0]
+                pack    r5, r5, ip
+                ldrh    ip, [r0]
+                ldrh    lr, [r0]
+                pack    ip, ip, lr
+                subs    r2, r2, #8
+                stmia   r1!, {r3 - r5, ip}
+                bpl     .insw_8_lp
+.no_insw_8:     tst     r2, #4
+                beq     .no_insw_4
+                ldrh    r3, [r0]
+                ldrh    r4, [r0]
+                pack    r3, r3, r4
+                ldrh    r4, [r0]
+                ldrh    ip, [r0]
+                pack    r4, r4, ip
+                stmia   r1!, {r3, r4}
+.no_insw_4:     movs    r2, r2, lsl #31
+                bcc     .no_insw_2
+                ldrh    r3, [r0]
+                ldrh    ip, [r0]
+                pack    r3, r3, ip
+                str     r3, [r1], #4
+.no_insw_2:     ldrneh  r3, [r0]
+                strneh  r3, [r1]
+                ldmfd   sp!, {r4, r5, pc}
+#ifdef __ARMEB__
+#define _BE_ONLY_(code...)      code
+#define _LE_ONLY_(code...)
+#define push_hbyte0             lsr #8
+#define pull_hbyte1             lsl #24
+#else
+#define _BE_ONLY_(code...)
+#define _LE_ONLY_(code...) code
+#define push_hbyte0             lsl #24
+#define pull_hbyte1             lsr #8
+#endif
+.insw_noalign:  stmfd   sp!, {r4, lr}
+                ldrccb  ip, [r1, #-1]!
+                bcc     1f
+                ldrh    ip, [r0]
+                sub     r2, r2, #1
+   _BE_ONLY_(   mov     ip, ip, ror #8          )
+                strb    ip, [r1], #1
+   _LE_ONLY_(   mov     ip, ip, lsr #8          )
+   _BE_ONLY_(   mov     ip, ip, lsr #24         )
+1:              subs    r2, r2, #2
+                bmi     3f
+   _BE_ONLY_(   mov     ip, ip, lsl #24         )
+2:              ldrh    r3, [r0]
+                ldrh    r4, [r0]
+                subs    r2, r2, #2
+                orr     ip, ip, r3, lsl #8
+                orr     ip, ip, r4, push_hbyte0
+                str     ip, [r1], #4
+                mov     ip, r4, pull_hbyte1
+                bpl     2b
+   _BE_ONLY_(   mov     ip, ip, lsr #24         )
+3:              tst     r2, #1
+                strb    ip, [r1], #1
+                ldrneh  ip, [r0]
+   _BE_ONLY_(   movne   ip, ip, ror #8          )
+                strneb  ip, [r1], #1
+   _LE_ONLY_(   movne   ip, ip, lsr #8          )
+   _BE_ONLY_(   movne   ip, ip, lsr #24         )
+                strneb  ip, [r1]
+                ldmfd   sp!, {r4, pc}
diff --git a/arch/arm/lib/io-shark.c b/arch/arm/lib/io-shark.c
new file mode 100644
index 000000000000..108d4573e970
--- /dev/null
+++ b/arch/arm/lib/io-shark.c
@@ -0,0 +1,83 @@
+/*
+ *  linux/arch/arm/lib/io-shark.c
+ *
+ *  by Alexander Schulz
+ *
+ * derived from:
+ * linux/arch/arm/lib/io-ebsa.S
+ * Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <asm/io.h>
+void print_warning(void)
+{
+        printk(KERN_WARNING "ins?/outs? not implemented on this architecture\n");
+}
+void insl(unsigned int port, void *to, int len)
+{
+        print_warning();
+}
+void insb(unsigned int port, void *to, int len)
+{
+        print_warning();
+}
+void outsl(unsigned int port, const void *from, int len)
+{
+        print_warning();
+}
+void outsb(unsigned int port, const void *from, int len)
+{
+        print_warning();
+}
+/* these should be in assembler again */
+/*
+ * Purpose: read a block of data from a hardware register to memory.
+ * Proto  : insw(int from_port, void *to, int len_in_words);
+ * Proto  : inswb(int from_port, void *to, int len_in_bytes);
+ * Notes  : increment to
+ */
+void insw(unsigned int port, void *to, int len)
+{
+        int i;
+        for (i = 0; i < len; i++)
+                ((unsigned short *) to)[i] = inw(port);
+}
+void inswb(unsigned int port, void *to, int len)
+{
+        insw(port, to, len >> 2);
+}
+/*
+ * Purpose: write a block of data from memory to a hardware register.
+ * Proto  : outsw(int to_reg, void *from, int len_in_words);
+ * Proto  : outswb(int to_reg, void *from, int len_in_bytes);
+ * Notes  : increments from
+ */
+void outsw(unsigned int port, const void *from, int len)
+{
+        int i;
+        for (i = 0; i < len; i++)
+                outw(((unsigned short *) from)[i], port);
+}
+void outswb(unsigned int port, const void *from, int len)
+{
+        outsw(port, from, len >> 2);
+}
diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S
new file mode 100644
index 000000000000..70b2561bdb09
--- /dev/null
+++ b/arch/arm/lib/io-writesb.S
@@ -0,0 +1,92 @@
+/*
+ *  linux/arch/arm/lib/io-writesb.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .macro  outword, rd
+#ifndef __ARMEB__
+                strb    \rd, [r0]
+                mov     \rd, \rd, lsr #8
+                strb    \rd, [r0]
+                mov     \rd, \rd, lsr #8
+                strb    \rd, [r0]
+                mov     \rd, \rd, lsr #8
+                strb    \rd, [r0]
+#else
+                mov     lr, \rd, lsr #24
+                strb    lr, [r0]
+                mov     lr, \rd, lsr #16
+                strb    lr, [r0]
+                mov     lr, \rd, lsr #8
+                strb    lr, [r0]
+                strb    \rd, [r0]
+#endif
+                .endm
+.outsb_align:   rsb     ip, ip, #4
+                cmp     ip, r2
+                movgt   ip, r2
+                cmp     ip, #2
+                ldrb    r3, [r1], #1
+                strb    r3, [r0]
+                ldrgeb  r3, [r1], #1
+                strgeb  r3, [r0]
+                ldrgtb  r3, [r1], #1
+                strgtb  r3, [r0]
+                subs    r2, r2, ip
+                bne     .outsb_aligned
+ENTRY(__raw_writesb)
+                teq     r2, #0          @ do we have to check for the zero len?
+                moveq   pc, lr
+                ands    ip, r1, #3
+                bne     .outsb_align
+.outsb_aligned: stmfd   sp!, {r4, r5, lr}
+                subs    r2, r2, #16
+                bmi     .outsb_no_16
+.outsb_16_lp:   ldmia   r1!, {r3, r4, r5, ip}
+                outword r3
+                outword r4
+                outword r5
+                outword ip
+                subs    r2, r2, #16
+                bpl     .outsb_16_lp
+                tst     r2, #15
+                LOADREGS(eqfd, sp!, {r4, r5, pc})
+.outsb_no_16:   tst     r2, #8
+                beq     .outsb_no_8
+                ldmia   r1!, {r3, r4}
+                outword r3
+                outword r4
+.outsb_no_8:    tst     r2, #4
+                beq     .outsb_no_4
+                ldr     r3, [r1], #4
+                outword r3
+.outsb_no_4:    ands    r2, r2, #3
+                LOADREGS(eqfd, sp!, {r4, r5, pc})
+                cmp     r2, #2
+                ldrb    r3, [r1], #1
+                strb    r3, [r0]
+                ldrgeb  r3, [r1], #1
+                strgeb  r3, [r0]
+                ldrgtb  r3, [r1]
+                strgtb  r3, [r0]
+                LOADREGS(fd, sp!, {r4, r5, pc})
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
new file mode 100644
index 000000000000..f8f14dd227ca
--- /dev/null
+++ b/arch/arm/lib/io-writesl.S
@@ -0,0 +1,66 @@
+/*
+ *  linux/arch/arm/lib/io-writesl.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+ENTRY(__raw_writesl)
+                teq     r2, #0          @ do we have to check for the zero len?
+                moveq   pc, lr
+                ands    ip, r1, #3
+                bne     3f
+                subs    r2, r2, #4
+                bmi     2f
+                stmfd   sp!, {r4, lr}
+1:              ldmia   r1!, {r3, r4, ip, lr}
+                subs    r2, r2, #4
+                str     r3, [r0, #0]
+                str     r4, [r0, #0]
+                str     ip, [r0, #0]
+                str     lr, [r0, #0]
+                bpl     1b
+                ldmfd   sp!, {r4, lr}
+2:              movs    r2, r2, lsl #31
+                ldmcsia r1!, {r3, ip}
+                strcs   r3, [r0, #0]
+                ldrne   r3, [r1, #0]
+                strcs   ip, [r0, #0]
+                strne   r3, [r0, #0]
+                mov     pc, lr
+3:              bic     r1, r1, #3
+                ldr     r3, [r1], #4
+                cmp     ip, #2
+                blt     5f
+                bgt     6f
+4:              mov     ip, r3, pull #16
+                ldr     r3, [r1], #4
+                subs    r2, r2, #1
+                orr     ip, ip, r3, push #16
+                str     ip, [r0]
+                bne     4b
+                mov     pc, lr
+5:              mov     ip, r3, pull #8
+                ldr     r3, [r1], #4
+                subs    r2, r2, #1
+                orr     ip, ip, r3, push #24
+                str     ip, [r0]
+                bne     5b
+                mov     pc, lr
+6:              mov     ip, r3, pull #24
+                ldr     r3, [r1], #4
+                subs    r2, r2, #1
+                orr     ip, ip, r3, push #8
+                str     ip, [r0]
+                bne     6b
+                mov     pc, lr
diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S
new file mode 100644
index 000000000000..950e7e310f1a
--- /dev/null
+++ b/arch/arm/lib/io-writesw-armv3.S
@@ -0,0 +1,127 @@
+/*
+ *  linux/arch/arm/lib/io-writesw-armv3.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+.outsw_bad_alignment:
+                adr     r0, .outsw_bad_align_msg
+                mov     r2, lr
+                b       panic
+.outsw_bad_align_msg:
+                .asciz  "outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n"
+                .align
+.outsw_align:   tst     r1, #1
+                bne     .outsw_bad_alignment
+                add     r1, r1, #2
+                ldr     r3, [r1, #-4]
+                mov     r3, r3, lsr #16
+                orr     r3, r3, r3, lsl #16
+                str     r3, [r0]
+                subs    r2, r2, #1
+                RETINSTR(moveq, pc, lr)
+ENTRY(__raw_writesw)
+                teq     r2, #0          @ do we have to check for the zero len?
+                moveq   pc, lr
+                tst     r1, #3
+                bne     .outsw_align
+.outsw_aligned: stmfd   sp!, {r4, r5, r6, lr}
+                subs    r2, r2, #8
+                bmi     .no_outsw_8
+.outsw_8_lp:    ldmia   r1!, {r3, r4, r5, r6}
+                mov     ip, r3, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r3, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+                mov     ip, r4, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r4, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+                mov     ip, r5, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r5, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+                mov     ip, r6, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r6, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+                subs    r2, r2, #8
+                bpl     .outsw_8_lp
+                tst     r2, #7
+                LOADREGS(eqfd, sp!, {r4, r5, r6, pc})
+.no_outsw_8:    tst     r2, #4
+                beq     .no_outsw_4
+                ldmia   r1!, {r3, r4}
+                mov     ip, r3, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r3, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+                mov     ip, r4, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r4, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+.no_outsw_4:    tst     r2, #2
+                beq     .no_outsw_2
+                ldr     r3, [r1], #4
+                mov     ip, r3, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r3, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+.no_outsw_2:    tst     r2, #1
+                ldrne   r3, [r1]
+                movne   ip, r3, lsl #16
+                orrne   ip, ip, ip, lsr #16
+                strne   ip, [r0]
+                LOADREGS(fd, sp!, {r4, r5, r6, pc})
diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S
new file mode 100644
index 000000000000..6d1d7c27806e
--- /dev/null
+++ b/arch/arm/lib/io-writesw-armv4.S
@@ -0,0 +1,95 @@
+/*
+ *  linux/arch/arm/lib/io-writesw-armv4.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .macro  outword, rd
+#ifndef __ARMEB__
+                strh    \rd, [r0]
+                mov     \rd, \rd, lsr #16
+                strh    \rd, [r0]
+#else
+                mov     lr, \rd, lsr #16
+                strh    lr, [r0]
+                strh    \rd, [r0]
+#endif
+                .endm
+.outsw_align:   movs    ip, r1, lsl #31
+                bne     .outsw_noalign
+                ldrh    r3, [r1], #2
+                sub     r2, r2, #1
+                strh    r3, [r0]
+ENTRY(__raw_writesw)
+                teq     r2, #0
+                moveq   pc, lr
+                ands    r3, r1, #3
+                bne     .outsw_align
+                stmfd   sp!, {r4, r5, lr}
+                subs    r2, r2, #8
+                bmi     .no_outsw_8
+.outsw_8_lp:    ldmia   r1!, {r3, r4, r5, ip}
+                subs    r2, r2, #8
+                outword r3
+                outword r4
+                outword r5
+                outword ip
+                bpl     .outsw_8_lp
+.no_outsw_8:    tst     r2, #4
+                beq     .no_outsw_4
+                ldmia   r1!, {r3, ip}
+                outword r3
+                outword ip
+.no_outsw_4:    movs    r2, r2, lsl #31
+                bcc     .no_outsw_2
+                ldr     r3, [r1], #4
+                outword r3
+.no_outsw_2:    ldrneh  r3, [r1]
+                strneh  r3, [r0]
+                ldmfd   sp!, {r4, r5, pc}
+#ifdef __ARMEB__
+#define pull_hbyte0     lsl #8
+#define push_hbyte1     lsr #24
+#else
+#define pull_hbyte0     lsr #24
+#define push_hbyte1     lsl #8
+#endif
+.outsw_noalign: ldr     r3, [r1, -r3]!
+                subcs   r2, r2, #1
+                bcs     2f
+                subs    r2, r2, #2
+                bmi     3f
+1:              mov     ip, r3, lsr #8
+                strh    ip, [r0]
+2:              mov     ip, r3, pull_hbyte0
+                ldr     r3, [r1, #4]!
+                subs    r2, r2, #2
+                orr     ip, ip, r3, push_hbyte1
+                strh    ip, [r0]
+                bpl     2b
+3:              tst     r2, #1
+2:              movne   ip, r3, lsr #8
+                strneh  ip, [r0]
+                mov     pc, lr
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
new file mode 100644
index 000000000000..59026029d017
--- /dev/null
+++ b/arch/arm/lib/lib1funcs.S
@@ -0,0 +1,314 @@
+/*
+ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
+ *
+ * Author: Nicolas Pitre <nico@cam.org>
+ *   - contributed to gcc-3.4 on Sep 30, 2003
+ *   - adapted for the Linux kernel on Oct 2, 2003
+ */
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+#if __LINUX_ARM_ARCH__ >= 5
+        clz     \curbit, \divisor
+        clz     \result, \dividend
+        sub     \result, \curbit, \result
+        mov     \curbit, #1
+        mov     \divisor, \divisor, lsl \result
+        mov     \curbit, \curbit, lsl \result
+        mov     \result, #0
+        
+#else
+        @ Initially shift the divisor left 3 bits if possible,
+        @ set curbit accordingly.  This allows for curbit to be located
+        @ at the left end of each 4 bit nibbles in the division loop
+        @ to save one loop in most cases.
+        tst     \divisor, #0xe0000000
+        moveq   \divisor, \divisor, lsl #3
+        moveq   \curbit, #8
+        movne   \curbit, #1
+        @ Unless the divisor is very big, shift it up in multiples of
+        @ four bits, since this is the amount of unwinding in the main
+        @ division loop.  Continue shifting until the divisor is 
+        @ larger than the dividend.
+1:      cmp     \divisor, #0x10000000
+        cmplo   \divisor, \dividend
+        movlo   \divisor, \divisor, lsl #4
+        movlo   \curbit, \curbit, lsl #4
+        blo     1b
+        @ For very big divisors, we must shift it a bit at a time, or
+        @ we will be in danger of overflowing.
+1:      cmp     \divisor, #0x80000000
+        cmplo   \divisor, \dividend
+        movlo   \divisor, \divisor, lsl #1
+        movlo   \curbit, \curbit, lsl #1
+        blo     1b
+        mov     \result, #0
+#endif
+        @ Division loop
+1:      cmp     \dividend, \divisor
+        subhs   \dividend, \dividend, \divisor
+        orrhs   \result,   \result,   \curbit
+        cmp     \dividend, \divisor,  lsr #1
+        subhs   \dividend, \dividend, \divisor, lsr #1
+        orrhs   \result,   \result,   \curbit,  lsr #1
+        cmp     \dividend, \divisor,  lsr #2
+        subhs   \dividend, \dividend, \divisor, lsr #2
+        orrhs   \result,   \result,   \curbit,  lsr #2
+        cmp     \dividend, \divisor,  lsr #3
+        subhs   \dividend, \dividend, \divisor, lsr #3
+        orrhs   \result,   \result,   \curbit,  lsr #3
+        cmp     \dividend, #0                   @ Early termination?
+        movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
+        movne   \divisor,  \divisor, lsr #4
+        bne     1b
+.endm
+.macro ARM_DIV2_ORDER divisor, order
+#if __LINUX_ARM_ARCH__ >= 5
+        clz     \order, \divisor
+        rsb     \order, \order, #31
+#else
+        cmp     \divisor, #(1 << 16)
+        movhs   \divisor, \divisor, lsr #16
+        movhs   \order, #16
+        movlo   \order, #0
+        cmp     \divisor, #(1 << 8)
+        movhs   \divisor, \divisor, lsr #8
+        addhs   \order, \order, #8
+        cmp     \divisor, #(1 << 4)
+        movhs   \divisor, \divisor, lsr #4
+        addhs   \order, \order, #4
+        cmp     \divisor, #(1 << 2)
+        addhi   \order, \order, #3
+        addls   \order, \order, \divisor, lsr #1
+#endif
+.endm
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+#if __LINUX_ARM_ARCH__ >= 5
+        clz     \order, \divisor
+        clz     \spare, \dividend
+        sub     \order, \order, \spare
+        mov     \divisor, \divisor, lsl \order
+#else
+        mov     \order, #0
+        @ Unless the divisor is very big, shift it up in multiples of
+        @ four bits, since this is the amount of unwinding in the main
+        @ division loop.  Continue shifting until the divisor is 
+        @ larger than the dividend.
+1:      cmp     \divisor, #0x10000000
+        cmplo   \divisor, \dividend
+        movlo   \divisor, \divisor, lsl #4
+        addlo   \order, \order, #4
+        blo     1b
+        @ For very big divisors, we must shift it a bit at a time, or
+        @ we will be in danger of overflowing.
+1:      cmp     \divisor, #0x80000000
+        cmplo   \divisor, \dividend
+        movlo   \divisor, \divisor, lsl #1
+        addlo   \order, \order, #1
+        blo     1b
+#endif
+        @ Perform all needed substractions to keep only the reminder.
+        @ Do comparisons in batch of 4 first.
+        subs    \order, \order, #3              @ yes, 3 is intended here
+        blt     2f
+1:      cmp     \dividend, \divisor
+        subhs   \dividend, \dividend, \divisor
+        cmp     \dividend, \divisor,  lsr #1
+        subhs   \dividend, \dividend, \divisor, lsr #1
+        cmp     \dividend, \divisor,  lsr #2
+        subhs   \dividend, \dividend, \divisor, lsr #2
+        cmp     \dividend, \divisor,  lsr #3
+        subhs   \dividend, \dividend, \divisor, lsr #3
+        cmp     \dividend, #1
+        mov     \divisor, \divisor, lsr #4
+        subges  \order, \order, #4
+        bge     1b
+        tst     \order, #3
+        teqne   \dividend, #0
+        beq     5f
+        @ Either 1, 2 or 3 comparison/substractions are left.
+2:      cmn     \order, #2
+        blt     4f
+        beq     3f
+        cmp     \dividend, \divisor
+        subhs   \dividend, \dividend, \divisor
+        mov     \divisor,  \divisor,  lsr #1
+3:      cmp     \dividend, \divisor
+        subhs   \dividend, \dividend, \divisor
+        mov     \divisor,  \divisor,  lsr #1
+4:      cmp     \dividend, \divisor
+        subhs   \dividend, \dividend, \divisor
+5:
+.endm
+ENTRY(__udivsi3)
+        subs    r2, r1, #1
+        moveq   pc, lr
+        bcc     Ldiv0
+        cmp     r0, r1
+        bls     11f
+        tst     r1, r2
+        beq     12f
+        ARM_DIV_BODY r0, r1, r2, r3
+        mov     r0, r2
+        mov     pc, lr
+11:     moveq   r0, #1
+        movne   r0, #0
+        mov     pc, lr
+12:     ARM_DIV2_ORDER r1, r2
+        mov     r0, r0, lsr r2
+        mov     pc, lr
+ENTRY(__umodsi3)
+        subs    r2, r1, #1                      @ compare divisor with 1
+        bcc     Ldiv0
+        cmpne   r0, r1                          @ compare dividend with divisor
+        moveq   r0, #0
+        tsthi   r1, r2                          @ see if divisor is power of 2
+        andeq   r0, r0, r2
+        movls   pc, lr
+        ARM_MOD_BODY r0, r1, r2, r3
+        mov     pc, lr
+ENTRY(__divsi3)
+        cmp     r1, #0
+        eor     ip, r0, r1                      @ save the sign of the result.
+        beq     Ldiv0
+        rsbmi   r1, r1, #0                      @ loops below use unsigned.
+        subs    r2, r1, #1                      @ division by 1 or -1 ?
+        beq     10f
+        movs    r3, r0
+        rsbmi   r3, r0, #0                      @ positive dividend value
+        cmp     r3, r1
+        bls     11f
+        tst     r1, r2                          @ divisor is power of 2 ?
+        beq     12f
+        ARM_DIV_BODY r3, r1, r0, r2
+        cmp     ip, #0
+        rsbmi   r0, r0, #0
+        mov     pc, lr
+10:     teq     ip, r0                          @ same sign ?
+        rsbmi   r0, r0, #0
+        mov     pc, lr
+11:     movlo   r0, #0
+        moveq   r0, ip, asr #31
+        orreq   r0, r0, #1
+        mov     pc, lr
+12:     ARM_DIV2_ORDER r1, r2
+        cmp     ip, #0
+        mov     r0, r3, lsr r2
+        rsbmi   r0, r0, #0
+        mov     pc, lr
+ENTRY(__modsi3)
+        cmp     r1, #0
+        beq     Ldiv0
+        rsbmi   r1, r1, #0                      @ loops below use unsigned.
+        movs    ip, r0                          @ preserve sign of dividend
+        rsbmi   r0, r0, #0                      @ if negative make positive
+        subs    r2, r1, #1                      @ compare divisor with 1
+        cmpne   r0, r1                          @ compare dividend with divisor
+        moveq   r0, #0
+        tsthi   r1, r2                          @ see if divisor is power of 2
+        andeq   r0, r0, r2
+        bls     10f
+        ARM_MOD_BODY r0, r1, r2, r3
+10:     cmp     ip, #0
+        rsbmi   r0, r0, #0
+        mov     pc, lr
+Ldiv0:
+        str     lr, [sp, #-4]!
+        bl      __div0
+        mov     r0, #0                  @ About as wrong as it could be.
+        ldr     pc, [sp], #4
diff --git a/arch/arm/lib/longlong.h b/arch/arm/lib/longlong.h
new file mode 100644
index 000000000000..179eea4edc35
--- /dev/null
+++ b/arch/arm/lib/longlong.h
@@ -0,0 +1,183 @@
+/* longlong.h -- based on code from gcc-2.95.3
+   definitions for mixed size 32/64 bit arithmetic.
+   Copyright (C) 1991, 92, 94, 95, 96, 1997, 1998 Free Software Foundation, Inc.
+   This definition file is free software; you can redistribute it
+   and/or modify it under the terms of the GNU General Public
+   License as published by the Free Software Foundation; either
+   version 2, or (at your option) any later version.
+   This definition file is distributed in the hope that it will be
+   useful, but WITHOUT ANY WARRANTY; without even the implied
+   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+   See the GNU General Public License for more details.
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+/* Borrowed from GCC 2.95.3, I Molton 29/07/01 */
+#ifndef SI_TYPE_SIZE
+#define SI_TYPE_SIZE 32
+#endif
+#define __BITS4 (SI_TYPE_SIZE / 4)
+#define __ll_B (1L << (SI_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((USItype) (t) % __ll_B)
+#define __ll_highpart(t) ((USItype) (t) / __ll_B)
+/* Define auxiliary asm macros.
+   1) umul_ppmm(high_prod, low_prod, multipler, multiplicand)
+   multiplies two USItype integers MULTIPLER and MULTIPLICAND,
+   and generates a two-part USItype product in HIGH_PROD and
+   LOW_PROD.
+   2) __umulsidi3(a,b) multiplies two USItype integers A and B,
+   and returns a UDItype product.  This is just a variant of umul_ppmm.
+   3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator) divides a two-word unsigned integer, composed by the
+   integers HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and
+   places the quotient in QUOTIENT and the remainder in REMAINDER.
+   HIGH_NUMERATOR must be less than DENOMINATOR for correct operation.
+   If, in addition, the most significant bit of DENOMINATOR must be 1,
+   then the pre-processor symbol UDIV_NEEDS_NORMALIZATION is defined to 1.
+   4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator).  Like udiv_qrnnd but the numbers are signed.  The
+   quotient is rounded towards 0.
+   5) count_leading_zeros(count, x) counts the number of zero-bits from
+   the msb to the first non-zero bit.  This is the number of steps X
+   needs to be shifted left to set the msb.  Undefined for X == 0.
+   6) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+   high_addend_2, low_addend_2) adds two two-word unsigned integers,
+   composed by HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and
+   LOW_ADDEND_2 respectively.  The result is placed in HIGH_SUM and
+   LOW_SUM.  Overflow (i.e. carry out) is not stored anywhere, and is
+   lost.
+   7) sub_ddmmss(high_difference, low_difference, high_minuend,
+   low_minuend, high_subtrahend, low_subtrahend) subtracts two
+   two-word unsigned integers, composed by HIGH_MINUEND_1 and
+   LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and LOW_SUBTRAHEND_2
+   respectively.  The result is placed in HIGH_DIFFERENCE and
+   LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
+   and is lost.
+   If any of these macros are left undefined for a particular CPU,
+   C macros are used.  */
+#if defined (__arm__)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("adds        %1, %4, %5                                      \n\
+        adc     %0, %2, %3"                                             \
+           : "=r" ((USItype) (sh)),                                     \
+             "=&r" ((USItype) (sl))                                     \
+           : "%r" ((USItype) (ah)),                                     \
+             "rI" ((USItype) (bh)),                                     \
+             "%r" ((USItype) (al)),                                     \
+             "rI" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subs        %1, %4, %5                                      \n\
+        sbc     %0, %2, %3"                                             \
+           : "=r" ((USItype) (sh)),                                     \
+             "=&r" ((USItype) (sl))                                     \
+           : "r" ((USItype) (ah)),                                      \
+             "rI" ((USItype) (bh)),                                     \
+             "r" ((USItype) (al)),                                      \
+             "rI" ((USItype) (bl)))
+#define umul_ppmm(xh, xl, a, b) \
+{register USItype __t0, __t1, __t2;                                     \
+  __asm__ ("%@ Inlined umul_ppmm                                        \n\
+        mov     %2, %5, lsr #16                                         \n\
+        mov     %0, %6, lsr #16                                         \n\
+        bic     %3, %5, %2, lsl #16                                     \n\
+        bic     %4, %6, %0, lsl #16                                     \n\
+        mul     %1, %3, %4                                              \n\
+        mul     %4, %2, %4                                              \n\
+        mul     %3, %0, %3                                              \n\
+        mul     %0, %2, %0                                              \n\
+        adds    %3, %4, %3                                              \n\
+        addcs   %0, %0, #65536                                          \n\
+        adds    %1, %1, %3, lsl #16                                     \n\
+        adc     %0, %0, %3, lsr #16"                                    \
+           : "=&r" ((USItype) (xh)),                                    \
+             "=r" ((USItype) (xl)),                                     \
+             "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
+           : "r" ((USItype) (a)),                                       \
+             "r" ((USItype) (b)));}
+#define UMUL_TIME 20
+#define UDIV_TIME 100
+#endif /* __arm__ */
+#define __umulsidi3(u, v) \
+  ({DIunion __w;                                                        \
+    umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
+    __w.ll; })
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+  do {                                                                  \
+    USItype __d1, __d0, __q1, __q0;                                     \
+    USItype __r1, __r0, __m;                                            \
+    __d1 = __ll_highpart (d);                                           \
+    __d0 = __ll_lowpart (d);                                            \
+                                                                        \
+    __r1 = (n1) % __d1;                                                 \
+    __q1 = (n1) / __d1;                                                 \
+    __m = (USItype) __q1 * __d0;                                        \
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
+    if (__r1 < __m)                                                     \
+      {                                                                 \
+        __q1--, __r1 += (d);                                            \
+        if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+          if (__r1 < __m)                                               \
+            __q1--, __r1 += (d);                                        \
+      }                                                                 \
+    __r1 -= __m;                                                        \
+                                                                        \
+    __r0 = __r1 % __d1;                                                 \
+    __q0 = __r1 / __d1;                                                 \
+    __m = (USItype) __q0 * __d0;                                        \
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
+    if (__r0 < __m)                                                     \
+      {                                                                 \
+        __q0--, __r0 += (d);                                            \
+        if (__r0 >= (d))                                                \
+          if (__r0 < __m)                                               \
+            __q0--, __r0 += (d);                                        \
+      }                                                                 \
+    __r0 -= __m;                                                        \
+                                                                        \
+    (q) = (USItype) __q1 * __ll_B | __q0;                               \
+    (r) = __r0;                                                         \
+  } while (0)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#define count_leading_zeros(count, x) \
+  do {                                                                  \
+    USItype __xr = (x);                                                 \
+    USItype __a;                                                        \
+                                                                        \
+    if (SI_TYPE_SIZE <= 32)                                             \
+      {                                                                 \
+        __a = __xr < ((USItype)1<<2*__BITS4)                            \
+          ? (__xr < ((USItype)1<<__BITS4) ? 0 : __BITS4)                \
+          : (__xr < ((USItype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);  \
+      }                                                                 \
+    else                                                                \
+      {                                                                 \
+        for (__a = SI_TYPE_SIZE - 8; __a > 0; __a -= 8)                 \
+          if (((__xr >> __a) & 0xff) != 0)                              \
+            break;                                                      \
+      }                                                                 \
+                                                                        \
+    (count) = SI_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);            \
+  } while (0)
diff --git a/arch/arm/lib/lshrdi3.c b/arch/arm/lib/lshrdi3.c
new file mode 100644
index 000000000000..b666f1bad451
--- /dev/null
+++ b/arch/arm/lib/lshrdi3.c
@@ -0,0 +1,61 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+This file is part of GNU CC.
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#include "gcclib.h"
+DItype
+__lshrdi3 (DItype u, word_type b)
+{
+  DIunion w;
+  word_type bm;
+  DIunion uu;
+  if (b == 0)
+    return u;
+  uu.ll = u;
+  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
+  if (bm <= 0)
+    {
+      w.s.high = 0;
+      w.s.low = (USItype)uu.s.high >> -bm;
+    }
+  else
+    {
+      USItype carries = (USItype)uu.s.high << bm;
+      w.s.high = (USItype)uu.s.high >> b;
+      w.s.low = ((USItype)uu.s.low >> b) | carries;
+    }
+  return w.ll;
+}
diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S
new file mode 100644
index 000000000000..ac34fe55d21a
--- /dev/null
+++ b/arch/arm/lib/memchr.S
@@ -0,0 +1,25 @@
+/*
+ *  linux/arch/arm/lib/memchr.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+        .text
+        .align  5
+ENTRY(memchr)
+1:      subs    r2, r2, #1
+        bmi     2f
+        ldrb    r3, [r0], #1
+        teq     r3, r1
+        bne     1b
+        sub     r0, r0, #1
+2:      movne   r0, #0
+        RETINSTR(mov,pc,lr)
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
new file mode 100644
index 000000000000..f5a593ceb8cc
--- /dev/null
+++ b/arch/arm/lib/memcpy.S
@@ -0,0 +1,393 @@
+/*
+ *  linux/arch/arm/lib/memcpy.S
+ *
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+#define ENTER   \
+                mov     ip,sp   ;\
+                stmfd   sp!,{r0,r4-r9,fp,ip,lr,pc}      ;\
+                sub     fp,ip,#4
+#define EXIT    \
+                LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc})
+#define EXITEQ  \
+                LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc})
+/*
+ * Prototype: void memcpy(void *to,const void *from,unsigned long n);
+ */
+ENTRY(memcpy)
+ENTRY(memmove)
+                ENTER
+                cmp     r1, r0
+                bcc     23f
+                subs    r2, r2, #4
+                blt     6f
+        PLD(    pld     [r1, #0]                )
+                ands    ip, r0, #3
+                bne     7f
+                ands    ip, r1, #3
+                bne     8f
+1:              subs    r2, r2, #8
+                blt     5f
+                subs    r2, r2, #20
+                blt     4f
+        PLD(    pld     [r1, #28]               )
+        PLD(    subs    r2, r2, #64             )
+        PLD(    blt     3f                      )
+2:      PLD(    pld     [r1, #60]               )
+        PLD(    pld     [r1, #92]               )
+                ldmia   r1!, {r3 - r9, ip}
+                subs    r2, r2, #32
+                stmgeia r0!, {r3 - r9, ip}
+                ldmgeia r1!, {r3 - r9, ip}
+                subges  r2, r2, #32
+                stmia   r0!, {r3 - r9, ip}
+                bge     2b
+3:      PLD(    ldmia   r1!, {r3 - r9, ip}      )
+        PLD(    adds    r2, r2, #32             )
+        PLD(    stmgeia r0!, {r3 - r9, ip}      )
+        PLD(    ldmgeia r1!, {r3 - r9, ip}      )
+        PLD(    subges  r2, r2, #32             )
+        PLD(    stmia   r0!, {r3 - r9, ip}      )
+4:              cmn     r2, #16
+                ldmgeia r1!, {r3 - r6}
+                subge   r2, r2, #16
+                stmgeia r0!, {r3 - r6}
+                adds    r2, r2, #20
+                ldmgeia r1!, {r3 - r5}
+                subge   r2, r2, #12
+                stmgeia r0!, {r3 - r5}
+5:              adds    r2, r2, #8
+                blt     6f
+                subs    r2, r2, #4
+                ldrlt   r3, [r1], #4
+                ldmgeia r1!, {r4, r5}
+                subge   r2, r2, #4
+                strlt   r3, [r0], #4
+                stmgeia r0!, {r4, r5}
+6:              adds    r2, r2, #4
+                EXITEQ
+                cmp     r2, #2
+                ldrb    r3, [r1], #1
+                ldrgeb  r4, [r1], #1
+                ldrgtb  r5, [r1], #1
+                strb    r3, [r0], #1
+                strgeb  r4, [r0], #1
+                strgtb  r5, [r0], #1
+                EXIT
+7:              rsb     ip, ip, #4
+                cmp     ip, #2
+                ldrb    r3, [r1], #1
+                ldrgeb  r4, [r1], #1
+                ldrgtb  r5, [r1], #1
+                strb    r3, [r0], #1
+                strgeb  r4, [r0], #1
+                strgtb  r5, [r0], #1
+                subs    r2, r2, ip
+                blt     6b
+                ands    ip, r1, #3
+                beq     1b
+8:              bic     r1, r1, #3
+                ldr     r7, [r1], #4
+                cmp     ip, #2
+                bgt     18f
+                beq     13f
+                cmp     r2, #12
+                blt     11f
+        PLD(    pld     [r1, #12]               )
+                sub     r2, r2, #12
+        PLD(    subs    r2, r2, #32             )
+        PLD(    blt     10f                     )
+        PLD(    pld     [r1, #28]               )
+9:      PLD(    pld     [r1, #44]               )
+10:             mov     r3, r7, pull #8
+                ldmia   r1!, {r4 - r7}
+                subs    r2, r2, #16
+                orr     r3, r3, r4, push #24
+                mov     r4, r4, pull #8
+                orr     r4, r4, r5, push #24
+                mov     r5, r5, pull #8
+                orr     r5, r5, r6, push #24
+                mov     r6, r6, pull #8
+                orr     r6, r6, r7, push #24
+                stmia   r0!, {r3 - r6}
+                bge     9b
+        PLD(    cmn     r2, #32                 )
+        PLD(    bge     10b                     )
+        PLD(    add     r2, r2, #32             )
+                adds    r2, r2, #12
+                blt     12f
+11:             mov     r3, r7, pull #8
+                ldr     r7, [r1], #4
+                subs    r2, r2, #4
+                orr     r3, r3, r7, push #24
+                str     r3, [r0], #4
+                bge     11b
+12:             sub     r1, r1, #3
+                b       6b
+13:             cmp     r2, #12
+                blt     16f
+        PLD(    pld     [r1, #12]               )
+                sub     r2, r2, #12
+        PLD(    subs    r2, r2, #32             )
+        PLD(    blt     15f                     )
+        PLD(    pld     [r1, #28]               )
+14:     PLD(    pld     [r1, #44]               )
+15:             mov     r3, r7, pull #16
+                ldmia   r1!, {r4 - r7}
+                subs    r2, r2, #16
+                orr     r3, r3, r4, push #16
+                mov     r4, r4, pull #16
+                orr     r4, r4, r5, push #16
+                mov     r5, r5, pull #16
+                orr     r5, r5, r6, push #16
+                mov     r6, r6, pull #16
+                orr     r6, r6, r7, push #16
+                stmia   r0!, {r3 - r6}
+                bge     14b
+        PLD(    cmn     r2, #32                 )
+        PLD(    bge     15b                     )
+        PLD(    add     r2, r2, #32             )
+                adds    r2, r2, #12
+                blt     17f
+16:             mov     r3, r7, pull #16
+                ldr     r7, [r1], #4
+                subs    r2, r2, #4
+                orr     r3, r3, r7, push #16
+                str     r3, [r0], #4
+                bge     16b
+17:             sub     r1, r1, #2
+                b       6b
+18:             cmp     r2, #12
+                blt     21f
+        PLD(    pld     [r1, #12]               )
+                sub     r2, r2, #12
+        PLD(    subs    r2, r2, #32             )
+        PLD(    blt     20f                     )
+        PLD(    pld     [r1, #28]               )
+19:     PLD(    pld     [r1, #44]               )
+20:             mov     r3, r7, pull #24
+                ldmia   r1!, {r4 - r7}
+                subs    r2, r2, #16
+                orr     r3, r3, r4, push #8
+                mov     r4, r4, pull #24
+                orr     r4, r4, r5, push #8
+                mov     r5, r5, pull #24
+                orr     r5, r5, r6, push #8
+                mov     r6, r6, pull #24
+                orr     r6, r6, r7, push #8
+                stmia   r0!, {r3 - r6}
+                bge     19b
+        PLD(    cmn     r2, #32                 )
+        PLD(    bge     20b                     )
+        PLD(    add     r2, r2, #32             )
+                adds    r2, r2, #12
+                blt     22f
+21:             mov     r3, r7, pull #24
+                ldr     r7, [r1], #4
+                subs    r2, r2, #4
+                orr     r3, r3, r7, push #8
+                str     r3, [r0], #4
+                bge     21b
+22:             sub     r1, r1, #1
+                b       6b
+23:             add     r1, r1, r2
+                add     r0, r0, r2
+                subs    r2, r2, #4
+                blt     29f
+        PLD(    pld     [r1, #-4]               )
+                ands    ip, r0, #3
+                bne     30f
+                ands    ip, r1, #3
+                bne     31f
+24:             subs    r2, r2, #8
+                blt     28f
+                subs    r2, r2, #20
+                blt     27f
+        PLD(    pld     [r1, #-32]              )
+        PLD(    subs    r2, r2, #64             )
+        PLD(    blt     26f                     )
+25:     PLD(    pld     [r1, #-64]              )
+        PLD(    pld     [r1, #-96]              )
+                ldmdb   r1!, {r3 - r9, ip}
+                subs    r2, r2, #32
+                stmgedb r0!, {r3 - r9, ip}
+                ldmgedb r1!, {r3 - r9, ip}
+                subges  r2, r2, #32
+                stmdb   r0!, {r3 - r9, ip}
+                bge     25b
+26:     PLD(    ldmdb   r1!, {r3 - r9, ip}      )
+        PLD(    adds    r2, r2, #32             )
+        PLD(    stmgedb r0!, {r3 - r9, ip}      )
+        PLD(    ldmgedb r1!, {r3 - r9, ip}      )
+        PLD(    subges  r2, r2, #32             )
+        PLD(    stmdb   r0!, {r3 - r9, ip}      )
+27:             cmn     r2, #16
+                ldmgedb r1!, {r3 - r6}
+                subge   r2, r2, #16
+                stmgedb r0!, {r3 - r6}
+                adds    r2, r2, #20
+                ldmgedb r1!, {r3 - r5}
+                subge   r2, r2, #12
+                stmgedb r0!, {r3 - r5}
+28:             adds    r2, r2, #8
+                blt     29f
+                subs    r2, r2, #4
+                ldrlt   r3, [r1, #-4]!
+                ldmgedb r1!, {r4, r5}
+                subge   r2, r2, #4
+                strlt   r3, [r0, #-4]!
+                stmgedb r0!, {r4, r5}
+29:             adds    r2, r2, #4
+                EXITEQ
+                cmp     r2, #2
+                ldrb    r3, [r1, #-1]!
+                ldrgeb  r4, [r1, #-1]!
+                ldrgtb  r5, [r1, #-1]!
+                strb    r3, [r0, #-1]!
+                strgeb  r4, [r0, #-1]!
+                strgtb  r5, [r0, #-1]!
+                EXIT
+30:             cmp     ip, #2
+                ldrb    r3, [r1, #-1]!
+                ldrgeb  r4, [r1, #-1]!
+                ldrgtb  r5, [r1, #-1]!
+                strb    r3, [r0, #-1]!
+                strgeb  r4, [r0, #-1]!
+                strgtb  r5, [r0, #-1]!
+                subs    r2, r2, ip
+                blt     29b
+                ands    ip, r1, #3
+                beq     24b
+31:             bic     r1, r1, #3
+                ldr     r3, [r1], #0
+                cmp     ip, #2
+                blt     41f
+                beq     36f
+                cmp     r2, #12
+                blt     34f
+        PLD(    pld     [r1, #-16]              )
+                sub     r2, r2, #12
+        PLD(    subs    r2, r2, #32             )
+        PLD(    blt     33f                     )
+        PLD(    pld     [r1, #-32]              )
+32:     PLD(    pld     [r1, #-48]              )
+33:             mov     r7, r3, push #8
+                ldmdb   r1!, {r3, r4, r5, r6}
+                subs    r2, r2, #16
+                orr     r7, r7, r6, pull #24
+                mov     r6, r6, push #8
+                orr     r6, r6, r5, pull #24
+                mov     r5, r5, push #8
+                orr     r5, r5, r4, pull #24
+                mov     r4, r4, push #8
+                orr     r4, r4, r3, pull #24
+                stmdb   r0!, {r4, r5, r6, r7}
+                bge     32b
+        PLD(    cmn     r2, #32                 )
+        PLD(    bge     33b                     )
+        PLD(    add     r2, r2, #32             )
+                adds    r2, r2, #12
+                blt     35f
+34:             mov     ip, r3, push #8
+                ldr     r3, [r1, #-4]!
+                subs    r2, r2, #4
+                orr     ip, ip, r3, pull #24
+                str     ip, [r0, #-4]!
+                bge     34b
+35:             add     r1, r1, #3
+                b       29b
+36:             cmp     r2, #12
+                blt     39f
+        PLD(    pld     [r1, #-16]              )
+                sub     r2, r2, #12
+        PLD(    subs    r2, r2, #32             )
+        PLD(    blt     38f                     )
+        PLD(    pld     [r1, #-32]              )
+37:     PLD(    pld     [r1, #-48]              )
+38:             mov     r7, r3, push #16
+                ldmdb   r1!, {r3, r4, r5, r6}
+                subs    r2, r2, #16
+                orr     r7, r7, r6, pull #16
+                mov     r6, r6, push #16
+                orr     r6, r6, r5, pull #16
+                mov     r5, r5, push #16
+                orr     r5, r5, r4, pull #16
+                mov     r4, r4, push #16
+                orr     r4, r4, r3, pull #16
+                stmdb   r0!, {r4, r5, r6, r7}
+                bge     37b
+        PLD(    cmn     r2, #32                 )
+        PLD(    bge     38b                     )
+        PLD(    add     r2, r2, #32             )
+                adds    r2, r2, #12
+                blt     40f
+39:             mov     ip, r3, push #16
+                ldr     r3, [r1, #-4]!
+                subs    r2, r2, #4
+                orr     ip, ip, r3, pull #16
+                str     ip, [r0, #-4]!
+                bge     39b
+40:             add     r1, r1, #2
+                b       29b
+41:             cmp     r2, #12
+                blt     44f
+        PLD(    pld     [r1, #-16]              )
+                sub     r2, r2, #12
+        PLD(    subs    r2, r2, #32             )
+        PLD(    blt     43f                     )
+        PLD(    pld     [r1, #-32]              )
+42:     PLD(    pld     [r1, #-48]              )
+43:             mov     r7, r3, push #24
+                ldmdb   r1!, {r3, r4, r5, r6}
+                subs    r2, r2, #16
+                orr     r7, r7, r6, pull #8
+                mov     r6, r6, push #24
+                orr     r6, r6, r5, pull #8
+                mov     r5, r5, push #24
+                orr     r5, r5, r4, pull #8
+                mov     r4, r4, push #24
+                orr     r4, r4, r3, pull #8
+                stmdb   r0!, {r4, r5, r6, r7}
+                bge     42b
+        PLD(    cmn     r2, #32                 )
+        PLD(    bge     43b                     )
+        PLD(    add     r2, r2, #32             )
+                adds    r2, r2, #12
+                blt     45f
+44:             mov     ip, r3, push #24
+                ldr     r3, [r1, #-4]!
+                subs    r2, r2, #4
+                orr     ip, ip, r3, pull #8
+                str     ip, [r0, #-4]!
+                bge     44b
+45:             add     r1, r1, #1
+                b       29b
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
new file mode 100644
index 000000000000..a1795f599937
--- /dev/null
+++ b/arch/arm/lib/memset.S
@@ -0,0 +1,80 @@
+/*
+ *  linux/arch/arm/lib/memset.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+        .text
+        .align  5
+        .word   0
+1:      subs    r2, r2, #4              @ 1 do we have enough
+        blt     5f                      @ 1 bytes to align with?
+        cmp     r3, #2                  @ 1
+        strltb  r1, [r0], #1            @ 1
+        strleb  r1, [r0], #1            @ 1
+        strb    r1, [r0], #1            @ 1
+        add     r2, r2, r3              @ 1 (r2 = r2 - (4 - r3))
+/*
+ * The pointer is now aligned and the length is adjusted.  Try doing the
+ * memzero again.
+ */
+ENTRY(memset)
+        ands    r3, r0, #3              @ 1 unaligned?
+        bne     1b                      @ 1
+/*
+ * we know that the pointer in r0 is aligned to a word boundary.
+ */
+        orr     r1, r1, r1, lsl #8
+        orr     r1, r1, r1, lsl #16
+        mov     r3, r1
+        cmp     r2, #16
+        blt     4f
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+        str     lr, [sp, #-4]!
+        mov     ip, r1
+        mov     lr, r1
+2:      subs    r2, r2, #64
+        stmgeia r0!, {r1, r3, ip, lr}   @ 64 bytes at a time.
+        stmgeia r0!, {r1, r3, ip, lr}
+        stmgeia r0!, {r1, r3, ip, lr}
+        stmgeia r0!, {r1, r3, ip, lr}
+        bgt     2b
+        LOADREGS(eqfd, sp!, {pc})       @ Now <64 bytes to go.
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+        tst     r2, #32
+        stmneia r0!, {r1, r3, ip, lr}
+        stmneia r0!, {r1, r3, ip, lr}
+        tst     r2, #16
+        stmneia r0!, {r1, r3, ip, lr}
+        ldr     lr, [sp], #4
+4:      tst     r2, #8
+        stmneia r0!, {r1, r3}
+        tst     r2, #4
+        strne   r1, [r0], #4
+/*
+ * When we get here, we've got less than 4 bytes to zero.  We
+ * may have an unaligned pointer as well.
+ */
+5:      tst     r2, #2
+        strneb  r1, [r0], #1
+        strneb  r1, [r0], #1
+        tst     r2, #1
+        strneb  r1, [r0], #1
+        RETINSTR(mov,pc,lr)
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
new file mode 100644
index 000000000000..51ccc60160fd
--- /dev/null
+++ b/arch/arm/lib/memzero.S
@@ -0,0 +1,80 @@
+/*
+ *  linux/arch/arm/lib/memzero.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+        .text
+        .align  5
+        .word   0
+/*
+ * Align the pointer in r0.  r3 contains the number of bytes that we are
+ * mis-aligned by, and r1 is the number of bytes.  If r1 < 4, then we
+ * don't bother; we use byte stores instead.
+ */
+1:      subs    r1, r1, #4              @ 1 do we have enough
+        blt     5f                      @ 1 bytes to align with?
+        cmp     r3, #2                  @ 1
+        strltb  r2, [r0], #1            @ 1
+        strleb  r2, [r0], #1            @ 1
+        strb    r2, [r0], #1            @ 1
+        add     r1, r1, r3              @ 1 (r1 = r1 - (4 - r3))
+/*
+ * The pointer is now aligned and the length is adjusted.  Try doing the
+ * memzero again.
+ */
+ENTRY(__memzero)
+        mov     r2, #0                  @ 1
+        ands    r3, r0, #3              @ 1 unaligned?
+        bne     1b                      @ 1
+/*
+ * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary.
+ */
+        cmp     r1, #16                 @ 1 we can skip this chunk if we
+        blt     4f                      @ 1 have < 16 bytes
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+        str     lr, [sp, #-4]!          @ 1
+        mov     ip, r2                  @ 1
+        mov     lr, r2                  @ 1
+3:      subs    r1, r1, #64             @ 1 write 32 bytes out per loop
+        stmgeia r0!, {r2, r3, ip, lr}   @ 4
+        stmgeia r0!, {r2, r3, ip, lr}   @ 4
+        stmgeia r0!, {r2, r3, ip, lr}   @ 4
+        stmgeia r0!, {r2, r3, ip, lr}   @ 4
+        bgt     3b                      @ 1
+        LOADREGS(eqfd, sp!, {pc})       @ 1/2 quick exit
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+        tst     r1, #32                 @ 1
+        stmneia r0!, {r2, r3, ip, lr}   @ 4
+        stmneia r0!, {r2, r3, ip, lr}   @ 4
+        tst     r1, #16                 @ 1 16 bytes or more?
+        stmneia r0!, {r2, r3, ip, lr}   @ 4
+        ldr     lr, [sp], #4            @ 1
+4:      tst     r1, #8                  @ 1 8 bytes or more?
+        stmneia r0!, {r2, r3}           @ 2
+        tst     r1, #4                  @ 1 4 bytes or more?
+        strne   r2, [r0], #4            @ 1
+/*
+ * When we get here, we've got less than 4 bytes to zero.  We
+ * may have an unaligned pointer as well.
+ */
+5:      tst     r1, #2                  @ 1 2 bytes or more?
+        strneb  r2, [r0], #1            @ 1
+        strneb  r2, [r0], #1            @ 1
+        tst     r1, #1                  @ 1 a byte left over
+        strneb  r2, [r0], #1            @ 1
+        RETINSTR(mov,pc,lr)             @ 1
diff --git a/arch/arm/lib/muldi3.c b/arch/arm/lib/muldi3.c
new file mode 100644
index 000000000000..44d611b1cfdb
--- /dev/null
+++ b/arch/arm/lib/muldi3.c
@@ -0,0 +1,77 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+This file is part of GNU CC.
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#include "gcclib.h"
+#define umul_ppmm(xh, xl, a, b) \
+{register USItype __t0, __t1, __t2;                                     \
+  __asm__ ("%@ Inlined umul_ppmm                                        \n\
+        mov     %2, %5, lsr #16                                         \n\
+        mov     %0, %6, lsr #16                                         \n\
+        bic     %3, %5, %2, lsl #16                                     \n\
+        bic     %4, %6, %0, lsl #16                                     \n\
+        mul     %1, %3, %4                                              \n\
+        mul     %4, %2, %4                                              \n\
+        mul     %3, %0, %3                                              \n\
+        mul     %0, %2, %0                                              \n\
+        adds    %3, %4, %3                                              \n\
+        addcs   %0, %0, #65536                                          \n\
+        adds    %1, %1, %3, lsl #16                                     \n\
+        adc     %0, %0, %3, lsr #16"                                    \
+           : "=&r" ((USItype) (xh)),                                    \
+             "=r" ((USItype) (xl)),                                     \
+             "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
+           : "r" ((USItype) (a)),                                       \
+             "r" ((USItype) (b)));}
+#define __umulsidi3(u, v) \
+  ({DIunion __w;                                                        \
+    umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
+    __w.ll; })
+DItype
+__muldi3 (DItype u, DItype v)
+{
+  DIunion w;
+  DIunion uu, vv;
+  uu.ll = u,
+  vv.ll = v;
+  w.ll = __umulsidi3 (uu.s.low, vv.s.low);
+  w.s.high += ((USItype) uu.s.low * (USItype) vv.s.high
+               + (USItype) uu.s.high * (USItype) vv.s.low);
+  return w.ll;
+}
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
new file mode 100644
index 000000000000..b09398d95aac
--- /dev/null
+++ b/arch/arm/lib/putuser.S
@@ -0,0 +1,76 @@
+/*
+ *  linux/arch/arm/lib/putuser.S
+ *
+ *  Copyright (C) 2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Idea from x86 version, (C) Copyright 1998 Linus Torvalds
+ *
+ * These functions have a non-standard call interface to make
+ * them more efficient, especially as they return an error
+ * value in addition to the "real" return value.
+ *
+ * __put_user_X
+ *
+ * Inputs:      r0 contains the address
+ *              r2, r3 contains the value
+ * Outputs:     r0 is the error code
+ *              lr corrupted
+ *
+ * No other registers must be altered.  (see include/asm-arm/uaccess.h
+ * for specific ASM register usage).
+ *
+ * Note that ADDR_LIMIT is either 0 or 0xc0000000
+ * Note also that it is intended that __put_user_bad is not global.
+ */
+#include <asm/constants.h>
+#include <asm/thread_info.h>
+#include <asm/errno.h>
+        .global __put_user_1
+__put_user_1:
+1:      strbt   r2, [r0]
+        mov     r0, #0
+        mov     pc, lr
+        .global __put_user_2
+__put_user_2:
+        mov     ip, r2, lsr #8
+#ifndef __ARMEB__
+2:      strbt   r2, [r0], #1
+3:      strbt   ip, [r0]
+#else
+2:      strbt   ip, [r0], #1
+3:      strbt   r2, [r0]
+#endif
+        mov     r0, #0
+        mov     pc, lr
+        .global __put_user_4
+__put_user_4:
+4:      strt    r2, [r0]
+        mov     r0, #0
+        mov     pc, lr
+        .global __put_user_8
+__put_user_8:
+5:      strt    r2, [r0], #4
+6:      strt    r3, [r0]
+        mov     r0, #0
+        mov     pc, lr
+__put_user_bad:
+        mov     r0, #-EFAULT
+        mov     pc, lr
+.section __ex_table, "a"
+        .long   1b, __put_user_bad
+        .long   2b, __put_user_bad
+        .long   3b, __put_user_bad
+        .long   4b, __put_user_bad
+        .long   5b, __put_user_bad
+        .long   6b, __put_user_bad
+.previous
diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S
new file mode 100644
index 000000000000..8f337df5d99b
--- /dev/null
+++ b/arch/arm/lib/setbit.S
@@ -0,0 +1,29 @@
+/*
+ *  linux/arch/arm/lib/setbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+/*
+ * Purpose  : Function to set a bit
+ * Prototype: int set_bit(int bit, void *addr)
+ */
+ENTRY(_set_bit_be)
+                eor     r0, r0, #0x18           @ big endian byte ordering
+ENTRY(_set_bit_le)
+                and     r2, r0, #7
+                mov     r3, #1
+                mov     r3, r3, lsl r2
+                save_and_disable_irqs ip, r2
+                ldrb    r2, [r1, r0, lsr #3]
+                orr     r2, r2, r3
+                strb    r2, [r1, r0, lsr #3]
+                restore_irqs ip
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S
new file mode 100644
index 000000000000..5b9b493733fc
--- /dev/null
+++ b/arch/arm/lib/strchr.S
@@ -0,0 +1,26 @@
+/*
+ *  linux/arch/arm/lib/strchr.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+                .align  5
+ENTRY(strchr)
+                and     r1, r1, #0xff
+1:              ldrb    r2, [r0], #1
+                teq     r2, r1
+                teqne   r2, #0
+                bne     1b
+                teq     r2, r1
+                movne   r0, #0
+                subeq   r0, r0, #1
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S
new file mode 100644
index 000000000000..629cc8775276
--- /dev/null
+++ b/arch/arm/lib/strncpy_from_user.S
@@ -0,0 +1,43 @@
+/*
+ *  linux/arch/arm/lib/strncpy_from_user.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+        .text
+        .align  5
+/*
+ * Copy a string from user space to kernel space.
+ *  r0 = dst, r1 = src, r2 = byte length
+ * returns the number of characters copied (strlen of copied string),
+ *  -EFAULT on exception, or "len" if we fill the whole buffer
+ */
+ENTRY(__arch_strncpy_from_user)
+        save_lr
+        mov     ip, r1
+1:      subs    r2, r2, #1
+USER(   ldrplbt r3, [r1], #1)
+        bmi     2f
+        strb    r3, [r0], #1
+        teq     r3, #0
+        bne     1b
+        sub     r1, r1, #1      @ take NUL character out of count
+2:      sub     r0, r1, ip
+        restore_pc
+        .section .fixup,"ax"
+        .align  0
+9001:   mov     r3, #0
+        strb    r3, [r0, #0]    @ null terminate
+        mov     r0, #-EFAULT
+        restore_pc
+        .previous
diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S
new file mode 100644
index 000000000000..67bcd8268128
--- /dev/null
+++ b/arch/arm/lib/strnlen_user.S
@@ -0,0 +1,40 @@
+/*
+ *  linux/arch/arm/lib/strnlen_user.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+        .text
+        .align  5
+/* Prototype: unsigned long __arch_strnlen_user(const char *str, long n)
+ * Purpose  : get length of a string in user memory
+ * Params   : str - address of string in user memory
+ * Returns  : length of string *including terminator*
+ *            or zero on exception, or n + 1 if too long
+ */
+ENTRY(__arch_strnlen_user)
+        save_lr
+        mov     r2, r0
+1:
+USER(   ldrbt   r3, [r0], #1)
+        teq     r3, #0
+        beq     2f
+        subs    r1, r1, #1
+        bne     1b
+        add     r0, r0, #1
+2:      sub     r0, r0, r2
+        restore_pc
+        .section .fixup,"ax"
+        .align  0
+9001:   mov     r0, #0
+        restore_pc
+        .previous
diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S
new file mode 100644
index 000000000000..fa923f026f15
--- /dev/null
+++ b/arch/arm/lib/strrchr.S
@@ -0,0 +1,25 @@
+/*
+ *  linux/arch/arm/lib/strrchr.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+                .align  5
+ENTRY(strrchr)
+                mov     r3, #0
+1:              ldrb    r2, [r0], #1
+                teq     r2, r1
+                subeq   r3, r0, #1
+                teq     r2, #0
+                bne     1b
+                mov     r0, r3
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S
new file mode 100644
index 000000000000..4aba4676b984
--- /dev/null
+++ b/arch/arm/lib/testchangebit.S
@@ -0,0 +1,29 @@
+/*
+ *  linux/arch/arm/lib/testchangebit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+ENTRY(_test_and_change_bit_be)
+                eor     r0, r0, #0x18           @ big endian byte ordering
+ENTRY(_test_and_change_bit_le)
+                add     r1, r1, r0, lsr #3
+                and     r3, r0, #7
+                mov     r0, #1
+                save_and_disable_irqs ip, r2
+                ldrb    r2, [r1]
+                tst     r2, r0, lsl r3
+                eor     r2, r2, r0, lsl r3
+                strb    r2, [r1]
+                restore_irqs ip
+                moveq   r0, #0
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S
new file mode 100644
index 000000000000..e07c5bd24307
--- /dev/null
+++ b/arch/arm/lib/testclearbit.S
@@ -0,0 +1,29 @@
+/*
+ *  linux/arch/arm/lib/testclearbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+ENTRY(_test_and_clear_bit_be)
+                eor     r0, r0, #0x18           @ big endian byte ordering
+ENTRY(_test_and_clear_bit_le)
+                add     r1, r1, r0, lsr #3      @ Get byte offset
+                and     r3, r0, #7              @ Get bit offset
+                mov     r0, #1
+                save_and_disable_irqs ip, r2
+                ldrb    r2, [r1]
+                tst     r2, r0, lsl r3
+                bic     r2, r2, r0, lsl r3
+                strb    r2, [r1]
+                restore_irqs ip
+                moveq   r0, #0
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S
new file mode 100644
index 000000000000..a570fc74cddd
--- /dev/null
+++ b/arch/arm/lib/testsetbit.S
@@ -0,0 +1,29 @@
+/*
+ *  linux/arch/arm/lib/testsetbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+ENTRY(_test_and_set_bit_be)
+                eor     r0, r0, #0x18           @ big endian byte ordering
+ENTRY(_test_and_set_bit_le)
+                add     r1, r1, r0, lsr #3      @ Get byte offset
+                and     r3, r0, #7              @ Get bit offset
+                mov     r0, #1
+                save_and_disable_irqs ip, r2
+                ldrb    r2, [r1]
+                tst     r2, r0, lsl r3
+                orr     r2, r2, r0, lsl r3
+                strb    r2, [r1]
+                restore_irqs ip
+                moveq   r0, #0
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S
new file mode 100644
index 000000000000..d3ed0636c008
--- /dev/null
+++ b/arch/arm/lib/uaccess.S
@@ -0,0 +1,697 @@
+/*
+ *  linux/arch/arm/lib/uaccess.S
+ *
+ *  Copyright (C) 1995, 1996,1997,1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Routines to block copy data to/from user memory
+ *   These are highly optimised both for the 4k page size
+ *   and for various alignments.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+                .text
+#define PAGE_SHIFT 12
+/* Prototype: int __arch_copy_to_user(void *to, const char *from, size_t n)
+ * Purpose  : copy a block to user memory from kernel memory
+ * Params   : to   - user memory
+ *          : from - kernel memory
+ *          : n    - number of bytes to copy
+ * Returns  : Number of bytes NOT copied.
+ */
+.c2u_dest_not_aligned:
+                rsb     ip, ip, #4
+                cmp     ip, #2
+                ldrb    r3, [r1], #1
+USER(           strbt   r3, [r0], #1)                   @ May fault
+                ldrgeb  r3, [r1], #1
+USER(           strgebt r3, [r0], #1)                   @ May fault
+                ldrgtb  r3, [r1], #1
+USER(           strgtbt r3, [r0], #1)                   @ May fault
+                sub     r2, r2, ip
+                b       .c2u_dest_aligned
+ENTRY(__arch_copy_to_user)
+                stmfd   sp!, {r2, r4 - r7, lr}
+                cmp     r2, #4
+                blt     .c2u_not_enough
+        PLD(    pld     [r1, #0]                )
+        PLD(    pld     [r0, #0]                )
+                ands    ip, r0, #3
+                bne     .c2u_dest_not_aligned
+.c2u_dest_aligned:
+                ands    ip, r1, #3
+                bne     .c2u_src_not_aligned
+/*
+ * Seeing as there has to be at least 8 bytes to copy, we can
+ * copy one word, and force a user-mode page fault...
+ */
+.c2u_0fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .c2u_0nowords
+                ldr     r3, [r1], #4
+USER(           strt    r3, [r0], #4)                   @ May fault
+                mov     ip, r0, lsl #32 - PAGE_SHIFT    @ On each page, use a ld/st??t instruction
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .c2u_0fupi
+/*
+ * ip = max no. of bytes to copy before needing another "strt" insn
+ */
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #32
+                blt     .c2u_0rem8lp
+        PLD(    pld     [r1, #28]               )
+        PLD(    pld     [r0, #28]               )
+        PLD(    subs    ip, ip, #64                     )
+        PLD(    blt     .c2u_0cpynopld          )
+        PLD(    pld     [r1, #60]               )
+        PLD(    pld     [r0, #60]               )
+.c2u_0cpy8lp:
+        PLD(    pld     [r1, #92]               )
+        PLD(    pld     [r0, #92]               )
+.c2u_0cpynopld: ldmia   r1!, {r3 - r6}
+                stmia   r0!, {r3 - r6}                  @ Shouldnt fault
+                ldmia   r1!, {r3 - r6}
+                subs    ip, ip, #32
+                stmia   r0!, {r3 - r6}                  @ Shouldnt fault
+                bpl     .c2u_0cpy8lp
+        PLD(    cmn     ip, #64                 )
+        PLD(    bge     .c2u_0cpynopld          )
+        PLD(    add     ip, ip, #64             )
+.c2u_0rem8lp:   cmn     ip, #16
+                ldmgeia r1!, {r3 - r6}
+                stmgeia r0!, {r3 - r6}                  @ Shouldnt fault
+                tst     ip, #8
+                ldmneia r1!, {r3 - r4}
+                stmneia r0!, {r3 - r4}                  @ Shouldnt fault
+                tst     ip, #4
+                ldrne   r3, [r1], #4
+                strnet  r3, [r0], #4                    @ Shouldnt fault
+                ands    ip, ip, #3
+                beq     .c2u_0fupi
+.c2u_0nowords:  teq     ip, #0
+                beq     .c2u_finished
+.c2u_nowords:   cmp     ip, #2
+                ldrb    r3, [r1], #1
+USER(           strbt   r3, [r0], #1)                   @ May fault
+                ldrgeb  r3, [r1], #1
+USER(           strgebt r3, [r0], #1)                   @ May fault
+                ldrgtb  r3, [r1], #1
+USER(           strgtbt r3, [r0], #1)                   @ May fault
+                b       .c2u_finished
+.c2u_not_enough:
+                movs    ip, r2
+                bne     .c2u_nowords
+.c2u_finished:  mov     r0, #0
+                LOADREGS(fd,sp!,{r2, r4 - r7, pc})
+.c2u_src_not_aligned:
+                bic     r1, r1, #3
+                ldr     r7, [r1], #4
+                cmp     ip, #2
+                bgt     .c2u_3fupi
+                beq     .c2u_2fupi
+.c2u_1fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .c2u_1nowords
+                mov     r3, r7, pull #8
+                ldr     r7, [r1], #4
+                orr     r3, r3, r7, push #24
+USER(           strt    r3, [r0], #4)                   @ May fault
+                mov     ip, r0, lsl #32 - PAGE_SHIFT
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .c2u_1fupi
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #16
+                blt     .c2u_1rem8lp
+        PLD(    pld     [r1, #12]               )
+        PLD(    pld     [r0, #12]               )
+        PLD(    subs    ip, ip, #32             )
+        PLD(    blt     .c2u_1cpynopld          )
+        PLD(    pld     [r1, #28]               )
+        PLD(    pld     [r0, #28]               )
+.c2u_1cpy8lp:
+        PLD(    pld     [r1, #44]               )
+        PLD(    pld     [r0, #44]               )
+.c2u_1cpynopld: mov     r3, r7, pull #8
+                ldmia   r1!, {r4 - r7}
+                subs    ip, ip, #16
+                orr     r3, r3, r4, push #24
+                mov     r4, r4, pull #8
+                orr     r4, r4, r5, push #24
+                mov     r5, r5, pull #8
+                orr     r5, r5, r6, push #24
+                mov     r6, r6, pull #8
+                orr     r6, r6, r7, push #24
+                stmia   r0!, {r3 - r6}                  @ Shouldnt fault
+                bpl     .c2u_1cpy8lp
+        PLD(    cmn     ip, #32                 )
+        PLD(    bge     .c2u_1cpynopld          )
+        PLD(    add     ip, ip, #32             )
+.c2u_1rem8lp:   tst     ip, #8
+                movne   r3, r7, pull #8
+                ldmneia r1!, {r4, r7}
+                orrne   r3, r3, r4, push #24
+                movne   r4, r4, pull #8
+                orrne   r4, r4, r7, push #24
+                stmneia r0!, {r3 - r4}                  @ Shouldnt fault
+                tst     ip, #4
+                movne   r3, r7, pull #8
+                ldrne   r7, [r1], #4
+                orrne   r3, r3, r7, push #24
+                strnet  r3, [r0], #4                    @ Shouldnt fault
+                ands    ip, ip, #3
+                beq     .c2u_1fupi
+.c2u_1nowords:  mov     r3, r7, get_byte_1
+                teq     ip, #0
+                beq     .c2u_finished
+                cmp     ip, #2
+USER(           strbt   r3, [r0], #1)                   @ May fault
+                movge   r3, r7, get_byte_2
+USER(           strgebt r3, [r0], #1)                   @ May fault
+                movgt   r3, r7, get_byte_3
+USER(           strgtbt r3, [r0], #1)                   @ May fault
+                b       .c2u_finished
+.c2u_2fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .c2u_2nowords
+                mov     r3, r7, pull #16
+                ldr     r7, [r1], #4
+                orr     r3, r3, r7, push #16
+USER(           strt    r3, [r0], #4)                   @ May fault
+                mov     ip, r0, lsl #32 - PAGE_SHIFT
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .c2u_2fupi
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #16
+                blt     .c2u_2rem8lp
+        PLD(    pld     [r1, #12]               )
+        PLD(    pld     [r0, #12]               )
+        PLD(    subs    ip, ip, #32             )
+        PLD(    blt     .c2u_2cpynopld          )
+        PLD(    pld     [r1, #28]               )
+        PLD(    pld     [r0, #28]               )
+.c2u_2cpy8lp:
+        PLD(    pld     [r1, #44]               )
+        PLD(    pld     [r0, #44]               )
+.c2u_2cpynopld: mov     r3, r7, pull #16
+                ldmia   r1!, {r4 - r7}
+                subs    ip, ip, #16
+                orr     r3, r3, r4, push #16
+                mov     r4, r4, pull #16
+                orr     r4, r4, r5, push #16
+                mov     r5, r5, pull #16
+                orr     r5, r5, r6, push #16
+                mov     r6, r6, pull #16
+                orr     r6, r6, r7, push #16
+                stmia   r0!, {r3 - r6}                  @ Shouldnt fault
+                bpl     .c2u_2cpy8lp
+        PLD(    cmn     ip, #32                 )
+        PLD(    bge     .c2u_2cpynopld          )
+        PLD(    add     ip, ip, #32             )
+.c2u_2rem8lp:   tst     ip, #8
+                movne   r3, r7, pull #16
+                ldmneia r1!, {r4, r7}
+                orrne   r3, r3, r4, push #16
+                movne   r4, r4, pull #16
+                orrne   r4, r4, r7, push #16
+                stmneia r0!, {r3 - r4}                  @ Shouldnt fault
+                tst     ip, #4
+                movne   r3, r7, pull #16
+                ldrne   r7, [r1], #4
+                orrne   r3, r3, r7, push #16
+                strnet  r3, [r0], #4                    @ Shouldnt fault
+                ands    ip, ip, #3
+                beq     .c2u_2fupi
+.c2u_2nowords:  mov     r3, r7, get_byte_2
+                teq     ip, #0
+                beq     .c2u_finished
+                cmp     ip, #2
+USER(           strbt   r3, [r0], #1)                   @ May fault
+                movge   r3, r7, get_byte_3
+USER(           strgebt r3, [r0], #1)                   @ May fault
+                ldrgtb  r3, [r1], #0
+USER(           strgtbt r3, [r0], #1)                   @ May fault
+                b       .c2u_finished
+.c2u_3fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .c2u_3nowords
+                mov     r3, r7, pull #24
+                ldr     r7, [r1], #4
+                orr     r3, r3, r7, push #8
+USER(           strt    r3, [r0], #4)                   @ May fault
+                mov     ip, r0, lsl #32 - PAGE_SHIFT
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .c2u_3fupi
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #16
+                blt     .c2u_3rem8lp
+        PLD(    pld     [r1, #12]               )
+        PLD(    pld     [r0, #12]               )
+        PLD(    subs    ip, ip, #32             )
+        PLD(    blt     .c2u_3cpynopld          )
+        PLD(    pld     [r1, #28]               )
+        PLD(    pld     [r0, #28]               )
+.c2u_3cpy8lp:
+        PLD(    pld     [r1, #44]               )
+        PLD(    pld     [r0, #44]               )
+.c2u_3cpynopld: mov     r3, r7, pull #24
+                ldmia   r1!, {r4 - r7}
+                subs    ip, ip, #16
+                orr     r3, r3, r4, push #8
+                mov     r4, r4, pull #24
+                orr     r4, r4, r5, push #8
+                mov     r5, r5, pull #24
+                orr     r5, r5, r6, push #8
+                mov     r6, r6, pull #24
+                orr     r6, r6, r7, push #8
+                stmia   r0!, {r3 - r6}                  @ Shouldnt fault
+                bpl     .c2u_3cpy8lp
+        PLD(    cmn     ip, #32                 )
+        PLD(    bge     .c2u_3cpynopld          )
+        PLD(    add     ip, ip, #32             )
+.c2u_3rem8lp:   tst     ip, #8
+                movne   r3, r7, pull #24
+                ldmneia r1!, {r4, r7}
+                orrne   r3, r3, r4, push #8
+                movne   r4, r4, pull #24
+                orrne   r4, r4, r7, push #8
+                stmneia r0!, {r3 - r4}                  @ Shouldnt fault
+                tst     ip, #4
+                movne   r3, r7, pull #24
+                ldrne   r7, [r1], #4
+                orrne   r3, r3, r7, push #8
+                strnet  r3, [r0], #4                    @ Shouldnt fault
+                ands    ip, ip, #3
+                beq     .c2u_3fupi
+.c2u_3nowords:  mov     r3, r7, get_byte_3
+                teq     ip, #0
+                beq     .c2u_finished
+                cmp     ip, #2
+USER(           strbt   r3, [r0], #1)                   @ May fault
+                ldrgeb  r3, [r1], #1
+USER(           strgebt r3, [r0], #1)                   @ May fault
+                ldrgtb  r3, [r1], #0
+USER(           strgtbt r3, [r0], #1)                   @ May fault
+                b       .c2u_finished
+                .section .fixup,"ax"
+                .align  0
+9001:           LOADREGS(fd,sp!, {r0, r4 - r7, pc})
+                .previous
+/* Prototype: unsigned long __arch_copy_from_user(void *to,const void *from,unsigned long n);
+ * Purpose  : copy a block from user memory to kernel memory
+ * Params   : to   - kernel memory
+ *          : from - user memory
+ *          : n    - number of bytes to copy
+ * Returns  : Number of bytes NOT copied.
+ */
+.cfu_dest_not_aligned:
+                rsb     ip, ip, #4
+                cmp     ip, #2
+USER(           ldrbt   r3, [r1], #1)                   @ May fault
+                strb    r3, [r0], #1
+USER(           ldrgebt r3, [r1], #1)                   @ May fault
+                strgeb  r3, [r0], #1
+USER(           ldrgtbt r3, [r1], #1)                   @ May fault
+                strgtb  r3, [r0], #1
+                sub     r2, r2, ip
+                b       .cfu_dest_aligned
+ENTRY(__arch_copy_from_user)
+                stmfd   sp!, {r0, r2, r4 - r7, lr}
+                cmp     r2, #4
+                blt     .cfu_not_enough
+        PLD(    pld     [r1, #0]                )
+        PLD(    pld     [r0, #0]                )
+                ands    ip, r0, #3
+                bne     .cfu_dest_not_aligned
+.cfu_dest_aligned:
+                ands    ip, r1, #3
+                bne     .cfu_src_not_aligned
+/*
+ * Seeing as there has to be at least 8 bytes to copy, we can
+ * copy one word, and force a user-mode page fault...
+ */
+.cfu_0fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .cfu_0nowords
+USER(           ldrt    r3, [r1], #4)
+                str     r3, [r0], #4
+                mov     ip, r1, lsl #32 - PAGE_SHIFT    @ On each page, use a ld/st??t instruction
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .cfu_0fupi
+/*
+ * ip = max no. of bytes to copy before needing another "strt" insn
+ */
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #32
+                blt     .cfu_0rem8lp
+        PLD(    pld     [r1, #28]               )
+        PLD(    pld     [r0, #28]               )
+        PLD(    subs    ip, ip, #64                     )
+        PLD(    blt     .cfu_0cpynopld          )
+        PLD(    pld     [r1, #60]               )
+        PLD(    pld     [r0, #60]               )
+.cfu_0cpy8lp:
+        PLD(    pld     [r1, #92]               )
+        PLD(    pld     [r0, #92]               )
+.cfu_0cpynopld: ldmia   r1!, {r3 - r6}                  @ Shouldnt fault
+                stmia   r0!, {r3 - r6}
+                ldmia   r1!, {r3 - r6}                  @ Shouldnt fault
+                subs    ip, ip, #32
+                stmia   r0!, {r3 - r6}
+                bpl     .cfu_0cpy8lp
+        PLD(    cmn     ip, #64                 )
+        PLD(    bge     .cfu_0cpynopld          )
+        PLD(    add     ip, ip, #64             )
+.cfu_0rem8lp:   cmn     ip, #16
+                ldmgeia r1!, {r3 - r6}                  @ Shouldnt fault
+                stmgeia r0!, {r3 - r6}
+                tst     ip, #8
+                ldmneia r1!, {r3 - r4}                  @ Shouldnt fault
+                stmneia r0!, {r3 - r4}
+                tst     ip, #4
+                ldrnet  r3, [r1], #4                    @ Shouldnt fault
+                strne   r3, [r0], #4
+                ands    ip, ip, #3
+                beq     .cfu_0fupi
+.cfu_0nowords:  teq     ip, #0
+                beq     .cfu_finished
+.cfu_nowords:   cmp     ip, #2
+USER(           ldrbt   r3, [r1], #1)                   @ May fault
+                strb    r3, [r0], #1
+USER(           ldrgebt r3, [r1], #1)                   @ May fault
+                strgeb  r3, [r0], #1
+USER(           ldrgtbt r3, [r1], #1)                   @ May fault
+                strgtb  r3, [r0], #1
+                b       .cfu_finished
+.cfu_not_enough:
+                movs    ip, r2
+                bne     .cfu_nowords
+.cfu_finished:  mov     r0, #0
+                add     sp, sp, #8
+                LOADREGS(fd,sp!,{r4 - r7, pc})
+.cfu_src_not_aligned:
+                bic     r1, r1, #3
+USER(           ldrt    r7, [r1], #4)                   @ May fault
+                cmp     ip, #2
+                bgt     .cfu_3fupi
+                beq     .cfu_2fupi
+.cfu_1fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .cfu_1nowords
+                mov     r3, r7, pull #8
+USER(           ldrt    r7, [r1], #4)                   @ May fault
+                orr     r3, r3, r7, push #24
+                str     r3, [r0], #4
+                mov     ip, r1, lsl #32 - PAGE_SHIFT
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .cfu_1fupi
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #16
+                blt     .cfu_1rem8lp
+        PLD(    pld     [r1, #12]               )
+        PLD(    pld     [r0, #12]               )
+        PLD(    subs    ip, ip, #32             )
+        PLD(    blt     .cfu_1cpynopld          )
+        PLD(    pld     [r1, #28]               )
+        PLD(    pld     [r0, #28]               )
+.cfu_1cpy8lp:
+        PLD(    pld     [r1, #44]               )
+        PLD(    pld     [r0, #44]               )
+.cfu_1cpynopld: mov     r3, r7, pull #8
+                ldmia   r1!, {r4 - r7}                  @ Shouldnt fault
+                subs    ip, ip, #16
+                orr     r3, r3, r4, push #24
+                mov     r4, r4, pull #8
+                orr     r4, r4, r5, push #24
+                mov     r5, r5, pull #8
+                orr     r5, r5, r6, push #24
+                mov     r6, r6, pull #8
+                orr     r6, r6, r7, push #24
+                stmia   r0!, {r3 - r6}
+                bpl     .cfu_1cpy8lp
+        PLD(    cmn     ip, #32                 )
+        PLD(    bge     .cfu_1cpynopld          )
+        PLD(    add     ip, ip, #32             )
+.cfu_1rem8lp:   tst     ip, #8
+                movne   r3, r7, pull #8
+                ldmneia r1!, {r4, r7}                   @ Shouldnt fault
+                orrne   r3, r3, r4, push #24
+                movne   r4, r4, pull #8
+                orrne   r4, r4, r7, push #24
+                stmneia r0!, {r3 - r4}
+                tst     ip, #4
+                movne   r3, r7, pull #8
+USER(           ldrnet  r7, [r1], #4)                   @ May fault
+                orrne   r3, r3, r7, push #24
+                strne   r3, [r0], #4
+                ands    ip, ip, #3
+                beq     .cfu_1fupi
+.cfu_1nowords:  mov     r3, r7, get_byte_1
+                teq     ip, #0
+                beq     .cfu_finished
+                cmp     ip, #2
+                strb    r3, [r0], #1
+                movge   r3, r7, get_byte_2
+                strgeb  r3, [r0], #1
+                movgt   r3, r7, get_byte_3
+                strgtb  r3, [r0], #1
+                b       .cfu_finished
+.cfu_2fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .cfu_2nowords
+                mov     r3, r7, pull #16
+USER(           ldrt    r7, [r1], #4)                   @ May fault
+                orr     r3, r3, r7, push #16
+                str     r3, [r0], #4
+                mov     ip, r1, lsl #32 - PAGE_SHIFT
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .cfu_2fupi
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #16
+                blt     .cfu_2rem8lp
+        PLD(    pld     [r1, #12]               )
+        PLD(    pld     [r0, #12]               )
+        PLD(    subs    ip, ip, #32             )
+        PLD(    blt     .cfu_2cpynopld          )
+        PLD(    pld     [r1, #28]               )
+        PLD(    pld     [r0, #28]               )
+.cfu_2cpy8lp:
+        PLD(    pld     [r1, #44]               )
+        PLD(    pld     [r0, #44]               )
+.cfu_2cpynopld: mov     r3, r7, pull #16
+                ldmia   r1!, {r4 - r7}                  @ Shouldnt fault
+                subs    ip, ip, #16
+                orr     r3, r3, r4, push #16
+                mov     r4, r4, pull #16
+                orr     r4, r4, r5, push #16
+                mov     r5, r5, pull #16
+                orr     r5, r5, r6, push #16
+                mov     r6, r6, pull #16
+                orr     r6, r6, r7, push #16
+                stmia   r0!, {r3 - r6}
+                bpl     .cfu_2cpy8lp
+        PLD(    cmn     ip, #32                 )
+        PLD(    bge     .cfu_2cpynopld          )
+        PLD(    add     ip, ip, #32             )
+.cfu_2rem8lp:   tst     ip, #8
+                movne   r3, r7, pull #16
+                ldmneia r1!, {r4, r7}                   @ Shouldnt fault
+                orrne   r3, r3, r4, push #16
+                movne   r4, r4, pull #16
+                orrne   r4, r4, r7, push #16
+                stmneia r0!, {r3 - r4}
+                tst     ip, #4
+                movne   r3, r7, pull #16
+USER(           ldrnet  r7, [r1], #4)                   @ May fault
+                orrne   r3, r3, r7, push #16
+                strne   r3, [r0], #4
+                ands    ip, ip, #3
+                beq     .cfu_2fupi
+.cfu_2nowords:  mov     r3, r7, get_byte_2
+                teq     ip, #0
+                beq     .cfu_finished
+                cmp     ip, #2
+                strb    r3, [r0], #1
+                movge   r3, r7, get_byte_3
+                strgeb  r3, [r0], #1
+USER(           ldrgtbt r3, [r1], #0)                   @ May fault
+                strgtb  r3, [r0], #1
+                b       .cfu_finished
+.cfu_3fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .cfu_3nowords
+                mov     r3, r7, pull #24
+USER(           ldrt    r7, [r1], #4)                   @ May fault
+                orr     r3, r3, r7, push #8
+                str     r3, [r0], #4
+                mov     ip, r1, lsl #32 - PAGE_SHIFT
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .cfu_3fupi
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #16
+                blt     .cfu_3rem8lp
+        PLD(    pld     [r1, #12]               )
+        PLD(    pld     [r0, #12]               )
+        PLD(    subs    ip, ip, #32             )
+        PLD(    blt     .cfu_3cpynopld          )
+        PLD(    pld     [r1, #28]               )
+        PLD(    pld     [r0, #28]               )
+.cfu_3cpy8lp:
+        PLD(    pld     [r1, #44]               )
+        PLD(    pld     [r0, #44]               )
+.cfu_3cpynopld: mov     r3, r7, pull #24
+                ldmia   r1!, {r4 - r7}                  @ Shouldnt fault
+                orr     r3, r3, r4, push #8
+                mov     r4, r4, pull #24
+                orr     r4, r4, r5, push #8
+                mov     r5, r5, pull #24
+                orr     r5, r5, r6, push #8
+                mov     r6, r6, pull #24
+                orr     r6, r6, r7, push #8
+                stmia   r0!, {r3 - r6}
+                subs    ip, ip, #16
+                bpl     .cfu_3cpy8lp
+        PLD(    cmn     ip, #32                 )
+        PLD(    bge     .cfu_3cpynopld          )
+        PLD(    add     ip, ip, #32             )
+.cfu_3rem8lp:   tst     ip, #8
+                movne   r3, r7, pull #24
+                ldmneia r1!, {r4, r7}                   @ Shouldnt fault
+                orrne   r3, r3, r4, push #8
+                movne   r4, r4, pull #24
+                orrne   r4, r4, r7, push #8
+                stmneia r0!, {r3 - r4}
+                tst     ip, #4
+                movne   r3, r7, pull #24
+USER(           ldrnet  r7, [r1], #4)                   @ May fault
+                orrne   r3, r3, r7, push #8
+                strne   r3, [r0], #4
+                ands    ip, ip, #3
+                beq     .cfu_3fupi
+.cfu_3nowords:  mov     r3, r7, get_byte_3
+                teq     ip, #0
+                beq     .cfu_finished
+                cmp     ip, #2
+                strb    r3, [r0], #1
+USER(           ldrgebt r3, [r1], #1)                   @ May fault
+                strgeb  r3, [r0], #1
+USER(           ldrgtbt r3, [r1], #1)                   @ May fault
+                strgtb  r3, [r0], #1
+                b       .cfu_finished
+                .section .fixup,"ax"
+                .align  0
+                /*
+                 * We took an exception.  r0 contains a pointer to
+                 * the byte not copied.
+                 */
+9001:           ldr     r2, [sp], #4                    @ void *to
+                sub     r2, r0, r2                      @ bytes copied
+                ldr     r1, [sp], #4                    @ unsigned long count
+                subs    r4, r1, r2                      @ bytes left to copy
+                movne   r1, r4
+                blne    __memzero
+                mov     r0, r4
+                LOADREGS(fd,sp!, {r4 - r7, pc})
+                .previous
+/* Prototype: int __arch_clear_user(void *addr, size_t sz)
+ * Purpose  : clear some user memory
+ * Params   : addr - user memory address to clear
+ *          : sz   - number of bytes to clear
+ * Returns  : number of bytes NOT cleared
+ */
+ENTRY(__arch_clear_user)
+                stmfd   sp!, {r1, lr}
+                mov     r2, #0
+                cmp     r1, #4
+                blt     2f
+                ands    ip, r0, #3
+                beq     1f
+                cmp     ip, #2
+USER(           strbt   r2, [r0], #1)
+USER(           strlebt r2, [r0], #1)
+USER(           strltbt r2, [r0], #1)
+                rsb     ip, ip, #4
+                sub     r1, r1, ip              @  7  6  5  4  3  2  1
+1:              subs    r1, r1, #8              @ -1 -2 -3 -4 -5 -6 -7
+USER(           strplt  r2, [r0], #4)
+USER(           strplt  r2, [r0], #4)
+                bpl     1b
+                adds    r1, r1, #4              @  3  2  1  0 -1 -2 -3
+USER(           strplt  r2, [r0], #4)
+2:              tst     r1, #2                  @ 1x 1x 0x 0x 1x 1x 0x
+USER(           strnebt r2, [r0], #1)
+USER(           strnebt r2, [r0], #1)
+                tst     r1, #1                  @ x1 x0 x1 x0 x1 x0 x1
+USER(           strnebt r2, [r0], #1)
+                mov     r0, #0
+                LOADREGS(fd,sp!, {r1, pc})
+                .section .fixup,"ax"
+                .align  0
+9001:           LOADREGS(fd,sp!, {r0, pc})
+                .previous
diff --git a/arch/arm/lib/ucmpdi2.c b/arch/arm/lib/ucmpdi2.c
new file mode 100644
index 000000000000..6c6ae63efa02
--- /dev/null
+++ b/arch/arm/lib/ucmpdi2.c
@@ -0,0 +1,51 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+This file is part of GNU CC.
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#include "gcclib.h"
+word_type
+__ucmpdi2 (DItype a, DItype b)
+{
+  DIunion au, bu;
+  au.ll = a, bu.ll = b;
+  if ((USItype) au.s.high < (USItype) bu.s.high)
+    return 0;
+  else if ((USItype) au.s.high > (USItype) bu.s.high)
+    return 2;
+  if ((USItype) au.s.low < (USItype) bu.s.low)
+    return 0;
+  else if ((USItype) au.s.low > (USItype) bu.s.low)
+    return 2;
+  return 1;
+}
diff --git a/arch/arm/lib/udivdi3.c b/arch/arm/lib/udivdi3.c
new file mode 100644
index 000000000000..d25195f673f4
--- /dev/null
+++ b/arch/arm/lib/udivdi3.c
@@ -0,0 +1,242 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+This file is part of GNU CC.
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#include "gcclib.h"
+#include "longlong.h"
+static const UQItype __clz_tab[] =
+{
+  0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+};
+UDItype
+__udivmoddi4 (UDItype n, UDItype d, UDItype *rp)
+{
+  DIunion ww;
+  DIunion nn, dd;
+  DIunion rr;
+  USItype d0, d1, n0, n1, n2;
+  USItype q0, q1;
+  USItype b, bm;
+  nn.ll = n;
+  dd.ll = d;
+  d0 = dd.s.low;
+  d1 = dd.s.high;
+  n0 = nn.s.low;
+  n1 = nn.s.high;
+  if (d1 == 0)
+    {
+      if (d0 > n1)
+        {
+          /* 0q = nn / 0D */
+          count_leading_zeros (bm, d0);
+          if (bm != 0)
+            {
+              /* Normalize, i.e. make the most significant bit of the
+                 denominator set.  */
+              d0 = d0 << bm;
+              n1 = (n1 << bm) | (n0 >> (SI_TYPE_SIZE - bm));
+              n0 = n0 << bm;
+            }
+          udiv_qrnnd (q0, n0, n1, n0, d0);
+          q1 = 0;
+          /* Remainder in n0 >> bm.  */
+        }
+      else
+        {
+          /* qq = NN / 0d */
+          if (d0 == 0)
+            d0 = 1 / d0;        /* Divide intentionally by zero.  */
+          count_leading_zeros (bm, d0);
+          if (bm == 0)
+            {
+              /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
+                 conclude (the most significant bit of n1 is set) /\ (the
+                 leading quotient digit q1 = 1).
+                 This special case is necessary, not an optimization.
+                 (Shifts counts of SI_TYPE_SIZE are undefined.)  */
+              n1 -= d0;
+              q1 = 1;
+            }
+          else
+            {
+              /* Normalize.  */
+              b = SI_TYPE_SIZE - bm;
+              d0 = d0 << bm;
+              n2 = n1 >> b;
+              n1 = (n1 << bm) | (n0 >> b);
+              n0 = n0 << bm;
+              udiv_qrnnd (q1, n1, n2, n1, d0);
+            }
+          /* n1 != d0...  */
+          udiv_qrnnd (q0, n0, n1, n0, d0);
+          /* Remainder in n0 >> bm.  */
+        }
+      if (rp != 0)
+        {
+          rr.s.low = n0 >> bm;
+          rr.s.high = 0;
+          *rp = rr.ll;
+        }
+    }
+  else
+    {
+      if (d1 > n1)
+        {
+          /* 00 = nn / DD */
+          q0 = 0;
+          q1 = 0;
+          /* Remainder in n1n0.  */
+          if (rp != 0)
+            {
+              rr.s.low = n0;
+              rr.s.high = n1;
+              *rp = rr.ll;
+            }
+        }
+      else
+        {
+          /* 0q = NN / dd */
+          count_leading_zeros (bm, d1);
+          if (bm == 0)
+            {
+              /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
+                 conclude (the most significant bit of n1 is set) /\ (the
+                 quotient digit q0 = 0 or 1).
+                 This special case is necessary, not an optimization.  */
+              /* The condition on the next line takes advantage of that
+                 n1 >= d1 (true due to program flow).  */
+              if (n1 > d1 || n0 >= d0)
+                {
+                  q0 = 1;
+                  sub_ddmmss (n1, n0, n1, n0, d1, d0);
+                }
+              else
+                q0 = 0;
+              q1 = 0;
+              if (rp != 0)
+                {
+                  rr.s.low = n0;
+                  rr.s.high = n1;
+                  *rp = rr.ll;
+                }
+            }
+          else
+            {
+              USItype m1, m0;
+              /* Normalize.  */
+              b = SI_TYPE_SIZE - bm;
+              d1 = (d1 << bm) | (d0 >> b);
+              d0 = d0 << bm;
+              n2 = n1 >> b;
+              n1 = (n1 << bm) | (n0 >> b);
+              n0 = n0 << bm;
+              udiv_qrnnd (q0, n1, n2, n1, d1);
+              umul_ppmm (m1, m0, q0, d0);
+              if (m1 > n1 || (m1 == n1 && m0 > n0))
+                {
+                  q0--;
+                  sub_ddmmss (m1, m0, m1, m0, d1, d0);
+                }
+              q1 = 0;
+              /* Remainder in (n1n0 - m1m0) >> bm.  */
+              if (rp != 0)
+                {
+                  sub_ddmmss (n1, n0, n1, n0, m1, m0);
+                  rr.s.low = (n1 << b) | (n0 >> bm);
+                  rr.s.high = n1 >> bm;
+                  *rp = rr.ll;
+                }
+            }
+        }
+    }
+  ww.s.low = q0;
+  ww.s.high = q1;
+  return ww.ll;
+}
+UDItype
+__udivdi3 (UDItype n, UDItype d)
+{
+  return __udivmoddi4 (n, d, (UDItype *) 0);
+}
+UDItype
+__umoddi3 (UDItype u, UDItype v)
+{
+  UDItype w;
+  (void) __udivmoddi4 (u ,v, &w);
+  return w;
+}
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/arm/lib