Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/arm26
119 files changed, 25018 insertions, 0 deletions
diff --git a/arch/arm26/ACKNOWLEDGEMENTS b/arch/arm26/ACKNOWLEDGEMENTS
new file mode 100644
index 000000000000..0a17a45110e7
--- /dev/null
+++ b/arch/arm26/ACKNOWLEDGEMENTS
@@ -0,0 +1,29 @@
+The work in this architecture (ARM26) is that of a great many people.
+This is what has happened:
+I [Ian Molton] have been trying to repair the ARM26 architecture support, but it has become an impossible task whilst it is still merged with the ARM32 (arch/arm) code. The ARM26 code is too different to be sensible to keep with the ARM32 code now, and Russell King really doesnt have the time to maintain the ARM26 code. Add to that that most ARM32 developers dont know about or care about ARM26 when writing patches, and you have a reall mess.
+As a result, I've split it off into a new architecture of its own. I've named it arm26 since these CPUs have only a 26 bit address space, unlike the other ARMs.
+The upheaval in moving around so many source files and chopping out vasty ammounts of cruft was enormous, and the copyright of many files is sometimes unclear. Because of this, I am writing this, in order that no-one is left out / misaccredited / blamed for any of the code.
+People I KNOW have made major contributions to the code:
+David Alan Gilbert (former maintainer of ARM26 bits)
+Philip Blundell
+Russell King
+Keith Owens
+also thanks to Nicholas Pitre for hints, and for the basis or our XIP support.
+Currently maintaing the code are
+Ian Molton (Maintainer / Archimedes)
+John Appleby (kernel / A5K)
+If anyone has a problem with attributions in header files / source files, please do contact me to straighten things out.
+Ian Molton (aka spyro)  -  ARM26 maintainer
+spyro@f2s.com
diff --git a/arch/arm26/Kconfig b/arch/arm26/Kconfig
new file mode 100644
index 000000000000..3955de5af4c0
--- /dev/null
+++ b/arch/arm26/Kconfig
@@ -0,0 +1,227 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/kconfig-language.txt.
+#
+mainmenu "Linux Kernel Configuration"
+config ARM
+        bool
+        default y
+config ARM26
+        bool
+        default y
+config MMU
+        bool
+        default y
+config ARCH_ACORN
+        bool
+        default y
+config CPU_26
+        bool
+        default y
+config FIQ
+        bool
+        default y
+# 9 = 512 pages 8 = 256 pages 7 = 128 pages
+config FORCE_MAX_ZONEORDER
+        int
+        default 9
+config UID16
+        bool
+        default y
+config RWSEM_GENERIC_SPINLOCK
+        bool
+        default y
+config RWSEM_XCHGADD_ALGORITHM
+        bool
+config GENERIC_CALIBRATE_DELAY
+        bool
+        default y
+config GENERIC_BUST_SPINLOCK
+        bool
+config GENERIC_ISA_DMA
+        bool
+source "init/Kconfig"
+menu "System Type"
+comment "Archimedes/A5000 Implementations (select only ONE)"
+config ARCH_ARC
+        bool "Archimedes"
+        help
+          Say Y to support the Acorn Archimedes.
+          The Acorn Archimedes was an personal computer based on an 8MHz ARM2
+          processor, released in 1987.  It supported up to 16MB of RAM in
+          later models and floppy, harddisc, ethernet etc.
+config ARCH_A5K
+        bool "A5000"
+        help
+          Say Y here to to support the Acorn A5000.
+          Linux can support the
+          internal IDE disk and CD-ROM interface, serial and parallel port,
+          and the floppy drive.  Note that on some A5000s the floppy is
+          plugged into the wrong socket on the motherboard.
+config PAGESIZE_16
+        bool "2MB physical memory (broken)"
+        help
+          Say Y here if your Archimedes or A5000 system has only 2MB of
+          memory, otherwise say N.  The resulting kernel will not run on a
+          machine with 4MB of memory.
+endmenu
+menu "General setup"
+# Compressed boot loader in ROM.  Yes, we really want to ask about
+# TEXT and BSS so we preserve their values in the config files.
+config ZBOOT_ROM
+        bool "Compressed boot loader in ROM/flash"
+        help
+          Say Y here if you intend to execute your compressed kernel image (zImage)
+          directly from ROM or flash.  If unsure, say N.
+config ZBOOT_ROM_TEXT
+        depends on ZBOOT_ROM
+        hex "Compressed ROM boot loader base address"
+        default "0"
+        help
+          The base address for zImage.  Unless you have special requirements, you
+          should not change this value.
+config ZBOOT_ROM_BSS
+        depends on ZBOOT_ROM
+        hex "Compressed ROM boot loader BSS address"
+        default "0"
+        help
+          The base address of 64KiB of read/write memory, which must be available
+          while the decompressor is running.  Unless you have special requirements,
+          you should not change this value.
+config XIP_KERNEL
+        bool "Execute In Place (XIP) kernel image"
+        help
+          Select this option to create a kernel that can be programed into
+          the OS ROMs.
+comment "At least one math emulation must be selected"
+config FPE_NWFPE
+        tristate "NWFPE math emulation"
+        ---help---
+          Say Y to include the NWFPE floating point emulator in the kernel.
+          This is necessary to run most binaries. Linux does not currently
+          support floating point hardware so you need to say Y here even if
+          your machine has an FPA or floating point co-processor podule.
+          It is also possible to say M to build the emulator as a module
+          (nwfpe) or indeed to leave it out altogether. However, unless you
+          know what you are doing this can easily render your machine
+          unbootable. Saying Y is the safe option.
+          You may say N here if you are going to load the Acorn FPEmulator
+          early in the bootup.
+source "fs/Kconfig.binfmt"
+config PREEMPT
+        bool "Preemptible Kernel (EXPERIMENTAL)"
+        depends on CPU_32 && EXPERIMENTAL
+        help
+          This option reduces the latency of the kernel when reacting to
+          real-time or interactive events by allowing a low priority process to
+          be preempted even if it is in kernel mode executing a system call.
+          This allows applications to run more reliably even when the system is
+          under load.
+          Say Y here if you are building a kernel for a desktop, embedded
+          or real-time system.  Say N if you are unsure.
+config ARTHUR
+        tristate "RISC OS personality"
+        depends on CPU_32
+        help
+          Say Y here to include the kernel code necessary if you want to run
+          Acorn RISC OS/Arthur binaries under Linux. This code is still very
+          experimental; if this sounds frightening, say N and sleep in peace.
+          You can also say M here to compile this support as a module (which
+          will be called arthur).
+config CMDLINE
+        string "Default kernel command string"
+        default ""
+        help
+          On some architectures (EBSA110 and CATS), there is currently no way
+          for the boot loader to pass arguments to the kernel. For these
+          architectures, you should supply some command-line options at build
+          time by entering them here. As a minimum, you should specify the
+          memory size and the root device (e.g., mem=64M root=/dev/nfs).
+endmenu
+source "drivers/base/Kconfig"
+source "drivers/parport/Kconfig"
+source "drivers/pnp/Kconfig"
+source "drivers/block/Kconfig"
+source "drivers/md/Kconfig"
+source "net/Kconfig"
+source "drivers/ide/Kconfig"
+source "drivers/scsi/Kconfig"
+source "drivers/isdn/Kconfig"
+#
+# input before char - char/joystick depends on it. As does USB.
+#
+source "drivers/input/Kconfig"
+source "drivers/char/Kconfig"
+source "drivers/media/Kconfig"
+source "fs/Kconfig"
+source "drivers/video/Kconfig"
+if ARCH_ACORN
+source "sound/Kconfig"
+endif
+source "drivers/misc/Kconfig"
+source "drivers/usb/Kconfig"
+source "arch/arm26/Kconfig.debug"
+source "security/Kconfig"
+source "crypto/Kconfig"
+source "lib/Kconfig"
diff --git a/arch/arm26/Kconfig.debug b/arch/arm26/Kconfig.debug
new file mode 100644
index 000000000000..611fc86503fc
--- /dev/null
+++ b/arch/arm26/Kconfig.debug
@@ -0,0 +1,50 @@
+menu "Kernel hacking"
+source "lib/Kconfig.debug"
+# RMK wants arm kernels compiled with frame pointers so hardwire this to y.
+# If you know what you are doing and are willing to live without stack
+# traces, you can get a slightly smaller kernel by setting this option to
+# n, but then RMK will have to kill you ;).
+config FRAME_POINTER
+        bool
+        default y
+        help
+          If you say N here, the resulting kernel will be slightly smaller and
+          faster. However, when a problem occurs with the kernel, the
+          information that is reported is severely limited. Most people
+          should say Y here.
+config DEBUG_USER
+        bool "Verbose user fault messages"
+        help
+          When a user program crashes due to an exception, the kernel can
+          print a brief message explaining what the problem was. This is
+          sometimes helpful for debugging but serves no purpose on a
+          production system. Most people should say N here.
+config DEBUG_WAITQ
+        bool "Wait queue debugging"
+        depends on DEBUG_KERNEL
+config DEBUG_ERRORS
+        bool "Verbose kernel error messages"
+        depends on DEBUG_KERNEL
+        help
+          This option controls verbose debugging information which can be
+          printed when the kernel detects an internal error. This debugging
+          information is useful to kernel hackers when tracking down problems,
+          but mostly meaningless to other people. It's safe to say Y unless
+          you are concerned with the code size or don't want to see these
+          messages.
+# These options are only for real kernel hackers who want to get their hands dirty.
+config DEBUG_LL
+        bool "Kernel low-level debugging functions"
+        depends on DEBUG_KERNEL
+        help
+          Say Y here to include definitions of printascii, printchar, printhex
+          in the kernel.  This is helpful if you are debugging code that
+          executes before the console is initialized.
+endmenu
diff --git a/arch/arm26/Makefile b/arch/arm26/Makefile
new file mode 100644
index 000000000000..ada8985530a5
--- /dev/null
+++ b/arch/arm26/Makefile
@@ -0,0 +1,118 @@
+#
+# arch/arm26/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995-2001 by Russell King
+# Copyright (c) 2004 Ian Molton
+LDFLAGS_vmlinux :=-p -X
+CPPFLAGS_vmlinux.lds = -DTEXTADDR=$(TEXTADDR) -DDATAADDR=$(DATAADDR)
+OBJCOPYFLAGS    :=-O binary -R .note -R .comment -S
+GZFLAGS         :=-9
+ifeq ($(CONFIG_FRAME_POINTER),y)
+CFLAGS          +=-fno-omit-frame-pointer -mno-sched-prolog
+endif
+ifeq ($(CONFIG_DEBUG_INFO),y)
+CFLAGS          +=-g
+endif
+CFLAGS_BOOT     :=-mapcs-26 -mcpu=arm3 -msoft-float -Uarm
+CFLAGS          +=-mapcs-26 -mcpu=arm3 -msoft-float -Uarm
+AFLAGS          +=-mapcs-26 -mcpu=arm3 -msoft-float
+ifeq ($(CONFIG_XIP_KERNEL),y)
+  TEXTADDR       := 0x03880000
+  DATAADDR       := 0x02080000
+else
+  TEXTADDR       := 0x02080000
+  DATAADDR       := .
+endif
+head-y          := arch/arm26/kernel/head.o arch/arm26/kernel/init_task.o
+ifeq ($(incdir-y),)
+incdir-y :=
+endif
+INCDIR   :=
+  
+export  MACHINE TEXTADDR GZFLAGS CFLAGS_BOOT
+# If we have a machine-specific directory, then include it in the build.
+core-y                          += arch/arm26/kernel/ arch/arm26/mm/ arch/arm26/machine/
+core-$(CONFIG_FPE_NWFPE)        += arch/arm26/nwfpe/
+libs-y                          += arch/arm26/lib/
+# Default target when executing plain make
+all: zImage
+boot := arch/arm26/boot
+prepare: include/asm-$(ARCH)/asm_offsets.h
+CLEAN_FILES += include/asm-$(ARCH)/asm_offsets.h
+.PHONY: maketools FORCE
+maketools: FORCE
+        
+# Convert bzImage to zImage
+bzImage: vmlinux
+        $(Q)$(MAKE) $(build)=$(boot) $(boot)/zImage
+zImage Image bootpImage xipImage: vmlinux
+        $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+zinstall install: vmlinux
+        $(Q)$(MAKE) $(build)=$(boot) $@
+# We use MRPROPER_FILES and CLEAN_FILES now
+archclean:
+        $(Q)$(MAKE) $(clean)=$(boot)
+# My testing targets (that short circuit a few dependencies)
+zImg:;  $(Q)$(MAKE) $(build)=$(boot) $(boot)/zImage
+Img:;   $(Q)$(MAKE) $(build)=$(boot) $(boot)/Image
+bp:;    $(Q)$(MAKE) $(build)=$(boot) $(boot)/bootpImage
+i:;     $(Q)$(MAKE) $(build)=$(boot) install
+zi:;    $(Q)$(MAKE) $(build)=$(boot) zinstall
+#
+# Configuration targets.  Use these to select a
+# configuration for your architecture
+%_config:
+        @( \
+        CFG=$(@:_config=); \
+        if [ -f arch/arm26/def-configs/$$CFG ]; then \
+          [ -f .config ] && mv -f .config .config.old; \
+          cp arch/arm26/def-configs/$$CFG .config; \
+          echo "*** Default configuration for $$CFG installed"; \
+          echo "*** Next, you may run 'make oldconfig'"; \
+        else \
+          echo "$$CFG does not exist"; \
+        fi; \
+        )
+arch/$(ARCH)/kernel/asm-offsets.s: include/asm include/linux/version.h \
+                                   include/config/MARKER
+include/asm-$(ARCH)/asm_offsets.h: arch/$(ARCH)/kernel/asm-offsets.s
+        $(call filechk,gen-asm-offsets)
+define archhelp
+  echo  '* zImage        - Compressed kernel image (arch/$(ARCH)/boot/zImage)'
+  echo  '  Image         - Uncompressed kernel image (arch/$(ARCH)/boot/Image)'
+  echo  '  bootpImage    - Combined zImage and initial RAM disk' 
+  echo  '  xipImage      - eXecute In Place capable image for ROM use (arch/$(ARCH)/boot/xipImage)'
+  echo  '  initrd        - Create an initial image'
+  echo  '  install       - Install uncompressed kernel'
+  echo  '  zinstall      - Install compressed kernel'
+  echo  '                  Install using (your) ~/bin/installkernel or'
+  echo  '                  (distribution) /sbin/installkernel or'
+  echo  '                  install to $$(INSTALL_PATH) and run lilo'
+endef
diff --git a/arch/arm26/boot/Makefile b/arch/arm26/boot/Makefile
new file mode 100644
index 000000000000..b5c2277654d4
--- /dev/null
+++ b/arch/arm26/boot/Makefile
@@ -0,0 +1,80 @@
+#
+# arch/arm26/boot/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995-2002 Russell King
+#
+# Note: the following conditions must always be true:
+#   ZRELADDR == virt_to_phys(TEXTADDR)
+#   PARAMS_PHYS must be with 4MB of ZRELADDR
+#   INITRD_PHYS must be in RAM
+   zreladdr-y           := 0x02080000 
+params_phys-y           := 0x0207c000
+initrd_phys-y           := 0x02180000
+ZRELADDR    := 0x02080000
+ZTEXTADDR   := 0x0207c000
+PARAMS_PHYS := $(params_phys-y)
+INITRD_PHYS := 0x02180000
+# We now have a PIC decompressor implementation.  Decompressors running
+# from RAM should not define ZTEXTADDR.  Decompressors running directly
+# from ROM or Flash must define ZTEXTADDR (preferably via the config)
+# FIXME: Previous assignment to ztextaddr-y is lost here. See SHARK
+ifeq ($(CONFIG_ZBOOT_ROM),y)
+ZTEXTADDR       := $(CONFIG_ZBOOT_ROM_TEXT)
+ZBSSADDR        := $(CONFIG_ZBOOT_ROM_BSS)
+else
+ZTEXTADDR       := 0
+ZBSSADDR        := ALIGN(4)
+endif
+export  ZTEXTADDR ZBSSADDR ZRELADDR INITRD_PHYS PARAMS_PHYS
+targets := Image zImage bootpImage xipImage
+$(obj)/Image: vmlinux FORCE
+        $(call if_changed,objcopy)
+        @echo '  Kernel: $@ is ready'
+$(obj)/zImage:  $(obj)/compressed/vmlinux FORCE
+        $(call if_changed,objcopy)
+        @echo '  Kernel: $@ is ready'
+$(obj)/compressed/vmlinux: vmlinux FORCE
+        $(Q)$(MAKE) $(build)=$(obj)/compressed $@
+ifeq ($(CONFIG_XIP_KERNEL),y)
+$(obj)/xipImage: vmlinux FORCE
+#       $(OBJCOPY) -S -O binary -R .data -R .comment vmlinux vmlinux-text.bin
+# FIXME - where has .pci_fixup crept in from?
+        $(OBJCOPY) -S -O binary -R .data -R .pci_fixup -R .comment vmlinux vmlinux-text.bin
+        $(OBJCOPY) -S -O binary -R .init -R .text -R __ex_table -R .pci_fixup -R __ksymtab -R __ksymtab_gpl -R __kcrctab -R __kcrctab_gpl -R __param -R .comment vmlinux vmlinux-data.bin
+        cat vmlinux-text.bin vmlinux-data.bin > $@
+        $(RM) -f vmlinux-text.bin vmlinux-data.bin
+        @echo '  Kernel: $@ is ready'
+endif
+.PHONY: initrd
+initrd:
+        @test "$(INITRD_PHYS)" != "" || \
+        (echo This machine does not support INITRD; exit -1)
+        @test "$(INITRD)" != "" || \
+        (echo You must specify INITRD; exit -1)
+install: $(obj)/Image
+        $(CONFIG_SHELL) $(obj)/install.sh \
+        $(KERNELRELEASE) \
+        $(obj)/Image System.map "$(INSTALL_PATH)"
+zinstall: $(obj)/zImage
+        $(CONFIG_SHELL) $(obj)/install.sh \
+        $(KERNELRELEASE) \
+        $(obj)/zImage System.map "$(INSTALL_PATH)"
+subdir-     := compressed
diff --git a/arch/arm26/boot/compressed/Makefile b/arch/arm26/boot/compressed/Makefile
new file mode 100644
index 000000000000..b1d9ddebbe74
--- /dev/null
+++ b/arch/arm26/boot/compressed/Makefile
@@ -0,0 +1,50 @@
+#
+# linux/arch/arm26/boot/compressed/Makefile
+#
+# create a compressed vmlinuz image from the original vmlinux
+#
+# Note! ZTEXTADDR, ZBSSADDR and ZRELADDR are now exported
+# from arch/arm26/boot/Makefile
+#
+HEAD    = head.o
+OBJS    = misc.o
+FONTC   = drivers/video/console/font_acorn_8x8.c
+OBJS            += ll_char_wr.o font.o
+CFLAGS_misc.o   := -DPARAMS_PHYS=$(PARAMS_PHYS)
+targets       := vmlinux vmlinux.lds piggy piggy.gz piggy.o font.o head.o $(OBJS)
+SEDFLAGS        = s/TEXT_START/$(ZTEXTADDR)/;s/LOAD_ADDR/$(ZRELADDR)/;s/BSS_START/$(ZBSSADDR)/
+EXTRA_CFLAGS  := $(CFLAGS_BOOT) -fpic
+EXTRA_AFLAGS  := -traditional
+LDFLAGS_vmlinux := -p -X \
+        $(shell $(CC) $(CFLAGS)) -T
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.o \
+                $(addprefix $(obj)/, $(OBJS)) FORCE
+        $(call if_changed,ld)
+        @:
+$(obj)/piggy: vmlinux FORCE
+        $(call if_changed,objcopy)
+$(obj)/piggy.gz: $(obj)/piggy FORCE
+        $(call if_changed,gzip)
+LDFLAGS_piggy.o := -r -b binary
+$(obj)/piggy.o:  $(obj)/piggy.gz FORCE
+        $(call if_changed,ld)
+$(obj)/font.o: $(FONTC)
+        $(CC) $(CFLAGS) -Dstatic= -c $(FONTC) -o $(obj)/font.o
+$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in Makefile arch/arm26/boot/Makefile .config
+        @sed "$(SEDFLAGS)" < $< > $@
+$(obj)/misc.o: $(obj)/misc.c $(obj)/uncompress.h lib/inflate.c
diff --git a/arch/arm26/boot/compressed/head.S b/arch/arm26/boot/compressed/head.S
new file mode 100644
index 000000000000..0307804a6070
--- /dev/null
+++ b/arch/arm26/boot/compressed/head.S
@@ -0,0 +1,517 @@
+/*
+ *  linux/arch/arm26/boot/compressed/head.S
+ *
+ *  Copyright (C) 1996-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/linkage.h>
+/*
+ * Debugging stuff
+ *
+ * Note that these macros must not contain any code which is not
+ * 100% relocatable.  Any attempt to do so will result in a crash.
+ * Please select one of the following when turning on debugging.
+ */
+                .macro  kputc,val
+                mov     r0, \val
+                bl      putc
+                .endm
+                .macro  kphex,val,len
+                mov     r0, \val
+                mov     r1, #\len
+                bl      phex
+                .endm
+                .macro  debug_reloc_start
+                .endm
+                .macro  debug_reloc_end
+                .endm
+                .section ".start", #alloc, #execinstr
+/*
+ * sort out different calling conventions
+ */
+                .align
+start:
+                .type   start,#function
+                .rept   8
+                mov     r0, r0
+                .endr
+                b       1f
+                .word   0x016f2818              @ Magic numbers to help the loader
+                .word   start                   @ absolute load/run zImage address
+                .word   _edata                  @ zImage end address
+1:              mov     r7, r1                  @ save architecture ID
+                mov     r8, #0                  @ save r0
+                teqp    pc, #0x0c000003         @ turn off interrupts
+                .text
+                adr     r0, LC0
+                ldmia   r0, {r1, r2, r3, r4, r5, r6, ip, sp}
+                subs    r0, r0, r1              @ calculate the delta offset
+                teq     r0, #0                  @ if delta is zero, we're
+                beq     not_relocated           @ running at the address we
+                                                @ were linked at.
+                add     r2, r2, r0              @ different address, so we
+                add     r3, r3, r0              @ need to fix up various
+                add     r5, r5, r0              @ pointers.
+                add     r6, r6, r0
+                add     ip, ip, r0
+                add     sp, sp, r0
+1:              ldr     r1, [r6, #0]            @ relocate entries in the GOT
+                add     r1, r1, r0              @ table.  This fixes up the
+                str     r1, [r6], #4            @ C references.
+                cmp     r6, ip
+                blo     1b
+not_relocated:  mov     r0, #0
+1:              str     r0, [r2], #4            @ clear bss
+                str     r0, [r2], #4
+                str     r0, [r2], #4
+                str     r0, [r2], #4
+                cmp     r2, r3
+                blo     1b
+                bl      cache_on
+                mov     r1, sp                  @ malloc space above stack
+                add     r2, sp, #0x10000        @ 64k max
+/*
+ * Check to see if we will overwrite ourselves.
+ *   r4 = final kernel address
+ *   r5 = start of this image
+ *   r2 = end of malloc space (and therefore this image)
+ * We basically want:
+ *   r4 >= r2 -> OK
+ *   r4 + image length <= r5 -> OK
+ */
+                cmp     r4, r2
+                bhs     wont_overwrite
+                add     r0, r4, #4096*1024      @ 4MB largest kernel size
+                cmp     r0, r5
+                bls     wont_overwrite
+                mov     r5, r2                  @ decompress after malloc space
+                mov     r0, r5
+                mov     r3, r7
+                bl      decompress_kernel
+                add     r0, r0, #127
+                bic     r0, r0, #127            @ align the kernel length
+/*
+ * r0     = decompressed kernel length
+ * r1-r3  = unused
+ * r4     = kernel execution address
+ * r5     = decompressed kernel start
+ * r6     = processor ID
+ * r7     = architecture ID
+ * r8-r14 = unused
+ */
+                add     r1, r5, r0              @ end of decompressed kernel
+                adr     r2, reloc_start
+                ldr     r3, LC1
+                add     r3, r2, r3
+1:              ldmia   r2!, {r8 - r13}         @ copy relocation code
+                stmia   r1!, {r8 - r13}
+                ldmia   r2!, {r8 - r13}
+                stmia   r1!, {r8 - r13}
+                cmp     r2, r3
+                blo     1b
+                bl      cache_clean_flush
+                add     pc, r5, r0              @ call relocation code
+/*
+ * We're not in danger of overwriting ourselves.  Do this the simple way.
+ *
+ * r4     = kernel execution address
+ * r7     = architecture ID
+ */
+wont_overwrite: mov     r0, r4
+                mov     r3, r7
+                bl      decompress_kernel
+                b       call_kernel
+                .type   LC0, #object
+LC0:            .word   LC0                     @ r1
+                .word   __bss_start             @ r2
+                .word   _end                    @ r3
+                .word   _load_addr              @ r4
+                .word   _start                  @ r5
+                .word   _got_start              @ r6
+                .word   _got_end                @ ip
+                .word   user_stack+4096         @ sp
+LC1:            .word   reloc_end - reloc_start
+                .size   LC0, . - LC0
+/*
+ * Turn on the cache.  We need to setup some page tables so that we
+ * can have both the I and D caches on.
+ *
+ * We place the page tables 16k down from the kernel execution address,
+ * and we hope that nothing else is using it.  If we're using it, we
+ * will go pop!
+ *
+ * On entry,
+ *  r4 = kernel execution address
+ *  r6 = processor ID
+ *  r7 = architecture number
+ *  r8 = run-time address of "start"
+ * On exit,
+ *  r1, r2, r3, r8, r9, r12 corrupted
+ * This routine must preserve:
+ *  r4, r5, r6, r7
+ */
+                .align  5
+cache_on:       mov     r3, #8                  @ cache_on function
+                b       call_cache_fn
+__setup_mmu:    sub     r3, r4, #16384          @ Page directory size
+                bic     r3, r3, #0xff           @ Align the pointer
+                bic     r3, r3, #0x3f00
+/*
+ * Initialise the page tables, turning on the cacheable and bufferable
+ * bits for the RAM area only.
+ */
+                mov     r0, r3
+                mov     r8, r0, lsr #18
+                mov     r8, r8, lsl #18         @ start of RAM
+                add     r9, r8, #0x10000000     @ a reasonable RAM size
+                mov     r1, #0x12
+                orr     r1, r1, #3 << 10
+                add     r2, r3, #16384
+1:              cmp     r1, r8                  @ if virt > start of RAM
+                orrhs   r1, r1, #0x0c           @ set cacheable, bufferable
+                cmp     r1, r9                  @ if virt > end of RAM
+                bichs   r1, r1, #0x0c           @ clear cacheable, bufferable
+                str     r1, [r0], #4            @ 1:1 mapping
+                add     r1, r1, #1048576
+                teq     r0, r2
+                bne     1b
+/*
+ * If ever we are running from Flash, then we surely want the cache
+ * to be enabled also for our execution instance...  We map 2MB of it
+ * so there is no map overlap problem for up to 1 MB compressed kernel.
+ * If the execution is in RAM then we would only be duplicating the above.
+ */
+                mov     r1, #0x1e
+                orr     r1, r1, #3 << 10
+                mov     r2, pc, lsr #20
+                orr     r1, r1, r2, lsl #20
+                add     r0, r3, r2, lsl #2
+                str     r1, [r0], #4
+                add     r1, r1, #1048576
+                str     r1, [r0]
+                mov     pc, lr
+__armv4_cache_on:
+                mov     r12, lr
+                bl      __setup_mmu
+                mov     r0, #0
+                mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
+                mcr     p15, 0, r0, c8, c7, 0   @ flush I,D TLBs
+                mrc     p15, 0, r0, c1, c0, 0   @ read control reg
+                orr     r0, r0, #0x1000         @ I-cache enable
+                orr     r0, r0, #0x0030
+                b       __common_cache_on
+__arm6_cache_on:
+                mov     r12, lr
+                bl      __setup_mmu
+                mov     r0, #0
+                mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
+                mcr     p15, 0, r0, c5, c0, 0   @ invalidate whole TLB v3
+                mov     r0, #0x30
+__common_cache_on:
+#ifndef DEBUG
+                orr     r0, r0, #0x000d         @ Write buffer, mmu
+#endif
+                mov     r1, #-1
+                mcr     p15, 0, r3, c2, c0, 0   @ load page table pointer
+                mcr     p15, 0, r1, c3, c0, 0   @ load domain access control
+                mcr     p15, 0, r0, c1, c0, 0   @ load control register
+                mov     pc, r12
+/*
+ * All code following this line is relocatable.  It is relocated by
+ * the above code to the end of the decompressed kernel image and
+ * executed there.  During this time, we have no stacks.
+ *
+ * r0     = decompressed kernel length
+ * r1-r3  = unused
+ * r4     = kernel execution address
+ * r5     = decompressed kernel start
+ * r6     = processor ID
+ * r7     = architecture ID
+ * r8-r14 = unused
+ */
+                .align  5
+reloc_start:    add     r8, r5, r0
+                debug_reloc_start
+                mov     r1, r4
+1:
+                .rept   4
+                ldmia   r5!, {r0, r2, r3, r9 - r13}     @ relocate kernel
+                stmia   r1!, {r0, r2, r3, r9 - r13}
+                .endr
+                cmp     r5, r8
+                blo     1b
+                debug_reloc_end
+call_kernel:    bl      cache_clean_flush
+                bl      cache_off
+                mov     r0, #0
+                mov     r1, r7                  @ restore architecture number
+                mov     pc, r4                  @ call kernel
+/*
+ * Here follow the relocatable cache support functions for the
+ * various processors.  This is a generic hook for locating an
+ * entry and jumping to an instruction at the specified offset
+ * from the start of the block.  Please note this is all position
+ * independent code.
+ *
+ *  r1  = corrupted
+ *  r2  = corrupted
+ *  r3  = block offset
+ *  r6  = corrupted
+ *  r12 = corrupted
+ */
+call_cache_fn:  adr     r12, proc_types
+                mrc     p15, 0, r6, c0, c0      @ get processor ID
+1:              ldr     r1, [r12, #0]           @ get value
+                ldr     r2, [r12, #4]           @ get mask
+                eor     r1, r1, r6              @ (real ^ match)
+                tst     r1, r2                  @       & mask
+                addeq   pc, r12, r3             @ call cache function
+                add     r12, r12, #4*5
+                b       1b
+/*
+ * Table for cache operations.  This is basically:
+ *   - CPU ID match
+ *   - CPU ID mask
+ *   - 'cache on' method instruction
+ *   - 'cache off' method instruction
+ *   - 'cache flush' method instruction
+ *
+ * We match an entry using: ((real_id ^ match) & mask) == 0
+ *
+ * Writethrough caches generally only need 'on' and 'off'
+ * methods.  Writeback caches _must_ have the flush method
+ * defined.
+ */
+                .type   proc_types,#object
+proc_types:
+                .word   0x41560600              @ ARM6/610
+                .word   0xffffffe0
+                b       __arm6_cache_off        @ works, but slow
+                b       __arm6_cache_off
+                mov     pc, lr
+@               b       __arm6_cache_on         @ untested
+@               b       __arm6_cache_off
+@               b       __armv3_cache_flush
+                .word   0x41007000              @ ARM7/710
+                .word   0xfff8fe00
+                b       __arm7_cache_off
+                b       __arm7_cache_off
+                mov     pc, lr
+                .word   0x41807200              @ ARM720T (writethrough)
+                .word   0xffffff00
+                b       __armv4_cache_on
+                b       __armv4_cache_off
+                mov     pc, lr
+                .word   0x41129200              @ ARM920T
+                .word   0xff00fff0
+                b       __armv4_cache_on
+                b       __armv4_cache_off
+                b       __armv4_cache_flush
+                .word   0x4401a100              @ sa110 / sa1100
+                .word   0xffffffe0
+                b       __armv4_cache_on
+                b       __armv4_cache_off
+                b       __armv4_cache_flush
+                .word   0x6901b110              @ sa1110
+                .word   0xfffffff0
+                b       __armv4_cache_on
+                b       __armv4_cache_off
+                b       __armv4_cache_flush
+                .word   0x69050000              @ xscale
+                .word   0xffff0000
+                b       __armv4_cache_on
+                b       __armv4_cache_off
+                b       __armv4_cache_flush
+                .word   0                       @ unrecognised type
+                .word   0
+                mov     pc, lr
+                mov     pc, lr
+                mov     pc, lr
+                .size   proc_types, . - proc_types
+/*
+ * Turn off the Cache and MMU.  ARMv3 does not support
+ * reading the control register, but ARMv4 does.
+ *
+ * On entry,  r6 = processor ID
+ * On exit,   r0, r1, r2, r3, r12 corrupted
+ * This routine must preserve: r4, r6, r7
+ */
+                .align  5
+cache_off:      mov     r3, #12                 @ cache_off function
+                b       call_cache_fn
+__armv4_cache_off:
+                mrc     p15, 0, r0, c1, c0
+                bic     r0, r0, #0x000d
+                mcr     p15, 0, r0, c1, c0      @ turn MMU and cache off
+                mov     r0, #0
+                mcr     p15, 0, r0, c7, c7      @ invalidate whole cache v4
+                mcr     p15, 0, r0, c8, c7      @ invalidate whole TLB v4
+                mov     pc, lr
+__arm6_cache_off:
+                mov     r0, #0x00000030         @ ARM6 control reg.
+                b       __armv3_cache_off
+__arm7_cache_off:
+                mov     r0, #0x00000070         @ ARM7 control reg.
+                b       __armv3_cache_off
+__armv3_cache_off:
+                mcr     p15, 0, r0, c1, c0, 0   @ turn MMU and cache off
+                mov     r0, #0
+                mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
+                mcr     p15, 0, r0, c5, c0, 0   @ invalidate whole TLB v3
+                mov     pc, lr
+/*
+ * Clean and flush the cache to maintain consistency.
+ *
+ * On entry,
+ *  r6 = processor ID
+ * On exit,
+ *  r1, r2, r3, r12 corrupted
+ * This routine must preserve:
+ *  r0, r4, r5, r6, r7
+ */
+                .align  5
+cache_clean_flush:
+                mov     r3, #16
+                b       call_cache_fn
+__armv4_cache_flush:
+                bic     r1, pc, #31
+                add     r2, r1, #65536          @ 2x the largest dcache size
+1:              ldr     r12, [r1], #32          @ s/w flush D cache
+                teq     r1, r2
+                bne     1b
+                mcr     p15, 0, r1, c7, c7, 0   @ flush I cache
+                mcr     p15, 0, r1, c7, c10, 4  @ drain WB
+                mov     pc, lr
+__armv3_cache_flush:
+                mov     r1, #0
+                mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
+                mov     pc, lr
+/*
+ * Various debugging routines for printing hex characters and
+ * memory, which again must be relocatable.
+ */
+#ifdef DEBUG
+                .type   phexbuf,#object
+phexbuf:        .space  12
+                .size   phexbuf, . - phexbuf
+phex:           adr     r3, phexbuf
+                mov     r2, #0
+                strb    r2, [r3, r1]
+1:              subs    r1, r1, #1
+                movmi   r0, r3
+                bmi     puts
+                and     r2, r0, #15
+                mov     r0, r0, lsr #4
+                cmp     r2, #10
+                addge   r2, r2, #7
+                add     r2, r2, #'0'
+                strb    r2, [r3, r1]
+                b       1b
+puts:           loadsp  r3
+1:              ldrb    r2, [r0], #1
+                teq     r2, #0
+                moveq   pc, lr
+2:              writeb  r2
+                mov     r1, #0x00020000
+3:              subs    r1, r1, #1
+                bne     3b
+                teq     r2, #'\n'
+                moveq   r2, #'\r'
+                beq     2b
+                teq     r0, #0
+                bne     1b
+                mov     pc, lr
+putc:
+                mov     r2, r0
+                mov     r0, #0
+                loadsp  r3
+                b       2b
+memdump:        mov     r12, r0
+                mov     r10, lr
+                mov     r11, #0
+2:              mov     r0, r11, lsl #2
+                add     r0, r0, r12
+                mov     r1, #8
+                bl      phex
+                mov     r0, #':'
+                bl      putc
+1:              mov     r0, #' '
+                bl      putc
+                ldr     r0, [r12, r11, lsl #2]
+                mov     r1, #8
+                bl      phex
+                and     r0, r11, #7
+                teq     r0, #3
+                moveq   r0, #' '
+                bleq    putc
+                and     r0, r11, #7
+                add     r11, r11, #1
+                teq     r0, #7
+                bne     1b
+                mov     r0, #'\n'
+                bl      putc
+                cmp     r11, #64
+                blt     2b
+                mov     pc, r10
+#endif
+reloc_end:
+                .align
+                .section ".stack", "aw"
+user_stack:     .space  4096
diff --git a/arch/arm26/boot/compressed/hw-bse.c b/arch/arm26/boot/compressed/hw-bse.c
new file mode 100644
index 000000000000..3e8f07f8e08a
--- /dev/null
+++ b/arch/arm26/boot/compressed/hw-bse.c
@@ -0,0 +1,74 @@
+/*
+ * Bright Star Engineering Inc.
+ *
+ * code for readng parameters from the
+ * parameter blocks of the boot block
+ * flash memory
+ *
+ */
+static int strcmp(const char *s1, const char *s2)
+{
+  while (*s1 != '\0' && *s1 == *s2)
+    {
+      s1++;
+      s2++;
+    }
+  return (*(unsigned char *) s1) - (*(unsigned char *) s2);
+}
+struct pblk_t {
+  char type;
+  unsigned short size;
+};
+static char *bse_getflashparam(char *name) {
+  unsigned int esize;
+  char *q,*r;
+  unsigned char *p,*e;
+  struct pblk_t *thepb = (struct pblk_t *) 0x00004000;
+  struct pblk_t *altpb = (struct pblk_t *) 0x00006000;  
+  if (thepb->type&1) {
+    if (altpb->type&1) {
+      /* no valid param block */ 
+      return (char*)0;
+    } else {
+      /* altpb is valid */
+      struct pblk_t *tmp;
+      tmp = thepb;
+      thepb = altpb;
+      altpb = tmp;
+    }
+  }
+  p = (char*)thepb + sizeof(struct pblk_t);
+  e = p + thepb->size; 
+  while (p < e) {
+    q = p;
+    esize = *p;
+    if (esize == 0xFF) break;
+    if (esize == 0) break;
+    if (esize > 127) {
+      esize = (esize&0x7F)<<8 | p[1];
+      q++;
+    }
+    q++;
+    r=q;
+    if (*r && ((name == 0) || (!strcmp(name,r)))) {
+      while (*q++) ;
+      return q;
+    }
+    p+=esize;
+  }
+  return (char*)0;
+}
+void bse_setup(void) {
+  /* extract the linux cmdline from flash */
+  char *name=bse_getflashparam("linuxboot");
+  char *x = (char *)0xc0000100;
+  if (name) { 
+    while (*name) *x++=*name++;
+  }
+  *x=0;
+}
diff --git a/arch/arm26/boot/compressed/ll_char_wr.S b/arch/arm26/boot/compressed/ll_char_wr.S
new file mode 100644
index 000000000000..f024c3ebdfa5
--- /dev/null
+++ b/arch/arm26/boot/compressed/ll_char_wr.S
@@ -0,0 +1,162 @@
+/*
+ *  linux/arch/arm26/lib/ll_char_wr.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Speedups & 1bpp code (C) 1996 Philip Blundell & Russell King.
+ *
+ *  10-04-96    RMK     Various cleanups & reduced register usage.
+ *  08-04-98    RMK     Shifts re-ordered
+ */
+@ Regs: [] = corruptible
+@       {} = used
+@       () = do not use
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+#define BOLD            0x01
+#define ITALIC          0x02
+#define UNDERLINE       0x04
+#define FLASH           0x08
+#define INVERSE         0x10
+LC0:            .word   bytes_per_char_h
+                .word   video_size_row
+                .word   acorndata_8x8
+                .word   con_charconvtable
+ENTRY(ll_write_char)
+                stmfd   sp!, {r4 - r7, lr}
+@
+@ Smashable regs: {r0 - r3}, [r4 - r7], (r8 - fp), [ip], (sp), [lr], (pc)
+@
+                eor     ip, r1, #UNDERLINE << 9
+/*
+ * calculate colours
+ */
+                tst     r1, #INVERSE << 9
+                moveq   r2, r1, lsr #16
+                moveq   r3, r1, lsr #24
+                movne   r2, r1, lsr #24
+                movne   r3, r1, lsr #16
+                and     r3, r3, #255
+                and     r2, r2, #255
+/*
+ * calculate offset into character table
+ */
+                mov     r1, r1, lsl #23
+                mov     r1, r1, lsr #20
+/*
+ * calculate offset required for each row [maybe I should make this an argument to this fn.
+ * Have to see what the register usage is like in the calling routines.
+ */
+                adr     r4, LC0
+                ldmia   r4, {r4, r5, r6, lr}
+                ldr     r4, [r4]
+                ldr     r5, [r5]
+/*
+ * Go to resolution-dependent routine...
+ */
+                cmp     r4, #4
+                blt     Lrow1bpp
+                eor     r2, r3, r2                      @ Create eor mask to change colour from bg
+                orr     r3, r3, r3, lsl #8              @ to fg.
+                orr     r3, r3, r3, lsl #16
+                add     r0, r0, r5, lsl #3              @ Move to bottom of character
+                add     r1, r1, #7
+                ldrb    r7, [r6, r1]
+                tst     ip, #UNDERLINE << 9
+                eoreq   r7, r7, #255
+                teq     r4, #8
+                beq     Lrow8bpplp
+@
+@ Smashable regs: {r0 - r3}, [r4], {r5 - r7}, (r8 - fp), [ip], (sp), {lr}, (pc)
+@
+                orr     r3, r3, r3, lsl #4
+Lrow4bpplp:     ldr     r7, [lr, r7, lsl #2]
+                mul     r7, r2, r7
+                tst     r1, #7                          @ avoid using r7 directly after
+                eor     ip, r3, r7
+                str     ip, [r0, -r5]!
+                LOADREGS(eqfd, sp!, {r4 - r7, pc})
+                sub     r1, r1, #1
+                ldrb    r7, [r6, r1]
+                ldr     r7, [lr, r7, lsl #2]
+                mul     r7, r2, r7
+                tst     r1, #7                          @ avoid using r7 directly after
+                eor     ip, r3, r7
+                str     ip, [r0, -r5]!
+                subne   r1, r1, #1
+                ldrneb  r7, [r6, r1]
+                bne     Lrow4bpplp
+                LOADREGS(fd, sp!, {r4 - r7, pc})
+@
+@ Smashable regs: {r0 - r3}, [r4], {r5 - r7}, (r8 - fp), [ip], (sp), {lr}, (pc)
+@
+Lrow8bpplp:     mov     ip, r7, lsr #4
+                ldr     ip, [lr, ip, lsl #2]
+                mul     r4, r2, ip
+                and     ip, r7, #15                     @ avoid r4
+                ldr     ip, [lr, ip, lsl #2]            @ avoid r4
+                mul     ip, r2, ip                      @ avoid r4
+                eor     r4, r3, r4                      @ avoid ip
+                tst     r1, #7                          @ avoid ip
+                sub     r0, r0, r5                      @ avoid ip
+                eor     ip, r3, ip
+                stmia   r0, {r4, ip}
+                LOADREGS(eqfd, sp!, {r4 - r7, pc})
+                sub     r1, r1, #1
+                ldrb    r7, [r6, r1]
+                mov     ip, r7, lsr #4
+                ldr     ip, [lr, ip, lsl #2]
+                mul     r4, r2, ip
+                and     ip, r7, #15                     @ avoid r4
+                ldr     ip, [lr, ip, lsl #2]            @ avoid r4
+                mul     ip, r2, ip                      @ avoid r4
+                eor     r4, r3, r4                      @ avoid ip
+                tst     r1, #7                          @ avoid ip
+                sub     r0, r0, r5                      @ avoid ip
+                eor     ip, r3, ip
+                stmia   r0, {r4, ip}
+                subne   r1, r1, #1
+                ldrneb  r7, [r6, r1]
+                bne     Lrow8bpplp
+                LOADREGS(fd, sp!, {r4 - r7, pc})
+@
+@ Smashable regs: {r0 - r3}, [r4], {r5, r6}, [r7], (r8 - fp), [ip], (sp), [lr], (pc)
+@
+Lrow1bpp:       add     r6, r6, r1
+                ldmia   r6, {r4, r7}
+                tst     ip, #INVERSE << 9
+                mvnne   r4, r4
+                mvnne   r7, r7
+                strb    r4, [r0], r5
+                mov     r4, r4, lsr #8
+                strb    r4, [r0], r5
+                mov     r4, r4, lsr #8
+                strb    r4, [r0], r5
+                mov     r4, r4, lsr #8
+                strb    r4, [r0], r5
+                strb    r7, [r0], r5
+                mov     r7, r7, lsr #8
+                strb    r7, [r0], r5
+                mov     r7, r7, lsr #8
+                strb    r7, [r0], r5
+                mov     r7, r7, lsr #8
+                tst     ip, #UNDERLINE << 9
+                mvneq   r7, r7
+                strb    r7, [r0], r5
+                LOADREGS(fd, sp!, {r4 - r7, pc})
+                .bss
+ENTRY(con_charconvtable)
+                .space  1024
diff --git a/arch/arm26/boot/compressed/misc.c b/arch/arm26/boot/compressed/misc.c
new file mode 100644
index 000000000000..f17f50e5516f
--- /dev/null
+++ b/arch/arm26/boot/compressed/misc.c
@@ -0,0 +1,316 @@
+/*
+ * misc.c
+ * 
+ * This is a collection of several routines from gzip-1.0.3 
+ * adapted for Linux.
+ *
+ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ *
+ * Modified for ARM Linux by Russell King
+ *
+ * Nicolas Pitre <nico@visuaide.com>  1999/04/14 :
+ *  For this code to run directly from Flash, all constant variables must
+ *  be marked with 'const' and all other variables initialized at run-time 
+ *  only.  This way all non constant variables will end up in the bss segment,
+ *  which should point to addresses in RAM and cleared to 0 on start.
+ *  This allows for a much quicker boot time.
+ */
+unsigned int __machine_arch_type;
+#include <linux/kernel.h>
+#include <asm/uaccess.h>
+#include "uncompress.h"
+#ifdef STANDALONE_DEBUG
+#define puts printf
+#endif
+#define __ptr_t void *
+/*
+ * Optimised C version of memzero for the ARM.
+ */
+void __memzero (__ptr_t s, size_t n)
+{
+        union { void *vp; unsigned long *ulp; unsigned char *ucp; } u;
+        int i;
+        u.vp = s;
+        for (i = n >> 5; i > 0; i--) {
+                *u.ulp++ = 0;
+                *u.ulp++ = 0;
+                *u.ulp++ = 0;
+                *u.ulp++ = 0;
+                *u.ulp++ = 0;
+                *u.ulp++ = 0;
+                *u.ulp++ = 0;
+                *u.ulp++ = 0;
+        }
+        if (n & 1 << 4) {
+                *u.ulp++ = 0;
+                *u.ulp++ = 0;
+                *u.ulp++ = 0;
+                *u.ulp++ = 0;
+        }
+        if (n & 1 << 3) {
+                *u.ulp++ = 0;
+                *u.ulp++ = 0;
+        }
+        if (n & 1 << 2)
+                *u.ulp++ = 0;
+        if (n & 1 << 1) {
+                *u.ucp++ = 0;
+                *u.ucp++ = 0;
+        }
+        if (n & 1)
+                *u.ucp++ = 0;
+}
+static inline __ptr_t memcpy(__ptr_t __dest, __const __ptr_t __src,
+                            size_t __n)
+{
+        int i = 0;
+        unsigned char *d = (unsigned char *)__dest, *s = (unsigned char *)__src;
+        for (i = __n >> 3; i > 0; i--) {
+                *d++ = *s++;
+                *d++ = *s++;
+                *d++ = *s++;
+                *d++ = *s++;
+                *d++ = *s++;
+                *d++ = *s++;
+                *d++ = *s++;
+                *d++ = *s++;
+        }
+        if (__n & 1 << 2) {
+                *d++ = *s++;
+                *d++ = *s++;
+                *d++ = *s++;
+                *d++ = *s++;
+        }
+        if (__n & 1 << 1) {
+                *d++ = *s++;
+                *d++ = *s++;
+        }
+        if (__n & 1)
+                *d++ = *s++;
+        return __dest;
+}
+/*
+ * gzip delarations
+ */
+#define OF(args)  args
+#define STATIC static
+typedef unsigned char  uch;
+typedef unsigned short ush;
+typedef unsigned long  ulg;
+#define WSIZE 0x8000            /* Window size must be at least 32k, */
+                                /* and a power of two */
+static uch *inbuf;              /* input buffer */
+static uch window[WSIZE];       /* Sliding window buffer */
+static unsigned insize;         /* valid bytes in inbuf */
+static unsigned inptr;          /* index of next byte to be processed in inbuf */
+static unsigned outcnt;         /* bytes in output buffer */
+/* gzip flag byte */
+#define ASCII_FLAG   0x01 /* bit 0 set: file probably ascii text */
+#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
+#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
+#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
+#define COMMENT      0x10 /* bit 4 set: file comment present */
+#define ENCRYPTED    0x20 /* bit 5 set: file is encrypted */
+#define RESERVED     0xC0 /* bit 6,7:   reserved */
+#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
+/* Diagnostic functions */
+#ifdef DEBUG
+#  define Assert(cond,msg) {if(!(cond)) error(msg);}
+#  define Trace(x) fprintf x
+#  define Tracev(x) {if (verbose) fprintf x ;}
+#  define Tracevv(x) {if (verbose>1) fprintf x ;}
+#  define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
+#  define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
+#else
+#  define Assert(cond,msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c,x)
+#  define Tracecv(c,x)
+#endif
+static int  fill_inbuf(void);
+static void flush_window(void);
+static void error(char *m);
+static void gzip_mark(void **);
+static void gzip_release(void **);
+extern char input_data[];
+extern char input_data_end[];
+static uch *output_data;
+static ulg output_ptr;
+static ulg bytes_out;
+static void *malloc(int size);
+static void free(void *where);
+static void error(char *m);
+static void gzip_mark(void **);
+static void gzip_release(void **);
+static void puts(const char *);
+extern int end;
+static ulg free_mem_ptr;
+static ulg free_mem_ptr_end;
+#define HEAP_SIZE 0x2000
+#include "../../../../lib/inflate.c"
+#ifndef STANDALONE_DEBUG
+static void *malloc(int size)
+{
+        void *p;
+        if (size <0) error("Malloc error");
+        if (free_mem_ptr <= 0) error("Memory error");
+        free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
+        p = (void *)free_mem_ptr;
+        free_mem_ptr += size;
+        if (free_mem_ptr >= free_mem_ptr_end)
+                error("Out of memory");
+        return p;
+}
+static void free(void *where)
+{ /* gzip_mark & gzip_release do the free */
+}
+static void gzip_mark(void **ptr)
+{
+        arch_decomp_wdog();
+        *ptr = (void *) free_mem_ptr;
+}
+static void gzip_release(void **ptr)
+{
+        arch_decomp_wdog();
+        free_mem_ptr = (long) *ptr;
+}
+#else
+static void gzip_mark(void **ptr)
+{
+}
+static void gzip_release(void **ptr)
+{
+}
+#endif
+/* ===========================================================================
+ * Fill the input buffer. This is called only when the buffer is empty
+ * and at least one byte is really needed.
+ */
+int fill_inbuf(void)
+{
+        if (insize != 0)
+                error("ran out of input data");
+        inbuf = input_data;
+        insize = &input_data_end[0] - &input_data[0];
+        inptr = 1;
+        return inbuf[0];
+}
+/* ===========================================================================
+ * Write the output window window[0..outcnt-1] and update crc and bytes_out.
+ * (Used for the decompressed data only.)
+ */
+void flush_window(void)
+{
+        ulg c = crc;
+        unsigned n;
+        uch *in, *out, ch;
+        in = window;
+        out = &output_data[output_ptr];
+        for (n = 0; n < outcnt; n++) {
+                ch = *out++ = *in++;
+                c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+        }
+        crc = c;
+        bytes_out += (ulg)outcnt;
+        output_ptr += (ulg)outcnt;
+        outcnt = 0;
+        puts(".");
+}
+static void error(char *x)
+{
+        int ptr;
+        puts("\n\n");
+        puts(x);
+        puts("\n\n -- System halted");
+        while(1);       /* Halt */
+}
+#ifndef STANDALONE_DEBUG
+ulg
+decompress_kernel(ulg output_start, ulg free_mem_ptr_p, ulg free_mem_ptr_end_p,
+                  int arch_id)
+{
+        output_data             = (uch *)output_start;  /* Points to kernel start */
+        free_mem_ptr            = free_mem_ptr_p;
+        free_mem_ptr_end        = free_mem_ptr_end_p;
+        __machine_arch_type     = arch_id;
+        arch_decomp_setup();
+        makecrc();
+        puts("Uncompressing Linux...");
+        gunzip();
+        puts(" done, booting the kernel.\n");
+        return output_ptr;
+}
+#else
+char output_buffer[1500*1024];
+int main()
+{
+        output_data = output_buffer;
+        makecrc();
+        puts("Uncompressing Linux...");
+        gunzip();
+        puts("done.\n");
+        return 0;
+}
+#endif
+        
diff --git a/arch/arm26/boot/compressed/uncompress.h b/arch/arm26/boot/compressed/uncompress.h
new file mode 100644
index 000000000000..66d9b938a7a4
--- /dev/null
+++ b/arch/arm26/boot/compressed/uncompress.h
@@ -0,0 +1,110 @@
+/*
+ *
+ *  Copyright (C) 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define VIDMEM ((char *)0x02000000)
+ 
+int video_num_columns, video_num_lines, video_size_row;
+int white, bytes_per_char_h;
+extern unsigned long con_charconvtable[256];
+struct param_struct {
+        unsigned long page_size;
+        unsigned long nr_pages;
+        unsigned long ramdisk_size;
+        unsigned long mountrootrdonly;
+        unsigned long rootdev;
+        unsigned long video_num_cols;
+        unsigned long video_num_rows;
+        unsigned long video_x;
+        unsigned long video_y;
+        unsigned long memc_control_reg;
+        unsigned char sounddefault;
+        unsigned char adfsdrives;
+        unsigned char bytes_per_char_h;
+        unsigned char bytes_per_char_v;
+        unsigned long unused[256/4-11];
+};
+static struct param_struct *params = (struct param_struct *)0x0207c000;
+ 
+/*
+ * This does not append a newline
+ */
+static void puts(const char *s)
+{
+        extern void ll_write_char(char *, unsigned long);
+        int x,y;
+        unsigned char c;
+        char *ptr;
+        x = params->video_x;
+        y = params->video_y;
+        while ( ( c = *(unsigned char *)s++ ) != '\0' ) {
+                if ( c == '\n' ) {
+                        x = 0;
+                        if ( ++y >= video_num_lines ) {
+                                y--;
+                        }
+                } else {
+                        ptr = VIDMEM + ((y*video_num_columns*params->bytes_per_char_v+x)*bytes_per_char_h);
+                        ll_write_char(ptr, c|(white<<16));
+                        if ( ++x >= video_num_columns ) {
+                                x = 0;
+                                if ( ++y >= video_num_lines ) {
+                                        y--;
+                                }
+                        }
+                }
+        }
+        params->video_x = x;
+        params->video_y = y;
+}
+static void error(char *x);
+/*
+ * Setup for decompression
+ */
+static void arch_decomp_setup(void)
+{
+        int i;
+        
+        video_num_lines = params->video_num_rows;
+        video_num_columns = params->video_num_cols;
+        bytes_per_char_h = params->bytes_per_char_h;
+        video_size_row = video_num_columns * bytes_per_char_h;
+        if (bytes_per_char_h == 4)
+                for (i = 0; i < 256; i++)
+                        con_charconvtable[i] =
+                                (i & 128 ? 1 << 0  : 0) |
+                                (i & 64  ? 1 << 4  : 0) |
+                                (i & 32  ? 1 << 8  : 0) |
+                                (i & 16  ? 1 << 12 : 0) |
+                                (i & 8   ? 1 << 16 : 0) |
+                                (i & 4   ? 1 << 20 : 0) |
+                                (i & 2   ? 1 << 24 : 0) |
+                                (i & 1   ? 1 << 28 : 0);
+        else
+                for (i = 0; i < 16; i++)
+                        con_charconvtable[i] =
+                                (i & 8   ? 1 << 0  : 0) |
+                                (i & 4   ? 1 << 8  : 0) |
+                                (i & 2   ? 1 << 16 : 0) |
+                                (i & 1   ? 1 << 24 : 0);
+        white = bytes_per_char_h == 8 ? 0xfc : 7;
+        if (params->nr_pages * params->page_size < 4096*1024) error("<4M of mem\n");
+}
+/*
+ * nothing to do
+ */
+#define arch_decomp_wdog()
diff --git a/arch/arm26/boot/compressed/vmlinux.lds.in b/arch/arm26/boot/compressed/vmlinux.lds.in
new file mode 100644
index 000000000000..86d821d5ab70
--- /dev/null
+++ b/arch/arm26/boot/compressed/vmlinux.lds.in
@@ -0,0 +1,60 @@
+/*
+ *  linux/arch/arm26/boot/compressed/vmlinux.lds.in
+ *
+ *  Copyright (C) 2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+OUTPUT_ARCH(arm)
+ENTRY(_start)
+SECTIONS
+{
+  . = LOAD_ADDR;
+  _load_addr = .;
+  . = TEXT_START;
+  _text = .;
+  .text : {
+    _start = .;
+    *(.start)
+    *(.text)
+    *(.fixup)
+    *(.gnu.warning)
+    *(.rodata)
+    *(.rodata.*)
+    *(.glue_7)
+    *(.glue_7t)
+    input_data = .;
+    arch/arm26/boot/compressed/piggy.o
+    input_data_end = .;
+    . = ALIGN(4);
+  }
+  _etext = .;
+  _got_start = .;
+  .got                  : { *(.got) }
+  _got_end = .;
+  .got.plt              : { *(.got.plt) }
+  .data                 : { *(.data) }
+  _edata = .;
+  . = BSS_START;
+  __bss_start = .;
+  .bss                  : { *(.bss) }
+  _end = .;
+  .stack (NOLOAD)       : { *(.stack) }
+  .stab 0               : { *(.stab) }
+  .stabstr 0            : { *(.stabstr) }
+  .stab.excl 0          : { *(.stab.excl) }
+  .stab.exclstr 0       : { *(.stab.exclstr) }
+  .stab.index 0         : { *(.stab.index) }
+  .stab.indexstr 0      : { *(.stab.indexstr) }
+  .comment 0            : { *(.comment) }
+}
diff --git a/arch/arm26/boot/install.sh b/arch/arm26/boot/install.sh
new file mode 100644
index 000000000000..c628328dd9ec
--- /dev/null
+++ b/arch/arm26/boot/install.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+#
+# arch/arm26/boot/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+# Adapted from code in arch/i386/boot/install.sh by Russell King
+# Stolen from arm32 by Ian Molton
+#
+# "make install" script for arm architecture
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+#
+# User may have a custom install script
+if [ -x /sbin/installkernel ]; then
+  exec /sbin/installkernel "$@"
+fi
+if [ "$2" = "zImage" ]; then
+# Compressed install
+  echo "Installing compressed kernel"
+  if [ -f $4/vmlinuz-$1 ]; then
+    mv $4/vmlinuz-$1 $4/vmlinuz.old
+  fi
+  if [ -f $4/System.map-$1 ]; then
+    mv $4/System.map-$1 $4/System.old
+  fi
+  cat $2 > $4/vmlinuz-$1
+  cp $3 $4/System.map-$1
+else
+# Normal install
+  echo "Installing normal kernel"
+  if [ -f $4/vmlinux-$1 ]; then
+    mv $4/vmlinux-$1 $4/vmlinux.old
+  fi
+  if [ -f $4/System.map ]; then
+    mv $4/System.map $4/System.old
+  fi
+  cat $2 > $4/vmlinux-$1
+  cp $3 $4/System.map
+fi
+if [ -x /sbin/loadmap ]; then
+  /sbin/loadmap --rdev /dev/ima
+else
+  echo "You have to install it yourself"
+fi
diff --git a/arch/arm26/defconfig b/arch/arm26/defconfig
new file mode 100644
index 000000000000..c4a89703c3d8
--- /dev/null
+++ b/arch/arm26/defconfig
@@ -0,0 +1,362 @@
+#
+# Automatically generated by make menuconfig: don't edit
+#
+CONFIG_ARM=y
+# CONFIG_EISA is not set
+# CONFIG_SBUS is not set
+# CONFIG_MCA is not set
+CONFIG_UID16=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
+# CONFIG_GENERIC_BUST_SPINLOCK is not set
+# CONFIG_GENERIC_ISA_DMA is not set
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+#
+# General setup
+#
+# CONFIG_NET is not set
+# CONFIG_SYSVIPC is not set
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_SYSCTL is not set
+#
+# Loadable module support
+#
+# CONFIG_MODULES is not set
+#
+# System Type
+#
+CONFIG_ARCH_ARC=y
+# CONFIG_ARCH_A5K is not set
+CONFIG_ARCH_ACORN=y
+# CONFIG_CPU_32 is not set
+CONFIG_CPU_26=y
+# CONFIG_PAGESIZE_16 is not set
+#
+# General setup
+#
+CONFIG_FIQ=y
+# CONFIG_ZBOOT_ROM is not set
+CONFIG_ZBOOT_ROM_TEXT=0
+CONFIG_ZBOOT_ROM_BSS=0
+CONFIG_FPE_NWFPE=y
+CONFIG_KCORE_ELF=y
+# CONFIG_KCORE_AOUT is not set
+# CONFIG_BINFMT_AOUT is not set
+# CONFIG_BINFMT_ELF is not set
+# CONFIG_BINFMT_MISC is not set
+CONFIG_CMDLINE=""
+# CONFIG_ALIGNMENT_TRAP is not set
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+#
+# Plug and Play configuration
+#
+# CONFIG_PNP is not set
+# CONFIG_ISAPNP is not set
+# CONFIG_PNPBIOS is not set
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_CISS_SCSI_TAPE is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_UMEM is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_BLK_DEV_INITRD is not set
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+# CONFIG_BLK_DEV_MD is not set
+# CONFIG_MD_LINEAR is not set
+# CONFIG_MD_RAID0 is not set
+# CONFIG_MD_RAID1 is not set
+# CONFIG_MD_RAID5 is not set
+# CONFIG_MD_MULTIPATH is not set
+# CONFIG_BLK_DEV_LVM is not set
+#
+# Acorn-specific block devices
+#
+# CONFIG_BLK_DEV_FD1772 is not set
+# CONFIG_BLK_DEV_MFM is not set
+#
+# ATA/ATAPI/MFM/RLL support
+#
+# CONFIG_IDE is not set
+# CONFIG_BLK_DEV_HD is not set
+#
+# SCSI support
+#
+# CONFIG_SCSI is not set
+#
+# ISDN subsystem
+#
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+# CONFIG_INPUT_KEYBDEV is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_TSDEV is not set
+# CONFIG_INPUT_TSLIBDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_INPUT_EVBUG is not set
+# CONFIG_INPUT_UINPUT is not set
+# CONFIG_GAMEPORT is not set
+CONFIG_SOUND_GAMEPORT=y
+# CONFIG_GAMEPORT_NS558 is not set
+# CONFIG_GAMEPORT_L4 is not set
+# CONFIG_GAMEPORT_EMU10K1 is not set
+# CONFIG_GAMEPORT_VORTEX is not set
+# CONFIG_GAMEPORT_FM801 is not set
+# CONFIG_GAMEPORT_CS461x is not set
+# CONFIG_SERIO is not set
+# CONFIG_SERIO_I8042 is not set
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_SERIO_CT82C710 is not set
+# CONFIG_SERIO_PARKBD is not set
+# CONFIG_SERIO_ACORN is not set
+#
+# Character devices
+#
+# CONFIG_VT is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+#
+# Serial drivers
+#
+# CONFIG_SERIAL_8250 is not set
+# CONFIG_SERIAL_8250_CONSOLE is not set
+# CONFIG_SERIAL_8250_CS is not set
+# CONFIG_SERIAL_8250_EXTENDED is not set
+# CONFIG_SERIAL_8250_MANY_PORTS is not set
+# CONFIG_SERIAL_8250_SHARE_IRQ is not set
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+# CONFIG_SERIAL_8250_MULTIPORT is not set
+# CONFIG_SERIAL_8250_RSA is not set
+# CONFIG_ATOMWIDE_SERIAL is not set
+# CONFIG_DUALSP_SERIAL is not set
+# CONFIG_SERIAL_AMBA is not set
+# CONFIG_SERIAL_AMBA_CONSOLE is not set
+# CONFIG_SERIAL_CLPS711X is not set
+# CONFIG_SERIAL_CLPS711X_CONSOLE is not set
+# CONFIG_SERIAL_CLPS711X_OLD_NAME is not set
+# CONFIG_SERIAL_21285 is not set
+# CONFIG_SERIAL_21285_OLD is not set
+# CONFIG_SERIAL_21285_CONSOLE is not set
+# CONFIG_SERIAL_UART00 is not set
+# CONFIG_SERIAL_UART00_CONSOLE is not set
+# CONFIG_SERIAL_SA1100 is not set
+# CONFIG_SERIAL_SA1100_CONSOLE is not set
+# CONFIG_UNIX98_PTYS is not set
+#
+# I2C support
+#
+CONFIG_I2C=y
+CONFIG_I2C_ALGOBIT=y
+CONFIG_I2C_ALGOPCF=y
+# CONFIG_I2C_ELEKTOR is not set
+CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_PROC is not set
+#
+# L3 serial bus support
+#
+# CONFIG_L3 is not set
+# CONFIG_L3_ALGOBIT is not set
+# CONFIG_L3_BIT_SA1100_GPIO is not set
+# CONFIG_L3_SA1111 is not set
+# CONFIG_BIT_SA1100_GPIO is not set
+#
+# Mice
+#
+# CONFIG_BUSMOUSE is not set
+# CONFIG_PSMOUSE is not set
+# CONFIG_QIC02_TAPE is not set
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_RAW_DRIVER is not set
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+#
+# File systems
+#
+# CONFIG_QUOTA is not set
+# CONFIG_QFMT_V1 is not set
+# CONFIG_QFMT_V2 is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+# CONFIG_ADFS_FS is not set
+# CONFIG_ADFS_FS_RW is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_JBD is not set
+# CONFIG_JBD_DEBUG is not set
+# CONFIG_FAT_FS is not set
+# CONFIG_MSDOS_FS is not set
+# CONFIG_UMSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_TMPFS is not set
+CONFIG_RAMFS=y
+# CONFIG_ISO9660_FS is not set
+# CONFIG_JOLIET is not set
+# CONFIG_ZISOFS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_JFS_DEBUG is not set
+# CONFIG_JFS_STATISTICS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_NTFS_FS is not set
+# CONFIG_NTFS_DEBUG is not set
+# CONFIG_HPFS_FS is not set
+CONFIG_PROC_FS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVFS_MOUNT is not set
+# CONFIG_DEVFS_DEBUG is not set
+# CONFIG_DEVPTS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX4FS_RW is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_EXT2_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UDF_FS is not set
+# CONFIG_UDF_RW is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_UFS_FS_WRITE is not set
+# CONFIG_NCPFS_NLS is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_ZISOFS_FS is not set
+#
+# Partition Types
+#
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_ACORN_PARTITION=y
+# CONFIG_ACORN_PARTITION_EESOX is not set
+# CONFIG_ACORN_PARTITION_ICS is not set
+CONFIG_ACORN_PARTITION_ADFS=y
+# CONFIG_ACORN_PARTITION_POWERTEC is not set
+CONFIG_ACORN_PARTITION_RISCIX=y
+# CONFIG_OSF_PARTITION is not set
+# CONFIG_AMIGA_PARTITION is not set
+# CONFIG_ATARI_PARTITION is not set
+# CONFIG_MAC_PARTITION is not set
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_LDM_PARTITION is not set
+# CONFIG_SGI_PARTITION is not set
+# CONFIG_ULTRIX_PARTITION is not set
+# CONFIG_SUN_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_SMB_NLS is not set
+# CONFIG_NLS is not set
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+#
+# Multimedia Capabilities Port drivers
+#
+# CONFIG_MCP is not set
+# CONFIG_MCP_SA1100 is not set
+# CONFIG_MCP_UCB1200 is not set
+# CONFIG_MCP_UCB1200_AUDIO is not set
+# CONFIG_MCP_UCB1200_TS is not set
+#
+# Console Switches
+#
+# CONFIG_SWITCHES is not set
+# CONFIG_SWITCHES_SA1100 is not set
+# CONFIG_SWITCHES_UCB1X00 is not set
+#
+# USB support
+#
+# CONFIG_USB is not set
+#
+# Kernel hacking
+#
+# CONFIG_NO_FRAME_POINTER is not set
+CONFIG_DEBUG_USER=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_SLAB=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_WAITQ=y
+CONFIG_DEBUG_BUGVERBOSE=y
+CONFIG_DEBUG_ERRORS=y
+CONFIG_DEBUG_LL=y
+#
+# Security options
+#
+CONFIG_SECURITY_CAPABILITIES=y
+#
+# Library routines
+#
+CONFIG_CRC32=y
+# CONFIG_ZLIB_INFLATE is not set
+# CONFIG_ZLIB_DEFLATE is not set
diff --git a/arch/arm26/kernel/Makefile b/arch/arm26/kernel/Makefile
new file mode 100644
index 000000000000..ee9fb49fdb78
--- /dev/null
+++ b/arch/arm26/kernel/Makefile
@@ -0,0 +1,17 @@
+#
+# Makefile for the linux kernel.
+#
+# Object file lists.
+AFLAGS_head.o           := -DTEXTADDR=$(TEXTADDR)
+obj-y           := compat.o dma.o entry.o irq.o process.o ptrace.o       \
+                   semaphore.o setup.o signal.o sys_arm.o time.o traps.o \
+                   ecard.o dma.o ecard.o fiq.o time.o
+extra-y         := head.o init_task.o vmlinux.lds
+obj-$(CONFIG_FIQ)               += fiq.o
+obj-$(CONFIG_MODULES)           += armksyms.o
diff --git a/arch/arm26/kernel/armksyms.c b/arch/arm26/kernel/armksyms.c
new file mode 100644
index 000000000000..35514b398e2e
--- /dev/null
+++ b/arch/arm26/kernel/armksyms.c
@@ -0,0 +1,220 @@
+/*
+ *  linux/arch/arm26/kernel/armksyms.c
+ *
+ *  Copyright (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/user.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/delay.h>
+#include <linux/in6.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>
+#include <linux/smp_lock.h>
+#include <linux/syscalls.h>
+#include <asm/byteorder.h>
+#include <asm/elf.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/processor.h>
+#include <asm/semaphore.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include <asm/mach-types.h>
+extern void dump_thread(struct pt_regs *, struct user *);
+extern int dump_fpu(struct pt_regs *, struct user_fp_struct *);
+extern void inswb(unsigned int port, void *to, int len);
+extern void outswb(unsigned int port, const void *to, int len);
+extern void __bad_xchg(volatile void *ptr, int size);
+/*
+ * libgcc functions - functions that are used internally by the
+ * compiler...  (prototypes are not correct though, but that
+ * doesn't really matter since they're not versioned).
+ */
+extern void __ashldi3(void);
+extern void __ashrdi3(void);
+extern void __divsi3(void);
+extern void __lshrdi3(void);
+extern void __modsi3(void);
+extern void __muldi3(void);
+extern void __ucmpdi2(void);
+extern void __udivdi3(void);
+extern void __umoddi3(void);
+extern void __udivmoddi4(void);
+extern void __udivsi3(void);
+extern void __umodsi3(void);
+extern void abort(void);
+extern void ret_from_exception(void);
+extern void fpundefinstr(void);
+extern void fp_enter(void);
+/*
+ * This has a special calling convention; it doesn't
+ * modify any of the usual registers, except for LR.
+ * FIXME - we used to use our own local version - looks to be in kernel/softirq now
+ */
+//extern void __do_softirq(void);
+#define EXPORT_SYMBOL_ALIAS(sym,orig)           \
+ const char __kstrtab_##sym[]                   \
+  __attribute__((section(".kstrtab"))) =        \
+    __MODULE_STRING(sym);                       \
+ const struct module_symbol __ksymtab_##sym     \
+  __attribute__((section("__ksymtab"))) =       \
+    { (unsigned long)&orig, __kstrtab_##sym };
+/*
+ * floating point math emulator support.
+ * These symbols will never change their calling convention...
+ */
+EXPORT_SYMBOL_ALIAS(kern_fp_enter,fp_enter);
+EXPORT_SYMBOL_ALIAS(fp_printk,printk);
+EXPORT_SYMBOL_ALIAS(fp_send_sig,send_sig);
+EXPORT_SYMBOL(fpundefinstr);
+EXPORT_SYMBOL(ret_from_exception);
+#ifdef CONFIG_VT
+EXPORT_SYMBOL(kd_mksound);
+#endif
+//EXPORT_SYMBOL(__do_softirq);
+        /* platform dependent support */
+EXPORT_SYMBOL(dump_thread);
+EXPORT_SYMBOL(dump_fpu);
+EXPORT_SYMBOL(udelay);
+EXPORT_SYMBOL(kernel_thread);
+EXPORT_SYMBOL(system_rev);
+EXPORT_SYMBOL(system_serial_low);
+EXPORT_SYMBOL(system_serial_high);
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+EXPORT_SYMBOL(__bug);
+#endif
+EXPORT_SYMBOL(__bad_xchg);
+EXPORT_SYMBOL(__readwrite_bug);
+EXPORT_SYMBOL(enable_irq);
+EXPORT_SYMBOL(disable_irq);
+EXPORT_SYMBOL(set_irq_type);
+EXPORT_SYMBOL(pm_idle);
+EXPORT_SYMBOL(pm_power_off);
+        /* processor dependencies */
+EXPORT_SYMBOL(__machine_arch_type);
+        /* networking */
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
+EXPORT_SYMBOL(__csum_ipv6_magic);
+        /* io */
+#ifndef __raw_readsb
+EXPORT_SYMBOL(__raw_readsb);
+#endif
+#ifndef __raw_readsw
+EXPORT_SYMBOL(__raw_readsw);
+#endif
+#ifndef __raw_readsl
+EXPORT_SYMBOL(__raw_readsl);
+#endif
+#ifndef __raw_writesb
+EXPORT_SYMBOL(__raw_writesb);
+#endif
+#ifndef __raw_writesw
+EXPORT_SYMBOL(__raw_writesw);
+#endif
+#ifndef __raw_writesl
+EXPORT_SYMBOL(__raw_writesl);
+#endif
+        /* string / mem functions */
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strcat);
+EXPORT_SYMBOL(strncat);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strncmp);
+EXPORT_SYMBOL(strchr);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(strpbrk);
+EXPORT_SYMBOL(strrchr);
+EXPORT_SYMBOL(strstr);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(memcmp);
+EXPORT_SYMBOL(memscan);
+EXPORT_SYMBOL(__memzero);
+        /* user mem (segment) */
+EXPORT_SYMBOL(uaccess_kernel);
+EXPORT_SYMBOL(uaccess_user);
+EXPORT_SYMBOL(__get_user_1);
+EXPORT_SYMBOL(__get_user_2);
+EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__get_user_8);
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+        /* gcc lib functions */
+EXPORT_SYMBOL(__ashldi3);
+EXPORT_SYMBOL(__ashrdi3);
+EXPORT_SYMBOL(__divsi3);
+EXPORT_SYMBOL(__lshrdi3);
+EXPORT_SYMBOL(__modsi3);
+EXPORT_SYMBOL(__muldi3);
+EXPORT_SYMBOL(__ucmpdi2);
+EXPORT_SYMBOL(__udivdi3);
+EXPORT_SYMBOL(__umoddi3);
+EXPORT_SYMBOL(__udivmoddi4);
+EXPORT_SYMBOL(__udivsi3);
+EXPORT_SYMBOL(__umodsi3);
+        /* bitops */
+EXPORT_SYMBOL(_set_bit_le);
+EXPORT_SYMBOL(_test_and_set_bit_le);
+EXPORT_SYMBOL(_clear_bit_le);
+EXPORT_SYMBOL(_test_and_clear_bit_le);
+EXPORT_SYMBOL(_change_bit_le);
+EXPORT_SYMBOL(_test_and_change_bit_le);
+EXPORT_SYMBOL(_find_first_zero_bit_le);
+EXPORT_SYMBOL(_find_next_zero_bit_le);
+        /* elf */
+EXPORT_SYMBOL(elf_platform);
+EXPORT_SYMBOL(elf_hwcap);
+        /* syscalls */
+EXPORT_SYMBOL(sys_write);
+EXPORT_SYMBOL(sys_read);
+EXPORT_SYMBOL(sys_lseek);
+EXPORT_SYMBOL(sys_open);
+EXPORT_SYMBOL(sys_exit);
+EXPORT_SYMBOL(sys_wait4);
+EXPORT_SYMBOL(get_wchan);
+#ifdef CONFIG_PREEMPT
+EXPORT_SYMBOL(kernel_flag);
+#endif
diff --git a/arch/arm26/kernel/asm-offsets.c b/arch/arm26/kernel/asm-offsets.c
new file mode 100644
index 000000000000..4ccacaef94df
--- /dev/null
+++ b/arch/arm26/kernel/asm-offsets.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 1995-2001 Russell King
+ *               2001-2002 Keith Owens
+ *               2003      Ian Molton
+ *     
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+/*
+ * Make sure that the compiler and target are compatible.
+ */
+#if defined(__APCS_32__) && defined(CONFIG_CPU_26)
+#error Sorry, your compiler targets APCS-32 but this kernel requires APCS-26
+#endif
+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95)
+#error Sorry, your compiler is known to miscompile kernels.  Only use gcc 2.95.3 and later.
+#endif
+#if __GNUC__ == 2 && __GNUC_MINOR__ == 95
+/* shame we can't detect the .1 or .2 releases */
+#warning GCC 2.95.2 and earlier miscompiles kernels.
+#endif
+/* Use marker if you need to separate the values later */
+#define DEFINE(sym, val) \
+        asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+#define BLANK() asm volatile("\n->" : : )
+int main(void)
+{
+  DEFINE(TSK_ACTIVE_MM,         offsetof(struct task_struct, active_mm));
+  BLANK();
+  DEFINE(VMA_VM_MM,             offsetof(struct vm_area_struct, vm_mm));
+  DEFINE(VMA_VM_FLAGS,          offsetof(struct vm_area_struct, vm_flags));
+  BLANK();
+  DEFINE(VM_EXEC,               VM_EXEC);
+  BLANK();
+  BLANK();
+  DEFINE(PAGE_PRESENT,          _PAGE_PRESENT);
+  DEFINE(PAGE_READONLY,         _PAGE_READONLY);
+  DEFINE(PAGE_NOT_USER,         _PAGE_NOT_USER);
+  DEFINE(PAGE_OLD,              _PAGE_OLD);
+  DEFINE(PAGE_CLEAN,            _PAGE_CLEAN);
+  BLANK();
+  DEFINE(PAGE_SZ,               PAGE_SIZE);
+  BLANK();
+  DEFINE(SYS_ERROR0,            0x9f0000);
+  return 0; 
+}
diff --git a/arch/arm26/kernel/calls.S b/arch/arm26/kernel/calls.S
new file mode 100644
index 000000000000..e3d276827c84
--- /dev/null
+++ b/arch/arm26/kernel/calls.S
@@ -0,0 +1,265 @@
+/*
+ *  linux/arch/arm26/kernel/calls.S
+ *
+ *  Copyright (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  FIXME
+ *  This file is included twice in entry.S which may not be necessary
+ */
+//FIXME - clearly NR_syscalls is never defined here
+#ifndef NR_syscalls
+#define NR_syscalls 256
+#else
+__syscall_start:
+/* 0 */         .long   sys_ni_syscall
+                .long   sys_exit
+                .long   sys_fork_wrapper
+                .long   sys_read
+                .long   sys_write
+/* 5 */         .long   sys_open
+                .long   sys_close
+                .long   sys_ni_syscall          /* was sys_waitpid */
+                .long   sys_creat
+                .long   sys_link
+/* 10 */        .long   sys_unlink
+                .long   sys_execve_wrapper
+                .long   sys_chdir
+                .long   sys_time                /* used by libc4 */
+                .long   sys_mknod
+/* 15 */        .long   sys_chmod
+                .long   sys_lchown16
+                .long   sys_ni_syscall          /* was sys_break */
+                .long   sys_ni_syscall          /* was sys_stat */
+                .long   sys_lseek
+/* 20 */        .long   sys_getpid
+                .long   sys_mount
+                .long   sys_oldumount           /* used by libc4 */
+                .long   sys_setuid16
+                .long   sys_getuid16
+/* 25 */        .long   sys_stime
+                .long   sys_ptrace
+                .long   sys_alarm               /* used by libc4 */
+                .long   sys_ni_syscall          /* was sys_fstat */
+                .long   sys_pause
+/* 30 */        .long   sys_utime               /* used by libc4 */
+                .long   sys_ni_syscall          /* was sys_stty */
+                .long   sys_ni_syscall          /* was sys_getty */
+                .long   sys_access
+                .long   sys_nice
+/* 35 */        .long   sys_ni_syscall          /* was sys_ftime */
+                .long   sys_sync
+                .long   sys_kill
+                .long   sys_rename
+                .long   sys_mkdir
+/* 40 */        .long   sys_rmdir
+                .long   sys_dup
+                .long   sys_pipe
+                .long   sys_times
+                .long   sys_ni_syscall          /* was sys_prof */
+/* 45 */        .long   sys_brk
+                .long   sys_setgid16
+                .long   sys_getgid16
+                .long   sys_ni_syscall          /* was sys_signal */
+                .long   sys_geteuid16
+/* 50 */        .long   sys_getegid16
+                .long   sys_acct
+                .long   sys_umount
+                .long   sys_ni_syscall          /* was sys_lock */
+                .long   sys_ioctl
+/* 55 */        .long   sys_fcntl
+                .long   sys_ni_syscall          /* was sys_mpx */
+                .long   sys_setpgid
+                .long   sys_ni_syscall          /* was sys_ulimit */
+                .long   sys_ni_syscall          /* was sys_olduname */
+/* 60 */        .long   sys_umask
+                .long   sys_chroot
+                .long   sys_ustat
+                .long   sys_dup2
+                .long   sys_getppid
+/* 65 */        .long   sys_getpgrp
+                .long   sys_setsid
+                .long   sys_sigaction
+                .long   sys_ni_syscall          /* was sys_sgetmask */
+                .long   sys_ni_syscall          /* was sys_ssetmask */
+/* 70 */        .long   sys_setreuid16
+                .long   sys_setregid16
+                .long   sys_sigsuspend_wrapper
+                .long   sys_sigpending
+                .long   sys_sethostname
+/* 75 */        .long   sys_setrlimit
+                .long   sys_old_getrlimit       /* used by libc4 */
+                .long   sys_getrusage
+                .long   sys_gettimeofday
+                .long   sys_settimeofday
+/* 80 */        .long   sys_getgroups16
+                .long   sys_setgroups16
+                .long   old_select              /* used by libc4 */
+                .long   sys_symlink
+                .long   sys_ni_syscall          /* was sys_lstat */
+/* 85 */        .long   sys_readlink
+                .long   sys_uselib
+                .long   sys_swapon
+                .long   sys_reboot
+                .long   old_readdir             /* used by libc4 */
+/* 90 */        .long   old_mmap                /* used by libc4 */
+                .long   sys_munmap
+                .long   sys_truncate
+                .long   sys_ftruncate
+                .long   sys_fchmod
+/* 95 */        .long   sys_fchown16
+                .long   sys_getpriority
+                .long   sys_setpriority
+                .long   sys_ni_syscall          /* was sys_profil */
+                .long   sys_statfs
+/* 100 */       .long   sys_fstatfs
+                .long   sys_ni_syscall
+                .long   sys_socketcall
+                .long   sys_syslog
+                .long   sys_setitimer
+/* 105 */       .long   sys_getitimer
+                .long   sys_newstat
+                .long   sys_newlstat
+                .long   sys_newfstat
+                .long   sys_ni_syscall          /* was sys_uname */
+/* 110 */       .long   sys_ni_syscall          /* was sys_iopl */
+                .long   sys_vhangup
+                .long   sys_ni_syscall
+                .long   sys_syscall             /* call a syscall */
+                .long   sys_wait4
+/* 115 */       .long   sys_swapoff
+                .long   sys_sysinfo
+                .long   sys_ipc
+                .long   sys_fsync
+                .long   sys_sigreturn_wrapper
+/* 120 */       .long   sys_clone_wapper
+                .long   sys_setdomainname
+                .long   sys_newuname
+                .long   sys_ni_syscall
+                .long   sys_adjtimex
+/* 125 */       .long   sys_mprotect
+                .long   sys_sigprocmask
+                .long   sys_ni_syscall  /* WAS: sys_create_module */
+                .long   sys_init_module
+                .long   sys_delete_module
+/* 130 */       .long   sys_ni_syscall  /* WAS: sys_get_kernel_syms */
+                .long   sys_quotactl
+                .long   sys_getpgid
+                .long   sys_fchdir
+                .long   sys_bdflush
+/* 135 */       .long   sys_sysfs
+                .long   sys_personality
+                .long   sys_ni_syscall          /* .long        _sys_afs_syscall */
+                .long   sys_setfsuid16
+                .long   sys_setfsgid16
+/* 140 */       .long   sys_llseek
+                .long   sys_getdents
+                .long   sys_select
+                .long   sys_flock
+                .long   sys_msync
+/* 145 */       .long   sys_readv
+                .long   sys_writev
+                .long   sys_getsid
+                .long   sys_fdatasync
+                .long   sys_sysctl
+/* 150 */       .long   sys_mlock
+                .long   sys_munlock
+                .long   sys_mlockall
+                .long   sys_munlockall
+                .long   sys_sched_setparam
+/* 155 */       .long   sys_sched_getparam
+                .long   sys_sched_setscheduler
+                .long   sys_sched_getscheduler
+                .long   sys_sched_yield
+                .long   sys_sched_get_priority_max
+/* 160 */       .long   sys_sched_get_priority_min
+                .long   sys_sched_rr_get_interval
+                .long   sys_nanosleep
+                .long   sys_arm_mremap
+                .long   sys_setresuid16
+/* 165 */       .long   sys_getresuid16
+                .long   sys_ni_syscall
+                .long   sys_ni_syscall /* WAS: sys_query_module */
+                .long   sys_poll
+                .long   sys_nfsservctl
+/* 170 */       .long   sys_setresgid16
+                .long   sys_getresgid16
+                .long   sys_prctl
+                .long   sys_rt_sigreturn_wrapper
+                .long   sys_rt_sigaction
+/* 175 */       .long   sys_rt_sigprocmask
+                .long   sys_rt_sigpending
+                .long   sys_rt_sigtimedwait
+                .long   sys_rt_sigqueueinfo
+                .long   sys_rt_sigsuspend_wrapper
+/* 180 */       .long   sys_pread64
+                .long   sys_pwrite64
+                .long   sys_chown16
+                .long   sys_getcwd
+                .long   sys_capget
+/* 185 */       .long   sys_capset
+                .long   sys_sigaltstack_wrapper
+                .long   sys_sendfile
+                .long   sys_ni_syscall
+                .long   sys_ni_syscall
+/* 190 */       .long   sys_vfork_wrapper
+                .long   sys_getrlimit
+                .long   sys_mmap2
+                .long   sys_truncate64
+                .long   sys_ftruncate64
+/* 195 */       .long   sys_stat64
+                .long   sys_lstat64
+                .long   sys_fstat64
+                .long   sys_lchown
+                .long   sys_getuid
+/* 200 */       .long   sys_getgid
+                .long   sys_geteuid
+                .long   sys_getegid
+                .long   sys_setreuid
+                .long   sys_setregid
+/* 205 */       .long   sys_getgroups
+                .long   sys_setgroups
+                .long   sys_fchown
+                .long   sys_setresuid
+                .long   sys_getresuid
+/* 210 */       .long   sys_setresgid
+                .long   sys_getresgid
+                .long   sys_chown
+                .long   sys_setuid
+                .long   sys_setgid
+/* 215 */       .long   sys_setfsuid
+                .long   sys_setfsgid
+                .long   sys_getdents64
+                .long   sys_pivot_root
+                .long   sys_mincore
+/* 220 */       .long   sys_madvise
+                .long   sys_fcntl64
+                .long   sys_ni_syscall /* TUX */
+                .long   sys_ni_syscall /* WAS: sys_security */
+                .long   sys_gettid
+/* 225 */       .long   sys_readahead
+                .long   sys_setxattr
+                .long   sys_lsetxattr
+                .long   sys_fsetxattr
+                .long   sys_getxattr
+/* 230 */       .long   sys_lgetxattr
+                .long   sys_fgetxattr
+                .long   sys_listxattr
+                .long   sys_llistxattr
+                .long   sys_flistxattr
+/* 235 */       .long   sys_removexattr
+                .long   sys_lremovexattr
+                .long   sys_fremovexattr
+                .long   sys_tkill
+__syscall_end:
+                .rept   NR_syscalls - (__syscall_end - __syscall_start) / 4
+                        .long   sys_ni_syscall
+                .endr
+#endif
diff --git a/arch/arm26/kernel/compat.c b/arch/arm26/kernel/compat.c
new file mode 100644
index 000000000000..db0310db8998
--- /dev/null
+++ b/arch/arm26/kernel/compat.c
@@ -0,0 +1,174 @@
+/*
+ *  linux/arch/arm26/kernel/compat.c
+ *
+ *  Copyright (C) 2001 Russell King
+ *                2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * We keep the old params compatibility cruft in one place (here)
+ * so we don't end up with lots of mess around other places.
+ *
+ * NOTE:
+ *  The old struct param_struct is deprecated, but it will be kept in
+ *  the kernel for 5 years from now (2001). This will allow boot loaders
+ *  to convert to the new struct tag way.
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+#include <asm/page.h>
+//#include <asm/arch.h>
+//#include <asm/mach/irq.h>
+/*
+ * Usage:
+ *  - do not go blindly adding fields, add them at the end
+ *  - when adding fields, don't rely on the address until
+ *    a patch from me has been released
+ *  - unused fields should be zero (for future expansion)
+ *  - this structure is relatively short-lived - only
+ *    guaranteed to contain useful data in setup_arch()
+ *
+ * This is the old deprecated way to pass parameters to the kernel
+ */
+struct param_struct {
+    union {
+        struct {
+            unsigned long page_size;            /*  0 */
+            unsigned long nr_pages;             /*  4 */
+            unsigned long ramdisk_size;         /*  8 */
+            unsigned long flags;                /* 12 */
+#define FLAG_READONLY   1
+#define FLAG_RDLOAD     4
+#define FLAG_RDPROMPT   8
+            unsigned long rootdev;              /* 16 */
+            unsigned long video_num_cols;       /* 20 */
+            unsigned long video_num_rows;       /* 24 */
+            unsigned long video_x;              /* 28 */
+            unsigned long video_y;              /* 32 */
+            unsigned long memc_control_reg;     /* 36 */
+            unsigned char sounddefault;         /* 40 */
+            unsigned char adfsdrives;           /* 41 */
+            unsigned char bytes_per_char_h;     /* 42 */
+            unsigned char bytes_per_char_v;     /* 43 */
+            unsigned long pages_in_bank[4];     /* 44 */
+            unsigned long pages_in_vram;        /* 60 */
+            unsigned long initrd_start;         /* 64 */
+            unsigned long initrd_size;          /* 68 */
+            unsigned long rd_start;             /* 72 */
+            unsigned long system_rev;           /* 76 */
+            unsigned long system_serial_low;    /* 80 */
+            unsigned long system_serial_high;   /* 84 */
+            unsigned long mem_fclk_21285;       /* 88 */
+        } s;
+        char unused[256];
+    } u1;
+    union {
+        char paths[8][128];
+        struct {
+            unsigned long magic;
+            char n[1024 - sizeof(unsigned long)];
+        } s;
+    } u2;
+    char commandline[COMMAND_LINE_SIZE];
+};
+static struct tag * __init memtag(struct tag *tag, unsigned long start, unsigned long size)
+{
+        tag = tag_next(tag);
+        tag->hdr.tag = ATAG_MEM;
+        tag->hdr.size = tag_size(tag_mem32);
+        tag->u.mem.size = size;
+        tag->u.mem.start = start;
+        return tag;
+}
+static void __init build_tag_list(struct param_struct *params, void *taglist)
+{
+        struct tag *tag = taglist;
+        if (params->u1.s.page_size != PAGE_SIZE) {
+                printk(KERN_WARNING "Warning: bad configuration page, "
+                       "trying to continue\n");
+                return;
+        }
+        printk(KERN_DEBUG "Converting old-style param struct to taglist\n");
+        tag->hdr.tag  = ATAG_CORE;
+        tag->hdr.size = tag_size(tag_core);
+        tag->u.core.flags = params->u1.s.flags & FLAG_READONLY;
+        tag->u.core.pagesize = params->u1.s.page_size;
+        tag->u.core.rootdev = params->u1.s.rootdev;
+        tag = tag_next(tag);
+        tag->hdr.tag = ATAG_RAMDISK;
+        tag->hdr.size = tag_size(tag_ramdisk);
+        tag->u.ramdisk.flags = (params->u1.s.flags & FLAG_RDLOAD ? 1 : 0) |
+                               (params->u1.s.flags & FLAG_RDPROMPT ? 2 : 0);
+        tag->u.ramdisk.size  = params->u1.s.ramdisk_size;
+        tag->u.ramdisk.start = params->u1.s.rd_start;
+        tag = tag_next(tag);
+        tag->hdr.tag = ATAG_INITRD;
+        tag->hdr.size = tag_size(tag_initrd);
+        tag->u.initrd.start = params->u1.s.initrd_start;
+        tag->u.initrd.size  = params->u1.s.initrd_size;
+        tag = tag_next(tag);
+        tag->hdr.tag = ATAG_SERIAL;
+        tag->hdr.size = tag_size(tag_serialnr);
+        tag->u.serialnr.low = params->u1.s.system_serial_low;
+        tag->u.serialnr.high = params->u1.s.system_serial_high;
+        tag = tag_next(tag);
+        tag->hdr.tag = ATAG_REVISION;
+        tag->hdr.size = tag_size(tag_revision);
+        tag->u.revision.rev = params->u1.s.system_rev;
+        tag = memtag(tag, PHYS_OFFSET, params->u1.s.nr_pages * PAGE_SIZE);
+        tag = tag_next(tag);
+        tag->hdr.tag = ATAG_ACORN;
+        tag->hdr.size = tag_size(tag_acorn);
+        tag->u.acorn.memc_control_reg = params->u1.s.memc_control_reg;
+        tag->u.acorn.vram_pages       = params->u1.s.pages_in_vram;
+        tag->u.acorn.sounddefault     = params->u1.s.sounddefault;
+        tag->u.acorn.adfsdrives       = params->u1.s.adfsdrives;
+        tag = tag_next(tag);
+        tag->hdr.tag = ATAG_CMDLINE;
+        tag->hdr.size = (strlen(params->commandline) + 3 +
+                         sizeof(struct tag_header)) >> 2;
+        strcpy(tag->u.cmdline.cmdline, params->commandline);
+        tag = tag_next(tag);
+        tag->hdr.tag = ATAG_NONE;
+        tag->hdr.size = 0;
+        memmove(params, taglist, ((int)tag) - ((int)taglist) +
+                                 sizeof(struct tag_header));
+}
+void __init convert_to_tag_list(struct tag *tags)
+{
+        struct param_struct *params = (struct param_struct *)tags;
+        build_tag_list(params, &params->u2);
+}
+void __init squash_mem_tags(struct tag *tag)
+{
+        for (; tag->hdr.size; tag = tag_next(tag))
+                if (tag->hdr.tag == ATAG_MEM)
+                        tag->hdr.tag = ATAG_NONE;
+}
diff --git a/arch/arm26/kernel/dma.c b/arch/arm26/kernel/dma.c
new file mode 100644
index 000000000000..80b5a774d905
--- /dev/null
+++ b/arch/arm26/kernel/dma.c
@@ -0,0 +1,273 @@
+/*
+ *  linux/arch/arm26/kernel/dma.c
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *                2003      Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Front-end to the DMA handling.  This handles the allocation/freeing
+ *  of DMA channels, and provides a unified interface to the machines
+ *  DMA facilities.
+ */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/mman.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <asm/dma.h>
+DEFINE_SPINLOCK(dma_spin_lock);
+static dma_t dma_chan[MAX_DMA_CHANNELS];
+/*
+ * Get dma list for /proc/dma
+ */
+int get_dma_list(char *buf)
+{
+        dma_t *dma;
+        char *p = buf;
+        int i;
+        for (i = 0, dma = dma_chan; i < MAX_DMA_CHANNELS; i++, dma++)
+                if (dma->lock)
+                        p += sprintf(p, "%2d: %14s %s\n", i,
+                                     dma->d_ops->type, dma->device_id);
+        return p - buf;
+}
+/*
+ * Request DMA channel
+ *
+ * On certain platforms, we have to allocate an interrupt as well...
+ */
+int request_dma(dmach_t channel, const char *device_id)
+{
+        dma_t *dma = dma_chan + channel;
+        int ret;
+        if (channel >= MAX_DMA_CHANNELS || !dma->d_ops)
+                goto bad_dma;
+        if (xchg(&dma->lock, 1) != 0)
+                goto busy;
+        dma->device_id = device_id;
+        dma->active    = 0;
+        dma->invalid   = 1;
+        ret = 0;
+        if (dma->d_ops->request)
+                ret = dma->d_ops->request(channel, dma);
+        if (ret)
+                xchg(&dma->lock, 0);
+        return ret;
+bad_dma:
+        printk(KERN_ERR "dma: trying to allocate DMA%d\n", channel);
+        return -EINVAL;
+busy:
+        return -EBUSY;
+}
+/*
+ * Free DMA channel
+ *
+ * On certain platforms, we have to free interrupt as well...
+ */
+void free_dma(dmach_t channel)
+{
+        dma_t *dma = dma_chan + channel;
+        if (channel >= MAX_DMA_CHANNELS || !dma->d_ops)
+                goto bad_dma;
+        if (dma->active) {
+                printk(KERN_ERR "dma%d: freeing active DMA\n", channel);
+                dma->d_ops->disable(channel, dma);
+                dma->active = 0;
+        }
+        if (xchg(&dma->lock, 0) != 0) {
+                if (dma->d_ops->free)
+                        dma->d_ops->free(channel, dma);
+                return;
+        }
+        printk(KERN_ERR "dma%d: trying to free free DMA\n", channel);
+        return;
+bad_dma:
+        printk(KERN_ERR "dma: trying to free DMA%d\n", channel);
+}
+/* Set DMA Scatter-Gather list
+ */
+void set_dma_sg (dmach_t channel, struct scatterlist *sg, int nr_sg)
+{
+        dma_t *dma = dma_chan + channel;
+        if (dma->active)
+                printk(KERN_ERR "dma%d: altering DMA SG while "
+                       "DMA active\n", channel);
+        dma->sg = sg;
+        dma->sgcount = nr_sg;
+        dma->using_sg = 1;
+        dma->invalid = 1;
+}
+/* Set DMA address
+ *
+ * Copy address to the structure, and set the invalid bit
+ */
+void set_dma_addr (dmach_t channel, unsigned long physaddr)
+{
+        dma_t *dma = dma_chan + channel;
+        if (dma->active)
+                printk(KERN_ERR "dma%d: altering DMA address while "
+                       "DMA active\n", channel);
+        dma->sg = &dma->buf;
+        dma->sgcount = 1;
+        dma->buf.__address = (char *)physaddr;//FIXME - not pretty
+        dma->using_sg = 0;
+        dma->invalid = 1;
+}
+/* Set DMA byte count
+ *
+ * Copy address to the structure, and set the invalid bit
+ */
+void set_dma_count (dmach_t channel, unsigned long count)
+{
+        dma_t *dma = dma_chan + channel;
+        if (dma->active)
+                printk(KERN_ERR "dma%d: altering DMA count while "
+                       "DMA active\n", channel);
+        dma->sg = &dma->buf;
+        dma->sgcount = 1;
+        dma->buf.length = count;
+        dma->using_sg = 0;
+        dma->invalid = 1;
+}
+/* Set DMA direction mode
+ */
+void set_dma_mode (dmach_t channel, dmamode_t mode)
+{
+        dma_t *dma = dma_chan + channel;
+        if (dma->active)
+                printk(KERN_ERR "dma%d: altering DMA mode while "
+                       "DMA active\n", channel);
+        dma->dma_mode = mode;
+        dma->invalid = 1;
+}
+/* Enable DMA channel
+ */
+void enable_dma (dmach_t channel)
+{
+        dma_t *dma = dma_chan + channel;
+        if (!dma->lock)
+                goto free_dma;
+        if (dma->active == 0) {
+                dma->active = 1;
+                dma->d_ops->enable(channel, dma);
+        }
+        return;
+free_dma:
+        printk(KERN_ERR "dma%d: trying to enable free DMA\n", channel);
+        BUG();
+}
+/* Disable DMA channel
+ */
+void disable_dma (dmach_t channel)
+{
+        dma_t *dma = dma_chan + channel;
+        if (!dma->lock)
+                goto free_dma;
+        if (dma->active == 1) {
+                dma->active = 0;
+                dma->d_ops->disable(channel, dma);
+        }
+        return;
+free_dma:
+        printk(KERN_ERR "dma%d: trying to disable free DMA\n", channel);
+        BUG();
+}
+/*
+ * Is the specified DMA channel active?
+ */
+int dma_channel_active(dmach_t channel)
+{
+        return dma_chan[channel].active;
+}
+void set_dma_page(dmach_t channel, char pagenr)
+{
+        printk(KERN_ERR "dma%d: trying to set_dma_page\n", channel);
+}
+void set_dma_speed(dmach_t channel, int cycle_ns)
+{
+        dma_t *dma = dma_chan + channel;
+        int ret = 0;
+        if (dma->d_ops->setspeed)
+                ret = dma->d_ops->setspeed(channel, dma, cycle_ns);
+        dma->speed = ret;
+}
+int get_dma_residue(dmach_t channel)
+{
+        dma_t *dma = dma_chan + channel;
+        int ret = 0;
+        if (dma->d_ops->residue)
+                ret = dma->d_ops->residue(channel, dma);
+        return ret;
+}
+void __init init_dma(void)
+{
+        arch_dma_init(dma_chan);
+}
+EXPORT_SYMBOL(request_dma);
+EXPORT_SYMBOL(free_dma);
+EXPORT_SYMBOL(enable_dma);
+EXPORT_SYMBOL(disable_dma);
+EXPORT_SYMBOL(set_dma_addr);
+EXPORT_SYMBOL(set_dma_count);
+EXPORT_SYMBOL(set_dma_mode);
+EXPORT_SYMBOL(set_dma_page);
+EXPORT_SYMBOL(get_dma_residue);
+EXPORT_SYMBOL(set_dma_sg);
+EXPORT_SYMBOL(set_dma_speed);
+EXPORT_SYMBOL(dma_spin_lock);
diff --git a/arch/arm26/kernel/ecard.c b/arch/arm26/kernel/ecard.c
new file mode 100644
index 000000000000..824c6b571ad9
--- /dev/null
+++ b/arch/arm26/kernel/ecard.c
@@ -0,0 +1,850 @@
+/*
+ *  linux/arch/arm26/kernel/ecard.c
+ *
+ *  Copyright 1995-2001 Russell King
+ *  Copyright 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Find all installed expansion cards, and handle interrupts from them.
+ *
+ *  Created from information from Acorns RiscOS3 PRMs
+ *  15-Jun-2003 IM      Modified from ARM32 (RiscPC capable) version
+ *  10-Jan-1999 RMK     Run loaders in a simulated RISC OS environment.
+ *  06-May-1997 RMK     Added blacklist for cards whose loader doesn't work.
+ *  12-Sep-1997 RMK     Created new handling of interrupt enables/disables
+ *                      - cards can now register their own routine to control
+ *                      interrupts (recommended).
+ *  29-Sep-1997 RMK     Expansion card interrupt hardware not being re-enabled
+ *                      on reset from Linux. (Caused cards not to respond
+ *                      under RiscOS without hard reset).
+ *
+ */
+#define ECARD_C
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/reboot.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <asm/dma.h>
+#include <asm/ecard.h>
+#include <asm/hardware.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/irqchip.h>
+#include <asm/tlbflush.h>
+enum req {
+        req_readbytes,
+        req_reset
+};
+struct ecard_request {
+        enum req        req;
+        ecard_t         *ec;
+        unsigned int    address;
+        unsigned int    length;
+        unsigned int    use_loader;
+        void            *buffer;
+};
+struct expcard_blacklist {
+        unsigned short   manufacturer;
+        unsigned short   product;
+        const char      *type;
+};
+static ecard_t *cards;
+static ecard_t *slot_to_expcard[MAX_ECARDS];
+static unsigned int ectcr;
+/* List of descriptions of cards which don't have an extended
+ * identification, or chunk directories containing a description.
+ */
+static struct expcard_blacklist __initdata blacklist[] = {
+        { MANU_ACORN, PROD_ACORN_ETHER1, "Acorn Ether1" }
+};
+asmlinkage extern int
+ecard_loader_reset(volatile unsigned char *pa, loader_t loader);
+asmlinkage extern int
+ecard_loader_read(int off, volatile unsigned char *pa, loader_t loader);
+static const struct ecard_id *
+ecard_match_device(const struct ecard_id *ids, struct expansion_card *ec);
+static inline unsigned short
+ecard_getu16(unsigned char *v)
+{
+        return v[0] | v[1] << 8;
+}
+static inline signed long
+ecard_gets24(unsigned char *v)
+{
+        return v[0] | v[1] << 8 | v[2] << 16 | ((v[2] & 0x80) ? 0xff000000 : 0);
+}
+static inline ecard_t *
+slot_to_ecard(unsigned int slot)
+{
+        return slot < MAX_ECARDS ? slot_to_expcard[slot] : NULL;
+}
+/* ===================== Expansion card daemon ======================== */
+/*
+ * Since the loader programs on the expansion cards need to be run
+ * in a specific environment, create a separate task with this
+ * environment up, and pass requests to this task as and when we
+ * need to.
+ *
+ * This should allow 99% of loaders to be called from Linux.
+ *
+ * From a security standpoint, we trust the card vendors.  This
+ * may be a misplaced trust.
+ */
+#define BUS_ADDR(x) ((((unsigned long)(x)) << 2) + IO_BASE)
+#define POD_INT_ADDR(x) ((volatile unsigned char *)\
+                         ((BUS_ADDR((x)) - IO_BASE) + IO_START))
+static inline void ecard_task_reset(struct ecard_request *req)
+{
+        struct expansion_card *ec = req->ec;
+        if (ec->loader)
+                ecard_loader_reset(POD_INT_ADDR(ec->podaddr), ec->loader);
+}
+static void
+ecard_task_readbytes(struct ecard_request *req)
+{
+        unsigned char *buf = (unsigned char *)req->buffer;
+        volatile unsigned char *base_addr =
+                (volatile unsigned char *)POD_INT_ADDR(req->ec->podaddr);
+        unsigned int len = req->length;
+        unsigned int off = req->address;
+        if (!req->use_loader || !req->ec->loader) {
+                off *= 4;
+                while (len--) {
+                        *buf++ = base_addr[off];
+                        off += 4;
+                }
+        } else {
+                while(len--) {
+                        /*
+                         * The following is required by some
+                         * expansion card loader programs.
+                         */
+                        *(unsigned long *)0x108 = 0;
+                        *buf++ = ecard_loader_read(off++, base_addr,
+                                                   req->ec->loader);
+                }
+        }
+}
+static void ecard_do_request(struct ecard_request *req)
+{
+        switch (req->req) {
+        case req_readbytes:
+                ecard_task_readbytes(req);
+                break;
+        case req_reset:
+                ecard_task_reset(req);
+                break;
+        }
+}
+/*
+ * On 26-bit processors, we don't need the kcardd thread to access the
+ * expansion card loaders.  We do it directly.
+ */
+#define ecard_call(req) ecard_do_request(req)
+/* ======================= Mid-level card control ===================== */
+static void
+ecard_readbytes(void *addr, ecard_t *ec, int off, int len, int useld)
+{
+        struct ecard_request req;
+        req.req         = req_readbytes;
+        req.ec          = ec;
+        req.address     = off;
+        req.length      = len;
+        req.use_loader  = useld;
+        req.buffer      = addr;
+        ecard_call(&req);
+}
+int ecard_readchunk(struct in_chunk_dir *cd, ecard_t *ec, int id, int num)
+{
+        struct ex_chunk_dir excd;
+        int index = 16;
+        int useld = 0;
+        if (!ec->cid.cd)
+                return 0;
+        while(1) {
+                ecard_readbytes(&excd, ec, index, 8, useld);
+                index += 8;
+                if (c_id(&excd) == 0) {
+                        if (!useld && ec->loader) {
+                                useld = 1;
+                                index = 0;
+                                continue;
+                        }
+                        return 0;
+                }
+                if (c_id(&excd) == 0xf0) { /* link */
+                        index = c_start(&excd);
+                        continue;
+                }
+                if (c_id(&excd) == 0x80) { /* loader */
+                        if (!ec->loader) {
+                                ec->loader = (loader_t)kmalloc(c_len(&excd),
+                                                               GFP_KERNEL);
+                                if (ec->loader)
+                                        ecard_readbytes(ec->loader, ec,
+                                                        (int)c_start(&excd),
+                                                        c_len(&excd), useld);
+                                else
+                                        return 0;
+                        }
+                        continue;
+                }
+                if (c_id(&excd) == id && num-- == 0)
+                        break;
+        }
+        if (c_id(&excd) & 0x80) {
+                switch (c_id(&excd) & 0x70) {
+                case 0x70:
+                        ecard_readbytes((unsigned char *)excd.d.string, ec,
+                                        (int)c_start(&excd), c_len(&excd),
+                                        useld);
+                        break;
+                case 0x00:
+                        break;
+                }
+        }
+        cd->start_offset = c_start(&excd);
+        memcpy(cd->d.string, excd.d.string, 256);
+        return 1;
+}
+/* ======================= Interrupt control ============================ */
+static void ecard_def_irq_enable(ecard_t *ec, int irqnr)
+{
+}
+static void ecard_def_irq_disable(ecard_t *ec, int irqnr)
+{
+}
+static int ecard_def_irq_pending(ecard_t *ec)
+{
+        return !ec->irqmask || ec->irqaddr[0] & ec->irqmask;
+}
+static void ecard_def_fiq_enable(ecard_t *ec, int fiqnr)
+{
+        panic("ecard_def_fiq_enable called - impossible");
+}
+static void ecard_def_fiq_disable(ecard_t *ec, int fiqnr)
+{
+        panic("ecard_def_fiq_disable called - impossible");
+}
+static int ecard_def_fiq_pending(ecard_t *ec)
+{
+        return !ec->fiqmask || ec->fiqaddr[0] & ec->fiqmask;
+}
+static expansioncard_ops_t ecard_default_ops = {
+        ecard_def_irq_enable,
+        ecard_def_irq_disable,
+        ecard_def_irq_pending,
+        ecard_def_fiq_enable,
+        ecard_def_fiq_disable,
+        ecard_def_fiq_pending
+};
+/*
+ * Enable and disable interrupts from expansion cards.
+ * (interrupts are disabled for these functions).
+ *
+ * They are not meant to be called directly, but via enable/disable_irq.
+ */
+static void ecard_irq_unmask(unsigned int irqnr)
+{
+        ecard_t *ec = slot_to_ecard(irqnr - 32);
+        if (ec) {
+                if (!ec->ops)
+                        ec->ops = &ecard_default_ops;
+                if (ec->claimed && ec->ops->irqenable)
+                        ec->ops->irqenable(ec, irqnr);
+                else
+                        printk(KERN_ERR "ecard: rejecting request to "
+                                "enable IRQs for %d\n", irqnr);
+        }
+}
+static void ecard_irq_mask(unsigned int irqnr)
+{
+        ecard_t *ec = slot_to_ecard(irqnr - 32);
+        if (ec) {
+                if (!ec->ops)
+                        ec->ops = &ecard_default_ops;
+                if (ec->ops && ec->ops->irqdisable)
+                        ec->ops->irqdisable(ec, irqnr);
+        }
+}
+static struct irqchip ecard_chip = {
+        .ack    = ecard_irq_mask,
+        .mask   = ecard_irq_mask,
+        .unmask = ecard_irq_unmask,
+};
+void ecard_enablefiq(unsigned int fiqnr)
+{
+        ecard_t *ec = slot_to_ecard(fiqnr);
+        if (ec) {
+                if (!ec->ops)
+                        ec->ops = &ecard_default_ops;
+                if (ec->claimed && ec->ops->fiqenable)
+                        ec->ops->fiqenable(ec, fiqnr);
+                else
+                        printk(KERN_ERR "ecard: rejecting request to "
+                                "enable FIQs for %d\n", fiqnr);
+        }
+}
+void ecard_disablefiq(unsigned int fiqnr)
+{
+        ecard_t *ec = slot_to_ecard(fiqnr);
+        if (ec) {
+                if (!ec->ops)
+                        ec->ops = &ecard_default_ops;
+                if (ec->ops->fiqdisable)
+                        ec->ops->fiqdisable(ec, fiqnr);
+        }
+}
+static void
+ecard_dump_irq_state(ecard_t *ec)
+{
+        printk("  %d: %sclaimed, ",
+               ec->slot_no,
+               ec->claimed ? "" : "not ");
+        if (ec->ops && ec->ops->irqpending &&
+            ec->ops != &ecard_default_ops)
+                printk("irq %spending\n",
+                       ec->ops->irqpending(ec) ? "" : "not ");
+        else
+                printk("irqaddr %p, mask = %02X, status = %02X\n",
+                       ec->irqaddr, ec->irqmask, *ec->irqaddr);
+}
+static void ecard_check_lockup(struct irqdesc *desc)
+{
+        static int last, lockup;
+        ecard_t *ec;
+        /*
+         * If the timer interrupt has not run since the last million
+         * unrecognised expansion card interrupts, then there is
+         * something seriously wrong.  Disable the expansion card
+         * interrupts so at least we can continue.
+         *
+         * Maybe we ought to start a timer to re-enable them some time
+         * later?
+         */
+        if (last == jiffies) {
+                lockup += 1;
+                if (lockup > 1000000) {
+                        printk(KERN_ERR "\nInterrupt lockup detected - "
+                               "disabling all expansion card interrupts\n");
+                        desc->chip->mask(IRQ_EXPANSIONCARD);
+                        printk("Expansion card IRQ state:\n");
+                        for (ec = cards; ec; ec = ec->next)
+                                ecard_dump_irq_state(ec);
+                }
+        } else
+                lockup = 0;
+        /*
+         * If we did not recognise the source of this interrupt,
+         * warn the user, but don't flood the user with these messages.
+         */
+        if (!last || time_after(jiffies, (unsigned long)(last + 5*HZ))) {
+                last = jiffies;
+                printk(KERN_WARNING "Unrecognised interrupt from backplane\n");
+        }
+}
+static void
+ecard_irq_handler(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
+{
+        ecard_t *ec;
+        int called = 0;
+        desc->chip->mask(irq);
+        for (ec = cards; ec; ec = ec->next) {
+                int pending;
+                if (!ec->claimed || ec->irq == NO_IRQ)
+                        continue;
+                if (ec->ops && ec->ops->irqpending)
+                        pending = ec->ops->irqpending(ec);
+                else
+                        pending = ecard_default_ops.irqpending(ec);
+                if (pending) {
+                        struct irqdesc *d = irq_desc + ec->irq;
+                        d->handle(ec->irq, d, regs);
+                        called ++;
+                }
+        }
+        desc->chip->unmask(irq);
+        if (called == 0)
+                ecard_check_lockup(desc);
+}
+#define ecard_irqexp_handler NULL
+#define ecard_probeirqhw() (0)
+unsigned int ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed)
+{
+        unsigned long address = 0;
+        int slot = ec->slot_no;
+        ectcr &= ~(1 << slot);
+        switch (type) {
+        case ECARD_MEMC:
+                address = IO_EC_MEMC_BASE + (slot << 12);
+                break;
+        case ECARD_IOC:
+                address = IO_EC_IOC_BASE + (slot << 12) + (speed << 17);
+                break;
+        default:
+                break;
+        }
+        return address;
+}
+static int ecard_prints(char *buffer, ecard_t *ec)
+{
+        char *start = buffer;
+        buffer += sprintf(buffer, "  %d: ", ec->slot_no);
+        if (ec->cid.id == 0) {
+                struct in_chunk_dir incd;
+                buffer += sprintf(buffer, "[%04X:%04X] ",
+                        ec->cid.manufacturer, ec->cid.product);
+                if (!ec->card_desc && ec->cid.cd &&
+                    ecard_readchunk(&incd, ec, 0xf5, 0)) {
+                        ec->card_desc = kmalloc(strlen(incd.d.string)+1, GFP_KERNEL);
+                        if (ec->card_desc)
+                                strcpy((char *)ec->card_desc, incd.d.string);
+                }
+                buffer += sprintf(buffer, "%s\n", ec->card_desc ? ec->card_desc : "*unknown*");
+        } else
+                buffer += sprintf(buffer, "Simple card %d\n", ec->cid.id);
+        return buffer - start;
+}
+static int get_ecard_dev_info(char *buf, char **start, off_t pos, int count)
+{
+        ecard_t *ec = cards;
+        off_t at = 0;
+        int len, cnt;
+        cnt = 0;
+        while (ec && count > cnt) {
+                len = ecard_prints(buf, ec);
+                at += len;
+                if (at >= pos) {
+                        if (!*start) {
+                                *start = buf + (pos - (at - len));
+                                cnt = at - pos;
+                        } else
+                                cnt += len;
+                        buf += len;
+                }
+                ec = ec->next;
+        }
+        return (count > cnt) ? cnt : count;
+}
+static struct proc_dir_entry *proc_bus_ecard_dir = NULL;
+static void ecard_proc_init(void)
+{
+        proc_bus_ecard_dir = proc_mkdir("ecard", proc_bus);
+        create_proc_info_entry("devices", 0, proc_bus_ecard_dir,
+                get_ecard_dev_info);
+}
+#define ec_set_resource(ec,nr,st,sz,flg)                        \
+        do {                                                    \
+                (ec)->resource[nr].name = ec->dev.bus_id;       \
+                (ec)->resource[nr].start = st;                  \
+                (ec)->resource[nr].end = (st) + (sz) - 1;       \
+                (ec)->resource[nr].flags = flg;                 \
+        } while (0)
+static void __init ecard_init_resources(struct expansion_card *ec)
+{
+        unsigned long base = PODSLOT_IOC0_BASE;
+        unsigned int slot = ec->slot_no;
+        int i;
+        ec_set_resource(ec, ECARD_RES_MEMC,
+                        PODSLOT_MEMC_BASE + (slot << 14),
+                        PODSLOT_MEMC_SIZE, IORESOURCE_MEM);
+        for (i = 0; i < ECARD_RES_IOCSYNC - ECARD_RES_IOCSLOW; i++) {
+                ec_set_resource(ec, i + ECARD_RES_IOCSLOW,
+                                base + (slot << 14) + (i << 19),
+                                PODSLOT_IOC_SIZE, IORESOURCE_MEM);
+        }
+        for (i = 0; i < ECARD_NUM_RESOURCES; i++) {
+                if (ec->resource[i].start &&
+                    request_resource(&iomem_resource, &ec->resource[i])) {
+                        printk(KERN_ERR "%s: resource(s) not available\n",
+                                ec->dev.bus_id);
+                        ec->resource[i].end -= ec->resource[i].start;
+                        ec->resource[i].start = 0;
+                }
+        }
+}
+static ssize_t ecard_show_irq(struct device *dev, char *buf)
+{
+        struct expansion_card *ec = ECARD_DEV(dev);
+        return sprintf(buf, "%u\n", ec->irq);
+}
+static ssize_t ecard_show_vendor(struct device *dev, char *buf)
+{
+        struct expansion_card *ec = ECARD_DEV(dev);
+        return sprintf(buf, "%u\n", ec->cid.manufacturer);
+}
+static ssize_t ecard_show_device(struct device *dev, char *buf)
+{
+        struct expansion_card *ec = ECARD_DEV(dev);
+        return sprintf(buf, "%u\n", ec->cid.product);
+}
+static ssize_t ecard_show_dma(struct device *dev, char *buf)
+{
+        struct expansion_card *ec = ECARD_DEV(dev);
+        return sprintf(buf, "%u\n", ec->dma);
+}
+static ssize_t ecard_show_resources(struct device *dev, char *buf)
+{
+        struct expansion_card *ec = ECARD_DEV(dev);
+        char *str = buf;
+        int i;
+        for (i = 0; i < ECARD_NUM_RESOURCES; i++)
+                str += sprintf(str, "%08lx %08lx %08lx\n",
+                                ec->resource[i].start,
+                                ec->resource[i].end,
+                                ec->resource[i].flags);
+        return str - buf;
+}
+static DEVICE_ATTR(irq, S_IRUGO, ecard_show_irq, NULL);
+static DEVICE_ATTR(vendor, S_IRUGO, ecard_show_vendor, NULL);
+static DEVICE_ATTR(device, S_IRUGO, ecard_show_device, NULL);
+static DEVICE_ATTR(dma, S_IRUGO, ecard_show_dma, NULL);
+static DEVICE_ATTR(resource, S_IRUGO, ecard_show_resources, NULL);
+/*
+ * Probe for an expansion card.
+ *
+ * If bit 1 of the first byte of the card is set, then the
+ * card does not exist.
+ */
+static int __init
+ecard_probe(int slot, card_type_t type)
+{
+        ecard_t **ecp;
+        ecard_t *ec;
+        struct ex_ecid cid;
+        int i, rc = -ENOMEM;
+        ec = kmalloc(sizeof(ecard_t), GFP_KERNEL);
+        if (!ec)
+                goto nomem;
+        memset(ec, 0, sizeof(ecard_t));
+        ec->slot_no     = slot;
+        ec->type        = type;
+        ec->irq         = NO_IRQ;
+        ec->fiq         = NO_IRQ;
+        ec->dma         = NO_DMA;
+        ec->card_desc   = NULL;
+        ec->ops         = &ecard_default_ops;
+        rc = -ENODEV;
+        if ((ec->podaddr = ecard_address(ec, type, ECARD_SYNC)) == 0)
+                goto nodev;
+        cid.r_zero = 1;
+        ecard_readbytes(&cid, ec, 0, 16, 0);
+        if (cid.r_zero)
+                goto nodev;
+        ec->cid.id      = cid.r_id;
+        ec->cid.cd      = cid.r_cd;
+        ec->cid.is      = cid.r_is;
+        ec->cid.w       = cid.r_w;
+        ec->cid.manufacturer = ecard_getu16(cid.r_manu);
+        ec->cid.product = ecard_getu16(cid.r_prod);
+        ec->cid.country = cid.r_country;
+        ec->cid.irqmask = cid.r_irqmask;
+        ec->cid.irqoff  = ecard_gets24(cid.r_irqoff);
+        ec->cid.fiqmask = cid.r_fiqmask;
+        ec->cid.fiqoff  = ecard_gets24(cid.r_fiqoff);
+        ec->fiqaddr     =
+        ec->irqaddr     = (unsigned char *)ioaddr(ec->podaddr);
+        if (ec->cid.is) {
+                ec->irqmask = ec->cid.irqmask;
+                ec->irqaddr += ec->cid.irqoff;
+                ec->fiqmask = ec->cid.fiqmask;
+                ec->fiqaddr += ec->cid.fiqoff;
+        } else {
+                ec->irqmask = 1;
+                ec->fiqmask = 4;
+        }
+        for (i = 0; i < sizeof(blacklist) / sizeof(*blacklist); i++)
+                if (blacklist[i].manufacturer == ec->cid.manufacturer &&
+                    blacklist[i].product == ec->cid.product) {
+                        ec->card_desc = blacklist[i].type;
+                        break;
+                }
+        snprintf(ec->dev.bus_id, sizeof(ec->dev.bus_id), "ecard%d", slot);
+        ec->dev.parent = NULL;
+        ec->dev.bus    = &ecard_bus_type;
+        ec->dev.dma_mask = &ec->dma_mask;
+        ec->dma_mask = (u64)0xffffffff;
+        ecard_init_resources(ec);
+        /*
+         * hook the interrupt handlers
+         */
+        ec->irq = 32 + slot;
+        set_irq_chip(ec->irq, &ecard_chip);
+        set_irq_handler(ec->irq, do_level_IRQ);
+        set_irq_flags(ec->irq, IRQF_VALID);
+        for (ecp = &cards; *ecp; ecp = &(*ecp)->next);
+        *ecp = ec;
+        slot_to_expcard[slot] = ec;
+        device_register(&ec->dev);
+        device_create_file(&ec->dev, &dev_attr_dma);
+        device_create_file(&ec->dev, &dev_attr_irq);
+        device_create_file(&ec->dev, &dev_attr_resource);
+        device_create_file(&ec->dev, &dev_attr_vendor);
+        device_create_file(&ec->dev, &dev_attr_device); 
+        return 0;
+nodev:
+        kfree(ec);
+nomem:
+        return rc;
+}
+/*
+ * Initialise the expansion card system.
+ * Locate all hardware - interrupt management and
+ * actual cards.
+ */
+static int __init ecard_init(void)
+{
+        int slot, irqhw;
+        printk("Probing expansion cards\n");
+        for (slot = 0; slot < MAX_ECARDS; slot ++) {
+                ecard_probe(slot, ECARD_IOC);
+        }
+        irqhw = ecard_probeirqhw();
+        set_irq_chained_handler(IRQ_EXPANSIONCARD,
+                                irqhw ? ecard_irqexp_handler : ecard_irq_handler);
+        ecard_proc_init();
+        return 0;
+}
+subsys_initcall(ecard_init);
+/*
+ *      ECARD "bus"
+ */
+static const struct ecard_id *
+ecard_match_device(const struct ecard_id *ids, struct expansion_card *ec)
+{
+        int i;
+        for (i = 0; ids[i].manufacturer != 65535; i++)
+                if (ec->cid.manufacturer == ids[i].manufacturer &&
+                    ec->cid.product == ids[i].product)
+                        return ids + i;
+        return NULL;
+}
+static int ecard_drv_probe(struct device *dev)
+{
+        struct expansion_card *ec = ECARD_DEV(dev);
+        struct ecard_driver *drv = ECARD_DRV(dev->driver);
+        const struct ecard_id *id;
+        int ret;
+        id = ecard_match_device(drv->id_table, ec);
+        ecard_claim(ec);
+        ret = drv->probe(ec, id);
+        if (ret)
+                ecard_release(ec);
+        return ret;
+}
+static int ecard_drv_remove(struct device *dev)
+{
+        struct expansion_card *ec = ECARD_DEV(dev);
+        struct ecard_driver *drv = ECARD_DRV(dev->driver);
+        drv->remove(ec);
+        ecard_release(ec);
+        return 0;
+}
+/*
+ * Before rebooting, we must make sure that the expansion card is in a
+ * sensible state, so it can be re-detected.  This means that the first
+ * page of the ROM must be visible.  We call the expansion cards reset
+ * handler, if any.
+ */
+static void ecard_drv_shutdown(struct device *dev)
+{
+        struct expansion_card *ec = ECARD_DEV(dev);
+        struct ecard_driver *drv = ECARD_DRV(dev->driver);
+        struct ecard_request req;
+        if (drv->shutdown)
+                drv->shutdown(ec);
+        ecard_release(ec);
+        req.req = req_reset;
+        req.ec = ec;
+        ecard_call(&req);
+}
+int ecard_register_driver(struct ecard_driver *drv)
+{
+        drv->drv.bus = &ecard_bus_type;
+        drv->drv.probe = ecard_drv_probe;
+        drv->drv.remove = ecard_drv_remove;
+        drv->drv.shutdown = ecard_drv_shutdown;
+        return driver_register(&drv->drv);
+}
+void ecard_remove_driver(struct ecard_driver *drv)
+{
+        driver_unregister(&drv->drv);
+}
+static int ecard_match(struct device *_dev, struct device_driver *_drv)
+{
+        struct expansion_card *ec = ECARD_DEV(_dev);
+        struct ecard_driver *drv = ECARD_DRV(_drv);
+        int ret;
+        if (drv->id_table) {
+                ret = ecard_match_device(drv->id_table, ec) != NULL;
+        } else {
+                ret = ec->cid.id == drv->id;
+        }
+        return ret;
+}
+struct bus_type ecard_bus_type = {
+        .name   = "ecard",
+        .match  = ecard_match,
+};
+static int ecard_bus_init(void)
+{
+        return bus_register(&ecard_bus_type);
+}
+postcore_initcall(ecard_bus_init);
+EXPORT_SYMBOL(ecard_readchunk);
+EXPORT_SYMBOL(ecard_address);
+EXPORT_SYMBOL(ecard_register_driver);
+EXPORT_SYMBOL(ecard_remove_driver);
+EXPORT_SYMBOL(ecard_bus_type);
diff --git a/arch/arm26/kernel/entry.S b/arch/arm26/kernel/entry.S
new file mode 100644
index 000000000000..a231dd88d0e1
--- /dev/null
+++ b/arch/arm26/kernel/entry.S
@@ -0,0 +1,961 @@
+/* arch/arm26/kernel/entry.S
+ * 
+ * Assembled from chunks of code in arch/arm
+ *
+ * Copyright (C) 2003 Ian Molton
+ * Based on the work of RMK.
+ *
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm_offsets.h>
+#include <asm/errno.h>
+#include <asm/hardware.h>
+#include <asm/sysirq.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+        .macro  zero_fp
+#ifndef CONFIG_NO_FRAME_POINTER
+        mov     fp, #0
+#endif
+        .endm
+        .text
+@ Bad Abort numbers
+@ -----------------
+@
+#define BAD_PREFETCH    0
+#define BAD_DATA        1
+#define BAD_ADDREXCPTN  2
+#define BAD_IRQ         3
+#define BAD_UNDEFINSTR  4
+@ OS version number used in SWIs
+@  RISC OS is 0
+@  RISC iX is 8
+@
+#define OS_NUMBER       9
+#define ARMSWI_OFFSET   0x000f0000
+@
+@ Stack format (ensured by USER_* and SVC_*)
+@ PSR and PC are comined on arm26
+@
+#define S_OFF           8
+#define S_OLD_R0        64
+#define S_PC            60
+#define S_LR            56
+#define S_SP            52
+#define S_IP            48
+#define S_FP            44
+#define S_R10           40
+#define S_R9            36
+#define S_R8            32
+#define S_R7            28
+#define S_R6            24
+#define S_R5            20
+#define S_R4            16
+#define S_R3            12
+#define S_R2            8
+#define S_R1            4
+#define S_R0            0
+        .macro  save_user_regs
+        str     r0, [sp, #-4]!   @ Store SVC r0
+        str     lr, [sp, #-4]!   @ Store user mode PC
+        sub     sp, sp, #15*4
+        stmia   sp, {r0 - lr}^   @ Store the other user-mode regs
+        mov     r0, r0
+        .endm
+        .macro  slow_restore_user_regs
+        ldmia   sp, {r0 - lr}^   @ restore the user regs not including PC
+        mov     r0, r0
+        ldr     lr, [sp, #15*4]  @ get user PC
+        add     sp, sp, #15*4+8  @ free stack
+        movs    pc, lr           @ return
+        .endm
+        .macro  fast_restore_user_regs
+        add     sp, sp, #S_OFF
+        ldmib   sp, {r1 - lr}^
+        mov     r0, r0
+        ldr     lr, [sp, #15*4]
+        add     sp, sp, #15*4+8
+        movs    pc, lr
+        .endm
+        .macro  save_svc_regs
+        str     sp, [sp, #-16]!
+        str     lr, [sp, #8]
+        str     lr, [sp, #4]
+        stmfd   sp!, {r0 - r12}
+        mov     r0, #-1
+        str     r0, [sp, #S_OLD_R0]
+        zero_fp
+        .endm
+        .macro  save_svc_regs_irq
+        str     sp, [sp, #-16]!
+        str     lr, [sp, #4]
+        ldr     lr, .LCirq
+        ldr     lr, [lr]
+        str     lr, [sp, #8]
+        stmfd   sp!, {r0 - r12}
+        mov     r0, #-1
+        str     r0, [sp, #S_OLD_R0]
+        zero_fp
+        .endm
+        .macro  restore_svc_regs
+                ldmfd   sp, {r0 - pc}^
+        .endm
+        .macro  mask_pc, rd, rm
+        bic     \rd, \rm, #PCMASK
+        .endm
+        .macro  disable_irqs, temp
+        mov     \temp, pc
+        orr     \temp, \temp, #PSR_I_BIT
+        teqp    \temp, #0
+        .endm
+        .macro  enable_irqs, temp
+        mov     \temp, pc
+        and     \temp, \temp, #~PSR_I_BIT
+        teqp    \temp, #0
+        .endm
+        .macro  initialise_traps_extra
+        .endm
+        .macro  get_thread_info, rd
+        mov     \rd, sp, lsr #13
+        mov     \rd, \rd, lsl #13
+        .endm
+/*
+ * These are the registers used in the syscall handler, and allow us to
+ * have in theory up to 7 arguments to a function - r0 to r6.
+ *
+ * Note that tbl == why is intentional.
+ *
+ * We must set at least "tsk" and "why" when calling ret_with_reschedule.
+ */
+scno    .req    r7              @ syscall number
+tbl     .req    r8              @ syscall table pointer
+why     .req    r8              @ Linux syscall (!= 0)
+tsk     .req    r9              @ current thread_info
+/*
+ * Get the system call number.
+ */
+        .macro  get_scno
+        mask_pc lr, lr
+        ldr     scno, [lr, #-4]         @ get SWI instruction
+        .endm
+/*
+ *  -----------------------------------------------------------------------
+ */
+/* 
+ * We rely on the fact that R0 is at the bottom of the stack (due to
+ * slow/fast restore user regs).
+ */
+#if S_R0 != 0
+#error "Please fix"
+#endif
+/*
+ * This is the fast syscall return path.  We do as little as
+ * possible here, and this includes saving r0 back into the SVC
+ * stack.
+ */
+ret_fast_syscall:
+        disable_irqs r1                         @ disable interrupts
+        ldr     r1, [tsk, #TI_FLAGS]
+        tst     r1, #_TIF_WORK_MASK
+        bne     fast_work_pending
+        fast_restore_user_regs
+/*
+ * Ok, we need to do extra processing, enter the slow path.
+ */
+fast_work_pending:
+        str     r0, [sp, #S_R0+S_OFF]!          @ returned r0
+work_pending:
+        tst     r1, #_TIF_NEED_RESCHED
+        bne     work_resched
+        tst     r1, #_TIF_NOTIFY_RESUME | _TIF_SIGPENDING
+        beq     no_work_pending
+        mov     r0, sp                          @ 'regs'
+        mov     r2, why                         @ 'syscall'
+        bl      do_notify_resume
+        disable_irqs r1                         @ disable interrupts
+        b       no_work_pending
+work_resched:
+        bl      schedule
+/*
+ * "slow" syscall return path.  "why" tells us if this was a real syscall.
+ */
+ENTRY(ret_to_user)
+ret_slow_syscall:
+        disable_irqs r1                         @ disable interrupts
+        ldr     r1, [tsk, #TI_FLAGS]
+        tst     r1, #_TIF_WORK_MASK
+        bne     work_pending
+no_work_pending:
+        slow_restore_user_regs
+/*
+ * This is how we return from a fork.
+ */
+ENTRY(ret_from_fork)
+        bl      schedule_tail
+        get_thread_info tsk
+        ldr     r1, [tsk, #TI_FLAGS]            @ check for syscall tracing
+        mov     why, #1
+        tst     r1, #_TIF_SYSCALL_TRACE         @ are we tracing syscalls?
+        beq     ret_slow_syscall
+        mov     r1, sp
+        mov     r0, #1                          @ trace exit [IP = 1]
+        bl      syscall_trace
+        b       ret_slow_syscall
+        
+// FIXME - is this strictly necessary?
+#include "calls.S"
+/*=============================================================================
+ * SWI handler
+ *-----------------------------------------------------------------------------
+ */
+        .align  5
+ENTRY(vector_swi)
+        save_user_regs
+        zero_fp
+        get_scno
+#ifdef CONFIG_ALIGNMENT_TRAP
+        ldr     ip, __cr_alignment
+        ldr     ip, [ip]
+        mcr     p15, 0, ip, c1, c0              @ update control register
+#endif
+        enable_irqs ip
+        str     r4, [sp, #-S_OFF]!              @ push fifth arg
+        get_thread_info tsk
+        ldr     ip, [tsk, #TI_FLAGS]            @ check for syscall tracing
+        bic     scno, scno, #0xff000000         @ mask off SWI op-code
+        eor     scno, scno, #OS_NUMBER << 20    @ check OS number
+        adr     tbl, sys_call_table             @ load syscall table pointer
+        tst     ip, #_TIF_SYSCALL_TRACE         @ are we tracing syscalls?
+        bne     __sys_trace
+        adral   lr, ret_fast_syscall            @ set return address
+        orral   lr, lr, #PSR_I_BIT | MODE_SVC26 @ Force SVC mode on return
+        cmp     scno, #NR_syscalls              @ check upper syscall limit
+        ldrcc   pc, [tbl, scno, lsl #2]         @ call sys_* routine
+        add     r1, sp, #S_OFF
+2:      mov     why, #0                         @ no longer a real syscall
+        cmp     scno, #ARMSWI_OFFSET
+        eor     r0, scno, #OS_NUMBER << 20      @ put OS number back
+        bcs     arm_syscall     
+        b       sys_ni_syscall                  @ not private func
+        /*
+         * This is the really slow path.  We're going to be doing
+         * context switches, and waiting for our parent to respond.
+         */
+__sys_trace:
+        add     r1, sp, #S_OFF
+        mov     r0, #0                          @ trace entry [IP = 0]
+        bl      syscall_trace
+        adral   lr, __sys_trace_return          @ set return address
+        orral   lr, lr, #PSR_I_BIT | MODE_SVC26 @ Force SVC mode on return
+        add     r1, sp, #S_R0 + S_OFF           @ pointer to regs
+        cmp     scno, #NR_syscalls              @ check upper syscall limit
+        ldmccia r1, {r0 - r3}                   @ have to reload r0 - r3
+        ldrcc   pc, [tbl, scno, lsl #2]         @ call sys_* routine
+        b       2b
+__sys_trace_return:
+        str     r0, [sp, #S_R0 + S_OFF]!        @ save returned r0
+        mov     r1, sp
+        mov     r0, #1                          @ trace exit [IP = 1]
+        bl      syscall_trace
+        b       ret_slow_syscall
+        .align  5
+#ifdef CONFIG_ALIGNMENT_TRAP
+        .type   __cr_alignment, #object
+__cr_alignment:
+        .word   cr_alignment
+#endif
+        .type   sys_call_table, #object
+ENTRY(sys_call_table)
+#include "calls.S"
+/*============================================================================
+ * Special system call wrappers
+ */
+@ r0 = syscall number
+@ r5 = syscall table
+                .type   sys_syscall, #function
+sys_syscall:
+                eor     scno, r0, #OS_NUMBER << 20
+                cmp     scno, #NR_syscalls      @ check range
+                stmleia sp, {r5, r6}            @ shuffle args
+                movle   r0, r1
+                movle   r1, r2
+                movle   r2, r3
+                movle   r3, r4
+                ldrle   pc, [tbl, scno, lsl #2]
+                b       sys_ni_syscall
+sys_fork_wrapper:
+                add     r0, sp, #S_OFF
+                b       sys_fork
+sys_vfork_wrapper:
+                add     r0, sp, #S_OFF
+                b       sys_vfork
+sys_execve_wrapper:
+                add     r3, sp, #S_OFF
+                b       sys_execve
+sys_clone_wapper:
+                add     r2, sp, #S_OFF
+                b       sys_clone
+sys_sigsuspend_wrapper:
+                add     r3, sp, #S_OFF
+                b       sys_sigsuspend
+sys_rt_sigsuspend_wrapper:
+                add     r2, sp, #S_OFF
+                b       sys_rt_sigsuspend
+sys_sigreturn_wrapper:
+                add     r0, sp, #S_OFF
+                b       sys_sigreturn
+sys_rt_sigreturn_wrapper:
+                add     r0, sp, #S_OFF
+                b       sys_rt_sigreturn
+sys_sigaltstack_wrapper:
+                ldr     r2, [sp, #S_OFF + S_SP]
+                b       do_sigaltstack
+/*
+ * Note: off_4k (r5) is always units of 4K.  If we can't do the requested
+ * offset, we return EINVAL.  FIXME - this lost some stuff from arm32 to
+ * ifdefs. check it out.
+ */
+sys_mmap2:
+                tst     r5, #((1 << (PAGE_SHIFT - 12)) - 1)
+                moveq   r5, r5, lsr #PAGE_SHIFT - 12
+                streq   r5, [sp, #4]
+                beq     do_mmap2
+                mov     r0, #-EINVAL
+                RETINSTR(mov,pc, lr)
+/*
+ *  Design issues:
+ *   - We have several modes that each vector can be called from,
+ *     each with its own set of registers.  On entry to any vector,
+ *     we *must* save the registers used in *that* mode.
+ *
+ *   - This code must be as fast as possible.
+ *
+ *  There are a few restrictions on the vectors:
+ *   - the SWI vector cannot be called from *any* non-user mode
+ *
+ *   - the FP emulator is *never* called from *any* non-user mode undefined
+ *     instruction.
+ *
+ */
+                .text
+                .macro handle_irq
+1:              mov     r4, #IOC_BASE
+                ldrb    r6, [r4, #0x24]            @ get high priority first
+                adr     r5, irq_prio_h
+                teq     r6, #0
+                ldreqb  r6, [r4, #0x14]            @ get low priority
+                adreq   r5, irq_prio_l
+                teq     r6, #0                     @ If an IRQ happened...
+                ldrneb  r0, [r5, r6]               @ get IRQ number
+                movne   r1, sp                     @ get struct pt_regs
+                adrne   lr, 1b                     @ Set return address to 1b
+                orrne   lr, lr, #PSR_I_BIT | MODE_SVC26  @ (and force SVC mode)
+                bne     asm_do_IRQ                 @ process IRQ (if asserted)
+                .endm
+/*
+ * Interrupt table (incorporates priority)
+ */
+                .macro  irq_prio_table
+irq_prio_l:     .byte    0, 0, 1, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3
+                .byte    4, 0, 1, 0, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3
+                .byte    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+                .byte    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+                .byte    6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3
+                .byte    6, 6, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 3, 3, 3, 3
+                .byte    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+                .byte    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+                .byte    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+                .byte    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+                .byte    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+                .byte    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+                .byte    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+                .byte    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+                .byte    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+                .byte    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+irq_prio_h:     .byte    0, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   12, 8, 9, 8,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   14,14,14,14,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   14,14,14,14,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   15,15,15,15,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+                .byte   13,13,13,13,10,10,10,10,11,11,11,11,10,10,10,10
+                .endm
+#if 1
+/*
+ * Uncomment these if you wish to get more debugging into about data aborts.
+ * FIXME - I bet we can find a way to encode these and keep performance.
+ */
+#define FAULT_CODE_LDRSTRPOST   0x80
+#define FAULT_CODE_LDRSTRPRE    0x40
+#define FAULT_CODE_LDRSTRREG    0x20
+#define FAULT_CODE_LDMSTM       0x10
+#define FAULT_CODE_LDCSTC       0x08
+#endif
+#define FAULT_CODE_PREFETCH     0x04
+#define FAULT_CODE_WRITE        0x02
+#define FAULT_CODE_FORCECOW     0x01
+/*=============================================================================
+ * Undefined FIQs
+ *-----------------------------------------------------------------------------
+ */
+_unexp_fiq:     ldr     sp, .LCfiq
+                mov     r12, #IOC_BASE
+                strb    r12, [r12, #0x38]       @ Disable FIQ register
+                teqp    pc, #PSR_I_BIT | PSR_F_BIT | MODE_SVC26
+                mov     r0, r0
+                stmfd   sp!, {r0 - r3, ip, lr}
+                adr     r0, Lfiqmsg
+                bl      printk
+                ldmfd   sp!, {r0 - r3, ip, lr}
+                teqp    pc, #PSR_I_BIT | PSR_F_BIT | MODE_FIQ26
+                mov     r0, r0
+                movs    pc, lr
+Lfiqmsg:        .ascii  "*** Unexpected FIQ\n\0"
+                .align
+.LCfiq:         .word   __temp_fiq
+.LCirq:         .word   __temp_irq
+/*=============================================================================
+ * Undefined instruction handler
+ *-----------------------------------------------------------------------------
+ * Handles floating point instructions
+ */
+vector_undefinstr:
+                tst     lr, #MODE_SVC26          @ did we come from a non-user mode?
+                bne     __und_svc                @ yes - deal with it.
+/* Otherwise, fall through for the user-space (common) case. */
+                save_user_regs
+                zero_fp                                 @ zero frame pointer
+                teqp    pc, #PSR_I_BIT | MODE_SVC26     @ disable IRQs
+.Lbug_undef:
+                ldr     r4, .LC2
+                ldr     pc, [r4]         @ Call FP module entry point
+/* FIXME - should we trap for a null pointer here? */
+/* The SVC mode case */
+__und_svc:      save_svc_regs                           @ Non-user mode
+                mask_pc r0, lr
+                and     r2, lr, #3
+                sub     r0, r0, #4
+                mov     r1, sp
+                bl      do_undefinstr
+                restore_svc_regs
+/* We get here if the FP emulator doesnt handle the undef instr.
+ * If the insn WAS handled, the emulator jumps to ret_from_exception by itself/
+ */
+                .globl  fpundefinstr 
+fpundefinstr:
+                mov     r0, lr
+                mov     r1, sp
+                teqp    pc, #MODE_SVC26
+                bl      do_undefinstr
+                b       ret_from_exception              @ Normal FP exit
+#if defined CONFIG_FPE_NWFPE || defined CONFIG_FPE_FASTFPE
+                /* The FPE is always present */
+                .equ    fpe_not_present, 0
+#else
+/* We get here if an undefined instruction happens and the floating
+ * point emulator is not present.  If the offending instruction was
+ * a WFS, we just perform a normal return as if we had emulated the
+ * operation.  This is a hack to allow some basic userland binaries
+ * to run so that the emulator module proper can be loaded. --philb
+ * FIXME - probably a broken useless hack...
+ */
+fpe_not_present:
+                adr     r10, wfs_mask_data
+                ldmia   r10, {r4, r5, r6, r7, r8}
+                ldr     r10, [sp, #S_PC]                @ Load PC
+                sub     r10, r10, #4
+                mask_pc r10, r10
+                ldrt    r10, [r10]                      @ get instruction
+                and     r5, r10, r5
+                teq     r5, r4                          @ Is it WFS?
+                beq     ret_from_exception
+                and     r5, r10, r8
+                teq     r5, r6                          @ Is it LDF/STF on sp or fp?
+                teqne   r5, r7
+                bne     fpundefinstr
+                tst     r10, #0x00200000                @ Does it have WB
+                beq     ret_from_exception
+                and     r4, r10, #255                   @ get offset
+                and     r6, r10, #0x000f0000
+                tst     r10, #0x00800000                @ +/-
+                ldr     r5, [sp, r6, lsr #14]           @ Load reg
+                rsbeq   r4, r4, #0
+                add     r5, r5, r4, lsl #2
+                str     r5, [sp, r6, lsr #14]           @ Save reg
+                b       ret_from_exception
+wfs_mask_data:  .word   0x0e200110                      @ WFS/RFS
+                .word   0x0fef0fff
+                .word   0x0d0d0100                      @ LDF [sp]/STF [sp]
+                .word   0x0d0b0100                      @ LDF [fp]/STF [fp]
+                .word   0x0f0f0f00
+#endif
+.LC2:           .word   fp_enter
+/*=============================================================================
+ * Prefetch abort handler
+ *-----------------------------------------------------------------------------
+ */
+#define DEBUG_UNDEF
+/* remember: lr = USR pc */
+vector_prefetch:
+                sub     lr, lr, #4
+                tst     lr, #MODE_SVC26
+                bne     __pabt_invalid
+                save_user_regs
+                teqp    pc, #MODE_SVC26         @ Enable IRQs...
+                mask_pc r0, lr                  @ Address of abort
+                mov     r1, sp                  @ Tasks registers
+                bl      do_PrefetchAbort
+                teq     r0, #0                  @ If non-zero, we believe this abort..
+                bne     ret_from_exception
+#ifdef DEBUG_UNDEF
+                adr     r0, t
+                bl      printk
+#endif
+                ldr     lr, [sp,#S_PC]          @ FIXME program to test this on.  I think its
+                b       .Lbug_undef             @ broken at the moment though!)
+__pabt_invalid: save_svc_regs
+                mov     r0, sp                  @ Prefetch aborts are definitely *not*
+                mov     r1, #BAD_PREFETCH       @ allowed in non-user modes.  We cant
+                and     r2, lr, #3              @ recover from this problem.
+                b       bad_mode
+#ifdef DEBUG_UNDEF
+t:              .ascii "*** undef ***\r\n\0"
+                .align
+#endif
+/*=============================================================================
+ * Address exception handler
+ *-----------------------------------------------------------------------------
+ * These aren't too critical.
+ * (they're not supposed to happen).
+ * In order to debug the reason for address exceptions in non-user modes,
+ * we have to obtain all the registers so that we can see what's going on.
+ */
+vector_addrexcptn:
+                sub     lr, lr, #8
+                tst     lr, #3
+                bne     Laddrexcptn_not_user
+                save_user_regs
+                teq     pc, #MODE_SVC26
+                mask_pc r0, lr                  @ Point to instruction
+                mov     r1, sp                  @ Point to registers
+                mov     r2, #0x400
+                mov     lr, pc
+                bl      do_excpt
+                b       ret_from_exception
+Laddrexcptn_not_user:
+                save_svc_regs
+                and     r2, lr, #3
+                teq     r2, #3
+                bne     Laddrexcptn_illegal_mode
+                teqp    pc, #MODE_SVC26
+                mask_pc r0, lr
+                mov     r1, sp
+                orr     r2, r2, #0x400
+                bl      do_excpt
+                ldmia   sp, {r0 - lr}           @ I cant remember the reason I changed this...
+                add     sp, sp, #15*4
+                movs    pc, lr
+Laddrexcptn_illegal_mode:
+                mov     r0, sp
+                str     lr, [sp, #-4]!
+                orr     r1, r2, #PSR_I_BIT | PSR_F_BIT
+                teqp    r1, #0                  @ change into mode (wont be user mode)
+                mov     r0, r0
+                mov     r1, r8                  @ Any register from r8 - r14 can be banked
+                mov     r2, r9
+                mov     r3, r10
+                mov     r4, r11
+                mov     r5, r12
+                mov     r6, r13
+                mov     r7, r14
+                teqp    pc, #PSR_F_BIT | MODE_SVC26 @ back to svc
+                mov     r0, r0
+                stmfd   sp!, {r1-r7}
+                ldmia   r0, {r0-r7}
+                stmfd   sp!, {r0-r7}
+                mov     r0, sp
+                mov     r1, #BAD_ADDREXCPTN
+                b       bad_mode
+/*=============================================================================
+ * Interrupt (IRQ) handler
+ *-----------------------------------------------------------------------------
+ * Note: if the IRQ was taken whilst in user mode, then *no* kernel routine
+ * is running, so do not have to save svc lr.
+ *
+ * Entered in IRQ mode.
+ */
+vector_IRQ:     ldr     sp, .LCirq         @ Setup some temporary stack
+                sub     lr, lr, #4
+                str     lr, [sp]           @ push return address
+                tst     lr, #3
+                bne     __irq_non_usr
+__irq_usr:      teqp    pc, #PSR_I_BIT | MODE_SVC26     @ Enter SVC mode
+                mov     r0, r0
+                ldr     lr, .LCirq
+                ldr     lr, [lr]           @ Restore lr for jump back to USR
+                save_user_regs
+                handle_irq
+                mov     why, #0
+                get_thread_info tsk
+                b       ret_to_user
+@ Place the IRQ priority table here so that the handle_irq macros above
+@ and below here can access it.
+                irq_prio_table
+__irq_non_usr:  teqp    pc, #PSR_I_BIT | MODE_SVC26     @ Enter SVC mode
+                mov     r0, r0
+                save_svc_regs_irq
+                and     r2, lr, #3
+                teq     r2, #3
+                bne     __irq_invalid                @ IRQ not from SVC mode
+                handle_irq
+                restore_svc_regs
+__irq_invalid:  mov     r0, sp
+                mov     r1, #BAD_IRQ
+                b       bad_mode
+/*=============================================================================
+ * Data abort handler code
+ *-----------------------------------------------------------------------------
+ *
+ * This handles both exceptions from user and SVC modes, computes the address
+ *  range of the problem, and does any correction that is required.  It then
+ *  calls the kernel data abort routine.
+ *
+ * This is where I wish that the ARM would tell you which address aborted.
+ */
+vector_data:    sub     lr, lr, #8              @ Correct lr
+                tst     lr, #3
+                bne     Ldata_not_user
+                save_user_regs
+                teqp    pc, #MODE_SVC26
+                mask_pc r0, lr
+                bl      Ldata_do
+                b       ret_from_exception
+Ldata_not_user:
+                save_svc_regs
+                and     r2, lr, #3
+                teq     r2, #3
+                bne     Ldata_illegal_mode
+                tst     lr, #PSR_I_BIT
+                teqeqp  pc, #MODE_SVC26
+                mask_pc r0, lr
+                bl      Ldata_do
+                restore_svc_regs
+Ldata_illegal_mode:
+                mov     r0, sp
+                mov     r1, #BAD_DATA
+                b       bad_mode
+Ldata_do:       mov     r3, sp
+                ldr     r4, [r0]                @ Get instruction
+                mov     r2, #0
+                tst     r4, #1 << 20            @ Check to see if it is a write instruction
+                orreq   r2, r2, #FAULT_CODE_WRITE @ Indicate write instruction
+                mov     r1, r4, lsr #22         @ Now branch to the relevent processing routine
+                and     r1, r1, #15 << 2
+                add     pc, pc, r1
+                movs    pc, lr
+                b       Ldata_unknown
+                b       Ldata_unknown
+                b       Ldata_unknown
+                b       Ldata_unknown
+                b       Ldata_ldrstr_post       @ ldr   rd, [rn], #m
+                b       Ldata_ldrstr_numindex   @ ldr   rd, [rn, #m]    @ RegVal
+                b       Ldata_ldrstr_post       @ ldr   rd, [rn], rm
+                b       Ldata_ldrstr_regindex   @ ldr   rd, [rn, rm]
+                b       Ldata_ldmstm            @ ldm*a rn, <rlist>
+                b       Ldata_ldmstm            @ ldm*b rn, <rlist>
+                b       Ldata_unknown
+                b       Ldata_unknown
+                b       Ldata_ldrstr_post       @ ldc   rd, [rn], #m    @ Same as ldr   rd, [rn], #m
+                b       Ldata_ldcstc_pre        @ ldc   rd, [rn, #m]
+                b       Ldata_unknown
+Ldata_unknown:  @ Part of jumptable
+                mov     r0, r1
+                mov     r1, r4
+                mov     r2, r3
+                b       baddataabort
+Ldata_ldrstr_post:
+                mov     r0, r4, lsr #14         @ Get Rn
+                and     r0, r0, #15 << 2        @ Mask out reg.
+                teq     r0, #15 << 2
+                ldr     r0, [r3, r0]            @ Get register
+                biceq   r0, r0, #PCMASK
+                mov     r1, r0
+#ifdef FAULT_CODE_LDRSTRPOST
+                orr     r2, r2, #FAULT_CODE_LDRSTRPOST
+#endif
+                b       do_DataAbort
+Ldata_ldrstr_numindex:
+                mov     r0, r4, lsr #14         @ Get Rn
+                and     r0, r0, #15 << 2        @ Mask out reg.
+                teq     r0, #15 << 2
+                ldr     r0, [r3, r0]            @ Get register
+                mov     r1, r4, lsl #20
+                biceq   r0, r0, #PCMASK
+                tst     r4, #1 << 23
+                addne   r0, r0, r1, lsr #20
+                subeq   r0, r0, r1, lsr #20
+                mov     r1, r0
+#ifdef FAULT_CODE_LDRSTRPRE
+                orr     r2, r2, #FAULT_CODE_LDRSTRPRE
+#endif
+                b       do_DataAbort
+Ldata_ldrstr_regindex:
+                mov     r0, r4, lsr #14         @ Get Rn
+                and     r0, r0, #15 << 2        @ Mask out reg.
+                teq     r0, #15 << 2
+                ldr     r0, [r3, r0]            @ Get register
+                and     r7, r4, #15
+                biceq   r0, r0, #PCMASK
+                teq     r7, #15                 @ Check for PC
+                ldr     r7, [r3, r7, lsl #2]    @ Get Rm
+                and     r8, r4, #0x60           @ Get shift types
+                biceq   r7, r7, #PCMASK
+                mov     r9, r4, lsr #7          @ Get shift amount
+                and     r9, r9, #31
+                teq     r8, #0
+                moveq   r7, r7, lsl r9
+                teq     r8, #0x20               @ LSR shift
+                moveq   r7, r7, lsr r9
+                teq     r8, #0x40               @ ASR shift
+                moveq   r7, r7, asr r9
+                teq     r8, #0x60               @ ROR shift
+                moveq   r7, r7, ror r9
+                tst     r4, #1 << 23
+                addne   r0, r0, r7
+                subeq   r0, r0, r7              @ Apply correction
+                mov     r1, r0
+#ifdef FAULT_CODE_LDRSTRREG
+                orr     r2, r2, #FAULT_CODE_LDRSTRREG
+#endif
+                b       do_DataAbort
+Ldata_ldmstm:
+                mov     r7, #0x11
+                orr     r7, r7, r7, lsl #8
+                and     r0, r4, r7
+                and     r1, r4, r7, lsl #1
+                add     r0, r0, r1, lsr #1
+                and     r1, r4, r7, lsl #2
+                add     r0, r0, r1, lsr #2
+                and     r1, r4, r7, lsl #3
+                add     r0, r0, r1, lsr #3
+                add     r0, r0, r0, lsr #8
+                add     r0, r0, r0, lsr #4
+                and     r7, r0, #15             @ r7 = no. of registers to transfer.
+                mov     r5, r4, lsr #14         @ Get Rn
+                and     r5, r5, #15 << 2
+                ldr     r0, [r3, r5]            @ Get reg
+                eor     r6, r4, r4, lsl #2
+                tst     r6, #1 << 23            @ Check inc/dec ^ writeback
+                rsbeq   r7, r7, #0
+                add     r7, r0, r7, lsl #2      @ Do correction (signed)
+                subne   r1, r7, #1
+                subeq   r1, r0, #1
+                moveq   r0, r7
+                tst     r4, #1 << 21            @ Check writeback
+                strne   r7, [r3, r5]
+                eor     r6, r4, r4, lsl #1
+                tst     r6, #1 << 24            @ Check Pre/Post ^ inc/dec
+                addeq   r0, r0, #4
+                addeq   r1, r1, #4
+                teq     r5, #15*4               @ CHECK FOR PC
+                biceq   r1, r1, #PCMASK
+                biceq   r0, r0, #PCMASK
+#ifdef FAULT_CODE_LDMSTM
+                orr     r2, r2, #FAULT_CODE_LDMSTM
+#endif
+                b       do_DataAbort
+Ldata_ldcstc_pre:
+                mov     r0, r4, lsr #14         @ Get Rn
+                and     r0, r0, #15 << 2        @ Mask out reg.
+                teq     r0, #15 << 2
+                ldr     r0, [r3, r0]            @ Get register
+                mov     r1, r4, lsl #24         @ Get offset
+                biceq   r0, r0, #PCMASK
+                tst     r4, #1 << 23
+                addne   r0, r0, r1, lsr #24
+                subeq   r0, r0, r1, lsr #24
+                mov     r1, r0
+#ifdef FAULT_CODE_LDCSTC
+                orr     r2, r2, #FAULT_CODE_LDCSTC
+#endif
+                b       do_DataAbort
+/*
+ * This is the return code to user mode for abort handlers
+ */
+ENTRY(ret_from_exception)
+                get_thread_info tsk
+                mov     why, #0
+                b       ret_to_user
+                .data
+ENTRY(fp_enter)
+                .word   fpe_not_present
+                .text
+/*
+ * Register switch for older 26-bit only ARMs
+ */
+ENTRY(__switch_to)
+                add     r0, r0, #TI_CPU_SAVE
+                stmia   r0, {r4 - sl, fp, sp, lr}
+                add     r1, r1, #TI_CPU_SAVE
+                ldmia   r1, {r4 - sl, fp, sp, pc}^
+/*
+ *=============================================================================
+ *              Low-level interface code
+ *-----------------------------------------------------------------------------
+ *              Trap initialisation
+ *-----------------------------------------------------------------------------
+ *
+ * Note - FIQ code has changed.  The default is a couple of words in 0x1c, 0x20
+ * that call _unexp_fiq.  Nowever, we now copy the FIQ routine to 0x1c (removes
+ * some excess cycles).
+ *
+ * What we need to put into 0-0x1c are branches to branch to the kernel.
+ */
+                .section ".init.text",#alloc,#execinstr
+.Ljump_addresses:
+                swi     SYS_ERROR0
+                .word   vector_undefinstr       - 12
+                .word   vector_swi              - 16
+                .word   vector_prefetch         - 20
+                .word   vector_data             - 24
+                .word   vector_addrexcptn       - 28
+                .word   vector_IRQ              - 32
+                .word   _unexp_fiq              - 36
+                b       . + 8
+/*
+ * initialise the trap system
+ */
+ENTRY(__trap_init)
+                stmfd   sp!, {r4 - r7, lr}
+                adr     r1, .Ljump_addresses
+                ldmia   r1, {r1 - r7, ip, lr}
+                orr     r2, lr, r2, lsr #2
+                orr     r3, lr, r3, lsr #2
+                orr     r4, lr, r4, lsr #2
+                orr     r5, lr, r5, lsr #2
+                orr     r6, lr, r6, lsr #2
+                orr     r7, lr, r7, lsr #2
+                orr     ip, lr, ip, lsr #2
+                mov     r0, #0
+                stmia   r0, {r1 - r7, ip}
+                ldmfd   sp!, {r4 - r7, pc}^
+                .bss
+__temp_irq:     .space  4                               @ saved lr_irq
+__temp_fiq:     .space  128
diff --git a/arch/arm26/kernel/fiq.c b/arch/arm26/kernel/fiq.c
new file mode 100644
index 000000000000..08a97c9498ff
--- /dev/null
+++ b/arch/arm26/kernel/fiq.c
@@ -0,0 +1,202 @@
+/*
+ *  linux/arch/arm26/kernel/fiq.c
+ *
+ *  Copyright (C) 1998 Russell King
+ *  Copyright (C) 1998, 1999 Phil Blundell
+ *  Copyright (C) 2003 Ian Molton
+ *
+ *  FIQ support written by Philip Blundell <philb@gnu.org>, 1998.
+ *
+ *  FIQ support re-written by Russell King to be more generic
+ *
+ * We now properly support a method by which the FIQ handlers can
+ * be stacked onto the vector.  We still do not support sharing
+ * the FIQ vector itself.
+ *
+ * Operation is as follows:
+ *  1. Owner A claims FIQ:
+ *     - default_fiq relinquishes control.
+ *  2. Owner A:
+ *     - inserts code.
+ *     - sets any registers,
+ *     - enables FIQ.
+ *  3. Owner B claims FIQ:
+ *     - if owner A has a relinquish function.
+ *       - disable FIQs.
+ *       - saves any registers.
+ *       - returns zero.
+ *  4. Owner B:
+ *     - inserts code.
+ *     - sets any registers,
+ *     - enables FIQ.
+ *  5. Owner B releases FIQ:
+ *     - Owner A is asked to reacquire FIQ:
+ *       - inserts code.
+ *       - restores saved registers.
+ *       - enables FIQ.
+ *  6. Goto 3
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <asm/fiq.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/pgalloc.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#define FIQ_VECTOR (vectors_base() + 0x1c)
+static unsigned long no_fiq_insn;
+#define unprotect_page_0()
+#define protect_page_0()
+/* Default reacquire function
+ * - we always relinquish FIQ control
+ * - we always reacquire FIQ control
+ */
+static int fiq_def_op(void *ref, int relinquish)
+{
+        if (!relinquish) {
+                unprotect_page_0();
+                *(unsigned long *)FIQ_VECTOR = no_fiq_insn;
+                protect_page_0();
+        }
+        return 0;
+}
+static struct fiq_handler default_owner = {
+        .name   = "default",
+        .fiq_op = fiq_def_op,
+};
+static struct fiq_handler *current_fiq = &default_owner;
+int show_fiq_list(struct seq_file *p, void *v)
+{
+        if (current_fiq != &default_owner)
+                seq_printf(p, "FIQ:              %s\n", current_fiq->name);
+        return 0;
+}
+void set_fiq_handler(void *start, unsigned int length)
+{
+        unprotect_page_0();
+        memcpy((void *)FIQ_VECTOR, start, length);
+        protect_page_0();
+}
+/*
+ * Taking an interrupt in FIQ mode is death, so both these functions
+ * disable irqs for the duration. 
+ */
+void set_fiq_regs(struct pt_regs *regs)
+{
+        register unsigned long tmp, tmp2;
+        __asm__ volatile (
+        "mov    %0, pc
+        bic     %1, %0, #0x3
+        orr     %1, %1, %3
+        teqp    %1, #0          @ select FIQ mode
+        mov     r0, r0
+        ldmia   %2, {r8 - r14}
+        teqp    %0, #0          @ return to SVC mode
+        mov     r0, r0"
+        : "=&r" (tmp), "=&r" (tmp2)
+        : "r" (&regs->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | MODE_FIQ26)
+        /* These registers aren't modified by the above code in a way
+           visible to the compiler, but we mark them as clobbers anyway
+           so that GCC won't put any of the input or output operands in
+           them.  */
+        : "r8", "r9", "r10", "r11", "r12", "r13", "r14");
+}
+void get_fiq_regs(struct pt_regs *regs)
+{
+        register unsigned long tmp, tmp2;
+        __asm__ volatile (
+        "mov    %0, pc
+        bic     %1, %0, #0x3
+        orr     %1, %1, %3
+        teqp    %1, #0          @ select FIQ mode
+        mov     r0, r0
+        stmia   %2, {r8 - r14}
+        teqp    %0, #0          @ return to SVC mode
+        mov     r0, r0"
+        : "=&r" (tmp), "=&r" (tmp2)
+        : "r" (&regs->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | MODE_FIQ26)
+        /* These registers aren't modified by the above code in a way
+           visible to the compiler, but we mark them as clobbers anyway
+           so that GCC won't put any of the input or output operands in
+           them.  */
+        : "r8", "r9", "r10", "r11", "r12", "r13", "r14");
+}
+int claim_fiq(struct fiq_handler *f)
+{
+        int ret = 0;
+        if (current_fiq) {
+                ret = -EBUSY;
+                if (current_fiq->fiq_op != NULL)
+                        ret = current_fiq->fiq_op(current_fiq->dev_id, 1);
+        }
+        if (!ret) {
+                f->next = current_fiq;
+                current_fiq = f;
+        }
+        return ret;
+}
+void release_fiq(struct fiq_handler *f)
+{
+        if (current_fiq != f) {
+                printk(KERN_ERR "%s FIQ trying to release %s FIQ\n",
+                       f->name, current_fiq->name);
+#ifdef CONFIG_DEBUG_ERRORS
+                __backtrace();
+#endif
+                return;
+        }
+        do
+                current_fiq = current_fiq->next;
+        while (current_fiq->fiq_op(current_fiq->dev_id, 0));
+}
+void enable_fiq(int fiq)
+{
+        enable_irq(fiq + FIQ_START);
+}
+void disable_fiq(int fiq)
+{
+        disable_irq(fiq + FIQ_START);
+}
+EXPORT_SYMBOL(set_fiq_handler);
+EXPORT_SYMBOL(set_fiq_regs);
+EXPORT_SYMBOL(get_fiq_regs);
+EXPORT_SYMBOL(claim_fiq);
+EXPORT_SYMBOL(release_fiq);
+EXPORT_SYMBOL(enable_fiq);
+EXPORT_SYMBOL(disable_fiq);
+void __init init_FIQ(void)
+{
+        no_fiq_insn = *(unsigned long *)FIQ_VECTOR;
+        set_fs(get_fs());
+}
diff --git a/arch/arm26/kernel/head.S b/arch/arm26/kernel/head.S
new file mode 100644
index 000000000000..8bfc62539ba6
--- /dev/null
+++ b/arch/arm26/kernel/head.S
@@ -0,0 +1,113 @@
+/*
+ *  linux/arch/arm26/kernel/head.S
+ *
+ *  Copyright (C) 1994-2000 Russell King
+ *  Copyright (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  26-bit kernel startup code
+ */
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/mach-types.h>
+                .globl  swapper_pg_dir
+                .equ    swapper_pg_dir, 0x0207d000
+/*
+ * Entry point.
+ */
+                .section ".init.text",#alloc,#execinstr
+ENTRY(stext)
+__entry:
+                cmp     pc, #0x02000000
+                ldrlt   pc, LC0                 @ if 0x01800000, call at 0x02080000
+                teq     r0, #0                  @ Check for old calling method
+                blne    oldparams               @ Move page if old
+                adr     r0, LC0
+                ldmib   r0, {r2-r5, sp}         @ Setup stack (and fetch other values)
+                mov     r0, #0                  @ Clear BSS
+1:              cmp     r2, r3
+                strcc   r0, [r2], #4
+                bcc     1b
+                bl      detect_proc_type
+                str     r0, [r4]
+                bl      detect_arch_type
+                str     r0, [r5]
+#ifdef CONFIG_XIP_KERNEL
+               ldr     r3, ETEXT                       @ data section copy
+               ldr     r4, SDATA
+               ldr     r5, EDATA
+1:
+               ldr     r6, [r3], #4
+               str     r6, [r4], #4
+               cmp     r4, r5
+               blt     1b
+#endif
+                mov     fp, #0
+                b       start_kernel
+LC0:            .word   _stext
+                .word   __bss_start             @ r2
+                .word   _end                    @ r3
+                .word   processor_id            @ r4
+                .word   __machine_arch_type     @ r5
+                .word   init_thread_union+8192  @ sp
+#ifdef CONFIG_XIP_KERNEL
+ETEXT:          .word   _endtext
+SDATA:          .word   _sdata
+EDATA:          .word   __bss_start
+#endif
+arm2_id:        .long   0x41560200  @ ARM2 and 250 dont have a CPUID
+arm250_id:      .long   0x41560250  @ So we create some after probing for them
+                .align
+oldparams:      mov     r4, #0x02000000
+                add     r3, r4, #0x00080000
+                add     r4, r4, #0x0007c000
+1:              ldmia   r0!, {r5 - r12}
+                stmia   r4!, {r5 - r12}
+                cmp     r4, r3
+                blt     1b
+                mov     pc, lr
+/*
+ * We need some way to automatically detect the difference between
+ * these two machines.  Unfortunately, it is not possible to detect
+ * the presence of the SuperIO chip, because that will hang the old
+ * Archimedes machines solid.
+ */
+/* DAG: Outdated, these have been combined !!!!!!! */
+detect_arch_type:
+#if defined(CONFIG_ARCH_ARC)
+                mov     r0, #MACH_TYPE_ARCHIMEDES
+#elif defined(CONFIG_ARCH_A5K)
+                mov     r0, #MACH_TYPE_A5K
+#endif
+                mov     pc, lr
+detect_proc_type:
+                mov     ip, lr
+                mov     r2, #0xea000000         @ Point undef instr to continuation
+                adr     r0, continue - 12
+                orr     r0, r2, r0, lsr #2
+                mov     r1, #0
+                str     r0, [r1, #4]
+                ldr     r0, arm2_id
+                swp     r2, r2, [r1]            @ check for swp (ARM2 cant)
+                ldr     r0, arm250_id
+                mrc     15, 0, r3, c0, c0       @ check for CP#15 (ARM250 cant)
+                mov     r0, r3
+continue:       mov     r2, #0xeb000000         @ Make undef vector loop
+                sub     r2, r2, #2
+                str     r2, [r1, #4]
+                mov     pc, ip
diff --git a/arch/arm26/kernel/init_task.c b/arch/arm26/kernel/init_task.c
new file mode 100644
index 000000000000..4191565b889b
--- /dev/null
+++ b/arch/arm26/kernel/init_task.c
@@ -0,0 +1,49 @@
+/*
+ *  linux/arch/arm26/kernel/init_task.c
+ *
+ * Copyright (C) 2003 Ian Molton
+ *
+ */
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/mqueue.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+EXPORT_SYMBOL(init_mm);
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is 8192-byte aligned due to the
+ * way process stacks are handled. This is done by making sure
+ * the linker maps this in the .text segment right after head.S,
+ * and making the linker scripts ensure the proper alignment.
+ *
+ * FIXME - should this be 32K alignment on arm26?
+ *
+ * The things we do for performance...
+ */
+union thread_union init_thread_union
+        __attribute__((__section__(".init.task"))) =
+                { INIT_THREAD_INFO(init_task) };
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+EXPORT_SYMBOL(init_task);
diff --git a/arch/arm26/kernel/irq.c b/arch/arm26/kernel/irq.c
new file mode 100644
index 000000000000..f3cc1036e5bc
--- /dev/null
+++ b/arch/arm26/kernel/irq.c
@@ -0,0 +1,716 @@
+/*
+ *  linux/arch/arm/kernel/irq.c
+ *
+ *  Copyright (C) 1992 Linus Torvalds
+ *  Modifications for ARM processor Copyright (C) 1995-2000 Russell King.
+ *  'Borrowed' for ARM26 and (C) 2003 Ian Molton.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This file contains the code used by various IRQ handling routines:
+ *  asking for different IRQ's should be done through these routines
+ *  instead of just grabbing them. Thus setups with different IRQ numbers
+ *  shouldn't result in any weird surprises, and installing new handlers
+ *  should be easier.
+ *
+ *  IRQ's are in fact implemented a bit like signal handlers for the kernel.
+ *  Naturally it's not a 1:1 relation, but there are similarities.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/errno.h>
+#include <asm/irq.h>
+#include <asm/system.h>
+#include <asm/irqchip.h>
+//FIXME - this ought to be in a header IMO
+void __init arc_init_irq(void);
+/*
+ * Maximum IRQ count.  Currently, this is arbitary.  However, it should
+ * not be set too low to prevent false triggering.  Conversely, if it
+ * is set too high, then you could miss a stuck IRQ.
+ *
+ * FIXME Maybe we ought to set a timer and re-enable the IRQ at a later time?
+ */
+#define MAX_IRQ_CNT     100000
+static volatile unsigned long irq_err_count;
+static DEFINE_SPINLOCK(irq_controller_lock);
+struct irqdesc irq_desc[NR_IRQS];
+/*
+ * Dummy mask/unmask handler
+ */
+void dummy_mask_unmask_irq(unsigned int irq)
+{
+}
+void do_bad_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
+{
+        irq_err_count += 1;
+        printk(KERN_ERR "IRQ: spurious interrupt %d\n", irq);
+}
+static struct irqchip bad_chip = {
+        .ack    = dummy_mask_unmask_irq,
+        .mask   = dummy_mask_unmask_irq,
+        .unmask = dummy_mask_unmask_irq,
+};
+static struct irqdesc bad_irq_desc = {
+        .chip   = &bad_chip,
+        .handle = do_bad_IRQ,
+        .depth  = 1,
+};
+/**
+ *      disable_irq - disable an irq and wait for completion
+ *      @irq: Interrupt to disable
+ *
+ *      Disable the selected interrupt line.  We do this lazily.
+ *
+ *      This function may be called from IRQ context.
+ */
+void disable_irq(unsigned int irq)
+{
+        struct irqdesc *desc = irq_desc + irq;
+        unsigned long flags;
+        spin_lock_irqsave(&irq_controller_lock, flags);
+        if (!desc->depth++)
+                desc->enabled = 0;
+        spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+/**
+ *      enable_irq - enable interrupt handling on an irq
+ *      @irq: Interrupt to enable
+ *
+ *      Re-enables the processing of interrupts on this IRQ line.
+ *      Note that this may call the interrupt handler, so you may
+ *      get unexpected results if you hold IRQs disabled.
+ *
+ *      This function may be called from IRQ context.
+ */
+void enable_irq(unsigned int irq)
+{
+        struct irqdesc *desc = irq_desc + irq;
+        unsigned long flags;
+        int pending = 0;
+        spin_lock_irqsave(&irq_controller_lock, flags);
+        if (unlikely(!desc->depth)) {
+                printk("enable_irq(%u) unbalanced from %p\n", irq,
+                        __builtin_return_address(0)); //FIXME bum addresses reported - why?
+        } else if (!--desc->depth) {
+                desc->probing = 0;
+                desc->enabled = 1;
+                desc->chip->unmask(irq);
+                pending = desc->pending;
+                desc->pending = 0;
+                /*
+                 * If the interrupt was waiting to be processed,
+                 * retrigger it.
+                 */
+                if (pending)
+                        desc->chip->rerun(irq);
+        }
+        spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+int show_interrupts(struct seq_file *p, void *v)
+{
+        int i = *(loff_t *) v;
+        struct irqaction * action;
+        if (i < NR_IRQS) {
+                action = irq_desc[i].action;
+                if (!action)
+                        continue;
+                seq_printf(p, "%3d: %10u ", i, kstat_irqs(i));
+                seq_printf(p, "  %s", action->name);
+                for (action = action->next; action; action = action->next) {
+                        seq_printf(p, ", %s", action->name);
+                }
+                seq_putc(p, '\n');
+        } else if (i == NR_IRQS) {
+                show_fiq_list(p, v);
+                seq_printf(p, "Err: %10lu\n", irq_err_count);
+        }
+        return 0;
+}
+/*
+ * IRQ lock detection.
+ *
+ * Hopefully, this should get us out of a few locked situations.
+ * However, it may take a while for this to happen, since we need
+ * a large number if IRQs to appear in the same jiffie with the
+ * same instruction pointer (or within 2 instructions).
+ */
+static int check_irq_lock(struct irqdesc *desc, int irq, struct pt_regs *regs)
+{
+        unsigned long instr_ptr = instruction_pointer(regs);
+        if (desc->lck_jif == jiffies &&
+            desc->lck_pc >= instr_ptr && desc->lck_pc < instr_ptr + 8) {
+                desc->lck_cnt += 1;
+                if (desc->lck_cnt > MAX_IRQ_CNT) {
+                        printk(KERN_ERR "IRQ LOCK: IRQ%d is locking the system, disabled\n", irq);
+                        return 1;
+                }
+        } else {
+                desc->lck_cnt = 0;
+                desc->lck_pc  = instruction_pointer(regs);
+                desc->lck_jif = jiffies;
+        }
+        return 0;
+}
+static void
+__do_irq(unsigned int irq, struct irqaction *action, struct pt_regs *regs)
+{
+        unsigned int status;
+        int ret;
+        spin_unlock(&irq_controller_lock);
+        if (!(action->flags & SA_INTERRUPT))
+                local_irq_enable();
+        status = 0;
+        do {
+                ret = action->handler(irq, action->dev_id, regs);
+                if (ret == IRQ_HANDLED)
+                        status |= action->flags;
+                action = action->next;
+        } while (action);
+        if (status & SA_SAMPLE_RANDOM)
+                add_interrupt_randomness(irq);
+        spin_lock_irq(&irq_controller_lock);
+}
+/*
+ * This is for software-decoded IRQs.  The caller is expected to
+ * handle the ack, clear, mask and unmask issues.
+ */
+void
+do_simple_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
+{
+        struct irqaction *action;
+        const int cpu = smp_processor_id();
+        desc->triggered = 1;
+        kstat_cpu(cpu).irqs[irq]++;
+        action = desc->action;
+        if (action)
+                __do_irq(irq, desc->action, regs);
+}
+/*
+ * Most edge-triggered IRQ implementations seem to take a broken
+ * approach to this.  Hence the complexity.
+ */
+void
+do_edge_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
+{
+        const int cpu = smp_processor_id();
+        desc->triggered = 1;
+        /*
+         * If we're currently running this IRQ, or its disabled,
+         * we shouldn't process the IRQ.  Instead, turn on the
+         * hardware masks.
+         */
+        if (unlikely(desc->running || !desc->enabled))
+                goto running;
+        /*
+         * Acknowledge and clear the IRQ, but don't mask it.
+         */
+        desc->chip->ack(irq);
+        /*
+         * Mark the IRQ currently in progress.
+         */
+        desc->running = 1;
+        kstat_cpu(cpu).irqs[irq]++;
+        do {
+                struct irqaction *action;
+                action = desc->action;
+                if (!action)
+                        break;
+                if (desc->pending && desc->enabled) {
+                        desc->pending = 0;
+                        desc->chip->unmask(irq);
+                }
+                __do_irq(irq, action, regs);
+        } while (desc->pending);
+        desc->running = 0;
+        /*
+         * If we were disabled or freed, shut down the handler.
+         */
+        if (likely(desc->action && !check_irq_lock(desc, irq, regs)))
+                return;
+ running:
+        /*
+         * We got another IRQ while this one was masked or
+         * currently running.  Delay it.
+         */
+        desc->pending = 1;
+        desc->chip->mask(irq);
+        desc->chip->ack(irq);
+}
+/*
+ * Level-based IRQ handler.  Nice and simple.
+ */
+void
+do_level_IRQ(unsigned int irq, struct irqdesc *desc, struct pt_regs *regs)
+{
+        struct irqaction *action;
+        const int cpu = smp_processor_id();
+        desc->triggered = 1;
+        /*
+         * Acknowledge, clear _AND_ disable the interrupt.
+         */
+        desc->chip->ack(irq);
+        if (likely(desc->enabled)) {
+                kstat_cpu(cpu).irqs[irq]++;
+                /*
+                 * Return with this interrupt masked if no action
+                 */
+                action = desc->action;
+                if (action) {
+                        __do_irq(irq, desc->action, regs);
+                        if (likely(desc->enabled &&
+                                   !check_irq_lock(desc, irq, regs)))
+                                desc->chip->unmask(irq);
+                }
+        }
+}
+/*
+ * do_IRQ handles all hardware IRQ's.  Decoded IRQs should not
+ * come via this function.  Instead, they should provide their
+ * own 'handler'
+ */
+asmlinkage void asm_do_IRQ(int irq, struct pt_regs *regs)
+{
+        struct irqdesc *desc = irq_desc + irq;
+        /*
+         * Some hardware gives randomly wrong interrupts.  Rather
+         * than crashing, do something sensible.
+         */
+        if (irq >= NR_IRQS)
+                desc = &bad_irq_desc;
+        irq_enter();
+        spin_lock(&irq_controller_lock);
+        desc->handle(irq, desc, regs);
+        spin_unlock(&irq_controller_lock);
+        irq_exit();
+}
+void __set_irq_handler(unsigned int irq, irq_handler_t handle, int is_chained)
+{
+        struct irqdesc *desc;
+        unsigned long flags;
+        if (irq >= NR_IRQS) {
+                printk(KERN_ERR "Trying to install handler for IRQ%d\n", irq);
+                return;
+        }
+        if (handle == NULL)
+                handle = do_bad_IRQ;
+        desc = irq_desc + irq;
+        if (is_chained && desc->chip == &bad_chip)
+                printk(KERN_WARNING "Trying to install chained handler for IRQ%d\n", irq);
+        spin_lock_irqsave(&irq_controller_lock, flags);
+        if (handle == do_bad_IRQ) {
+                desc->chip->mask(irq);
+                desc->chip->ack(irq);
+                desc->depth = 1;
+                desc->enabled = 0;
+        }
+        desc->handle = handle;
+        if (handle != do_bad_IRQ && is_chained) {
+                desc->valid = 0;
+                desc->probe_ok = 0;
+                desc->depth = 0;
+                desc->chip->unmask(irq);
+        }
+        spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+void set_irq_chip(unsigned int irq, struct irqchip *chip)
+{
+        struct irqdesc *desc;
+        unsigned long flags;
+        if (irq >= NR_IRQS) {
+                printk(KERN_ERR "Trying to install chip for IRQ%d\n", irq);
+                return;
+        }
+        if (chip == NULL)
+                chip = &bad_chip;
+        desc = irq_desc + irq;
+        spin_lock_irqsave(&irq_controller_lock, flags);
+        desc->chip = chip;
+        spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+int set_irq_type(unsigned int irq, unsigned int type)
+{
+        struct irqdesc *desc;
+        unsigned long flags;
+        int ret = -ENXIO;
+        if (irq >= NR_IRQS) {
+                printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
+                return -ENODEV;
+        }
+        desc = irq_desc + irq;
+        if (desc->chip->type) {
+                spin_lock_irqsave(&irq_controller_lock, flags);
+                ret = desc->chip->type(irq, type);
+                spin_unlock_irqrestore(&irq_controller_lock, flags);
+        }
+        return ret;
+}
+void set_irq_flags(unsigned int irq, unsigned int iflags)
+{
+        struct irqdesc *desc;
+        unsigned long flags;
+        if (irq >= NR_IRQS) {
+                printk(KERN_ERR "Trying to set irq flags for IRQ%d\n", irq);
+                return;
+        }
+        desc = irq_desc + irq;
+        spin_lock_irqsave(&irq_controller_lock, flags);
+        desc->valid = (iflags & IRQF_VALID) != 0;
+        desc->probe_ok = (iflags & IRQF_PROBE) != 0;
+        desc->noautoenable = (iflags & IRQF_NOAUTOEN) != 0;
+        spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+int setup_irq(unsigned int irq, struct irqaction *new)
+{
+        int shared = 0;
+        struct irqaction *old, **p;
+        unsigned long flags;
+        struct irqdesc *desc;
+        /*
+         * Some drivers like serial.c use request_irq() heavily,
+         * so we have to be careful not to interfere with a
+         * running system.
+         */
+        if (new->flags & SA_SAMPLE_RANDOM) {
+                /*
+                 * This function might sleep, we want to call it first,
+                 * outside of the atomic block.
+                 * Yes, this might clear the entropy pool if the wrong
+                 * driver is attempted to be loaded, without actually
+                 * installing a new handler, but is this really a problem,
+                 * only the sysadmin is able to do this.
+                 */
+                rand_initialize_irq(irq);
+        }
+        /*
+         * The following block of code has to be executed atomically
+         */
+        desc = irq_desc + irq;
+        spin_lock_irqsave(&irq_controller_lock, flags);
+        p = &desc->action;
+        if ((old = *p) != NULL) {
+                /* Can't share interrupts unless both agree to */
+                if (!(old->flags & new->flags & SA_SHIRQ)) {
+                        spin_unlock_irqrestore(&irq_controller_lock, flags);
+                        return -EBUSY;
+                }
+                /* add new interrupt at end of irq queue */
+                do {
+                        p = &old->next;
+                        old = *p;
+                } while (old);
+                shared = 1;
+        }
+        *p = new;
+        if (!shared) {
+                desc->probing = 0;
+                desc->running = 0;
+                desc->pending = 0;
+                desc->depth = 1;
+                if (!desc->noautoenable) {
+                        desc->depth = 0;
+                        desc->enabled = 1;
+                        desc->chip->unmask(irq);
+                }
+        }
+        spin_unlock_irqrestore(&irq_controller_lock, flags);
+        return 0;
+}
+/**
+ *      request_irq - allocate an interrupt line
+ *      @irq: Interrupt line to allocate
+ *      @handler: Function to be called when the IRQ occurs
+ *      @irqflags: Interrupt type flags
+ *      @devname: An ascii name for the claiming device
+ *      @dev_id: A cookie passed back to the handler function
+ *
+ *      This call allocates interrupt resources and enables the
+ *      interrupt line and IRQ handling. From the point this
+ *      call is made your handler function may be invoked. Since
+ *      your handler function must clear any interrupt the board
+ *      raises, you must take care both to initialise your hardware
+ *      and to set up the interrupt handler in the right order.
+ *
+ *      Dev_id must be globally unique. Normally the address of the
+ *      device data structure is used as the cookie. Since the handler
+ *      receives this value it makes sense to use it.
+ *
+ *      If your interrupt is shared you must pass a non NULL dev_id
+ *      as this is required when freeing the interrupt.
+ *
+ *      Flags:
+ *
+ *      SA_SHIRQ                Interrupt is shared
+ *
+ *      SA_INTERRUPT            Disable local interrupts while processing
+ *
+ *      SA_SAMPLE_RANDOM        The interrupt can be used for entropy
+ *
+ */
+//FIXME - handler used to return void - whats the significance of the change?
+int request_irq(unsigned int irq, irqreturn_t (*handler)(int, void *, struct pt_regs *),
+                 unsigned long irq_flags, const char * devname, void *dev_id)
+{
+        unsigned long retval;
+        struct irqaction *action;
+        if (irq >= NR_IRQS || !irq_desc[irq].valid || !handler ||
+            (irq_flags & SA_SHIRQ && !dev_id))
+                return -EINVAL;
+        action = (struct irqaction *)kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+        if (!action)
+                return -ENOMEM;
+        action->handler = handler;
+        action->flags = irq_flags;
+        cpus_clear(action->mask);
+        action->name = devname;
+        action->next = NULL;
+        action->dev_id = dev_id;
+        retval = setup_irq(irq, action);
+        if (retval)
+                kfree(action);
+        return retval;
+}
+EXPORT_SYMBOL(request_irq);
+/**
+ *      free_irq - free an interrupt
+ *      @irq: Interrupt line to free
+ *      @dev_id: Device identity to free
+ *
+ *      Remove an interrupt handler. The handler is removed and if the
+ *      interrupt line is no longer in use by any driver it is disabled.
+ *      On a shared IRQ the caller must ensure the interrupt is disabled
+ *      on the card it drives before calling this function.
+ *
+ *      This function may be called from interrupt context.
+ */
+void free_irq(unsigned int irq, void *dev_id)
+{
+        struct irqaction * action, **p;
+        unsigned long flags;
+        if (irq >= NR_IRQS || !irq_desc[irq].valid) {
+                printk(KERN_ERR "Trying to free IRQ%d\n",irq);
+#ifdef CONFIG_DEBUG_ERRORS
+                __backtrace();
+#endif
+                return;
+        }
+        spin_lock_irqsave(&irq_controller_lock, flags);
+        for (p = &irq_desc[irq].action; (action = *p) != NULL; p = &action->next) {
+                if (action->dev_id != dev_id)
+                        continue;
+                /* Found it - now free it */
+                *p = action->next;
+                kfree(action);
+                goto out;
+        }
+        printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
+#ifdef CONFIG_DEBUG_ERRORS
+        __backtrace();
+#endif
+out:
+        spin_unlock_irqrestore(&irq_controller_lock, flags);
+}
+EXPORT_SYMBOL(free_irq);
+/* Start the interrupt probing.  Unlike other architectures,
+ * we don't return a mask of interrupts from probe_irq_on,
+ * but return the number of interrupts enabled for the probe.
+ * The interrupts which have been enabled for probing is
+ * instead recorded in the irq_desc structure.
+ */
+unsigned long probe_irq_on(void)
+{
+        unsigned int i, irqs = 0;
+        unsigned long delay;
+        /*
+         * first snaffle up any unassigned but
+         * probe-able interrupts
+         */
+        spin_lock_irq(&irq_controller_lock);
+        for (i = 0; i < NR_IRQS; i++) {
+                if (!irq_desc[i].probe_ok || irq_desc[i].action)
+                        continue;
+                irq_desc[i].probing = 1;
+                irq_desc[i].triggered = 0;
+                if (irq_desc[i].chip->type)
+                        irq_desc[i].chip->type(i, IRQT_PROBE);
+                irq_desc[i].chip->unmask(i);
+                irqs += 1;
+        }
+        spin_unlock_irq(&irq_controller_lock);
+        /*
+         * wait for spurious interrupts to mask themselves out again
+         */
+        for (delay = jiffies + HZ/10; time_before(jiffies, delay); )
+                /* min 100ms delay */;
+        /*
+         * now filter out any obviously spurious interrupts
+         */
+        spin_lock_irq(&irq_controller_lock);
+        for (i = 0; i < NR_IRQS; i++) {
+                if (irq_desc[i].probing && irq_desc[i].triggered) {
+                        irq_desc[i].probing = 0;
+                        irqs -= 1;
+                }
+        }
+        spin_unlock_irq(&irq_controller_lock);
+        return irqs;
+}
+EXPORT_SYMBOL(probe_irq_on);
+/*
+ * Possible return values:
+ *  >= 0 - interrupt number
+ *    -1 - no interrupt/many interrupts
+ */
+int probe_irq_off(unsigned long irqs)
+{
+        unsigned int i;
+        int irq_found = NO_IRQ;
+        /*
+         * look at the interrupts, and find exactly one
+         * that we were probing has been triggered
+         */
+        spin_lock_irq(&irq_controller_lock);
+        for (i = 0; i < NR_IRQS; i++) {
+                if (irq_desc[i].probing &&
+                    irq_desc[i].triggered) {
+                        if (irq_found != NO_IRQ) {
+                                irq_found = NO_IRQ;
+                                goto out;
+                        }
+                        irq_found = i;
+                }
+        }
+        if (irq_found == -1)
+                irq_found = NO_IRQ;
+out:
+        spin_unlock_irq(&irq_controller_lock);
+        return irq_found;
+}
+EXPORT_SYMBOL(probe_irq_off);
+void __init init_irq_proc(void)
+{
+}
+void __init init_IRQ(void)
+{
+        struct irqdesc *desc;
+        extern void init_dma(void);
+        int irq;
+        for (irq = 0, desc = irq_desc; irq < NR_IRQS; irq++, desc++)
+                *desc = bad_irq_desc;
+        arc_init_irq();
+        init_dma();
+}
diff --git a/arch/arm26/kernel/process.c b/arch/arm26/kernel/process.c
new file mode 100644
index 000000000000..46aea6ac194d
--- /dev/null
+++ b/arch/arm26/kernel/process.c
@@ -0,0 +1,401 @@
+/*
+ *  linux/arch/arm26/kernel/process.c
+ *
+ *  Copyright (C) 2003 Ian Molton - adapted for ARM26
+ *  Copyright (C) 1996-2000 Russell King - Converted to ARM.
+ *  Origional Copyright (C) 1995  Linus Torvalds
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <stdarg.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/leds.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+extern const char *processor_modes[];
+extern void setup_mm_for_reboot(char mode);
+static volatile int hlt_counter;
+void disable_hlt(void)
+{
+        hlt_counter++;
+}
+EXPORT_SYMBOL(disable_hlt);
+void enable_hlt(void)
+{
+        hlt_counter--;
+}
+EXPORT_SYMBOL(enable_hlt);
+static int __init nohlt_setup(char *__unused)
+{
+        hlt_counter = 1;
+        return 1;
+}
+static int __init hlt_setup(char *__unused)
+{
+        hlt_counter = 0;
+        return 1;
+}
+__setup("nohlt", nohlt_setup);
+__setup("hlt", hlt_setup);
+/*
+ * This is our default idle handler.  We need to disable
+ * interrupts here to ensure we don't miss a wakeup call.
+ */
+void cpu_idle(void)
+{
+        /* endless idle loop with no priority at all */
+        preempt_disable();
+        while (1) {
+                while (!need_resched()) {
+                        local_irq_disable();
+                        if (!need_resched() && !hlt_counter)
+                                local_irq_enable();
+                }
+        }
+        schedule();
+}
+static char reboot_mode = 'h';
+int __init reboot_setup(char *str)
+{
+        reboot_mode = str[0];
+        return 1;
+}
+__setup("reboot=", reboot_setup);
+/* ARM26 cant do these but we still need to define them. */
+void machine_halt(void)
+{
+}
+void machine_power_off(void)
+{
+}
+EXPORT_SYMBOL(machine_halt);
+EXPORT_SYMBOL(machine_power_off);
+void machine_restart(char * __unused)
+{
+        /*
+         * Clean and disable cache, and turn off interrupts
+         */
+        cpu_proc_fin();
+        /*
+         * Tell the mm system that we are going to reboot -
+         * we may need it to insert some 1:1 mappings so that
+         * soft boot works.
+         */
+        setup_mm_for_reboot(reboot_mode);
+        /*
+         * copy branch instruction to reset location and call it
+         */
+        *(unsigned long *)0 = *(unsigned long *)0x03800000;
+        ((void(*)(void))0)();
+        /*
+         * Whoops - the architecture was unable to reboot.
+         * Tell the user! Should never happen...
+         */
+        mdelay(1000);
+        printk("Reboot failed -- System halted\n");
+        while (1);
+}
+EXPORT_SYMBOL(machine_restart);
+void show_regs(struct pt_regs * regs)
+{
+        unsigned long flags;
+        flags = condition_codes(regs);
+        printk("pc : [<%08lx>]    lr : [<%08lx>]    %s\n"
+               "sp : %08lx  ip : %08lx  fp : %08lx\n",
+                instruction_pointer(regs),
+                regs->ARM_lr, print_tainted(), regs->ARM_sp,
+                regs->ARM_ip, regs->ARM_fp);
+        printk("r10: %08lx  r9 : %08lx  r8 : %08lx\n",
+                regs->ARM_r10, regs->ARM_r9,
+                regs->ARM_r8);
+        printk("r7 : %08lx  r6 : %08lx  r5 : %08lx  r4 : %08lx\n",
+                regs->ARM_r7, regs->ARM_r6,
+                regs->ARM_r5, regs->ARM_r4);
+        printk("r3 : %08lx  r2 : %08lx  r1 : %08lx  r0 : %08lx\n",
+                regs->ARM_r3, regs->ARM_r2,
+                regs->ARM_r1, regs->ARM_r0);
+        printk("Flags: %c%c%c%c",
+                flags & PSR_N_BIT ? 'N' : 'n',
+                flags & PSR_Z_BIT ? 'Z' : 'z',
+                flags & PSR_C_BIT ? 'C' : 'c',
+                flags & PSR_V_BIT ? 'V' : 'v');
+        printk("  IRQs o%s  FIQs o%s  Mode %s  Segment %s\n",
+                interrupts_enabled(regs) ? "n" : "ff",
+                fast_interrupts_enabled(regs) ? "n" : "ff",
+                processor_modes[processor_mode(regs)],
+                get_fs() == get_ds() ? "kernel" : "user");
+}
+void show_fpregs(struct user_fp *regs)
+{
+        int i;
+        for (i = 0; i < 8; i++) {
+                unsigned long *p;
+                char type;
+                p = (unsigned long *)(regs->fpregs + i);
+                switch (regs->ftype[i]) {
+                        case 1: type = 'f'; break;
+                        case 2: type = 'd'; break;
+                        case 3: type = 'e'; break;
+                        default: type = '?'; break;
+                }
+                if (regs->init_flag)
+                        type = '?';
+                printk("  f%d(%c): %08lx %08lx %08lx%c",
+                        i, type, p[0], p[1], p[2], i & 1 ? '\n' : ' ');
+        }
+                        
+        printk("FPSR: %08lx FPCR: %08lx\n",
+                (unsigned long)regs->fpsr,
+                (unsigned long)regs->fpcr);
+}
+/*
+ * Task structure and kernel stack allocation.
+ */
+static unsigned long *thread_info_head;
+static unsigned int nr_thread_info;
+extern unsigned long get_page_8k(int priority);
+extern void free_page_8k(unsigned long page);
+// FIXME - is this valid?
+#define EXTRA_TASK_STRUCT       0
+#define ll_alloc_task_struct()  ((struct thread_info *)get_page_8k(GFP_KERNEL))
+#define ll_free_task_struct(p)  free_page_8k((unsigned long)(p))
+//FIXME - do we use *task param below looks like we dont, which is ok?
+//FIXME - if EXTRA_TASK_STRUCT is zero we can optimise the below away permanently. *IF* its supposed to be zero.
+struct thread_info *alloc_thread_info(struct task_struct *task)
+{
+        struct thread_info *thread = NULL;
+        if (EXTRA_TASK_STRUCT) {
+                unsigned long *p = thread_info_head;
+                if (p) {
+                        thread_info_head = (unsigned long *)p[0];
+                        nr_thread_info -= 1;
+                }
+                thread = (struct thread_info *)p;
+        }
+        if (!thread)
+                thread = ll_alloc_task_struct();
+#ifdef CONFIG_MAGIC_SYSRQ
+        /*
+         * The stack must be cleared if you want SYSRQ-T to
+         * give sensible stack usage information
+         */
+        if (thread) {
+                char *p = (char *)thread;
+                memzero(p+KERNEL_STACK_SIZE, KERNEL_STACK_SIZE);
+        }
+#endif
+        return thread;
+}
+void free_thread_info(struct thread_info *thread)
+{
+        if (EXTRA_TASK_STRUCT && nr_thread_info < EXTRA_TASK_STRUCT) {
+                unsigned long *p = (unsigned long *)thread;
+                p[0] = (unsigned long)thread_info_head;
+                thread_info_head = p;
+                nr_thread_info += 1;
+        } else
+                ll_free_task_struct(thread);
+}
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+}
+void flush_thread(void)
+{
+        struct thread_info *thread = current_thread_info();
+        struct task_struct *tsk = current;
+        memset(&tsk->thread.debug, 0, sizeof(struct debug_info));
+        memset(&thread->fpstate, 0, sizeof(union fp_state));
+        clear_used_math();
+}
+void release_thread(struct task_struct *dead_task)
+{
+}
+asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+int
+copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start,
+            unsigned long unused, struct task_struct *p, struct pt_regs *regs)
+{
+        struct thread_info *thread = p->thread_info;
+        struct pt_regs *childregs;
+        childregs = __get_user_regs(thread);
+        *childregs = *regs;
+        childregs->ARM_r0 = 0;
+        childregs->ARM_sp = stack_start;
+        memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
+        thread->cpu_context.sp = (unsigned long)childregs;
+        thread->cpu_context.pc = (unsigned long)ret_from_fork | MODE_SVC26 | PSR_I_BIT;
+        return 0;
+}
+/*
+ * fill in the fpe structure for a core dump...
+ */
+int dump_fpu (struct pt_regs *regs, struct user_fp *fp)
+{
+        struct thread_info *thread = current_thread_info();
+        int used_math = !!used_math();
+        if (used_math)
+                memcpy(fp, &thread->fpstate.soft, sizeof (*fp));
+        return used_math;
+}
+/*
+ * fill in the user structure for a core dump..
+ */
+void dump_thread(struct pt_regs * regs, struct user * dump)
+{
+        struct task_struct *tsk = current;
+        dump->magic = CMAGIC;
+        dump->start_code = tsk->mm->start_code;
+        dump->start_stack = regs->ARM_sp & ~(PAGE_SIZE - 1);
+        dump->u_tsize = (tsk->mm->end_code - tsk->mm->start_code) >> PAGE_SHIFT;
+        dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT;
+        dump->u_ssize = 0;
+        dump->u_debugreg[0] = tsk->thread.debug.bp[0].address;
+        dump->u_debugreg[1] = tsk->thread.debug.bp[1].address;
+        dump->u_debugreg[2] = tsk->thread.debug.bp[0].insn;
+        dump->u_debugreg[3] = tsk->thread.debug.bp[1].insn;
+        dump->u_debugreg[4] = tsk->thread.debug.nsaved;
+        if (dump->start_stack < 0x04000000)
+                dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT;
+        dump->regs = *regs;
+        dump->u_fpvalid = dump_fpu (regs, &dump->u_fp);
+}
+/*
+ * Shuffle the argument into the correct register before calling the
+ * thread function.  r1 is the thread argument, r2 is the pointer to
+ * the thread function, and r3 points to the exit function.
+ * FIXME - make sure this is right - the older code used to zero fp
+ * and cause the parent to call sys_exit (do_exit in this version)
+ */
+extern void kernel_thread_helper(void);
+asm(    ".section .text\n"
+"       .align\n"
+"       .type   kernel_thread_helper, #function\n"
+"kernel_thread_helper:\n"
+"       mov     r0, r1\n"
+"       mov     lr, r3\n"
+"       mov     pc, r2\n"
+"       .size   kernel_thread_helper, . - kernel_thread_helper\n"
+"       .previous");
+/*
+ * Create a kernel thread.
+ */
+pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+        struct pt_regs regs;
+        memset(&regs, 0, sizeof(regs));
+        regs.ARM_r1 = (unsigned long)arg;
+        regs.ARM_r2 = (unsigned long)fn;
+        regs.ARM_r3 = (unsigned long)do_exit;
+        regs.ARM_pc = (unsigned long)kernel_thread_helper | MODE_SVC26;
+        return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+unsigned long get_wchan(struct task_struct *p)
+{
+        unsigned long fp, lr;
+        unsigned long stack_page;
+        int count = 0;
+        if (!p || p == current || p->state == TASK_RUNNING)
+                return 0;
+        stack_page = 4096 + (unsigned long)p;
+        fp = thread_saved_fp(p);
+        do {
+                if (fp < stack_page || fp > 4092+stack_page)
+                        return 0;
+                lr = pc_pointer (((unsigned long *)fp)[-1]);
+                if (!in_sched_functions(lr))
+                        return lr;
+                fp = *(unsigned long *) (fp - 12);
+        } while (count ++ < 16);
+        return 0;
+}
diff --git a/arch/arm26/kernel/ptrace.c b/arch/arm26/kernel/ptrace.c
new file mode 100644
index 000000000000..2a137146a77c
--- /dev/null
+++ b/arch/arm26/kernel/ptrace.c
@@ -0,0 +1,744 @@
+/*
+ *  linux/arch/arm26/kernel/ptrace.c
+ *
+ *  By Ross Biro 1/23/92
+ * edited by Linus Torvalds
+ * ARM modifications Copyright (C) 2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+//#include <asm/processor.h>
+#include "ptrace.h"
+#define REG_PC  15
+#define REG_PSR 15
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+/*
+ * Breakpoint SWI instruction: SWI &9F0001
+ */
+#define BREAKINST_ARM   0xef9f0001
+/*
+ * Get the address of the live pt_regs for the specified task.
+ * These are saved onto the top kernel stack when the process
+ * is not running.
+ *
+ * Note: if a user thread is execve'd from kernel space, the
+ * kernel stack will not be empty on entry to the kernel, so
+ * ptracing these tasks will fail.
+ */
+static inline struct pt_regs *
+get_user_regs(struct task_struct *task)
+{
+        return __get_user_regs(task->thread_info);
+}
+/*
+ * this routine will get a word off of the processes privileged stack.
+ * the offset is how far from the base addr as stored in the THREAD.
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline long get_user_reg(struct task_struct *task, int offset)
+{
+        return get_user_regs(task)->uregs[offset];
+}
+/*
+ * this routine will put a word on the processes privileged stack.
+ * the offset is how far from the base addr as stored in the THREAD.
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline int
+put_user_reg(struct task_struct *task, int offset, long data)
+{
+        struct pt_regs newregs, *regs = get_user_regs(task);
+        int ret = -EINVAL;
+        newregs = *regs;
+        newregs.uregs[offset] = data;
+        if (valid_user_regs(&newregs)) {
+                regs->uregs[offset] = data;
+                ret = 0;
+        }
+        return ret;
+}
+static inline int
+read_u32(struct task_struct *task, unsigned long addr, u32 *res)
+{
+        int ret;
+        ret = access_process_vm(task, addr, res, sizeof(*res), 0);
+        return ret == sizeof(*res) ? 0 : -EIO;
+}
+static inline int
+read_instr(struct task_struct *task, unsigned long addr, u32 *res)
+{
+        int ret;
+        u32 val;
+        ret = access_process_vm(task, addr & ~3, &val, sizeof(val), 0);
+        ret = ret == sizeof(val) ? 0 : -EIO;
+        *res = val;
+        return ret;
+}
+/*
+ * Get value of register `rn' (in the instruction)
+ */
+static unsigned long
+ptrace_getrn(struct task_struct *child, unsigned long insn)
+{
+        unsigned int reg = (insn >> 16) & 15;
+        unsigned long val;
+        val = get_user_reg(child, reg);
+        if (reg == 15)
+                val = pc_pointer(val + 8); //FIXME - correct for arm26?
+        return val;
+}
+/*
+ * Get value of operand 2 (in an ALU instruction)
+ */
+static unsigned long
+ptrace_getaluop2(struct task_struct *child, unsigned long insn)
+{
+        unsigned long val;
+        int shift;
+        int type;
+        if (insn & 1 << 25) {
+                val = insn & 255;
+                shift = (insn >> 8) & 15;
+                type = 3;
+        } else {
+                val = get_user_reg (child, insn & 15);
+                if (insn & (1 << 4))
+                        shift = (int)get_user_reg (child, (insn >> 8) & 15);
+                else
+                        shift = (insn >> 7) & 31;
+                type = (insn >> 5) & 3;
+        }
+        switch (type) {
+        case 0: val <<= shift;  break;
+        case 1: val >>= shift;  break;
+        case 2:
+                val = (((signed long)val) >> shift);
+                break;
+        case 3:
+                val = (val >> shift) | (val << (32 - shift));
+                break;
+        }
+        return val;
+}
+/*
+ * Get value of operand 2 (in a LDR instruction)
+ */
+static unsigned long
+ptrace_getldrop2(struct task_struct *child, unsigned long insn)
+{
+        unsigned long val;
+        int shift;
+        int type;
+        val = get_user_reg(child, insn & 15);
+        shift = (insn >> 7) & 31;
+        type = (insn >> 5) & 3;
+        switch (type) {
+        case 0: val <<= shift;  break;
+        case 1: val >>= shift;  break;
+        case 2:
+                val = (((signed long)val) >> shift);
+                break;
+        case 3:
+                val = (val >> shift) | (val << (32 - shift));
+                break;
+        }
+        return val;
+}
+#define OP_MASK 0x01e00000
+#define OP_AND  0x00000000
+#define OP_EOR  0x00200000
+#define OP_SUB  0x00400000
+#define OP_RSB  0x00600000
+#define OP_ADD  0x00800000
+#define OP_ADC  0x00a00000
+#define OP_SBC  0x00c00000
+#define OP_RSC  0x00e00000
+#define OP_ORR  0x01800000
+#define OP_MOV  0x01a00000
+#define OP_BIC  0x01c00000
+#define OP_MVN  0x01e00000
+static unsigned long
+get_branch_address(struct task_struct *child, unsigned long pc, unsigned long insn)
+{
+        u32 alt = 0;
+        switch (insn & 0x0e000000) {
+        case 0x00000000:
+        case 0x02000000: {
+                /*
+                 * data processing
+                 */
+                long aluop1, aluop2, ccbit;
+                if ((insn & 0xf000) != 0xf000)
+                        break;
+                aluop1 = ptrace_getrn(child, insn);
+                aluop2 = ptrace_getaluop2(child, insn);
+                ccbit  = get_user_reg(child, REG_PSR) & PSR_C_BIT ? 1 : 0;
+                switch (insn & OP_MASK) {
+                case OP_AND: alt = aluop1 & aluop2;             break;
+                case OP_EOR: alt = aluop1 ^ aluop2;             break;
+                case OP_SUB: alt = aluop1 - aluop2;             break;
+                case OP_RSB: alt = aluop2 - aluop1;             break;
+                case OP_ADD: alt = aluop1 + aluop2;             break;
+                case OP_ADC: alt = aluop1 + aluop2 + ccbit;     break;
+                case OP_SBC: alt = aluop1 - aluop2 + ccbit;     break;
+                case OP_RSC: alt = aluop2 - aluop1 + ccbit;     break;
+                case OP_ORR: alt = aluop1 | aluop2;             break;
+                case OP_MOV: alt = aluop2;                      break;
+                case OP_BIC: alt = aluop1 & ~aluop2;            break;
+                case OP_MVN: alt = ~aluop2;                     break;
+                }
+                break;
+        }
+        case 0x04000000:
+        case 0x06000000:
+                /*
+                 * ldr
+                 */
+                if ((insn & 0x0010f000) == 0x0010f000) {
+                        unsigned long base;
+                        base = ptrace_getrn(child, insn);
+                        if (insn & 1 << 24) {
+                                long aluop2;
+                                if (insn & 0x02000000)
+                                        aluop2 = ptrace_getldrop2(child, insn);
+                                else
+                                        aluop2 = insn & 0xfff;
+                                if (insn & 1 << 23)
+                                        base += aluop2;
+                                else
+                                        base -= aluop2;
+                        }
+                        if (read_u32(child, base, &alt) == 0)
+                                alt = pc_pointer(alt);
+                }
+                break;
+        case 0x08000000:
+                /*
+                 * ldm
+                 */
+                if ((insn & 0x00108000) == 0x00108000) {
+                        unsigned long base;
+                        unsigned int nr_regs;
+                        if (insn & (1 << 23)) {
+                                nr_regs = hweight16(insn & 65535) << 2;
+                                if (!(insn & (1 << 24)))
+                                        nr_regs -= 4;
+                        } else {
+                                if (insn & (1 << 24))
+                                        nr_regs = -4;
+                                else
+                                        nr_regs = 0;
+                        }
+                        base = ptrace_getrn(child, insn);
+                        if (read_u32(child, base + nr_regs, &alt) == 0)
+                                alt = pc_pointer(alt);
+                        break;
+                }
+                break;
+        case 0x0a000000: {
+                /*
+                 * bl or b
+                 */
+                signed long displ;
+                /* It's a branch/branch link: instead of trying to
+                 * figure out whether the branch will be taken or not,
+                 * we'll put a breakpoint at both locations.  This is
+                 * simpler, more reliable, and probably not a whole lot
+                 * slower than the alternative approach of emulating the
+                 * branch.
+                 */
+                displ = (insn & 0x00ffffff) << 8;
+                displ = (displ >> 6) + 8;
+                if (displ != 0 && displ != 4)
+                        alt = pc + displ;
+            }
+            break;
+        }
+        return alt;
+}
+static int
+swap_insn(struct task_struct *task, unsigned long addr,
+          void *old_insn, void *new_insn, int size)
+{
+        int ret;
+        ret = access_process_vm(task, addr, old_insn, size, 0);
+        if (ret == size)
+                ret = access_process_vm(task, addr, new_insn, size, 1);
+        return ret;
+}
+static void
+add_breakpoint(struct task_struct *task, struct debug_info *dbg, unsigned long addr)
+{
+        int nr = dbg->nsaved;
+        if (nr < 2) {
+                u32 new_insn = BREAKINST_ARM;
+                int res;
+                res = swap_insn(task, addr, &dbg->bp[nr].insn, &new_insn, 4);
+                if (res == 4) {
+                        dbg->bp[nr].address = addr;
+                        dbg->nsaved += 1;
+                }
+        } else
+                printk(KERN_ERR "ptrace: too many breakpoints\n");
+}
+/*
+ * Clear one breakpoint in the user program.  We copy what the hardware
+ * does and use bit 0 of the address to indicate whether this is a Thumb
+ * breakpoint or an ARM breakpoint.
+ */
+static void clear_breakpoint(struct task_struct *task, struct debug_entry *bp)
+{
+        unsigned long addr = bp->address;
+        u32 old_insn;
+        int ret;
+        ret = swap_insn(task, addr & ~3, &old_insn,
+                        &bp->insn, 4);
+        if (ret != 4 || old_insn != BREAKINST_ARM)
+                printk(KERN_ERR "%s:%d: corrupted ARM breakpoint at "
+                        "0x%08lx (0x%08x)\n", task->comm, task->pid,
+                        addr, old_insn);
+}
+void ptrace_set_bpt(struct task_struct *child)
+{
+        struct pt_regs *regs;
+        unsigned long pc;
+        u32 insn;
+        int res;
+        regs = get_user_regs(child);
+        pc = instruction_pointer(regs);
+        res = read_instr(child, pc, &insn);
+        if (!res) {
+                struct debug_info *dbg = &child->thread.debug;
+                unsigned long alt;
+                dbg->nsaved = 0;
+                alt = get_branch_address(child, pc, insn);
+                if (alt)
+                        add_breakpoint(child, dbg, alt);
+                /*
+                 * Note that we ignore the result of setting the above
+                 * breakpoint since it may fail.  When it does, this is
+                 * not so much an error, but a forewarning that we may
+                 * be receiving a prefetch abort shortly.
+                 *
+                 * If we don't set this breakpoint here, then we can
+                 * lose control of the thread during single stepping.
+                 */
+                if (!alt || predicate(insn) != PREDICATE_ALWAYS)
+                        add_breakpoint(child, dbg, pc + 4);
+        }
+}
+/*
+ * Ensure no single-step breakpoint is pending.  Returns non-zero
+ * value if child was being single-stepped.
+ */
+void ptrace_cancel_bpt(struct task_struct *child)
+{
+        int i, nsaved = child->thread.debug.nsaved;
+        child->thread.debug.nsaved = 0;
+        if (nsaved > 2) {
+                printk("ptrace_cancel_bpt: bogus nsaved: %d!\n", nsaved);
+                nsaved = 2;
+        }
+        for (i = 0; i < nsaved; i++)
+                clear_breakpoint(child, &child->thread.debug.bp[i]);
+}
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure the single step bit is not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+        child->ptrace &= ~PT_SINGLESTEP;
+        ptrace_cancel_bpt(child);
+}
+/*
+ * Handle hitting a breakpoint.
+ */
+void ptrace_break(struct task_struct *tsk, struct pt_regs *regs)
+{
+        siginfo_t info;
+        /*
+         * The PC is always left pointing at the next instruction.  Fix this.
+         */
+        regs->ARM_pc -= 4;
+        if (tsk->thread.debug.nsaved == 0)
+                printk(KERN_ERR "ptrace: bogus breakpoint trap\n");
+        ptrace_cancel_bpt(tsk);
+        info.si_signo = SIGTRAP;
+        info.si_errno = 0;
+        info.si_code  = TRAP_BRKPT;
+        info.si_addr  = (void *)instruction_pointer(regs) - 4;
+        force_sig_info(SIGTRAP, &info, tsk);
+}
+/*
+ * Read the word at offset "off" into the "struct user".  We
+ * actually access the pt_regs stored on the kernel stack.
+ */
+static int ptrace_read_user(struct task_struct *tsk, unsigned long off,
+                            unsigned long *ret)
+{
+        unsigned long tmp;
+        if (off & 3 || off >= sizeof(struct user))
+                return -EIO;
+        tmp = 0;
+        if (off < sizeof(struct pt_regs))
+                tmp = get_user_reg(tsk, off >> 2);
+        return put_user(tmp, ret);
+}
+/*
+ * Write the word at offset "off" into "struct user".  We
+ * actually access the pt_regs stored on the kernel stack.
+ */
+static int ptrace_write_user(struct task_struct *tsk, unsigned long off,
+                             unsigned long val)
+{
+        if (off & 3 || off >= sizeof(struct user))
+                return -EIO;
+        if (off >= sizeof(struct pt_regs))
+                return 0;
+        return put_user_reg(tsk, off >> 2, val);
+}
+/*
+ * Get all user integer registers.
+ */
+static int ptrace_getregs(struct task_struct *tsk, void *uregs)
+{
+        struct pt_regs *regs = get_user_regs(tsk);
+        return copy_to_user(uregs, regs, sizeof(struct pt_regs)) ? -EFAULT : 0;
+}
+/*
+ * Set all user integer registers.
+ */
+static int ptrace_setregs(struct task_struct *tsk, void *uregs)
+{
+        struct pt_regs newregs;
+        int ret;
+        ret = -EFAULT;
+        if (copy_from_user(&newregs, uregs, sizeof(struct pt_regs)) == 0) {
+                struct pt_regs *regs = get_user_regs(tsk);
+                ret = -EINVAL;
+                if (valid_user_regs(&newregs)) {
+                        *regs = newregs;
+                        ret = 0;
+                }
+        }
+        return ret;
+}
+/*
+ * Get the child FPU state.
+ */
+static int ptrace_getfpregs(struct task_struct *tsk, void *ufp)
+{
+        return copy_to_user(ufp, &tsk->thread_info->fpstate,
+                            sizeof(struct user_fp)) ? -EFAULT : 0;
+}
+/*
+ * Set the child FPU state.
+ */
+static int ptrace_setfpregs(struct task_struct *tsk, void *ufp)
+{
+        set_stopped_child_used_math(tsk);
+        return copy_from_user(&tsk->thread_info->fpstate, ufp,
+                              sizeof(struct user_fp)) ? -EFAULT : 0;
+}
+static int do_ptrace(int request, struct task_struct *child, long addr, long data)
+{
+        unsigned long tmp;
+        int ret;
+        switch (request) {
+                /*
+                 * read word at location "addr" in the child process.
+                 */
+                case PTRACE_PEEKTEXT:
+                case PTRACE_PEEKDATA:
+                        ret = access_process_vm(child, addr, &tmp,
+                                                sizeof(unsigned long), 0);
+                        if (ret == sizeof(unsigned long))
+                                ret = put_user(tmp, (unsigned long *) data);
+                        else
+                                ret = -EIO;
+                        break;
+                case PTRACE_PEEKUSR:
+                        ret = ptrace_read_user(child, addr, (unsigned long *)data);
+                        break;
+                /*
+                 * write the word at location addr.
+                 */
+                case PTRACE_POKETEXT:
+                case PTRACE_POKEDATA:
+                        ret = access_process_vm(child, addr, &data,
+                                                sizeof(unsigned long), 1);
+                        if (ret == sizeof(unsigned long))
+                                ret = 0;
+                        else
+                                ret = -EIO;
+                        break;
+                case PTRACE_POKEUSR:
+                        ret = ptrace_write_user(child, addr, data);
+                        break;
+                /*
+                 * continue/restart and stop at next (return from) syscall
+                 */
+                case PTRACE_SYSCALL:
+                case PTRACE_CONT:
+                        ret = -EIO;
+                        if ((unsigned long) data > _NSIG)
+                                break;
+                        if (request == PTRACE_SYSCALL)
+                                set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+                        else
+                                clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+                        child->exit_code = data;
+                        /* make sure single-step breakpoint is gone. */
+                        child->ptrace &= ~PT_SINGLESTEP;
+                        ptrace_cancel_bpt(child);
+                        wake_up_process(child);
+                        ret = 0;
+                        break;
+                /*
+                 * make the child exit.  Best I can do is send it a sigkill.
+                 * perhaps it should be put in the status that it wants to
+                 * exit.
+                 */
+                case PTRACE_KILL:
+                        /* make sure single-step breakpoint is gone. */
+                        child->ptrace &= ~PT_SINGLESTEP;
+                        ptrace_cancel_bpt(child);
+                        if (child->exit_state != EXIT_ZOMBIE) {
+                                child->exit_code = SIGKILL;
+                                wake_up_process(child);
+                        }
+                        ret = 0;
+                        break;
+                /*
+                 * execute single instruction.
+                 */
+                case PTRACE_SINGLESTEP:
+                        ret = -EIO;
+                        if ((unsigned long) data > _NSIG)
+                                break;
+                        child->ptrace |= PT_SINGLESTEP;
+                        clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+                        child->exit_code = data;
+                        /* give it a chance to run. */
+                        wake_up_process(child);
+                        ret = 0;
+                        break;
+                case PTRACE_DETACH:
+                        ret = ptrace_detach(child, data);
+                        break;
+                case PTRACE_GETREGS:
+                        ret = ptrace_getregs(child, (void *)data);
+                        break;
+                case PTRACE_SETREGS:
+                        ret = ptrace_setregs(child, (void *)data);
+                        break;
+                case PTRACE_GETFPREGS:
+                        ret = ptrace_getfpregs(child, (void *)data);
+                        break;
+                
+                case PTRACE_SETFPREGS:
+                        ret = ptrace_setfpregs(child, (void *)data);
+                        break;
+                default:
+                        ret = ptrace_request(child, request, addr, data);
+                        break;
+        }
+        return ret;
+}
+asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
+{
+        struct task_struct *child;
+        int ret;
+        lock_kernel();
+        ret = -EPERM;
+        if (request == PTRACE_TRACEME) {
+                /* are we already being traced? */
+                if (current->ptrace & PT_PTRACED)
+                        goto out;
+                ret = security_ptrace(current->parent, current);
+                if (ret)
+                        goto out;
+                /* set the ptrace bit in the process flags. */
+                current->ptrace |= PT_PTRACED;
+                ret = 0;
+                goto out;
+        }
+        ret = -ESRCH;
+        read_lock(&tasklist_lock);
+        child = find_task_by_pid(pid);
+        if (child)
+                get_task_struct(child);
+        read_unlock(&tasklist_lock);
+        if (!child)
+                goto out;
+        ret = -EPERM;
+        if (pid == 1)           /* you may not mess with init */
+                goto out_tsk;
+        if (request == PTRACE_ATTACH) {
+                ret = ptrace_attach(child);
+                goto out_tsk;
+        }
+        ret = ptrace_check_attach(child, request == PTRACE_KILL);
+        if (ret == 0)
+                ret = do_ptrace(request, child, addr, data);
+out_tsk:
+        put_task_struct(child);
+out:
+        unlock_kernel();
+        return ret;
+}
+asmlinkage void syscall_trace(int why, struct pt_regs *regs)
+{
+        unsigned long ip;
+        if (!test_thread_flag(TIF_SYSCALL_TRACE))
+                return;
+        if (!(current->ptrace & PT_PTRACED))
+                return;
+        /*
+         * Save IP.  IP is used to denote syscall entry/exit:
+         *  IP = 0 -> entry, = 1 -> exit
+         */
+        ip = regs->ARM_ip;
+        regs->ARM_ip = why;
+        /* the 0x80 provides a way for the tracing parent to distinguish
+           between a syscall stop and SIGTRAP delivery */
+        ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+                                 ? 0x80 : 0));
+        /*
+         * this isn't the same as continuing with a signal, but it will do
+         * for normal use.  strace only continues with a signal if the
+         * stopping signal is not SIGTRAP.  -brl
+         */
+        if (current->exit_code) {
+                send_sig(current->exit_code, current, 1);
+                current->exit_code = 0;
+        }
+        regs->ARM_ip = ip;
+}
diff --git a/arch/arm26/kernel/ptrace.h b/arch/arm26/kernel/ptrace.h
new file mode 100644
index 000000000000..846c9d8d36ed
--- /dev/null
+++ b/arch/arm26/kernel/ptrace.h
@@ -0,0 +1,13 @@
+/*
+ *  linux/arch/arm26/kernel/ptrace.h
+ *
+ *  Copyright (C) 2000-2003 Russell King
+ *  Copyright (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+extern void ptrace_cancel_bpt(struct task_struct *);
+extern void ptrace_set_bpt(struct task_struct *);
+extern void ptrace_break(struct task_struct *, struct pt_regs *);
diff --git a/arch/arm26/kernel/semaphore.c b/arch/arm26/kernel/semaphore.c
new file mode 100644
index 000000000000..3023a53431ff
--- /dev/null
+++ b/arch/arm26/kernel/semaphore.c
@@ -0,0 +1,223 @@
+/*
+ *  ARM semaphore implementation, taken from
+ *
+ *  i386 semaphore implementation.
+ *
+ *  (C) Copyright 1999 Linus Torvalds
+ *  (C) Copyright 2003 Ian Molton (ARM26 mods)
+ *
+ *  Modified for ARM by Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <asm/semaphore.h>
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is
+ * protected by the semaphore spinlock.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in <asm/semaphore.h>
+ * where we want to avoid any extra jumps and calls.
+ */
+/*
+ * Logic:
+ *  - only on a boundary condition do we need to care. When we go
+ *    from a negative count to a non-negative, we wake people up.
+ *  - when we go from a non-negative count to a negative do we
+ *    (a) synchronize with the "sleeper" count and (b) make sure
+ *    that we're on the wakeup list before we synchronize so that
+ *    we cannot lose wakeup events.
+ */
+void __up(struct semaphore *sem)
+{
+        wake_up(&sem->wait);
+}
+static DEFINE_SPINLOCK(semaphore_lock);
+void __sched __down(struct semaphore * sem)
+{
+        struct task_struct *tsk = current;
+        DECLARE_WAITQUEUE(wait, tsk);
+        tsk->state = TASK_UNINTERRUPTIBLE;
+        add_wait_queue_exclusive(&sem->wait, &wait);
+        spin_lock_irq(&semaphore_lock);
+        sem->sleepers++;
+        for (;;) {
+                int sleepers = sem->sleepers;
+                /*
+                 * Add "everybody else" into it. They aren't
+                 * playing, because we own the spinlock.
+                 */
+                if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+                        sem->sleepers = 0;
+                        break;
+                }
+                sem->sleepers = 1;      /* us - see -1 above */
+                spin_unlock_irq(&semaphore_lock);
+                schedule();
+                tsk->state = TASK_UNINTERRUPTIBLE;
+                spin_lock_irq(&semaphore_lock);
+        }
+        spin_unlock_irq(&semaphore_lock);
+        remove_wait_queue(&sem->wait, &wait);
+        tsk->state = TASK_RUNNING;
+        wake_up(&sem->wait);
+}
+int __sched __down_interruptible(struct semaphore * sem)
+{
+        int retval = 0;
+        struct task_struct *tsk = current;
+        DECLARE_WAITQUEUE(wait, tsk);
+        tsk->state = TASK_INTERRUPTIBLE;
+        add_wait_queue_exclusive(&sem->wait, &wait);
+        spin_lock_irq(&semaphore_lock);
+        sem->sleepers ++;
+        for (;;) {
+                int sleepers = sem->sleepers;
+                /*
+                 * With signals pending, this turns into
+                 * the trylock failure case - we won't be
+                 * sleeping, and we* can't get the lock as
+                 * it has contention. Just correct the count
+                 * and exit.
+                 */
+                if (signal_pending(current)) {
+                        retval = -EINTR;
+                        sem->sleepers = 0;
+                        atomic_add(sleepers, &sem->count);
+                        break;
+                }
+                /*
+                 * Add "everybody else" into it. They aren't
+                 * playing, because we own the spinlock. The
+                 * "-1" is because we're still hoping to get
+                 * the lock.
+                 */
+                if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+                        sem->sleepers = 0;
+                        break;
+                }
+                sem->sleepers = 1;      /* us - see -1 above */
+                spin_unlock_irq(&semaphore_lock);
+                schedule();
+                tsk->state = TASK_INTERRUPTIBLE;
+                spin_lock_irq(&semaphore_lock);
+        }
+        spin_unlock_irq(&semaphore_lock);
+        tsk->state = TASK_RUNNING;
+        remove_wait_queue(&sem->wait, &wait);
+        wake_up(&sem->wait);
+        return retval;
+}
+/*
+ * Trylock failed - make sure we correct for
+ * having decremented the count.
+ *
+ * We could have done the trylock with a
+ * single "cmpxchg" without failure cases,
+ * but then it wouldn't work on a 386.
+ */
+int __down_trylock(struct semaphore * sem)
+{
+        int sleepers;
+        unsigned long flags;
+        spin_lock_irqsave(&semaphore_lock, flags);
+        sleepers = sem->sleepers + 1;
+        sem->sleepers = 0;
+        /*
+         * Add "everybody else" and us into it. They aren't
+         * playing, because we own the spinlock.
+         */
+        if (!atomic_add_negative(sleepers, &sem->count))
+                wake_up(&sem->wait);
+        spin_unlock_irqrestore(&semaphore_lock, flags);
+        return 1;
+}
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ *
+ * ip contains the semaphore pointer on entry. Save the C-clobbered
+ * registers (r0 to r3 and lr), but not ip, as we use it as a return
+ * value in some cases..
+ */
+asm("   .section .sched.text , #alloc, #execinstr       \n\
+        .align  5                               \n\
+        .globl  __down_failed                   \n\
+__down_failed:                                  \n\
+        stmfd   sp!, {r0 - r3, lr}              \n\
+        mov     r0, ip                          \n\
+        bl      __down                          \n\
+        ldmfd   sp!, {r0 - r3, pc}^             \n\
+                                                \n\
+        .align  5                               \n\
+        .globl  __down_interruptible_failed     \n\
+__down_interruptible_failed:                    \n\
+        stmfd   sp!, {r0 - r3, lr}              \n\
+        mov     r0, ip                          \n\
+        bl      __down_interruptible            \n\
+        mov     ip, r0                          \n\
+        ldmfd   sp!, {r0 - r3, pc}^             \n\
+                                                \n\
+        .align  5                               \n\
+        .globl  __down_trylock_failed           \n\
+__down_trylock_failed:                          \n\
+        stmfd   sp!, {r0 - r3, lr}              \n\
+        mov     r0, ip                          \n\
+        bl      __down_trylock                  \n\
+        mov     ip, r0                          \n\
+        ldmfd   sp!, {r0 - r3, pc}^             \n\
+                                                \n\
+        .align  5                               \n\
+        .globl  __up_wakeup                     \n\
+__up_wakeup:                                    \n\
+        stmfd   sp!, {r0 - r3, lr}              \n\
+        mov     r0, ip                          \n\
+        bl      __up                            \n\
+        ldmfd   sp!, {r0 - r3, pc}^             \n\
+        ");
+EXPORT_SYMBOL(__down_failed);
+EXPORT_SYMBOL(__down_interruptible_failed);
+EXPORT_SYMBOL(__down_trylock_failed);
+EXPORT_SYMBOL(__up_wakeup);
diff --git a/arch/arm26/kernel/setup.c b/arch/arm26/kernel/setup.c
new file mode 100644
index 000000000000..4eb329e3828a
--- /dev/null
+++ b/arch/arm26/kernel/setup.c
@@ -0,0 +1,573 @@
+/*
+ *  linux/arch/arm26/kernel/setup.c
+ *
+ *  Copyright (C) 1995-2001 Russell King
+ *  Copyright (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/utsname.h>
+#include <linux/blkdev.h>
+#include <linux/console.h>
+#include <linux/bootmem.h>
+#include <linux/seq_file.h>
+#include <linux/tty.h>
+#include <linux/init.h>
+#include <linux/root_dev.h>
+#include <asm/elf.h>
+#include <asm/hardware.h>
+#include <asm/io.h>
+#include <asm/procinfo.h>
+#include <asm/setup.h>
+#include <asm/mach-types.h>
+#include <asm/tlbflush.h>
+#include <asm/irqchip.h>
+#ifndef MEM_SIZE
+#define MEM_SIZE        (16*1024*1024)
+#endif
+#ifdef CONFIG_PREEMPT
+DEFINE_SPINLOCK(kernel_flag);
+#endif
+#if defined(CONFIG_FPE_NWFPE)
+char fpe_type[8];
+static int __init fpe_setup(char *line)
+{
+        memcpy(fpe_type, line, 8);
+        return 1;
+}
+__setup("fpe=", fpe_setup);
+#endif
+extern void paging_init(struct meminfo *);
+extern void convert_to_tag_list(struct tag *tags);
+extern void squash_mem_tags(struct tag *tag);
+extern void bootmem_init(struct meminfo *);
+extern int root_mountflags;
+extern int _stext, _text, _etext, _edata, _end;
+#ifdef CONFIG_XIP_KERNEL
+extern int _endtext, _sdata;
+#endif
+unsigned int processor_id;
+unsigned int __machine_arch_type;
+unsigned int system_rev;
+unsigned int system_serial_low;
+unsigned int system_serial_high;
+unsigned int elf_hwcap;
+unsigned int memc_ctrl_reg;
+unsigned int number_mfm_drives;
+struct processor processor;
+char elf_platform[ELF_PLATFORM_SIZE];
+unsigned long phys_initrd_start __initdata = 0;
+unsigned long phys_initrd_size __initdata = 0;
+static struct meminfo meminfo __initdata = { 0, };
+static struct proc_info_item proc_info;
+static const char *machine_name;
+static char command_line[COMMAND_LINE_SIZE];
+static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE;
+/*
+ * Standard memory resources
+ */
+static struct resource mem_res[] = {
+        { "Video RAM",   0,     0,     IORESOURCE_MEM                   },
+        { "Kernel code", 0,     0,     IORESOURCE_MEM                   },
+        { "Kernel data", 0,     0,     IORESOURCE_MEM                   }
+};
+#define video_ram   mem_res[0]
+#define kernel_code mem_res[1]
+#define kernel_data mem_res[2]
+static struct resource io_res[] = {
+        { "reserved",    0x3bc, 0x3be, IORESOURCE_IO | IORESOURCE_BUSY },
+        { "reserved",    0x378, 0x37f, IORESOURCE_IO | IORESOURCE_BUSY },
+        { "reserved",    0x278, 0x27f, IORESOURCE_IO | IORESOURCE_BUSY }
+};
+#define lp0 io_res[0]
+#define lp1 io_res[1]
+#define lp2 io_res[2]
+#define dump_cpu_info() do { } while (0)
+static void __init setup_processor(void)
+{
+        extern struct proc_info_list __proc_info_begin, __proc_info_end;
+        struct proc_info_list *list;
+        /*
+         * locate processor in the list of supported processor
+         * types.  The linker builds this table for us from the
+         * entries in arch/arm26/mm/proc-*.S
+         */
+        for (list = &__proc_info_begin; list < &__proc_info_end ; list++)
+                if ((processor_id & list->cpu_mask) == list->cpu_val)
+                        break;
+        /*
+         * If processor type is unrecognised, then we
+         * can do nothing...
+         */
+        if (list >= &__proc_info_end) {
+                printk("CPU configuration botched (ID %08x), unable "
+                       "to continue.\n", processor_id);
+                while (1);
+        }
+        proc_info = *list->info;
+        processor = *list->proc;
+        printk("CPU: %s %s revision %d\n",
+               proc_info.manufacturer, proc_info.cpu_name,
+               (int)processor_id & 15);
+        dump_cpu_info();
+        sprintf(system_utsname.machine, "%s", list->arch_name);
+        sprintf(elf_platform, "%s", list->elf_name);
+        elf_hwcap = list->elf_hwcap;
+        cpu_proc_init();
+}
+/*
+ * Initial parsing of the command line.  We need to pick out the
+ * memory size.  We look for mem=size@start, where start and size
+ * are "size[KkMm]"
+ */
+static void __init
+parse_cmdline(struct meminfo *mi, char **cmdline_p, char *from)
+{
+        char c = ' ', *to = command_line;
+        int usermem = 0, len = 0;
+        for (;;) {
+                if (c == ' ' && !memcmp(from, "mem=", 4)) {
+                        unsigned long size, start;
+                        if (to != command_line)
+                                to -= 1;
+                        /*
+                         * If the user specifies memory size, we
+                         * blow away any automatically generated
+                         * size.
+                         */
+                        if (usermem == 0) {
+                                usermem = 1;
+                                mi->nr_banks = 0;
+                        }
+                        start = PHYS_OFFSET;
+                        size  = memparse(from + 4, &from);
+                        if (*from == '@')
+                                start = memparse(from + 1, &from);
+                        mi->bank[mi->nr_banks].start = start;
+                        mi->bank[mi->nr_banks].size  = size;
+                        mi->bank[mi->nr_banks].node  = PHYS_TO_NID(start);
+                        mi->nr_banks += 1;
+                }
+                c = *from++;
+                if (!c)
+                        break;
+                if (COMMAND_LINE_SIZE <= ++len)
+                        break;
+                *to++ = c;
+        }
+        *to = '\0';
+        *cmdline_p = command_line;
+}
+static void __init
+setup_ramdisk(int doload, int prompt, int image_start, unsigned int rd_sz)
+{
+#ifdef CONFIG_BLK_DEV_RAM
+        extern int rd_size, rd_image_start, rd_prompt, rd_doload;
+        rd_image_start = image_start;
+        rd_prompt = prompt;
+        rd_doload = doload;
+        if (rd_sz)
+                rd_size = rd_sz;
+#endif
+}
+static void __init
+request_standard_resources(struct meminfo *mi)
+{
+        struct resource *res;
+        int i;
+        kernel_code.start  = init_mm.start_code;
+        kernel_code.end    = init_mm.end_code - 1;
+#ifdef CONFIG_XIP_KERNEL
+        kernel_data.start  = init_mm.start_data;
+#else
+        kernel_data.start  = init_mm.end_code;
+#endif
+        kernel_data.end    = init_mm.brk - 1;
+        for (i = 0; i < mi->nr_banks; i++) {
+                unsigned long virt_start, virt_end;
+                if (mi->bank[i].size == 0)
+                        continue;
+                virt_start = mi->bank[i].start;
+                virt_end   = virt_start + mi->bank[i].size - 1;
+                res = alloc_bootmem_low(sizeof(*res));
+                res->name  = "System RAM";
+                res->start = virt_start;
+                res->end   = virt_end;
+                res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+                request_resource(&iomem_resource, res);
+                if (kernel_code.start >= res->start &&
+                    kernel_code.end <= res->end)
+                        request_resource(res, &kernel_code);
+                if (kernel_data.start >= res->start &&
+                    kernel_data.end <= res->end)
+                        request_resource(res, &kernel_data);
+        }
+/*      FIXME - needed? if (mdesc->video_start) {
+                video_ram.start = mdesc->video_start;
+                video_ram.end   = mdesc->video_end;
+                request_resource(&iomem_resource, &video_ram);
+        }*/
+        /*
+         * Some machines don't have the possibility of ever
+         * possessing lp1 or lp2
+         */
+        if (0)  /* FIXME - need to do this for A5k at least */
+                request_resource(&ioport_resource, &lp0);
+}
+/*
+ *  Tag parsing.
+ *
+ * This is the new way of passing data to the kernel at boot time.  Rather
+ * than passing a fixed inflexible structure to the kernel, we pass a list
+ * of variable-sized tags to the kernel.  The first tag must be a ATAG_CORE
+ * tag for the list to be recognised (to distinguish the tagged list from
+ * a param_struct).  The list is terminated with a zero-length tag (this tag
+ * is not parsed in any way).
+ */
+static int __init parse_tag_core(const struct tag *tag)
+{
+        if (tag->hdr.size > 2) {
+                if ((tag->u.core.flags & 1) == 0)
+                        root_mountflags &= ~MS_RDONLY;
+                ROOT_DEV = old_decode_dev(tag->u.core.rootdev);
+        }
+        return 0;
+}
+__tagtable(ATAG_CORE, parse_tag_core);
+static int __init parse_tag_mem32(const struct tag *tag)
+{
+        if (meminfo.nr_banks >= NR_BANKS) {
+                printk(KERN_WARNING
+                       "Ignoring memory bank 0x%08x size %dKB\n",
+                        tag->u.mem.start, tag->u.mem.size / 1024);
+                return -EINVAL;
+        }
+        meminfo.bank[meminfo.nr_banks].start = tag->u.mem.start;
+        meminfo.bank[meminfo.nr_banks].size  = tag->u.mem.size;
+        meminfo.bank[meminfo.nr_banks].node  = PHYS_TO_NID(tag->u.mem.start);
+        meminfo.nr_banks += 1;
+        return 0;
+}
+__tagtable(ATAG_MEM, parse_tag_mem32);
+#if defined(CONFIG_DUMMY_CONSOLE)
+struct screen_info screen_info = {
+ .orig_video_lines      = 30,
+ .orig_video_cols       = 80,
+ .orig_video_mode       = 0,
+ .orig_video_ega_bx     = 0,
+ .orig_video_isVGA      = 1,
+ .orig_video_points     = 8
+};
+static int __init parse_tag_videotext(const struct tag *tag)
+{
+        screen_info.orig_x            = tag->u.videotext.x;
+        screen_info.orig_y            = tag->u.videotext.y;
+        screen_info.orig_video_page   = tag->u.videotext.video_page;
+        screen_info.orig_video_mode   = tag->u.videotext.video_mode;
+        screen_info.orig_video_cols   = tag->u.videotext.video_cols;
+        screen_info.orig_video_ega_bx = tag->u.videotext.video_ega_bx;
+        screen_info.orig_video_lines  = tag->u.videotext.video_lines;
+        screen_info.orig_video_isVGA  = tag->u.videotext.video_isvga;
+        screen_info.orig_video_points = tag->u.videotext.video_points;
+        return 0;
+}
+__tagtable(ATAG_VIDEOTEXT, parse_tag_videotext);
+#endif
+static int __init parse_tag_acorn(const struct tag *tag)
+{
+        memc_ctrl_reg = tag->u.acorn.memc_control_reg;
+        number_mfm_drives = tag->u.acorn.adfsdrives;
+        return 0;
+}
+__tagtable(ATAG_ACORN, parse_tag_acorn);
+static int __init parse_tag_ramdisk(const struct tag *tag)
+{
+        setup_ramdisk((tag->u.ramdisk.flags & 1) == 0,
+                      (tag->u.ramdisk.flags & 2) == 0,
+                      tag->u.ramdisk.start, tag->u.ramdisk.size);
+        return 0;
+}
+__tagtable(ATAG_RAMDISK, parse_tag_ramdisk);
+static int __init parse_tag_initrd(const struct tag *tag)
+{
+        printk(KERN_WARNING "ATAG_INITRD is deprecated; please update your bootloader. \n");
+        phys_initrd_start = (unsigned long)tag->u.initrd.start;
+        phys_initrd_size = (unsigned long)tag->u.initrd.size;
+        return 0;
+}
+__tagtable(ATAG_INITRD, parse_tag_initrd);
+static int __init parse_tag_initrd2(const struct tag *tag)
+{
+        printk(KERN_WARNING "ATAG_INITRD is deprecated; please update your bootloader. \n");
+        phys_initrd_start = (unsigned long)tag->u.initrd.start;
+        phys_initrd_size = (unsigned long)tag->u.initrd.size;
+        return 0;
+}
+__tagtable(ATAG_INITRD2, parse_tag_initrd2);
+static int __init parse_tag_serialnr(const struct tag *tag)
+{
+        system_serial_low = tag->u.serialnr.low;
+        system_serial_high = tag->u.serialnr.high;
+        return 0;
+}
+__tagtable(ATAG_SERIAL, parse_tag_serialnr);
+static int __init parse_tag_revision(const struct tag *tag)
+{
+        system_rev = tag->u.revision.rev;
+        return 0;
+}
+__tagtable(ATAG_REVISION, parse_tag_revision);
+static int __init parse_tag_cmdline(const struct tag *tag)
+{
+        strncpy(default_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE);
+        default_command_line[COMMAND_LINE_SIZE - 1] = '\0';
+        return 0;
+}
+__tagtable(ATAG_CMDLINE, parse_tag_cmdline);
+/*
+ * Scan the tag table for this tag, and call its parse function.
+ * The tag table is built by the linker from all the __tagtable
+ * declarations.
+ */
+static int __init parse_tag(const struct tag *tag)
+{
+        extern struct tagtable __tagtable_begin, __tagtable_end;
+        struct tagtable *t;
+        for (t = &__tagtable_begin; t < &__tagtable_end; t++)
+                if (tag->hdr.tag == t->tag) {
+                        t->parse(tag);
+                        break;
+                }
+        return t < &__tagtable_end;
+}
+/*
+ * Parse all tags in the list, checking both the global and architecture
+ * specific tag tables.
+ */
+static void __init parse_tags(const struct tag *t)
+{
+        for (; t->hdr.size; t = tag_next(t))
+                if (!parse_tag(t))
+                        printk(KERN_WARNING
+                                "Ignoring unrecognised tag 0x%08x\n",
+                                t->hdr.tag);
+}
+/*
+ * This holds our defaults.
+ */
+static struct init_tags {
+        struct tag_header hdr1;
+        struct tag_core   core;
+        struct tag_header hdr2;
+        struct tag_mem32  mem;
+        struct tag_header hdr3;
+} init_tags __initdata = {
+        { tag_size(tag_core), ATAG_CORE },
+        { 1, PAGE_SIZE, 0xff },
+        { tag_size(tag_mem32), ATAG_MEM },
+        { MEM_SIZE, PHYS_OFFSET },
+        { 0, ATAG_NONE }
+};
+void __init setup_arch(char **cmdline_p)
+{
+        struct tag *tags = (struct tag *)&init_tags;
+        char *from = default_command_line;
+        setup_processor();
+        if(machine_arch_type == MACH_TYPE_A5K)
+                machine_name = "A5000";
+        else if(machine_arch_type == MACH_TYPE_ARCHIMEDES)
+                machine_name = "Archimedes";
+        else
+                machine_name = "UNKNOWN";
+        //FIXME - the tag struct is always copied here but this is a block
+        // of RAM that is accidentally reserved along with video RAM. perhaps
+        // it would be a good idea to explicitly reserve this?
+        tags = (struct tag *)0x0207c000;
+        /*
+         * If we have the old style parameters, convert them to
+         * a tag list.
+         */
+        if (tags->hdr.tag != ATAG_CORE)
+                convert_to_tag_list(tags);
+        if (tags->hdr.tag != ATAG_CORE)
+                tags = (struct tag *)&init_tags;
+        if (tags->hdr.tag == ATAG_CORE) {
+                if (meminfo.nr_banks != 0)
+                        squash_mem_tags(tags);
+                parse_tags(tags);
+        }
+        init_mm.start_code = (unsigned long) &_text;
+#ifndef CONFIG_XIP_KERNEL
+        init_mm.end_code   = (unsigned long) &_etext;
+#else
+        init_mm.end_code   = (unsigned long) &_endtext;
+        init_mm.start_data   = (unsigned long) &_sdata;
+#endif
+        init_mm.end_data   = (unsigned long) &_edata;
+        init_mm.brk        = (unsigned long) &_end;
+        memcpy(saved_command_line, from, COMMAND_LINE_SIZE);
+        saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
+        parse_cmdline(&meminfo, cmdline_p, from);
+        bootmem_init(&meminfo);
+        paging_init(&meminfo);
+        request_standard_resources(&meminfo);
+#ifdef CONFIG_VT
+#if defined(CONFIG_DUMMY_CONSOLE)
+        conswitchp = &dummy_con;
+#endif
+#endif
+}
+static const char *hwcap_str[] = {
+        "swp",
+        "half",
+        "thumb",
+        "26bit",
+        "fastmult",
+        "fpa",
+        "vfp",
+        "edsp",
+        NULL
+};
+static int c_show(struct seq_file *m, void *v)
+{
+        int i;
+        seq_printf(m, "Processor\t: %s %s rev %d (%s)\n",
+                   proc_info.manufacturer, proc_info.cpu_name,
+                   (int)processor_id & 15, elf_platform);
+        seq_printf(m, "BogoMIPS\t: %lu.%02lu\n",
+                   loops_per_jiffy / (500000/HZ),
+                   (loops_per_jiffy / (5000/HZ)) % 100);
+        /* dump out the processor features */
+        seq_puts(m, "Features\t: ");
+        for (i = 0; hwcap_str[i]; i++)
+                if (elf_hwcap & (1 << i))
+                        seq_printf(m, "%s ", hwcap_str[i]);
+        seq_puts(m, "\n");
+        seq_printf(m, "CPU part\t\t: %07x\n", processor_id >> 4);
+        seq_printf(m, "CPU revision\t: %d\n\n", processor_id & 15);
+        seq_printf(m, "Hardware\t: %s\n", machine_name);
+        seq_printf(m, "Revision\t: %04x\n", system_rev);
+        seq_printf(m, "Serial\t\t: %08x%08x\n",
+                   system_serial_high, system_serial_low);
+        return 0;
+}
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+        return *pos < 1 ? (void *)1 : NULL;
+}
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+        ++*pos;
+        return NULL;
+}
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+struct seq_operations cpuinfo_op = {
+        .start  = c_start,
+        .next   = c_next,
+        .stop   = c_stop,
+        .show   = c_show
+};
diff --git a/arch/arm26/kernel/signal.c b/arch/arm26/kernel/signal.c
new file mode 100644
index 000000000000..356d9809cc0b
--- /dev/null
+++ b/arch/arm26/kernel/signal.c
@@ -0,0 +1,540 @@
+/*
+ *  linux/arch/arm26/kernel/signal.c
+ *
+ *  Copyright (C) 1995-2002 Russell King
+ *  Copyright (C) 2003 Ian Molton (ARM26)
+ *
+ * FIXME!!! This is probably very broken (13/05/2003)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/personality.h>
+#include <linux/tty.h>
+#include <linux/binfmts.h>
+#include <linux/elf.h>
+#include <asm/pgalloc.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include "ptrace.h"
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+/*
+ * For ARM syscalls, we encode the syscall number into the instruction.
+ */
+#define SWI_SYS_SIGRETURN       (0xef000000|(__NR_sigreturn))
+#define SWI_SYS_RT_SIGRETURN    (0xef000000|(__NR_rt_sigreturn))
+static int do_signal(sigset_t *oldset, struct pt_regs * regs, int syscall);
+/*
+ * atomically swap in the new signal mask, and wait for a signal.
+ */
+asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, old_sigset_t mask, struct pt_regs *regs)
+{
+        sigset_t saveset;
+        mask &= _BLOCKABLE;
+        spin_lock_irq(&current->sighand->siglock);
+        saveset = current->blocked;
+        siginitset(&current->blocked, mask);
+        recalc_sigpending();
+        spin_unlock_irq(&current->sighand->siglock);
+        regs->ARM_r0 = -EINTR;
+        while (1) {
+                current->state = TASK_INTERRUPTIBLE;
+                schedule();
+                if (do_signal(&saveset, regs, 0))
+                        return regs->ARM_r0;
+        }
+}
+asmlinkage int
+sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs *regs)
+{
+        sigset_t saveset, newset;
+        /* XXX: Don't preclude handling different sized sigset_t's. */
+        if (sigsetsize != sizeof(sigset_t))
+                return -EINVAL;
+        if (copy_from_user(&newset, unewset, sizeof(newset)))
+                return -EFAULT;
+        sigdelsetmask(&newset, ~_BLOCKABLE);
+        spin_lock_irq(&current->sighand->siglock);
+        saveset = current->blocked;
+        current->blocked = newset;
+        recalc_sigpending();
+        spin_unlock_irq(&current->sighand->siglock);
+        regs->ARM_r0 = -EINTR;
+        while (1) {
+                current->state = TASK_INTERRUPTIBLE;
+                schedule();
+                if (do_signal(&saveset, regs, 0))
+                        return regs->ARM_r0;
+        }
+}
+asmlinkage int 
+sys_sigaction(int sig, const struct old_sigaction *act,
+              struct old_sigaction *oact)
+{
+        struct k_sigaction new_ka, old_ka;
+        int ret;
+        if (act) {
+                old_sigset_t mask;
+                if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+                    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+                    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+                        return -EFAULT;
+                __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+                __get_user(mask, &act->sa_mask);
+                siginitset(&new_ka.sa.sa_mask, mask);
+        }
+        ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+        if (!ret && oact) {
+                if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+                    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+                    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+                        return -EFAULT;
+                __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+                __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+        }
+        return ret;
+}
+/*
+ * Do a signal return; undo the signal stack.
+ */
+struct sigframe
+{
+        struct sigcontext sc;
+        unsigned long extramask[_NSIG_WORDS-1];
+        unsigned long retcode;
+};
+struct rt_sigframe
+{
+        struct siginfo *pinfo;
+        void *puc;
+        struct siginfo info;
+        struct ucontext uc;
+        unsigned long retcode;
+};
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc)
+{
+        int err = 0;
+        __get_user_error(regs->ARM_r0, &sc->arm_r0, err);
+        __get_user_error(regs->ARM_r1, &sc->arm_r1, err);
+        __get_user_error(regs->ARM_r2, &sc->arm_r2, err);
+        __get_user_error(regs->ARM_r3, &sc->arm_r3, err);
+        __get_user_error(regs->ARM_r4, &sc->arm_r4, err);
+        __get_user_error(regs->ARM_r5, &sc->arm_r5, err);
+        __get_user_error(regs->ARM_r6, &sc->arm_r6, err);
+        __get_user_error(regs->ARM_r7, &sc->arm_r7, err);
+        __get_user_error(regs->ARM_r8, &sc->arm_r8, err);
+        __get_user_error(regs->ARM_r9, &sc->arm_r9, err);
+        __get_user_error(regs->ARM_r10, &sc->arm_r10, err);
+        __get_user_error(regs->ARM_fp, &sc->arm_fp, err);
+        __get_user_error(regs->ARM_ip, &sc->arm_ip, err);
+        __get_user_error(regs->ARM_sp, &sc->arm_sp, err);
+        __get_user_error(regs->ARM_lr, &sc->arm_lr, err);
+        __get_user_error(regs->ARM_pc, &sc->arm_pc, err);
+        err |= !valid_user_regs(regs);
+        return err;
+}
+asmlinkage int sys_sigreturn(struct pt_regs *regs)
+{
+        struct sigframe *frame;
+        sigset_t set;
+        /*
+         * Since we stacked the signal on a 64-bit boundary,
+         * then 'sp' should be word aligned here.  If it's
+         * not, then the user is trying to mess with us.
+         */
+        if (regs->ARM_sp & 7)
+                goto badframe;
+        frame = (struct sigframe *)regs->ARM_sp;
+        if (!access_ok(VERIFY_READ, frame, sizeof (*frame)))
+                goto badframe;
+        if (__get_user(set.sig[0], &frame->sc.oldmask)
+            || (_NSIG_WORDS > 1
+                && __copy_from_user(&set.sig[1], &frame->extramask,
+                                    sizeof(frame->extramask))))
+                goto badframe;
+        sigdelsetmask(&set, ~_BLOCKABLE);
+        spin_lock_irq(&current->sighand->siglock);
+        current->blocked = set;
+        recalc_sigpending();
+        spin_unlock_irq(&current->sighand->siglock);
+        if (restore_sigcontext(regs, &frame->sc))
+                goto badframe;
+        /* Send SIGTRAP if we're single-stepping */
+        if (current->ptrace & PT_SINGLESTEP) {
+                ptrace_cancel_bpt(current);
+                send_sig(SIGTRAP, current, 1);
+        }
+        return regs->ARM_r0;
+badframe:
+        force_sig(SIGSEGV, current);
+        return 0;
+}
+asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
+{
+        struct rt_sigframe *frame;
+        sigset_t set;
+        /*
+         * Since we stacked the signal on a 64-bit boundary,
+         * then 'sp' should be word aligned here.  If it's
+         * not, then the user is trying to mess with us.
+         */
+        if (regs->ARM_sp & 7)
+                goto badframe;
+        frame = (struct rt_sigframe *)regs->ARM_sp;
+        if (!access_ok(VERIFY_READ, frame, sizeof (*frame)))
+                goto badframe;
+        if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+                goto badframe;
+        sigdelsetmask(&set, ~_BLOCKABLE);
+        spin_lock_irq(&current->sighand->siglock);
+        current->blocked = set;
+        recalc_sigpending();
+        spin_unlock_irq(&current->sighand->siglock);
+        if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+                goto badframe;
+        /* Send SIGTRAP if we're single-stepping */
+        if (current->ptrace & PT_SINGLESTEP) {
+                ptrace_cancel_bpt(current);
+                send_sig(SIGTRAP, current, 1);
+        }
+        return regs->ARM_r0;
+badframe:
+        force_sig(SIGSEGV, current);
+        return 0;
+}
+static int
+setup_sigcontext(struct sigcontext *sc, /*struct _fpstate *fpstate,*/
+                 struct pt_regs *regs, unsigned long mask)
+{
+        int err = 0;
+        __put_user_error(regs->ARM_r0, &sc->arm_r0, err);
+        __put_user_error(regs->ARM_r1, &sc->arm_r1, err);
+        __put_user_error(regs->ARM_r2, &sc->arm_r2, err);
+        __put_user_error(regs->ARM_r3, &sc->arm_r3, err);
+        __put_user_error(regs->ARM_r4, &sc->arm_r4, err);
+        __put_user_error(regs->ARM_r5, &sc->arm_r5, err);
+        __put_user_error(regs->ARM_r6, &sc->arm_r6, err);
+        __put_user_error(regs->ARM_r7, &sc->arm_r7, err);
+        __put_user_error(regs->ARM_r8, &sc->arm_r8, err);
+        __put_user_error(regs->ARM_r9, &sc->arm_r9, err);
+        __put_user_error(regs->ARM_r10, &sc->arm_r10, err);
+        __put_user_error(regs->ARM_fp, &sc->arm_fp, err);
+        __put_user_error(regs->ARM_ip, &sc->arm_ip, err);
+        __put_user_error(regs->ARM_sp, &sc->arm_sp, err);
+        __put_user_error(regs->ARM_lr, &sc->arm_lr, err);
+        __put_user_error(regs->ARM_pc, &sc->arm_pc, err);
+        __put_user_error(current->thread.trap_no, &sc->trap_no, err);
+        __put_user_error(current->thread.error_code, &sc->error_code, err);
+        __put_user_error(current->thread.address, &sc->fault_address, err);
+        __put_user_error(mask, &sc->oldmask, err);
+        return err;
+}
+static inline void *
+get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, int framesize)
+{
+        unsigned long sp = regs->ARM_sp;
+        /*
+         * This is the X/Open sanctioned signal stack switching.
+         */
+        if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp))
+                sp = current->sas_ss_sp + current->sas_ss_size;
+        /*
+         * ATPCS B01 mandates 8-byte alignment
+         */
+        return (void *)((sp - framesize) & ~7);
+}
+static int
+setup_return(struct pt_regs *regs, struct k_sigaction *ka,
+             unsigned long *rc, void *frame, int usig)
+{
+        unsigned long handler = (unsigned long)ka->sa.sa_handler;
+        unsigned long retcode;
+        if (ka->sa.sa_flags & SA_RESTORER) {
+                retcode = (unsigned long)ka->sa.sa_restorer;
+        } else {
+                if (__put_user((ka->sa.sa_flags & SA_SIGINFO)?SWI_SYS_RT_SIGRETURN:SWI_SYS_SIGRETURN, rc))
+                        return 1;
+                retcode = ((unsigned long)rc);
+        }
+        regs->ARM_r0 = usig;
+        regs->ARM_sp = (unsigned long)frame;
+        regs->ARM_lr = retcode;
+        regs->ARM_pc = handler & ~3;
+        return 0;
+}
+static int
+setup_frame(int usig, struct k_sigaction *ka, sigset_t *set, struct pt_regs *regs)
+{
+        struct sigframe *frame = get_sigframe(ka, regs, sizeof(*frame));
+        int err = 0;
+        if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
+                return 1;
+        err |= setup_sigcontext(&frame->sc, /*&frame->fpstate,*/ regs, set->sig[0]);
+        if (_NSIG_WORDS > 1) {
+                err |= __copy_to_user(frame->extramask, &set->sig[1],
+                                      sizeof(frame->extramask));
+        }
+        if (err == 0)
+                err = setup_return(regs, ka, &frame->retcode, frame, usig);
+        return err;
+}
+static int
+setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info,
+               sigset_t *set, struct pt_regs *regs)
+{
+        struct rt_sigframe *frame = get_sigframe(ka, regs, sizeof(*frame));
+        int err = 0;
+        if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame)))
+                return 1;
+        __put_user_error(&frame->info, &frame->pinfo, err);
+        __put_user_error(&frame->uc, &frame->puc, err);
+        err |= copy_siginfo_to_user(&frame->info, info);
+        /* Clear all the bits of the ucontext we don't use.  */
+        err |= __clear_user(&frame->uc, offsetof(struct ucontext, uc_mcontext));
+        err |= setup_sigcontext(&frame->uc.uc_mcontext, /*&frame->fpstate,*/
+                                regs, set->sig[0]);
+        err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+        if (err == 0)
+                err = setup_return(regs, ka, &frame->retcode, frame, usig);
+        if (err == 0) {
+                /*
+                 * For realtime signals we must also set the second and third
+                 * arguments for the signal handler.
+                 *   -- Peter Maydell <pmaydell@chiark.greenend.org.uk> 2000-12-06
+                 */
+                regs->ARM_r1 = (unsigned long)frame->pinfo;
+                regs->ARM_r2 = (unsigned long)frame->puc;
+        }
+        return err;
+}
+static inline void restart_syscall(struct pt_regs *regs)
+{
+        regs->ARM_r0 = regs->ARM_ORIG_r0;
+        regs->ARM_pc -= 4;
+}
+/*
+ * OK, we're invoking a handler
+ */     
+static void
+handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
+              struct pt_regs * regs, int syscall)
+{
+        struct thread_info *thread = current_thread_info();
+        struct task_struct *tsk = current;
+        struct k_sigaction *ka = &tsk->sighand->action[sig-1];
+        int usig = sig;
+        int ret;
+        /*
+         * If we were from a system call, check for system call restarting...
+         */
+        if (syscall) {
+                switch (regs->ARM_r0) {
+                case -ERESTART_RESTARTBLOCK:
+                        current_thread_info()->restart_block.fn =
+                                do_no_restart_syscall;
+                case -ERESTARTNOHAND:
+                        regs->ARM_r0 = -EINTR;
+                        break;
+                case -ERESTARTSYS:
+                        if (!(ka->sa.sa_flags & SA_RESTART)) {
+                                regs->ARM_r0 = -EINTR;
+                                break;
+                        }
+                        /* fallthrough */
+                case -ERESTARTNOINTR:
+                        restart_syscall(regs);
+                }
+        }
+        /*
+         * translate the signal
+         */
+        if (usig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap)
+                usig = thread->exec_domain->signal_invmap[usig];
+        /*
+         * Set up the stack frame
+         */
+        if (ka->sa.sa_flags & SA_SIGINFO)
+                ret = setup_rt_frame(usig, ka, info, oldset, regs);
+        else
+                ret = setup_frame(usig, ka, oldset, regs);
+        /*
+         * Check that the resulting registers are actually sane.
+         */
+        ret |= !valid_user_regs(regs);
+        if (ret == 0) {
+                if (ka->sa.sa_flags & SA_ONESHOT)
+                        ka->sa.sa_handler = SIG_DFL;
+                if (!(ka->sa.sa_flags & SA_NODEFER)) {
+                        spin_lock_irq(&tsk->sighand->siglock);
+                        sigorsets(&tsk->blocked, &tsk->blocked,
+                                  &ka->sa.sa_mask);
+                        sigaddset(&tsk->blocked, sig);
+                        recalc_sigpending();
+                        spin_unlock_irq(&tsk->sighand->siglock);
+                }
+                return;
+        }
+        force_sigsegv(sig, tsk);
+}
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ */
+static int do_signal(sigset_t *oldset, struct pt_regs *regs, int syscall)
+{
+        siginfo_t info;
+        int signr;
+        /*
+         * We want the common case to go fast, which
+         * is why we may in certain cases get here from
+         * kernel mode. Just return without doing anything
+         * if so.
+         */
+        if (!user_mode(regs))
+                return 0;
+        if (current->ptrace & PT_SINGLESTEP)
+                ptrace_cancel_bpt(current);
+        
+        signr = get_signal_to_deliver(&info, regs, NULL);
+        if (signr > 0) {
+                handle_signal(signr, &info, oldset, regs, syscall);
+                if (current->ptrace & PT_SINGLESTEP)
+                        ptrace_set_bpt(current);
+                return 1;
+        }
+        /*
+         * No signal to deliver to the process - restart the syscall.
+         */
+        if (syscall) {
+                if (regs->ARM_r0 == -ERESTART_RESTARTBLOCK) {
+                        u32 *usp;
+                        regs->ARM_sp -= 12;
+                        usp = (u32 *)regs->ARM_sp;
+                        put_user(regs->ARM_pc, &usp[0]);
+                        /* swi __NR_restart_syscall */
+                        put_user(0xef000000 | __NR_restart_syscall, &usp[1]);
+                        /* ldr  pc, [sp], #12 */
+// FIXME!!! is #12 correct there?
+                        put_user(0xe49df00c, &usp[2]);
+                        regs->ARM_pc = regs->ARM_sp + 4;
+                }
+                if (regs->ARM_r0 == -ERESTARTNOHAND ||
+                    regs->ARM_r0 == -ERESTARTSYS ||
+                    regs->ARM_r0 == -ERESTARTNOINTR) {
+                        restart_syscall(regs);
+                }
+        }
+        if (current->ptrace & PT_SINGLESTEP)
+                ptrace_set_bpt(current);
+        return 0;
+}
+asmlinkage void
+do_notify_resume(struct pt_regs *regs, unsigned int thread_flags, int syscall)
+{
+        if (thread_flags & _TIF_SIGPENDING)
+                do_signal(&current->blocked, regs, syscall);
+}
diff --git a/arch/arm26/kernel/sys_arm.c b/arch/arm26/kernel/sys_arm.c
new file mode 100644
index 000000000000..e7edd201579a
--- /dev/null
+++ b/arch/arm26/kernel/sys_arm.c
@@ -0,0 +1,324 @@
+/*
+ *  linux/arch/arm26/kernel/sys_arm.c
+ *
+ *  Copyright (C) People who wrote linux/arch/i386/kernel/sys_i386.c
+ *  Copyright (C) 1995, 1996 Russell King.
+ *  Copyright (C) 2003 Ian Molton.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This file contains various random system calls that
+ *  have a non-standard calling sequence on the Linux/arm
+ *  platform.
+ */
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <asm/uaccess.h>
+#include <asm/ipc.h>
+extern unsigned long do_mremap(unsigned long addr, unsigned long old_len,
+                               unsigned long new_len, unsigned long flags,
+                               unsigned long new_addr);
+/*
+ * sys_pipe() is the normal C calling standard for creating
+ * a pipe. It's not the way unix traditionally does this, though.
+ */
+asmlinkage int sys_pipe(unsigned long * fildes)
+{
+        int fd[2];
+        int error;
+        error = do_pipe(fd);
+        if (!error) {
+                if (copy_to_user(fildes, fd, 2*sizeof(int)))
+                        error = -EFAULT;
+        }
+        return error;
+}
+/* common code for old and new mmaps */
+inline long do_mmap2(
+        unsigned long addr, unsigned long len,
+        unsigned long prot, unsigned long flags,
+        unsigned long fd, unsigned long pgoff)
+{
+        int error = -EINVAL;
+        struct file * file = NULL;
+        flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+        /*
+         * If we are doing a fixed mapping, and address < PAGE_SIZE,
+         * then deny it.
+         */
+        if (flags & MAP_FIXED && addr < PAGE_SIZE && vectors_base() == 0)
+                goto out;
+        error = -EBADF;
+        if (!(flags & MAP_ANONYMOUS)) {
+                file = fget(fd);
+                if (!file)
+                        goto out;
+        }
+        down_write(&current->mm->mmap_sem);
+        error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+        up_write(&current->mm->mmap_sem);
+        if (file)
+                fput(file);
+out:
+        return error;
+}
+struct mmap_arg_struct {
+        unsigned long addr;
+        unsigned long len;
+        unsigned long prot;
+        unsigned long flags;
+        unsigned long fd;
+        unsigned long offset;
+};
+asmlinkage int old_mmap(struct mmap_arg_struct *arg)
+{
+        int error = -EFAULT;
+        struct mmap_arg_struct a;
+        if (copy_from_user(&a, arg, sizeof(a)))
+                goto out;
+        error = -EINVAL;
+        if (a.offset & ~PAGE_MASK)
+                goto out;
+        error = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
+out:
+        return error;
+}
+asmlinkage unsigned long
+sys_arm_mremap(unsigned long addr, unsigned long old_len,
+               unsigned long new_len, unsigned long flags,
+               unsigned long new_addr)
+{
+        unsigned long ret = -EINVAL;
+        /*
+         * If we are doing a fixed mapping, and address < PAGE_SIZE,
+         * then deny it.
+         */
+        if (flags & MREMAP_FIXED && new_addr < PAGE_SIZE &&
+            vectors_base() == 0)
+                goto out;
+        down_write(&current->mm->mmap_sem);
+        ret = do_mremap(addr, old_len, new_len, flags, new_addr);
+        up_write(&current->mm->mmap_sem);
+out:
+        return ret;
+}
+/*
+ * Perform the select(nd, in, out, ex, tv) and mmap() system
+ * calls.
+ */
+struct sel_arg_struct {
+        unsigned long n;
+        fd_set *inp, *outp, *exp;
+        struct timeval *tvp;
+};
+asmlinkage int old_select(struct sel_arg_struct *arg)
+{
+        struct sel_arg_struct a;
+        if (copy_from_user(&a, arg, sizeof(a)))
+                return -EFAULT;
+        /* sys_select() does the appropriate kernel locking */
+        return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
+}
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls..
+ *
+ * This is really horribly ugly.
+ */
+asmlinkage int sys_ipc (uint call, int first, int second, int third, void *ptr, long fifth)
+{
+        int version, ret;
+        version = call >> 16; /* hack for backward compatibility */
+        call &= 0xffff;
+        switch (call) {
+        case SEMOP:
+                return sys_semop (first, (struct sembuf *)ptr, second);
+        case SEMGET:
+                return sys_semget (first, second, third);
+        case SEMCTL: {
+                union semun fourth;
+                if (!ptr)
+                        return -EINVAL;
+                if (get_user(fourth.__pad, (void **) ptr))
+                        return -EFAULT;
+                return sys_semctl (first, second, third, fourth);
+        }
+        case MSGSND:
+                return sys_msgsnd (first, (struct msgbuf *) ptr, 
+                                   second, third);
+        case MSGRCV:
+                switch (version) {
+                case 0: {
+                        struct ipc_kludge tmp;
+                        if (!ptr)
+                                return -EINVAL;
+                        if (copy_from_user(&tmp,(struct ipc_kludge *) ptr,
+                                           sizeof (tmp)))
+                                return -EFAULT;
+                        return sys_msgrcv (first, tmp.msgp, second,
+                                           tmp.msgtyp, third);
+                }
+                default:
+                        return sys_msgrcv (first,
+                                           (struct msgbuf *) ptr,
+                                           second, fifth, third);
+                }
+        case MSGGET:
+                return sys_msgget ((key_t) first, second);
+        case MSGCTL:
+                return sys_msgctl (first, second, (struct msqid_ds *) ptr);
+        case SHMAT:
+                switch (version) {
+                default: {
+                        ulong raddr;
+                        ret = do_shmat (first, (char *) ptr, second, &raddr);
+                        if (ret)
+                                return ret;
+                        return put_user (raddr, (ulong *) third);
+                }
+                case 1: /* iBCS2 emulator entry point */
+                        if (!segment_eq(get_fs(), get_ds()))
+                                return -EINVAL;
+                        return do_shmat (first, (char *) ptr,
+                                          second, (ulong *) third);
+                }
+        case SHMDT: 
+                return sys_shmdt ((char *)ptr);
+        case SHMGET:
+                return sys_shmget (first, second, third);
+        case SHMCTL:
+                return sys_shmctl (first, second,
+                                   (struct shmid_ds *) ptr);
+        default:
+                return -EINVAL;
+        }
+}
+/* Fork a new task - this creates a new program thread.
+ * This is called indirectly via a small wrapper
+ */
+asmlinkage int sys_fork(struct pt_regs *regs)
+{
+        return do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
+}
+/* Clone a task - this clones the calling program thread.
+ * This is called indirectly via a small wrapper
+ */
+asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, struct pt_regs *regs)
+{
+        /*
+         * We don't support SETTID / CLEARTID  (FIXME!!! (nicked from arm32))
+         */
+        if (clone_flags & (CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID))
+                return -EINVAL;
+        
+        if (!newsp)
+                newsp = regs->ARM_sp;
+        return do_fork(clone_flags, newsp, regs, 0, NULL, NULL);
+}
+asmlinkage int sys_vfork(struct pt_regs *regs)
+{
+        return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
+}
+/* sys_execve() executes a new program.
+ * This is called indirectly via a small wrapper
+ */
+asmlinkage int sys_execve(char *filenamei, char **argv, char **envp, struct pt_regs *regs)
+{
+        int error;
+        char * filename;
+        filename = getname(filenamei);
+        error = PTR_ERR(filename);
+        if (IS_ERR(filename))
+                goto out;
+        error = do_execve(filename, argv, envp, regs);
+        putname(filename);
+out:
+        return error;
+}
+/* FIXME - see if this is correct for arm26 */
+long execve(const char *filename, char **argv, char **envp)
+{
+        struct pt_regs regs;
+        int ret;
+         memset(&regs, 0, sizeof(struct pt_regs));
+        ret = do_execve((char *)filename, (char __user * __user *)argv,                         (char __user * __user *)envp, &regs);
+        if (ret < 0)
+                goto out;
+        /*
+         * Save argc to the register structure for userspace.
+         */
+        regs.ARM_r0 = ret;
+        /*
+         * We were successful.  We won't be returning to our caller, but
+         * instead to user space by manipulating the kernel stack.
+         */
+        asm(    "add    r0, %0, %1\n\t"
+                "mov    r1, %2\n\t"
+                "mov    r2, %3\n\t"
+                "bl     memmove\n\t"    /* copy regs to top of stack */
+                "mov    r8, #0\n\t"     /* not a syscall */
+                "mov    r9, %0\n\t"     /* thread structure */
+                "mov    sp, r0\n\t"     /* reposition stack pointer */
+                "b      ret_to_user"
+                :
+                : "r" (current_thread_info()),
+                  "Ir" (THREAD_SIZE - 8 - sizeof(regs)),
+                  "r" (&regs),
+                  "Ir" (sizeof(regs))
+                : "r0", "r1", "r2", "r3", "ip", "memory");
+ out:
+        return ret;
+}
+EXPORT_SYMBOL(execve);
diff --git a/arch/arm26/kernel/time.c b/arch/arm26/kernel/time.c
new file mode 100644
index 000000000000..549a6b2e177e
--- /dev/null
+++ b/arch/arm26/kernel/time.c
@@ -0,0 +1,234 @@
+/*
+ *  linux/arch/arm26/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *  Modifications for ARM (C) 1994-2001 Russell King
+ *  Mods for ARM26 (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This file contains the ARM-specific time handling details:
+ *  reading the RTC at bootup, etc...
+ *
+ *  1994-07-02  Alan Modra
+ *              fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
+ *  1998-12-20  Updated NTP code according to technical memorandum Jan '96
+ *              "A Kernel Model for Precision Timekeeping" by Dave Mills
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/timex.h>
+#include <linux/errno.h>
+#include <linux/profile.h>
+#include <asm/hardware.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/ioc.h>
+u64 jiffies_64 = INITIAL_JIFFIES;
+EXPORT_SYMBOL(jiffies_64);
+extern unsigned long wall_jiffies;
+/* this needs a better home */
+DEFINE_SPINLOCK(rtc_lock);
+/* change this if you have some constant time drift */
+#define USECS_PER_JIFFY (1000000/HZ)
+static int dummy_set_rtc(void)
+{
+        return 0;
+}
+/*
+ * hook for setting the RTC's idea of the current time.
+ */
+int (*set_rtc)(void) = dummy_set_rtc;
+/*
+ * Get time offset based on IOCs timer.
+ * FIXME - if this is called with interrutps off, why the shennanigans
+ * below ?
+ */
+static unsigned long gettimeoffset(void)
+{
+        unsigned int count1, count2, status;
+        long offset;
+        ioc_writeb (0, IOC_T0LATCH);
+        barrier ();
+        count1 = ioc_readb(IOC_T0CNTL) | (ioc_readb(IOC_T0CNTH) << 8);
+        barrier ();
+        status = ioc_readb(IOC_IRQREQA);
+        barrier ();
+        ioc_writeb (0, IOC_T0LATCH);
+        barrier ();
+        count2 = ioc_readb(IOC_T0CNTL) | (ioc_readb(IOC_T0CNTH) << 8);
+        offset = count2;
+        if (count2 < count1) {
+                /*
+                 * We have not had an interrupt between reading count1
+                 * and count2.
+                 */
+                if (status & (1 << 5))
+                        offset -= LATCH;
+        } else if (count2 > count1) {
+                /*
+                 * We have just had another interrupt between reading
+                 * count1 and count2.
+                 */
+                offset -= LATCH;
+        }
+        offset = (LATCH - offset) * (tick_nsec / 1000);
+        return (offset + LATCH/2) / LATCH;
+}
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long sched_clock(void)
+{
+        return (unsigned long long)jiffies * (1000000000 / HZ);
+}
+static unsigned long next_rtc_update;
+/*
+ * If we have an externally synchronized linux clock, then update
+ * CMOS clock accordingly every ~11 minutes.  set_rtc() has to be
+ * called as close as possible to 500 ms before the new second
+ * starts.
+ */
+static inline void do_set_rtc(void)
+{
+        if (time_status & STA_UNSYNC || set_rtc == NULL)
+                return;
+//FIXME - timespec.tv_sec is a time_t not unsigned long
+        if (next_rtc_update &&
+            time_before((unsigned long)xtime.tv_sec, next_rtc_update))
+                return;
+        if (xtime.tv_nsec < 500000000 - ((unsigned) tick_nsec >> 1) &&
+            xtime.tv_nsec >= 500000000 + ((unsigned) tick_nsec >> 1))
+                return;
+        if (set_rtc())
+                /*
+                 * rtc update failed.  Try again in 60s
+                 */
+                next_rtc_update = xtime.tv_sec + 60;
+        else
+                next_rtc_update = xtime.tv_sec + 660;
+}
+#define do_leds()
+void do_gettimeofday(struct timeval *tv)
+{
+        unsigned long flags;
+        unsigned long seq;
+        unsigned long usec, sec, lost;
+        do {
+                seq = read_seqbegin_irqsave(&xtime_lock, flags);
+                usec = gettimeoffset();
+                lost = jiffies - wall_jiffies;
+                if (lost)
+                        usec += lost * USECS_PER_JIFFY;
+                sec = xtime.tv_sec;
+                usec += xtime.tv_nsec / 1000;
+        } while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
+        /* usec may have gone up a lot: be safe */
+        while (usec >= 1000000) {
+                usec -= 1000000;
+                sec++;
+        }
+        tv->tv_sec = sec;
+        tv->tv_usec = usec;
+}
+EXPORT_SYMBOL(do_gettimeofday);
+int do_settimeofday(struct timespec *tv)
+{
+        if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
+                return -EINVAL;
+        write_seqlock_irq(&xtime_lock);
+        /*
+         * This is revolting. We need to set "xtime" correctly. However, the
+         * value in this location is the value at the most recent update of
+         * wall time.  Discover what correction gettimeofday() would have
+         * done, and then undo it!
+         */
+        tv->tv_nsec -= 1000 * (gettimeoffset() +
+                        (jiffies - wall_jiffies) * USECS_PER_JIFFY);
+        while (tv->tv_nsec < 0) {
+                tv->tv_nsec += NSEC_PER_SEC;
+                tv->tv_sec--;
+        }
+        xtime.tv_sec = tv->tv_sec;
+        xtime.tv_nsec = tv->tv_nsec;
+        time_adjust = 0;                /* stop active adjtime() */
+        time_status |= STA_UNSYNC;
+        time_maxerror = NTP_PHASE_LIMIT;
+        time_esterror = NTP_PHASE_LIMIT;
+        write_sequnlock_irq(&xtime_lock);
+        clock_was_set();
+        return 0;
+}
+EXPORT_SYMBOL(do_settimeofday);
+static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+        do_timer(regs);
+#ifndef CONFIG_SMP
+        update_process_times(user_mode(regs));
+#endif
+        do_set_rtc(); //FIME - EVERY timer IRQ?
+        profile_tick(CPU_PROFILING, regs);
+        return IRQ_HANDLED; //FIXME - is this right?
+}
+static struct irqaction timer_irq = {
+        .name   = "timer",
+        .flags  = SA_INTERRUPT,
+        .handler = timer_interrupt,
+};
+extern void ioctime_init(void);
+/*
+ * Set up timer interrupt.
+ */
+void __init time_init(void)
+{
+        ioc_writeb(LATCH & 255, IOC_T0LTCHL);
+        ioc_writeb(LATCH >> 8, IOC_T0LTCHH);
+        ioc_writeb(0, IOC_T0GO);
+        setup_irq(IRQ_TIMER, &timer_irq);
+}
diff --git a/arch/arm26/kernel/traps.c b/arch/arm26/kernel/traps.c
new file mode 100644
index 000000000000..f64f59022392
--- /dev/null
+++ b/arch/arm26/kernel/traps.c
@@ -0,0 +1,548 @@
+/*
+ *  linux/arch/arm26/kernel/traps.c
+ *
+ *  Copyright (C) 1995-2002 Russell King
+ *  Fragments that appear the same as linux/arch/i386/kernel/traps.c (C) Linus Torvalds
+ *  Copyright (C) 2003 Ian Molton (ARM26)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  'traps.c' handles hardware exceptions after we have saved some state in
+ *  'linux/arch/arm26/lib/traps.S'.  Mostly a debugging aid, but will probably
+ *  kill the offending process.
+ */
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/personality.h>
+#include <linux/ptrace.h>
+#include <linux/elf.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <asm/atomic.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/semaphore.h>
+#include "ptrace.h"
+extern void c_backtrace (unsigned long fp, int pmode);
+extern void show_pte(struct mm_struct *mm, unsigned long addr);
+const char *processor_modes[] = { "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" };
+static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" "*bad reason*"};
+/*
+ * Stack pointers should always be within the kernels view of
+ * physical memory.  If it is not there, then we can't dump
+ * out any information relating to the stack.
+ */
+static int verify_stack(unsigned long sp)
+{
+        if (sp < PAGE_OFFSET || (sp > (unsigned long)high_memory && high_memory != 0))
+                return -EFAULT;
+        return 0;
+}
+/*
+ * Dump out the contents of some memory nicely...
+ */
+static void dump_mem(const char *str, unsigned long bottom, unsigned long top)
+{
+        unsigned long p = bottom & ~31;
+        mm_segment_t fs;
+        int i;
+        /*
+         * We need to switch to kernel mode so that we can use __get_user
+         * to safely read from kernel space.  Note that we now dump the
+         * code first, just in case the backtrace kills us.
+         */
+        fs = get_fs();
+        set_fs(KERNEL_DS);
+        printk("%s", str);
+        printk("(0x%08lx to 0x%08lx)\n", bottom, top);
+        for (p = bottom & ~31; p < top;) {
+                printk("%04lx: ", p & 0xffff);
+                for (i = 0; i < 8; i++, p += 4) {
+                        unsigned int val;
+                        if (p < bottom || p >= top)
+                                printk("         ");
+                        else {
+                                __get_user(val, (unsigned long *)p);
+                                printk("%08x ", val);
+                        }
+                }
+                printk ("\n");
+        }
+        set_fs(fs);
+}
+static void dump_instr(struct pt_regs *regs)
+{
+        unsigned long addr = instruction_pointer(regs);
+        const int width = 8;
+        mm_segment_t fs;
+        int i;
+        /*
+         * We need to switch to kernel mode so that we can use __get_user
+         * to safely read from kernel space.  Note that we now dump the
+         * code first, just in case the backtrace kills us.
+         */
+        fs = get_fs();
+        set_fs(KERNEL_DS);
+        printk("Code: ");
+        for (i = -4; i < 1; i++) {
+                unsigned int val, bad;
+                bad = __get_user(val, &((u32 *)addr)[i]);
+                if (!bad)
+                        printk(i == 0 ? "(%0*x) " : "%0*x ", width, val);
+                else {
+                        printk("bad PC value.");
+                        break;
+                }
+        }
+        printk("\n");
+        set_fs(fs);
+}
+/*static*/ void __dump_stack(struct task_struct *tsk, unsigned long sp)
+{
+        dump_mem("Stack: ", sp, 8192+(unsigned long)tsk->thread_info);
+}
+void dump_stack(void)
+{
+#ifdef CONFIG_DEBUG_ERRORS
+        __backtrace();
+#endif
+}
+EXPORT_SYMBOL(dump_stack);
+//FIXME - was a static fn
+void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
+{
+        unsigned int fp;
+        int ok = 1;
+        printk("Backtrace: ");
+        fp = regs->ARM_fp;
+        if (!fp) {
+                printk("no frame pointer");
+                ok = 0;
+        } else if (verify_stack(fp)) {
+                printk("invalid frame pointer 0x%08x", fp);
+                ok = 0;
+        } else if (fp < (unsigned long)(tsk->thread_info + 1))
+                printk("frame pointer underflow");
+        printk("\n");
+        if (ok)
+                c_backtrace(fp, processor_mode(regs));
+}
+/* FIXME - this is probably wrong.. */
+void show_stack(struct task_struct *task, unsigned long *sp) {
+        dump_mem("Stack: ", (unsigned long)sp, 8192+(unsigned long)task->thread_info);
+}
+DEFINE_SPINLOCK(die_lock);
+/*
+ * This function is protected against re-entrancy.
+ */
+NORET_TYPE void die(const char *str, struct pt_regs *regs, int err)
+{
+        struct task_struct *tsk = current;
+        console_verbose();
+        spin_lock_irq(&die_lock);
+        printk("Internal error: %s: %x\n", str, err);
+        printk("CPU: %d\n", smp_processor_id());
+        show_regs(regs);
+        printk("Process %s (pid: %d, stack limit = 0x%p)\n",
+                current->comm, current->pid, tsk->thread_info + 1);
+        if (!user_mode(regs) || in_interrupt()) {
+                __dump_stack(tsk, (unsigned long)(regs + 1));
+                dump_backtrace(regs, tsk);
+                dump_instr(regs);
+        }
+while(1);
+        spin_unlock_irq(&die_lock);
+        do_exit(SIGSEGV);
+}
+void die_if_kernel(const char *str, struct pt_regs *regs, int err)
+{
+        if (user_mode(regs))
+                return;
+        die(str, regs, err);
+}
+static DECLARE_MUTEX(undef_sem);
+static int (*undef_hook)(struct pt_regs *);
+int request_undef_hook(int (*fn)(struct pt_regs *))
+{
+        int ret = -EBUSY;
+        down(&undef_sem);
+        if (undef_hook == NULL) {
+                undef_hook = fn;
+                ret = 0;
+        }
+        up(&undef_sem);
+        return ret;
+}
+int release_undef_hook(int (*fn)(struct pt_regs *))
+{
+        int ret = -EINVAL;
+        down(&undef_sem);
+        if (undef_hook == fn) {
+                undef_hook = NULL;
+                ret = 0;
+        }
+        up(&undef_sem);
+        return ret;
+}
+static int undefined_extension(struct pt_regs *regs, unsigned int op)
+{
+        switch (op) {
+        case 1: /* 0xde01 / 0x?7f001f0 */
+                ptrace_break(current, regs);
+                return 0;
+        }
+        return 1;
+}
+asmlinkage void do_undefinstr(struct pt_regs *regs)
+{
+        siginfo_t info;
+        void *pc;
+        regs->ARM_pc -= 4;
+        pc = (unsigned long *)instruction_pointer(regs); /* strip PSR */
+        if (user_mode(regs)) {
+                u32 instr;
+                get_user(instr, (u32 *)pc);
+                if ((instr & 0x0fff00ff) == 0x07f000f0 &&
+                    undefined_extension(regs, (instr >> 8) & 255) == 0) {
+                        regs->ARM_pc += 4;
+                        return;
+                }
+        } else {
+                if (undef_hook && undef_hook(regs) == 0) {
+                        regs->ARM_pc += 4;
+                        return;
+                }
+        }
+#ifdef CONFIG_DEBUG_USER
+        printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n",
+                current->comm, current->pid, pc);
+        dump_instr(regs);
+#endif
+        current->thread.error_code = 0;
+        current->thread.trap_no = 6;
+        info.si_signo = SIGILL;
+        info.si_errno = 0;
+        info.si_code  = ILL_ILLOPC;
+        info.si_addr  = pc;
+        force_sig_info(SIGILL, &info, current);
+        die_if_kernel("Oops - undefined instruction", regs, 0);
+}
+asmlinkage void do_excpt(unsigned long address, struct pt_regs *regs, int mode)
+{
+        siginfo_t info;
+#ifdef CONFIG_DEBUG_USER
+        printk(KERN_INFO "%s (%d): address exception: pc=%08lx\n",
+                current->comm, current->pid, instruction_pointer(regs));
+        dump_instr(regs);
+#endif
+        current->thread.error_code = 0;
+        current->thread.trap_no = 11;
+        info.si_signo = SIGBUS;
+        info.si_errno = 0;
+        info.si_code  = BUS_ADRERR;
+        info.si_addr  = (void *)address;
+        force_sig_info(SIGBUS, &info, current);
+        die_if_kernel("Oops - address exception", regs, mode);
+}
+asmlinkage void do_unexp_fiq (struct pt_regs *regs)
+{
+#ifndef CONFIG_IGNORE_FIQ
+        printk("Hmm.  Unexpected FIQ received, but trying to continue\n");
+        printk("You may have a hardware problem...\n");
+#endif
+}
+/*
+ * bad_mode handles the impossible case in the vectors.  If you see one of
+ * these, then it's extremely serious, and could mean you have buggy hardware.
+ * It never returns, and never tries to sync.  We hope that we can at least
+ * dump out some state information...
+ */
+asmlinkage void bad_mode(struct pt_regs *regs, int reason, int proc_mode)
+{
+        unsigned int vectors = vectors_base();
+        console_verbose();
+        printk(KERN_CRIT "Bad mode in %s handler detected: mode %s\n",
+                handler[reason<5?reason:4], processor_modes[proc_mode]);
+        /*
+         * Dump out the vectors and stub routines.  Maybe a better solution
+         * would be to dump them out only if we detect that they are corrupted.
+         */
+        dump_mem(KERN_CRIT "Vectors: ", vectors, vectors + 0x40);
+        dump_mem(KERN_CRIT "Stubs: ", vectors + 0x200, vectors + 0x4b8);
+        die("Oops", regs, 0);
+        local_irq_disable();
+        panic("bad mode");
+}
+static int bad_syscall(int n, struct pt_regs *regs)
+{
+        struct thread_info *thread = current_thread_info();
+        siginfo_t info;
+        if (current->personality != PER_LINUX && thread->exec_domain->handler) {
+                thread->exec_domain->handler(n, regs);
+                return regs->ARM_r0;
+        }
+#ifdef CONFIG_DEBUG_USER
+        printk(KERN_ERR "[%d] %s: obsolete system call %08x.\n",
+                current->pid, current->comm, n);
+        dump_instr(regs);
+#endif
+        info.si_signo = SIGILL;
+        info.si_errno = 0;
+        info.si_code  = ILL_ILLTRP;
+        info.si_addr  = (void *)instruction_pointer(regs) - 4;
+        force_sig_info(SIGILL, &info, current);
+        die_if_kernel("Oops", regs, n);
+        return regs->ARM_r0;
+}
+static inline void
+do_cache_op(unsigned long start, unsigned long end, int flags)
+{
+        struct vm_area_struct *vma;
+        if (end < start)
+                return;
+        vma = find_vma(current->active_mm, start);
+        if (vma && vma->vm_start < end) {
+                if (start < vma->vm_start)
+                        start = vma->vm_start;
+                if (end > vma->vm_end)
+                        end = vma->vm_end;
+        }
+}
+/*
+ * Handle all unrecognised system calls.
+ *  0x9f0000 - 0x9fffff are some more esoteric system calls
+ */
+#define NR(x) ((__ARM_NR_##x) - __ARM_NR_BASE)
+asmlinkage int arm_syscall(int no, struct pt_regs *regs)
+{
+        siginfo_t info;
+        if ((no >> 16) != 0x9f)
+                return bad_syscall(no, regs);
+        switch (no & 0xffff) {
+        case 0: /* branch through 0 */
+                info.si_signo = SIGSEGV;
+                info.si_errno = 0;
+                info.si_code  = SEGV_MAPERR;
+                info.si_addr  = NULL;
+                force_sig_info(SIGSEGV, &info, current);
+                die_if_kernel("branch through zero", regs, 0);
+                return 0;
+        case NR(breakpoint): /* SWI BREAK_POINT */
+                ptrace_break(current, regs);
+                return regs->ARM_r0;
+        case NR(cacheflush):
+                return 0;
+        case NR(usr26):
+                break;
+        default:
+                /* Calls 9f00xx..9f07ff are defined to return -ENOSYS
+                   if not implemented, rather than raising SIGILL.  This
+                   way the calling program can gracefully determine whether
+                   a feature is supported.  */
+                if (no <= 0x7ff)
+                        return -ENOSYS;
+                break;
+        }
+#ifdef CONFIG_DEBUG_USER
+        /*
+         * experience shows that these seem to indicate that
+         * something catastrophic has happened
+         */
+        printk("[%d] %s: arm syscall %d\n", current->pid, current->comm, no);
+        dump_instr(regs);
+        if (user_mode(regs)) {
+                show_regs(regs);
+                c_backtrace(regs->ARM_fp, processor_mode(regs));
+        }
+#endif
+        info.si_signo = SIGILL;
+        info.si_errno = 0;
+        info.si_code  = ILL_ILLTRP;
+        info.si_addr  = (void *)instruction_pointer(regs) - 4;
+        force_sig_info(SIGILL, &info, current);
+        die_if_kernel("Oops", regs, no);
+        return 0;
+}
+void __bad_xchg(volatile void *ptr, int size)
+{
+        printk("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
+                __builtin_return_address(0), ptr, size);
+        BUG();
+}
+/*
+ * A data abort trap was taken, but we did not handle the instruction.
+ * Try to abort the user program, or panic if it was the kernel.
+ */
+asmlinkage void
+baddataabort(int code, unsigned long instr, struct pt_regs *regs)
+{
+        unsigned long addr = instruction_pointer(regs);
+        siginfo_t info;
+#ifdef CONFIG_DEBUG_USER
+        printk(KERN_ERR "[%d] %s: bad data abort: code %d instr 0x%08lx\n",
+                current->pid, current->comm, code, instr);
+        dump_instr(regs);
+        show_pte(current->mm, addr);
+#endif
+        info.si_signo = SIGILL;
+        info.si_errno = 0;
+        info.si_code  = ILL_ILLOPC;
+        info.si_addr  = (void *)addr;
+        force_sig_info(SIGILL, &info, current);
+        die_if_kernel("unknown data abort code", regs, instr);
+}
+volatile void __bug(const char *file, int line, void *data)
+{
+        printk(KERN_CRIT"kernel BUG at %s:%d!", file, line);
+        if (data)
+                printk(KERN_CRIT" - extra data = %p", data);
+        printk("\n");
+        *(int *)0 = 0;
+}
+void __readwrite_bug(const char *fn)
+{
+        printk("%s called, but not implemented", fn);
+        BUG();
+}
+void __pte_error(const char *file, int line, unsigned long val)
+{
+        printk("%s:%d: bad pte %08lx.\n", file, line, val);
+}
+void __pmd_error(const char *file, int line, unsigned long val)
+{
+        printk("%s:%d: bad pmd %08lx.\n", file, line, val);
+}
+void __pgd_error(const char *file, int line, unsigned long val)
+{
+        printk("%s:%d: bad pgd %08lx.\n", file, line, val);
+}
+asmlinkage void __div0(void)
+{
+        printk("Division by zero in kernel.\n");
+        dump_stack();
+}
+void abort(void)
+{
+        BUG();
+        /* if that doesn't kill us, halt */
+        panic("Oops failed to kill thread");
+}
+void __init trap_init(void)
+{
+        extern void __trap_init(unsigned long);
+        unsigned long base = vectors_base();
+        __trap_init(base);
+        if (base != 0)
+                printk(KERN_DEBUG "Relocating machine vectors to 0x%08lx\n",
+                        base);
+}
diff --git a/arch/arm26/kernel/vmlinux-arm26-xip.lds.in b/arch/arm26/kernel/vmlinux-arm26-xip.lds.in
new file mode 100644
index 000000000000..ca61ec8218fe
--- /dev/null
+++ b/arch/arm26/kernel/vmlinux-arm26-xip.lds.in
@@ -0,0 +1,134 @@
+/* ld script to make ARM Linux kernel
+ * taken from the i386 version by Russell King
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ * borrowed from Russels ARM port by Ian Molton
+ */
+#include <asm-generic/vmlinux.lds.h>
+OUTPUT_ARCH(arm)
+ENTRY(stext)
+jiffies = jiffies_64;
+SECTIONS
+{
+        . = TEXTADDR;
+        .init : {                       /* Init code and data           */
+                _stext = .;
+                __init_begin = .;
+                        _sinittext = .;
+                        *(.init.text)
+                        _einittext = .;
+                __proc_info_begin = .;
+                        *(.proc.info)
+                __proc_info_end = .;
+                __arch_info_begin = .;
+                        *(.arch.info)
+                __arch_info_end = .;
+                __tagtable_begin = .;
+                        *(.taglist)
+                __tagtable_end = .;
+                . = ALIGN(16);
+                __setup_start = .;
+                        *(.init.setup)
+                __setup_end = .;
+                __early_begin = .;
+                        *(__early_param)
+                __early_end = .;
+                __initcall_start = .;
+                        *(.initcall1.init)
+                        *(.initcall2.init)
+                        *(.initcall3.init)
+                        *(.initcall4.init)
+                        *(.initcall5.init)
+                        *(.initcall6.init)
+                        *(.initcall7.init)
+                __initcall_end = .;
+                __con_initcall_start = .;
+                        *(.con_initcall.init)
+                __con_initcall_end = .;
+                . = ALIGN(32);
+                __initramfs_start = .;
+                        usr/built-in.o(.init.ramfs)
+                __initramfs_end = .;
+                . = ALIGN(32768);
+                __init_end = .;
+        }
+        /DISCARD/ : {                   /* Exit code and data           */
+                *(.exit.text)
+                *(.exit.data)
+                *(.exitcall.exit)
+        }
+        .text : {                       /* Real text segment            */
+                _text = .;              /* Text and read-only data      */
+                        *(.text)
+                        SCHED_TEXT
+                        LOCK_TEXT       /* FIXME - borrowed from arm32 - check*/
+                        *(.fixup)
+                        *(.gnu.warning)
+                        *(.rodata)
+                        *(.rodata.*)
+                        *(.glue_7)
+                        *(.glue_7t)
+                *(.got)                 /* Global offset table          */
+                _etext = .;             /* End of text section          */
+        }
+        . = ALIGN(16);
+        __ex_table : {                  /* Exception table              */
+                __start___ex_table = .;
+                        *(__ex_table)
+                __stop___ex_table = .;
+        }
+        RODATA
+        _endtext = .;
+        . = DATAADDR;
+        _sdata = .;
+        .data : {
+                . = ALIGN(8192);
+                /*
+                 * first, the init thread union, aligned
+                 * to an 8192 byte boundary. (see arm26/kernel/init_task.c)
+                 * FIXME - sould this be 32K aligned on arm26?
+                 */
+                *(.init.task)
+                /*
+                 * The cacheline aligned data
+                 */
+                . = ALIGN(32);
+                *(.data.cacheline_aligned)
+                /*
+                 * and the usual data section
+                 */
+                *(.data)
+                CONSTRUCTORS
+                *(.init.data)
+                _edata = .;
+        }
+        .bss : {
+                __bss_start = .;        /* BSS                          */
+                *(.bss)
+                *(COMMON)
+                _end = . ;
+        }
+                                        /* Stabs debugging sections.    */
+        .stab 0 : { *(.stab) }
+        .stabstr 0 : { *(.stabstr) }
+        .stab.excl 0 : { *(.stab.excl) }
+        .stab.exclstr 0 : { *(.stab.exclstr) }
+        .stab.index 0 : { *(.stab.index) }
+        .stab.indexstr 0 : { *(.stab.indexstr) }
+        .comment 0 : { *(.comment) }
+}
diff --git a/arch/arm26/kernel/vmlinux-arm26.lds.in b/arch/arm26/kernel/vmlinux-arm26.lds.in
new file mode 100644
index 000000000000..d1d3418d7eb6
--- /dev/null
+++ b/arch/arm26/kernel/vmlinux-arm26.lds.in
@@ -0,0 +1,127 @@
+/* ld script to make ARM Linux kernel
+ * taken from the i386 version by Russell King
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ * borrowed from Russels ARM port by Ian Molton and subsequently modified.
+ */
+#include <asm-generic/vmlinux.lds.h>
+OUTPUT_ARCH(arm)
+ENTRY(stext)
+jiffies = jiffies_64;
+SECTIONS
+{
+        . = TEXTADDR;
+        .init : {                       /* Init code and data           */
+                _stext = .;
+                __init_begin = .;
+                        _sinittext = .;
+                        *(.init.text)
+                        _einittext = .;
+                __proc_info_begin = .;
+                        *(.proc.info)
+                __proc_info_end = .;
+                __arch_info_begin = .;
+                        *(.arch.info)
+                __arch_info_end = .;
+                __tagtable_begin = .;
+                        *(.taglist)
+                __tagtable_end = .;
+                        *(.init.data)
+                . = ALIGN(16);
+                __setup_start = .;
+                        *(.init.setup)
+                __setup_end = .;
+                __early_begin = .;
+                        *(__early_param)
+                __early_end = .;
+                __initcall_start = .;
+                        *(.initcall1.init)
+                        *(.initcall2.init)
+                        *(.initcall3.init)
+                        *(.initcall4.init)
+                        *(.initcall5.init)
+                        *(.initcall6.init)
+                        *(.initcall7.init)
+                __initcall_end = .;
+                __con_initcall_start = .;
+                        *(.con_initcall.init)
+                __con_initcall_end = .;
+                . = ALIGN(32);
+                __initramfs_start = .;
+                        usr/built-in.o(.init.ramfs)
+                __initramfs_end = .;
+                . = ALIGN(32768);
+                __init_end = .;
+        }
+        /DISCARD/ : {                   /* Exit code and data           */
+                *(.exit.text)
+                *(.exit.data)
+                *(.exitcall.exit)
+        }
+        .text : {                       /* Real text segment            */
+                _text = .;              /* Text and read-only data      */
+                        *(.text)
+                        SCHED_TEXT
+                        LOCK_TEXT
+                        *(.fixup)
+                        *(.gnu.warning)
+                        *(.rodata)
+                        *(.rodata.*)
+                        *(.glue_7)
+                        *(.glue_7t)
+                *(.got)                 /* Global offset table          */
+                _etext = .;             /* End of text section          */
+        }
+        . = ALIGN(16);
+        __ex_table : {                  /* Exception table              */
+                __start___ex_table = .;
+                        *(__ex_table)
+                __stop___ex_table = .;
+        }
+        RODATA
+        . = ALIGN(8192);
+        .data : {
+                /*
+                 * first, the init task union, aligned
+                 * to an 8192 byte boundary. (see arm26/kernel/init_task.c)
+                 */
+                *(.init.task)
+                /*
+                 * The cacheline aligned data
+                 */
+                . = ALIGN(32);
+                *(.data.cacheline_aligned)
+                /*
+                 * and the usual data section
+                 */
+                *(.data)
+                CONSTRUCTORS
+                _edata = .;
+        }
+        .bss : {
+                __bss_start = .;        /* BSS                          */
+                *(.bss)
+                *(COMMON)
+                _end = . ;
+        }
+                                        /* Stabs debugging sections.    */
+        .stab 0 : { *(.stab) }
+        .stabstr 0 : { *(.stabstr) }
+        .stab.excl 0 : { *(.stab.excl) }
+        .stab.exclstr 0 : { *(.stab.exclstr) }
+        .stab.index 0 : { *(.stab.index) }
+        .stab.indexstr 0 : { *(.stab.indexstr) }
+        .comment 0 : { *(.comment) }
+}
diff --git a/arch/arm26/kernel/vmlinux.lds.S b/arch/arm26/kernel/vmlinux.lds.S
new file mode 100644
index 000000000000..811a69048010
--- /dev/null
+++ b/arch/arm26/kernel/vmlinux.lds.S
@@ -0,0 +1,12 @@
+#include <linux/config.h>
+#ifdef CONFIG_XIP_KERNEL
+#include "vmlinux-arm26-xip.lds.in"
+#else
+#include "vmlinux-arm26.lds.in"
+#endif
diff --git a/arch/arm26/lib/Makefile b/arch/arm26/lib/Makefile
new file mode 100644
index 000000000000..6df2b793d367
--- /dev/null
+++ b/arch/arm26/lib/Makefile
@@ -0,0 +1,26 @@
+#
+# linux/arch/arm26/lib/Makefile
+#
+# Copyright (C) 1995-2000 Russell King
+#
+lib-y           := backtrace.o changebit.o csumipv6.o csumpartial.o   \
+                   csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
+                   copy_page.o delay.o findbit.o memchr.o memcpy.o    \
+                   memset.o memzero.o setbit.o                        \
+                   strchr.o strrchr.o testchangebit.o                 \
+                   testclearbit.o testsetbit.o getuser.o              \
+                   putuser.o ashldi3.o ashrdi3.o lshrdi3.o muldi3.o   \
+                   ucmpdi2.o udivdi3.o lib1funcs.o ecard.o io-acorn.o \
+                   floppydma.o io-readsb.o io-writesb.o io-writesl.o  \
+                   uaccess-kernel.o uaccess-user.o io-readsw.o        \
+                   io-writesw.o io-readsl.o ecard.o io-acorn.o        \
+                   floppydma.o
+lib-n           :=
+lib-$(CONFIG_VT)+= kbd.o
+csumpartialcopy.o: csumpartialcopygeneric.S
+csumpartialcopyuser.o: csumpartialcopygeneric.S
diff --git a/arch/arm26/lib/ashldi3.c b/arch/arm26/lib/ashldi3.c
new file mode 100644
index 000000000000..130f5a839669
--- /dev/null
+++ b/arch/arm26/lib/ashldi3.c
@@ -0,0 +1,61 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+This file is part of GNU CC.
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#include "gcclib.h"
+DItype
+__ashldi3 (DItype u, word_type b)
+{
+  DIunion w;
+  word_type bm;
+  DIunion uu;
+  if (b == 0)
+    return u;
+  uu.ll = u;
+  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
+  if (bm <= 0)
+    {
+      w.s.low = 0;
+      w.s.high = (USItype)uu.s.low << -bm;
+    }
+  else
+    {
+      USItype carries = (USItype)uu.s.low >> bm;
+      w.s.low = (USItype)uu.s.low << b;
+      w.s.high = ((USItype)uu.s.high << b) | carries;
+    }
+  return w.ll;
+}
diff --git a/arch/arm26/lib/ashrdi3.c b/arch/arm26/lib/ashrdi3.c
new file mode 100644
index 000000000000..71625d218f8d
--- /dev/null
+++ b/arch/arm26/lib/ashrdi3.c
@@ -0,0 +1,61 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+This file is part of GNU CC.
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#include "gcclib.h"
+DItype
+__ashrdi3 (DItype u, word_type b)
+{
+  DIunion w;
+  word_type bm;
+  DIunion uu;
+  if (b == 0)
+    return u;
+  uu.ll = u;
+  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
+  if (bm <= 0)
+    {
+      /* w.s.high = 1..1 or 0..0 */
+      w.s.high = uu.s.high >> (sizeof (SItype) * BITS_PER_UNIT - 1);
+      w.s.low = uu.s.high >> -bm;
+    }
+  else
+    {
+      USItype carries = (USItype)uu.s.high << bm;
+      w.s.high = uu.s.high >> b;
+      w.s.low = ((USItype)uu.s.low >> b) | carries;
+    }
+  return w.ll;
+}
diff --git a/arch/arm26/lib/backtrace.S b/arch/arm26/lib/backtrace.S
new file mode 100644
index 000000000000..d793fe4339fc
--- /dev/null
+++ b/arch/arm26/lib/backtrace.S
@@ -0,0 +1,145 @@
+/*
+ *  linux/arch/arm26/lib/backtrace.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+@ fp is 0 or stack frame
+#define frame   r4
+#define next    r5
+#define save    r6
+#define mask    r7
+#define offset  r8
+ENTRY(__backtrace)
+                mov     r1, #0x10
+                mov     r0, fp
+ENTRY(c_backtrace)
+#ifdef CONFIG_NO_FRAME_POINTER
+                mov     pc, lr
+#else
+                stmfd   sp!, {r4 - r8, lr}      @ Save an extra register so we have a location...
+                mov     mask, #0xfc000003
+                tst     mask, r0
+                movne   r0, #0
+                movs    frame, r0
+1:              moveq   r0, #-2
+                LOADREGS(eqfd, sp!, {r4 - r8, pc})
+2:              stmfd   sp!, {pc}               @ calculate offset of PC in STMIA instruction
+                ldr     r0, [sp], #4
+                adr     r1, 2b - 4
+                sub     offset, r0, r1
+3:              tst     frame, mask             @ Check for address exceptions...
+                bne     1b
+1001:           ldr     next, [frame, #-12]     @ get fp
+1002:           ldr     r2, [frame, #-4]        @ get lr
+1003:           ldr     r3, [frame, #0]         @ get pc
+                sub     save, r3, offset        @ Correct PC for prefetching
+                bic     save, save, mask
+1004:           ldr     r1, [save, #0]          @ get instruction at function
+                mov     r1, r1, lsr #10
+                ldr     r3, .Ldsi+4
+                teq     r1, r3
+                subeq   save, save, #4
+                adr     r0, .Lfe
+                mov     r1, save
+                bic     r2, r2, mask
+                bl      printk                  @ print pc and link register
+                ldr     r0, [frame, #-8]        @ get sp
+                sub     r0, r0, #4
+1005:           ldr     r1, [save, #4]          @ get instruction at function+4
+                mov     r3, r1, lsr #10
+                ldr     r2, .Ldsi+4
+                teq     r3, r2                  @ Check for stmia sp!, {args}
+                addeq   save, save, #4          @ next instruction
+                bleq    .Ldumpstm
+                sub     r0, frame, #16
+1006:           ldr     r1, [save, #4]          @ Get 'stmia sp!, {rlist, fp, ip, lr, pc}' instruction
+                mov     r3, r1, lsr #10
+                ldr     r2, .Ldsi
+                teq     r3, r2
+                bleq    .Ldumpstm
+                teq     frame, next
+                movne   frame, next
+                teqne   frame, #0
+                bne     3b
+                LOADREGS(fd, sp!, {r4 - r8, pc})
+/*
+ * Fixup for LDMDB
+ */
+                .section .fixup,"ax"
+                .align  0
+1007:           ldr     r0, =.Lbad
+                mov     r1, frame
+                bl      printk
+                LOADREGS(fd, sp!, {r4 - r8, pc})
+                .ltorg
+                .previous
+                
+                .section __ex_table,"a"
+                .align  3
+                .long   1001b, 1007b
+                .long   1002b, 1007b
+                .long   1003b, 1007b
+                .long   1004b, 1007b
+                .long   1005b, 1007b
+                .long   1006b, 1007b
+                .previous
+#define instr r4
+#define reg   r5
+#define stack r6
+.Ldumpstm:      stmfd   sp!, {instr, reg, stack, r7, lr}
+                mov     stack, r0
+                mov     instr, r1
+                mov     reg, #9
+                mov     r7, #0
+1:              mov     r3, #1
+                tst     instr, r3, lsl reg
+                beq     2f
+                add     r7, r7, #1
+                teq     r7, #4
+                moveq   r7, #0
+                moveq   r3, #'\n'
+                movne   r3, #' '
+                ldr     r2, [stack], #-4
+                mov     r1, reg
+                adr     r0, .Lfp
+                bl      printk
+2:              subs    reg, reg, #1
+                bpl     1b
+                teq     r7, #0
+                adrne   r0, .Lcr
+                blne    printk
+                mov     r0, stack
+                LOADREGS(fd, sp!, {instr, reg, stack, r7, pc})
+.Lfe:           .asciz  "Function entered at [<%p>] from [<%p>]\n"
+.Lfp:           .asciz  " r%d = %08X%c"
+.Lcr:           .asciz  "\n"
+.Lbad:          .asciz  "Backtrace aborted due to bad frame pointer <%p>\n"
+                .align
+.Ldsi:          .word   0x00e92dd8 >> 2
+                .word   0x00e92d00 >> 2
+#endif
diff --git a/arch/arm26/lib/changebit.S b/arch/arm26/lib/changebit.S
new file mode 100644
index 000000000000..1b6a077be5a6
--- /dev/null
+++ b/arch/arm26/lib/changebit.S
@@ -0,0 +1,28 @@
+/*
+ *  linux/arch/arm26/lib/changebit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+/* Purpose  : Function to change a bit
+ * Prototype: int change_bit(int bit, void *addr)
+ */
+ENTRY(_change_bit_be)
+                eor     r0, r0, #0x18           @ big endian byte ordering
+ENTRY(_change_bit_le)
+                and     r2, r0, #7
+                mov     r3, #1
+                mov     r3, r3, lsl r2
+                save_and_disable_irqs ip, r2
+                ldrb    r2, [r1, r0, lsr #3]
+                eor     r2, r2, r3
+                strb    r2, [r1, r0, lsr #3]
+                restore_irqs ip
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/clearbit.S b/arch/arm26/lib/clearbit.S
new file mode 100644
index 000000000000..0a895b0c759f
--- /dev/null
+++ b/arch/arm26/lib/clearbit.S
@@ -0,0 +1,31 @@
+/*
+ *  linux/arch/arm26/lib/clearbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+/*
+ * Purpose  : Function to clear a bit
+ * Prototype: int clear_bit(int bit, void *addr)
+ */
+ENTRY(_clear_bit_be)
+                eor     r0, r0, #0x18           @ big endian byte ordering
+ENTRY(_clear_bit_le)
+                and     r2, r0, #7
+                mov     r3, #1
+                mov     r3, r3, lsl r2
+                save_and_disable_irqs ip, r2
+                ldrb    r2, [r1, r0, lsr #3]
+                bic     r2, r2, r3
+                strb    r2, [r1, r0, lsr #3]
+                restore_irqs ip
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/copy_page.S b/arch/arm26/lib/copy_page.S
new file mode 100644
index 000000000000..2d79ee12ea1f
--- /dev/null
+++ b/arch/arm26/lib/copy_page.S
@@ -0,0 +1,62 @@
+/*
+ *  linux/arch/arm26/lib/copypage.S
+ *
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm_offsets.h>
+                .text
+                .align  5
+/*
+ * ARMv3 optimised copy_user_page
+ *
+ * FIXME: rmk do we need to handle cache stuff...
+ * FIXME: im is this right on ARM26?
+ */
+ENTRY(__copy_user_page)
+        stmfd   sp!, {r4, lr}                   @       2
+        mov     r2, #PAGE_SZ/64                 @       1
+        ldmia   r1!, {r3, r4, ip, lr}           @       4+1
+1:      stmia   r0!, {r3, r4, ip, lr}           @       4
+        ldmia   r1!, {r3, r4, ip, lr}           @       4+1
+        stmia   r0!, {r3, r4, ip, lr}           @       4
+        ldmia   r1!, {r3, r4, ip, lr}           @       4+1
+        stmia   r0!, {r3, r4, ip, lr}           @       4
+        ldmia   r1!, {r3, r4, ip, lr}           @       4
+        subs    r2, r2, #1                      @       1
+        stmia   r0!, {r3, r4, ip, lr}           @       4
+        ldmneia r1!, {r3, r4, ip, lr}           @       4
+        bne     1b                              @       1
+        LOADREGS(fd, sp!, {r4, pc})             @       3
+        .align  5
+/*
+ * ARMv3 optimised clear_user_page
+ *
+ * FIXME: rmk do we need to handle cache stuff...
+ */
+ENTRY(__clear_user_page)
+        str     lr, [sp, #-4]!
+        mov     r1, #PAGE_SZ/64                 @ 1
+        mov     r2, #0                          @ 1
+        mov     r3, #0                          @ 1
+        mov     ip, #0                          @ 1
+        mov     lr, #0                          @ 1
+1:      stmia   r0!, {r2, r3, ip, lr}           @ 4
+        stmia   r0!, {r2, r3, ip, lr}           @ 4
+        stmia   r0!, {r2, r3, ip, lr}           @ 4
+        stmia   r0!, {r2, r3, ip, lr}           @ 4
+        subs    r1, r1, #1                      @ 1
+        bne     1b                              @ 1
+        ldr     pc, [sp], #4
+        .section ".init.text", #alloc, #execinstr
diff --git a/arch/arm26/lib/csumipv6.S b/arch/arm26/lib/csumipv6.S
new file mode 100644
index 000000000000..62831155acde
--- /dev/null
+++ b/arch/arm26/lib/csumipv6.S
@@ -0,0 +1,32 @@
+/*
+ *  linux/arch/arm26/lib/csumipv6.S
+ *
+ *  Copyright (C) 1995-1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+ENTRY(__csum_ipv6_magic)
+                str     lr, [sp, #-4]!
+                adds    ip, r2, r3
+                ldmia   r1, {r1 - r3, lr}
+                adcs    ip, ip, r1
+                adcs    ip, ip, r2
+                adcs    ip, ip, r3
+                adcs    ip, ip, lr
+                ldmia   r0, {r0 - r3}
+                adcs    r0, ip, r0
+                adcs    r0, r0, r1
+                adcs    r0, r0, r2
+                ldr     r2, [sp, #4]
+                adcs    r0, r0, r3
+                adcs    r0, r0, r2
+                adcs    r0, r0, #0
+                LOADREGS(fd, sp!, {pc})
diff --git a/arch/arm26/lib/csumpartial.S b/arch/arm26/lib/csumpartial.S
new file mode 100644
index 000000000000..e53e7109e623
--- /dev/null
+++ b/arch/arm26/lib/csumpartial.S
@@ -0,0 +1,130 @@
+/*
+ *  linux/arch/arm26/lib/csumpartial.S
+ *
+ *  Copyright (C) 1995-1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+/*
+ * Function: __u32 csum_partial(const char *src, int len, __u32 sum)
+ * Params  : r0 = buffer, r1 = len, r2 = checksum
+ * Returns : r0 = new checksum
+ */
+buf     .req    r0
+len     .req    r1
+sum     .req    r2
+td0     .req    r3
+td1     .req    r4      @ save before use
+td2     .req    r5      @ save before use
+td3     .req    lr
+.zero:          mov     r0, sum
+                add     sp, sp, #4
+                ldr     pc, [sp], #4
+                /*
+                 * Handle 0 to 7 bytes, with any alignment of source and
+                 * destination pointers.  Note that when we get here, C = 0
+                 */
+.less8:         teq     len, #0                 @ check for zero count
+                beq     .zero
+                /* we must have at least one byte. */
+                tst     buf, #1                 @ odd address?
+                ldrneb  td0, [buf], #1
+                subne   len, len, #1
+                adcnes  sum, sum, td0, lsl #byte(1)
+.less4:         tst     len, #6
+                beq     .less8_byte
+                /* we are now half-word aligned */
+.less8_wordlp:
+#if __LINUX_ARM_ARCH__ >= 4
+                ldrh    td0, [buf], #2
+                sub     len, len, #2
+#else
+                ldrb    td0, [buf], #1
+                ldrb    td3, [buf], #1
+                sub     len, len, #2
+                orr     td0, td0, td3, lsl #8
+#endif
+                adcs    sum, sum, td0
+                tst     len, #6
+                bne     .less8_wordlp
+.less8_byte:    tst     len, #1                 @ odd number of bytes
+                ldrneb  td0, [buf], #1          @ include last byte
+                adcnes  sum, sum, td0, lsl #byte(0)     @ update checksum
+.done:          adc     r0, sum, #0             @ collect up the last carry
+                ldr     td0, [sp], #4
+                tst     td0, #1                 @ check buffer alignment
+                movne   td0, r0, lsl #8         @ rotate checksum by 8 bits
+                orrne   r0, td0, r0, lsr #24
+                ldr     pc, [sp], #4            @ return
+.not_aligned:   tst     buf, #1                 @ odd address
+                ldrneb  td0, [buf], #1          @ make even
+                subne   len, len, #1
+                adcnes  sum, sum, td0, lsl #byte(1)     @ update checksum
+                tst     buf, #2                 @ 32-bit aligned?
+#if __LINUX_ARM_ARCH__ >= 4
+                ldrneh  td0, [buf], #2          @ make 32-bit aligned
+                subne   len, len, #2
+#else
+                ldrneb  td0, [buf], #1
+                ldrneb  ip, [buf], #1
+                subne   len, len, #2
+                orrne   td0, td0, ip, lsl #8
+#endif
+                adcnes  sum, sum, td0           @ update checksum
+                mov     pc, lr
+ENTRY(csum_partial)
+                stmfd   sp!, {buf, lr}
+                cmp     len, #8                 @ Ensure that we have at least
+                blo     .less8                  @ 8 bytes to copy.
+                adds    sum, sum, #0            @ C = 0
+                tst     buf, #3                 @ Test destination alignment
+                blne    .not_aligned            @ aligh destination, return here
+1:              bics    ip, len, #31
+                beq     3f
+                stmfd   sp!, {r4 - r5}
+2:              ldmia   buf!, {td0, td1, td2, td3}
+                adcs    sum, sum, td0
+                adcs    sum, sum, td1
+                adcs    sum, sum, td2
+                adcs    sum, sum, td3
+                ldmia   buf!, {td0, td1, td2, td3}
+                adcs    sum, sum, td0
+                adcs    sum, sum, td1
+                adcs    sum, sum, td2
+                adcs    sum, sum, td3
+                sub     ip, ip, #32
+                teq     ip, #0
+                bne     2b
+                ldmfd   sp!, {r4 - r5}
+3:              tst     len, #0x1c              @ should not change C
+                beq     .less4
+4:              ldr     td0, [buf], #4
+                sub     len, len, #4
+                adcs    sum, sum, td0
+                tst     len, #0x1c
+                bne     4b
+                b       .less4
diff --git a/arch/arm26/lib/csumpartialcopy.S b/arch/arm26/lib/csumpartialcopy.S
new file mode 100644
index 000000000000..a1c4b5fdd498
--- /dev/null
+++ b/arch/arm26/lib/csumpartialcopy.S
@@ -0,0 +1,52 @@
+/*
+ *  linux/arch/arm26/lib/csumpartialcopy.S
+ *
+ *  Copyright (C) 1995-1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len, __u32 sum)
+ * Params  : r0 = src, r1 = dst, r2 = len, r3 = checksum
+ * Returns : r0 = new checksum
+ */
+                .macro  save_regs
+                stmfd   sp!, {r1, r4 - r8, fp, ip, lr, pc}
+                .endm
+                .macro  load_regs,flags
+                LOADREGS(\flags,fp,{r1, r4 - r8, fp, sp, pc})
+                .endm
+                .macro  load1b, reg1
+                ldrb    \reg1, [r0], #1
+                .endm
+                .macro  load2b, reg1, reg2
+                ldrb    \reg1, [r0], #1
+                ldrb    \reg2, [r0], #1
+                .endm
+                .macro  load1l, reg1
+                ldr     \reg1, [r0], #4
+                .endm
+                .macro  load2l, reg1, reg2
+                ldr     \reg1, [r0], #4
+                ldr     \reg2, [r0], #4
+                .endm
+                .macro  load4l, reg1, reg2, reg3, reg4
+                ldmia   r0!, {\reg1, \reg2, \reg3, \reg4}
+                .endm
+#define FN_ENTRY        ENTRY(csum_partial_copy_nocheck)
+#include "csumpartialcopygeneric.S"
diff --git a/arch/arm26/lib/csumpartialcopygeneric.S b/arch/arm26/lib/csumpartialcopygeneric.S
new file mode 100644
index 000000000000..5249c3ad11db
--- /dev/null
+++ b/arch/arm26/lib/csumpartialcopygeneric.S
@@ -0,0 +1,352 @@
+/*
+ *  linux/arch/arm26/lib/csumpartialcopygeneric.S
+ *
+ *  Copyright (C) 1995-2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * JMA 01/06/03 Commented out some shl0s; probobly irrelevant to arm26 
+ *
+ */
+/*
+ * unsigned int
+ * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
+ *  r0 = src, r1 = dst, r2 = len, r3 = sum
+ *  Returns : r0 = checksum
+ *
+ * Note that 'tst' and 'teq' preserve the carry flag.
+ */
+/* Quick hack */
+                .macro  save_regs
+                stmfd   sp!, {r1, r4 - r8, fp, ip, lr, pc}
+                .endm
+/* end Quick Hack */
+src     .req    r0
+dst     .req    r1
+len     .req    r2
+sum     .req    r3
+.zero:          mov     r0, sum
+                load_regs       ea
+                /*
+                 * Align an unaligned destination pointer.  We know that
+                 * we have >= 8 bytes here, so we don't need to check
+                 * the length.  Note that the source pointer hasn't been
+                 * aligned yet.
+                 */
+.dst_unaligned: tst     dst, #1
+                beq     .dst_16bit
+                load1b  ip
+                sub     len, len, #1
+                adcs    sum, sum, ip, lsl #byte(1)      @ update checksum
+                strb    ip, [dst], #1
+                tst     dst, #2
+                moveq   pc, lr                  @ dst is now 32bit aligned
+.dst_16bit:     load2b  r8, ip
+                sub     len, len, #2
+                adcs    sum, sum, r8, lsl #byte(0)
+                strb    r8, [dst], #1
+                adcs    sum, sum, ip, lsl #byte(1)
+                strb    ip, [dst], #1
+                mov     pc, lr                  @ dst is now 32bit aligned
+                /*
+                 * Handle 0 to 7 bytes, with any alignment of source and
+                 * destination pointers.  Note that when we get here, C = 0
+                 */
+.less8:         teq     len, #0                 @ check for zero count
+                beq     .zero
+                /* we must have at least one byte. */
+                tst     dst, #1                 @ dst 16-bit aligned
+                beq     .less8_aligned
+                /* Align dst */
+                load1b  ip
+                sub     len, len, #1
+                adcs    sum, sum, ip, lsl #byte(1)      @ update checksum
+                strb    ip, [dst], #1
+                tst     len, #6
+                beq     .less8_byteonly
+1:              load2b  r8, ip
+                sub     len, len, #2
+                adcs    sum, sum, r8, lsl #byte(0)
+                strb    r8, [dst], #1
+                adcs    sum, sum, ip, lsl #byte(1)
+                strb    ip, [dst], #1
+.less8_aligned: tst     len, #6
+                bne     1b
+.less8_byteonly:
+                tst     len, #1
+                beq     .done
+                load1b  r8
+                adcs    sum, sum, r8, lsl #byte(0)      @ update checksum
+                strb    r8, [dst], #1
+                b       .done
+FN_ENTRY
+                mov     ip, sp
+                save_regs
+                sub     fp, ip, #4
+                cmp     len, #8                 @ Ensure that we have at least
+                blo     .less8                  @ 8 bytes to copy.
+                adds    sum, sum, #0            @ C = 0
+                tst     dst, #3                 @ Test destination alignment
+                blne    .dst_unaligned          @ align destination, return here
+                /*
+                 * Ok, the dst pointer is now 32bit aligned, and we know
+                 * that we must have more than 4 bytes to copy.  Note
+                 * that C contains the carry from the dst alignment above.
+                 */
+                tst     src, #3                 @ Test source alignment
+                bne     .src_not_aligned
+                /* Routine for src & dst aligned */
+                bics    ip, len, #15
+                beq     2f
+1:              load4l  r4, r5, r6, r7
+                stmia   dst!, {r4, r5, r6, r7}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                adcs    sum, sum, r6
+                adcs    sum, sum, r7
+                sub     ip, ip, #16
+                teq     ip, #0
+                bne     1b
+2:              ands    ip, len, #12
+                beq     4f
+                tst     ip, #8
+                beq     3f
+                load2l  r4, r5
+                stmia   dst!, {r4, r5}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                tst     ip, #4
+                beq     4f
+3:              load1l  r4
+                str     r4, [dst], #4
+                adcs    sum, sum, r4
+4:              ands    len, len, #3
+                beq     .done
+                load1l  r4
+                tst     len, #2
+/*              mov     r5, r4, lsr #byte(0)
+FIXME? 0 Shift anyhow!
+*/
+                beq     .exit
+                adcs    sum, sum, r4, push #16
+                strb    r5, [dst], #1
+                mov     r5, r4, lsr #byte(1)
+                strb    r5, [dst], #1
+                mov     r5, r4, lsr #byte(2)
+.exit:          tst     len, #1
+                strneb  r5, [dst], #1
+                andne   r5, r5, #255
+                adcnes  sum, sum, r5, lsl #byte(0)
+                /*
+                 * If the dst pointer was not 16-bit aligned, we
+                 * need to rotate the checksum here to get around
+                 * the inefficient byte manipulations in the
+                 * architecture independent code.
+                 */
+.done:          adc     r0, sum, #0
+                ldr     sum, [sp, #0]           @ dst
+                tst     sum, #1
+                movne   sum, r0, lsl #8
+                orrne   r0, sum, r0, lsr #24
+                load_regs       ea
+.src_not_aligned:
+                adc     sum, sum, #0            @ include C from dst alignment
+                and     ip, src, #3
+                bic     src, src, #3
+                load1l  r5
+                cmp     ip, #2
+                beq     .src2_aligned
+                bhi     .src3_aligned
+                mov     r4, r5, pull #8         @ C = 0
+                bics    ip, len, #15
+                beq     2f
+1:              load4l  r5, r6, r7, r8
+                orr     r4, r4, r5, push #24
+                mov     r5, r5, pull #8
+                orr     r5, r5, r6, push #24
+                mov     r6, r6, pull #8
+                orr     r6, r6, r7, push #24
+                mov     r7, r7, pull #8
+                orr     r7, r7, r8, push #24
+                stmia   dst!, {r4, r5, r6, r7}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                adcs    sum, sum, r6
+                adcs    sum, sum, r7
+                mov     r4, r8, pull #8
+                sub     ip, ip, #16
+                teq     ip, #0
+                bne     1b
+2:              ands    ip, len, #12
+                beq     4f
+                tst     ip, #8
+                beq     3f
+                load2l  r5, r6
+                orr     r4, r4, r5, push #24
+                mov     r5, r5, pull #8
+                orr     r5, r5, r6, push #24
+                stmia   dst!, {r4, r5}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                mov     r4, r6, pull #8
+                tst     ip, #4
+                beq     4f
+3:              load1l  r5
+                orr     r4, r4, r5, push #24
+                str     r4, [dst], #4
+                adcs    sum, sum, r4
+                mov     r4, r5, pull #8
+4:              ands    len, len, #3
+                beq     .done
+/*              mov     r5, r4, lsr #byte(0)
+FIXME? 0 Shift anyhow
+*/
+                tst     len, #2
+                beq     .exit
+                adcs    sum, sum, r4, push #16
+                strb    r5, [dst], #1
+                mov     r5, r4, lsr #byte(1)
+                strb    r5, [dst], #1
+                mov     r5, r4, lsr #byte(2)
+                b       .exit
+.src2_aligned:  mov     r4, r5, pull #16
+                adds    sum, sum, #0
+                bics    ip, len, #15
+                beq     2f
+1:              load4l  r5, r6, r7, r8
+                orr     r4, r4, r5, push #16
+                mov     r5, r5, pull #16
+                orr     r5, r5, r6, push #16
+                mov     r6, r6, pull #16
+                orr     r6, r6, r7, push #16
+                mov     r7, r7, pull #16
+                orr     r7, r7, r8, push #16
+                stmia   dst!, {r4, r5, r6, r7}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                adcs    sum, sum, r6
+                adcs    sum, sum, r7
+                mov     r4, r8, pull #16
+                sub     ip, ip, #16
+                teq     ip, #0
+                bne     1b
+2:              ands    ip, len, #12
+                beq     4f
+                tst     ip, #8
+                beq     3f
+                load2l  r5, r6
+                orr     r4, r4, r5, push #16
+                mov     r5, r5, pull #16
+                orr     r5, r5, r6, push #16
+                stmia   dst!, {r4, r5}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                mov     r4, r6, pull #16
+                tst     ip, #4
+                beq     4f
+3:              load1l  r5
+                orr     r4, r4, r5, push #16
+                str     r4, [dst], #4
+                adcs    sum, sum, r4
+                mov     r4, r5, pull #16
+4:              ands    len, len, #3
+                beq     .done
+/*              mov     r5, r4, lsr #byte(0)
+FIXME? 0 Shift anyhow
+*/
+                tst     len, #2
+                beq     .exit
+                adcs    sum, sum, r4
+                strb    r5, [dst], #1
+                mov     r5, r4, lsr #byte(1)
+                strb    r5, [dst], #1
+                tst     len, #1
+                beq     .done
+                load1b  r5
+                b       .exit
+.src3_aligned:  mov     r4, r5, pull #24
+                adds    sum, sum, #0
+                bics    ip, len, #15
+                beq     2f
+1:              load4l  r5, r6, r7, r8
+                orr     r4, r4, r5, push #8
+                mov     r5, r5, pull #24
+                orr     r5, r5, r6, push #8
+                mov     r6, r6, pull #24
+                orr     r6, r6, r7, push #8
+                mov     r7, r7, pull #24
+                orr     r7, r7, r8, push #8
+                stmia   dst!, {r4, r5, r6, r7}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                adcs    sum, sum, r6
+                adcs    sum, sum, r7
+                mov     r4, r8, pull #24
+                sub     ip, ip, #16
+                teq     ip, #0
+                bne     1b
+2:              ands    ip, len, #12
+                beq     4f
+                tst     ip, #8
+                beq     3f
+                load2l  r5, r6
+                orr     r4, r4, r5, push #8
+                mov     r5, r5, pull #24
+                orr     r5, r5, r6, push #8
+                stmia   dst!, {r4, r5}
+                adcs    sum, sum, r4
+                adcs    sum, sum, r5
+                mov     r4, r6, pull #24
+                tst     ip, #4
+                beq     4f
+3:              load1l  r5
+                orr     r4, r4, r5, push #8
+                str     r4, [dst], #4
+                adcs    sum, sum, r4
+                mov     r4, r5, pull #24
+4:              ands    len, len, #3
+                beq     .done
+/*              mov     r5, r4, lsr #byte(0)
+FIXME? 0 Shift anyhow
+*/
+                tst     len, #2
+                beq     .exit
+                strb    r5, [dst], #1
+                adcs    sum, sum, r4
+                load1l  r4
+/*              mov     r5, r4, lsr #byte(0)
+FIXME? 0 Shift anyhow
+*/
+                strb    r5, [dst], #1
+                adcs    sum, sum, r4, push #24
+                mov     r5, r4, lsr #byte(1)
+                b       .exit
diff --git a/arch/arm26/lib/csumpartialcopyuser.S b/arch/arm26/lib/csumpartialcopyuser.S
new file mode 100644
index 000000000000..5b821188e479
--- /dev/null
+++ b/arch/arm26/lib/csumpartialcopyuser.S
@@ -0,0 +1,115 @@
+/*
+ *  linux/arch/arm26/lib/csumpartialcopyuser.S
+ *
+ *  Copyright (C) 1995-1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/asm_offsets.h>
+                .text
+                .macro  save_regs
+                stmfd   sp!, {r1 - r2, r4 - r9, fp, ip, lr, pc}
+                mov     r9, sp, lsr #13
+                mov     r9, r9, lsl #13
+                ldr     r9, [r9, #TSK_ADDR_LIMIT]
+                mov     r9, r9, lsr #24
+                .endm
+                .macro  load_regs,flags
+                ldm\flags       fp, {r1, r2, r4-r9, fp, sp, pc}^
+                .endm
+                .macro  load1b, reg1
+                tst     r9, #0x01
+9999:           ldreqbt \reg1, [r0], #1
+                ldrneb  \reg1, [r0], #1
+                .section __ex_table, "a"
+                .align  3
+                .long   9999b, 6001f
+                .previous
+                .endm
+                .macro  load2b, reg1, reg2
+                tst     r9, #0x01
+9999:           ldreqbt \reg1, [r0], #1
+                ldrneb  \reg1, [r0], #1
+9998:           ldreqbt \reg2, [r0], #1
+                ldrneb  \reg2, [r0], #1
+                .section __ex_table, "a"
+                .long   9999b, 6001f
+                .long   9998b, 6001f
+                .previous
+                .endm
+                .macro  load1l, reg1
+                tst     r9, #0x01
+9999:           ldreqt  \reg1, [r0], #4
+                ldrne   \reg1, [r0], #4
+                .section __ex_table, "a"
+                .align  3
+                .long   9999b, 6001f
+                .previous
+                .endm
+                .macro  load2l, reg1, reg2
+                tst     r9, #0x01
+                ldmneia r0!, {\reg1, \reg2}
+9999:           ldreqt  \reg1, [r0], #4
+9998:           ldreqt  \reg2, [r0], #4
+                .section __ex_table, "a"
+                .long   9999b, 6001f
+                .long   9998b, 6001f
+                .previous
+                .endm
+                .macro  load4l, reg1, reg2, reg3, reg4
+                tst     r9, #0x01
+                ldmneia r0!, {\reg1, \reg2, \reg3, \reg4}
+9999:           ldreqt  \reg1, [r0], #4
+9998:           ldreqt  \reg2, [r0], #4
+9997:           ldreqt  \reg3, [r0], #4
+9996:           ldreqt  \reg4, [r0], #4
+                .section __ex_table, "a"
+                .long   9999b, 6001f
+                .long   9998b, 6001f
+                .long   9997b, 6001f
+                .long   9996b, 6001f
+                .previous
+                .endm
+/*
+ * unsigned int
+ * csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, int *err_ptr)
+ *  r0 = src, r1 = dst, r2 = len, r3 = sum, [sp] = *err_ptr
+ *  Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT
+ */
+#define FN_ENTRY        ENTRY(csum_partial_copy_from_user)
+#include "csumpartialcopygeneric.S"
+/*
+ * FIXME: minor buglet here
+ * We don't return the checksum for the data present in the buffer.  To do
+ * so properly, we would have to add in whatever registers were loaded before
+ * the fault, which, with the current asm above is not predictable.
+ */
+                .align  4
+6001:           mov     r4, #-EFAULT
+                ldr     r5, [fp, #4]            @ *err_ptr
+                str     r4, [r5]
+                ldmia   sp, {r1, r2}            @ retrieve dst, len
+                add     r2, r2, r1
+                mov     r0, #0                  @ zero the buffer
+6002:           teq     r2, r1
+                strneb  r0, [r1], #1
+                bne     6002b
+                load_regs       ea
diff --git a/arch/arm26/lib/delay.S b/arch/arm26/lib/delay.S
new file mode 100644
index 000000000000..66f2b68e1b13
--- /dev/null
+++ b/arch/arm26/lib/delay.S
@@ -0,0 +1,57 @@
+/*
+ *  linux/arch/arm26/lib/delay.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+LC0:            .word   loops_per_jiffy
+/*
+ * 0 <= r0 <= 2000
+ */
+ENTRY(udelay)
+                mov     r2,     #0x6800
+                orr     r2, r2, #0x00db
+                mul     r1, r0, r2
+                ldr     r2, LC0
+                ldr     r2, [r2]
+                mov     r1, r1, lsr #11
+                mov     r2, r2, lsr #11
+                mul     r0, r1, r2
+                movs    r0, r0, lsr #6
+                RETINSTR(moveq,pc,lr)
+/*
+ * loops = (r0 * 0x10c6 * 100 * loops_per_jiffy) / 2^32
+ *
+ * Oh, if only we had a cycle counter...
+ */
+@ Delay routine
+ENTRY(__delay)
+                subs    r0, r0, #1
+#if 0
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+                RETINSTR(movls,pc,lr)
+                subs    r0, r0, #1
+#endif
+                bhi     __delay
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/ecard.S b/arch/arm26/lib/ecard.S
new file mode 100644
index 000000000000..b4633150f01c
--- /dev/null
+++ b/arch/arm26/lib/ecard.S
@@ -0,0 +1,41 @@
+/*
+ *  linux/arch/arm26/lib/ecard.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h> /* for CONFIG_CPU_nn */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+#define CPSR2SPSR(rt)
+@ Purpose: call an expansion card loader to read bytes.
+@ Proto  : char read_loader(int offset, char *card_base, char *loader);
+@ Returns: byte read
+ENTRY(ecard_loader_read)
+                stmfd   sp!, {r4 - r12, lr}
+                mov     r11, r1
+                mov     r1, r0
+                CPSR2SPSR(r0)
+                mov     lr, pc
+                mov     pc, r2
+                LOADREGS(fd, sp!, {r4 - r12, pc})
+@ Purpose: call an expansion card loader to reset the card
+@ Proto  : void read_loader(int card_base, char *loader);
+@ Returns: byte read
+ENTRY(ecard_loader_reset)
+                stmfd   sp!, {r4 - r12, lr}
+                mov     r11, r0
+                CPSR2SPSR(r0)
+                mov     lr, pc
+                add     pc, r1, #8
+                LOADREGS(fd, sp!, {r4 - r12, pc})
diff --git a/arch/arm26/lib/findbit.S b/arch/arm26/lib/findbit.S
new file mode 100644
index 000000000000..26f67cccc37c
--- /dev/null
+++ b/arch/arm26/lib/findbit.S
@@ -0,0 +1,67 @@
+/*
+ *  linux/arch/arm/lib/findbit.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 16th March 2001 - John Ripley <jripley@sonicblue.com>
+ *   Fixed so that "size" is an exclusive not an inclusive quantity.
+ *   All users of these functions expect exclusive sizes, and may
+ *   also call with zero size.
+ * Reworked by rmk.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+/*
+ * Purpose  : Find a 'zero' bit
+ * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
+ */
+ENTRY(_find_first_zero_bit_le)
+                teq     r1, #0  
+                beq     3f
+                mov     r2, #0
+1:              ldrb    r3, [r0, r2, lsr #3]
+                eors    r3, r3, #0xff           @ invert bits
+                bne     .found                  @ any now set - found zero bit
+                add     r2, r2, #8              @ next bit pointer
+2:              cmp     r2, r1                  @ any more?
+                blo     1b
+3:              mov     r0, r1                  @ no free bits
+                RETINSTR(mov,pc,lr)
+/*
+ * Purpose  : Find next 'zero' bit
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
+ */
+ENTRY(_find_next_zero_bit_le)
+                teq     r1, #0
+                beq     2b
+                ands    ip, r2, #7
+                beq     1b                      @ If new byte, goto old routine
+                ldrb    r3, [r0, r2, lsr #3]
+                eor     r3, r3, #0xff           @ now looking for a 1 bit
+                movs    r3, r3, lsr ip          @ shift off unused bits
+                bne     .found
+                orr     r2, r2, #7              @ if zero, then no bits here
+                add     r2, r2, #1              @ align bit pointer
+                b       2b                      @ loop for next bit
+/*
+ * One or more bits in the LSB of r3 are assumed to be set.
+ */
+.found:         tst     r3, #0x0f
+                addeq   r2, r2, #4
+                movne   r3, r3, lsl #4
+                tst     r3, #0x30
+                addeq   r2, r2, #2
+                movne   r3, r3, lsl #2
+                tst     r3, #0x40
+                addeq   r2, r2, #1
+                mov     r0, r2
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/floppydma.S b/arch/arm26/lib/floppydma.S
new file mode 100644
index 000000000000..e99ebbb20353
--- /dev/null
+++ b/arch/arm26/lib/floppydma.S
@@ -0,0 +1,32 @@
+/*
+ *  linux/arch/arm26/lib/floppydma.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+                .global floppy_fiqin_end
+ENTRY(floppy_fiqin_start)
+                subs    r9, r9, #1
+                ldrgtb  r12, [r11, #-4]
+                ldrleb  r12, [r11], #0
+                strb    r12, [r10], #1
+                subs    pc, lr, #4
+floppy_fiqin_end:
+                .global floppy_fiqout_end
+ENTRY(floppy_fiqout_start)
+                subs    r9, r9, #1
+                ldrgeb  r12, [r10], #1
+                movlt   r12, #0
+                strleb  r12, [r11], #0
+                subles  pc, lr, #4
+                strb    r12, [r11, #-4]
+                subs    pc, lr, #4
+floppy_fiqout_end:
diff --git a/arch/arm26/lib/gcclib.h b/arch/arm26/lib/gcclib.h
new file mode 100644
index 000000000000..9895e78904b5
--- /dev/null
+++ b/arch/arm26/lib/gcclib.h
@@ -0,0 +1,21 @@
+/* gcclib.h -- definitions for various functions 'borrowed' from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#define BITS_PER_UNIT  8
+#define SI_TYPE_SIZE (sizeof (SItype) * BITS_PER_UNIT)
+typedef unsigned int UQItype    __attribute__ ((mode (QI)));
+typedef          int SItype     __attribute__ ((mode (SI)));
+typedef unsigned int USItype    __attribute__ ((mode (SI)));
+typedef          int DItype     __attribute__ ((mode (DI)));
+typedef          int word_type  __attribute__ ((mode (__word__)));
+typedef unsigned int UDItype    __attribute__ ((mode (DI)));
+struct DIstruct {SItype low, high;};
+typedef union
+{
+  struct DIstruct s;
+  DItype ll;
+} DIunion;
diff --git a/arch/arm26/lib/getuser.S b/arch/arm26/lib/getuser.S
new file mode 100644
index 000000000000..e6d59b334851
--- /dev/null
+++ b/arch/arm26/lib/getuser.S
@@ -0,0 +1,112 @@
+/*
+ *  linux/arch/arm26/lib/getuser.S
+ *
+ *  Copyright (C) 2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Idea from x86 version, (C) Copyright 1998 Linus Torvalds
+ *
+ * These functions have a non-standard call interface to make them more
+ * efficient, especially as they return an error value in addition to
+ * the "real" return value.
+ *
+ * __get_user_X
+ *
+ * Inputs:      r0 contains the address
+ * Outputs:     r0 is the error code
+ *              r1, r2 contains the zero-extended value
+ *              lr corrupted
+ *
+ * No other registers must be altered.  (see include/asm-arm/uaccess.h
+ * for specific ASM register usage).
+ *
+ * Note that ADDR_LIMIT is either 0 or 0xc0000000.
+ * Note also that it is intended that __get_user_bad is not global.
+ */
+#include <asm/asm_offsets.h>
+#include <asm/thread_info.h>
+#include <asm/errno.h>
+        .global __get_user_1
+__get_user_1:
+        bic     r1, sp, #0x1f00
+        bic     r1, r1, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r1, [r1, #TI_ADDR_LIMIT]
+        sub     r1, r1, #1
+        cmp     r0, r1
+        bge     __get_user_bad
+        cmp     r0, #0x02000000
+1:      ldrlsbt r1, [r0]
+        ldrgeb  r1, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+        .global __get_user_2
+__get_user_2:
+        bic     r2, sp, #0x1f00
+        bic     r2, r2, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r2, [r2, #TI_ADDR_LIMIT]
+        sub     r2, r2, #2
+        cmp     r0, r2
+        bge     __get_user_bad
+        cmp     r0, #0x02000000
+2:      ldrlsbt r1, [r0], #1
+3:      ldrlsbt r2, [r0]
+        ldrgeb  r1, [r0], #1
+        ldrgeb  r2, [r0]
+        orr     r1, r1, r2, lsl #8
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+        .global __get_user_4
+__get_user_4:
+        bic     r1, sp, #0x1f00
+        bic     r1, r1, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r1, [r1, #TI_ADDR_LIMIT]
+        sub     r1, r1, #4
+        cmp     r0, r1
+        bge     __get_user_bad
+        cmp     r0, #0x02000000
+4:      ldrlst  r1, [r0]
+        ldrge   r1, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+        .global __get_user_8
+__get_user_8:
+        bic     r2, sp, #0x1f00
+        bic     r2, r2, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r2, [r2, #TI_ADDR_LIMIT]
+        sub     r2, r2, #8
+        cmp     r0, r2
+        bge     __get_user_bad_8
+        cmp     r0, #0x02000000
+5:      ldrlst  r1, [r0], #4
+6:      ldrlst  r2, [r0]
+        ldrge   r1, [r0], #4
+        ldrge   r2, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+__get_user_bad_8:
+        mov     r2, #0
+__get_user_bad:
+        mov     r1, #0
+        mov     r0, #-EFAULT
+        ldmfd   sp!, {pc}^
+.section __ex_table, "a"
+        .long   1b, __get_user_bad
+        .long   2b, __get_user_bad
+        .long   3b, __get_user_bad
+        .long   4b, __get_user_bad
+        .long   5b, __get_user_bad_8
+        .long   6b, __get_user_bad_8
+.previous
diff --git a/arch/arm26/lib/io-acorn.S b/arch/arm26/lib/io-acorn.S
new file mode 100644
index 000000000000..f6c3e30b1b4f
--- /dev/null
+++ b/arch/arm26/lib/io-acorn.S
@@ -0,0 +1,71 @@
+/*
+ *  linux/arch/arm26/lib/io-acorn.S
+ *
+ *  Copyright (C) 1995, 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h> /* for CONFIG_CPU_nn */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+                .text
+                .align
+                .equ    diff_pcio_base, PCIO_BASE - IO_BASE
+                .macro  outw2   rd
+                mov     r8, \rd, lsl #16
+                orr     r8, r8, r8, lsr #16
+                str     r8, [r3, r0, lsl #2]
+                mov     r8, \rd, lsr #16
+                orr     r8, r8, r8, lsl #16
+                str     r8, [r3, r0, lsl #2]
+                .endm
+                .macro  inw2    rd, mask, temp
+                ldr     \rd, [r0]
+                and     \rd, \rd, \mask
+                ldr     \temp, [r0]
+                orr     \rd, \rd, \temp, lsl #16
+                .endm
+                .macro  addr    rd
+                tst     \rd, #0x80000000
+                mov     \rd, \rd, lsl #2
+                add     \rd, \rd, #IO_BASE
+                addeq   \rd, \rd, #diff_pcio_base
+                .endm
+.iosl_warning:
+                .ascii  "<4>insl/outsl not implemented, called from %08lX\0"
+                .align
+/*
+ * These make no sense on Acorn machines.
+ * Print a warning message.
+ */
+ENTRY(insl)
+ENTRY(outsl)
+                adr     r0, .iosl_warning
+                mov     r1, lr
+                b       printk
+@ Purpose: write a memc register
+@ Proto  : void memc_write(int register, int value);
+@ Returns: nothing
+ENTRY(memc_write)
+                cmp     r0, #7
+                RETINSTR(movgt,pc,lr)
+                mov     r0, r0, lsl #17
+                mov     r1, r1, lsl #15
+                mov     r1, r1, lsr #17
+                orr     r0, r0, r1, lsl #2
+                add     r0, r0, #0x03600000
+                strb    r0, [r0]
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/io-readsb.S b/arch/arm26/lib/io-readsb.S
new file mode 100644
index 000000000000..4c4d99c05856
--- /dev/null
+++ b/arch/arm26/lib/io-readsb.S
@@ -0,0 +1,116 @@
+/*
+ *  linux/arch/arm26/lib/io-readsb.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+.insb_align:    rsb     ip, ip, #4
+                cmp     ip, r2
+                movgt   ip, r2
+                cmp     ip, #2
+                ldrb    r3, [r0]
+                strb    r3, [r1], #1
+                ldrgeb  r3, [r0]
+                strgeb  r3, [r1], #1
+                ldrgtb  r3, [r0]
+                strgtb  r3, [r1], #1
+                subs    r2, r2, ip
+                bne     .insb_aligned
+ENTRY(__raw_readsb)
+                teq     r2, #0          @ do we have to check for the zero len?
+                moveq   pc, lr
+                ands    ip, r1, #3
+                bne     .insb_align
+.insb_aligned:  stmfd   sp!, {r4 - r6, lr}
+                subs    r2, r2, #16
+                bmi     .insb_no_16
+.insb_16_lp:    ldrb    r3, [r0]
+                ldrb    r4, [r0]
+                orr     r3, r3, r4, lsl #8
+                ldrb    r4, [r0]
+                orr     r3, r3, r4, lsl #16
+                ldrb    r4, [r0]
+                orr     r3, r3, r4, lsl #24
+                ldrb    r4, [r0]
+                ldrb    r5, [r0]
+                orr     r4, r4, r5, lsl #8
+                ldrb    r5, [r0]
+                orr     r4, r4, r5, lsl #16
+                ldrb    r5, [r0]
+                orr     r4, r4, r5, lsl #24
+                ldrb    r5, [r0]
+                ldrb    r6, [r0]
+                orr     r5, r5, r6, lsl #8
+                ldrb    r6, [r0]
+                orr     r5, r5, r6, lsl #16
+                ldrb    r6, [r0]
+                orr     r5, r5, r6, lsl #24
+                ldrb    r6, [r0]
+                ldrb    ip, [r0]
+                orr     r6, r6, ip, lsl #8
+                ldrb    ip, [r0]
+                orr     r6, r6, ip, lsl #16
+                ldrb    ip, [r0]
+                orr     r6, r6, ip, lsl #24
+                stmia   r1!, {r3 - r6}
+                subs    r2, r2, #16
+                bpl     .insb_16_lp
+                tst     r2, #15
+                LOADREGS(eqfd, sp!, {r4 - r6, pc})
+.insb_no_16:    tst     r2, #8
+                beq     .insb_no_8
+                ldrb    r3, [r0]
+                ldrb    r4, [r0]
+                orr     r3, r3, r4, lsl #8
+                ldrb    r4, [r0]
+                orr     r3, r3, r4, lsl #16
+                ldrb    r4, [r0]
+                orr     r3, r3, r4, lsl #24
+                ldrb    r4, [r0]
+                ldrb    r5, [r0]
+                orr     r4, r4, r5, lsl #8
+                ldrb    r5, [r0]
+                orr     r4, r4, r5, lsl #16
+                ldrb    r5, [r0]
+                orr     r4, r4, r5, lsl #24
+                stmia   r1!, {r3, r4}
+.insb_no_8:     tst     r2, #4
+                beq     .insb_no_4
+                ldrb    r3, [r0]
+                ldrb    r4, [r0]
+                orr     r3, r3, r4, lsl #8
+                ldrb    r4, [r0]
+                orr     r3, r3, r4, lsl #16
+                ldrb    r4, [r0]
+                orr     r3, r3, r4, lsl #24
+                str     r3, [r1], #4
+.insb_no_4:     ands    r2, r2, #3
+                LOADREGS(eqfd, sp!, {r4 - r6, pc})
+                cmp     r2, #2
+                ldrb    r3, [r0]
+                strb    r3, [r1], #1
+                ldrgeb  r3, [r0]
+                strgeb  r3, [r1], #1
+                ldrgtb  r3, [r0]
+                strgtb  r3, [r1]
+                LOADREGS(fd, sp!, {r4 - r6, pc})
diff --git a/arch/arm26/lib/io-readsl.S b/arch/arm26/lib/io-readsl.S
new file mode 100644
index 000000000000..7be208bd23c6
--- /dev/null
+++ b/arch/arm26/lib/io-readsl.S
@@ -0,0 +1,78 @@
+/*
+ *  linux/arch/arm26/lib/io-readsl.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+/*
+ * Note that some reads can be aligned on half-word boundaries.
+ */
+ENTRY(__raw_readsl)
+                teq     r2, #0          @ do we have to check for the zero len?
+                moveq   pc, lr
+                ands    ip, r1, #3
+                bne     2f
+1:              ldr     r3, [r0]
+                str     r3, [r1], #4
+                subs    r2, r2, #1
+                bne     1b
+                mov     pc, lr
+2:              cmp     ip, #2
+                ldr     ip, [r0]
+                blt     4f
+                bgt     6f
+                strb    ip, [r1], #1
+                mov     ip, ip, lsr #8
+                strb    ip, [r1], #1
+                mov     ip, ip, lsr #8
+3:              subs    r2, r2, #1
+                ldrne   r3, [r0]
+                orrne   ip, ip, r3, lsl #16
+                strne   ip, [r1], #4
+                movne   ip, r3, lsr #16
+                bne     3b
+                strb    ip, [r1], #1
+                mov     ip, ip, lsr #8
+                strb    ip, [r1], #1
+                mov     pc, lr
+4:              strb    ip, [r1], #1
+                mov     ip, ip, lsr #8
+                strb    ip, [r1], #1
+                mov     ip, ip, lsr #8
+                strb    ip, [r1], #1
+                mov     ip, ip, lsr #8
+5:              subs    r2, r2, #1
+                ldrne   r3, [r0]
+                orrne   ip, ip, r3, lsl #8
+                strne   ip, [r1], #4
+                movne   ip, r3, lsr #24
+                bne     5b
+                strb    ip, [r1], #1
+                mov     pc, lr
+6:              strb    ip, [r1], #1
+                mov     ip, ip, lsr #8
+7:              subs    r2, r2, #1
+                ldrne   r3, [r0]
+                orrne   ip, ip, r3, lsl #24
+                strne   ip, [r1], #4
+                movne   ip, r3, lsr #8
+                bne     7b
+                strb    ip, [r1], #1
+                mov     ip, ip, lsr #8
+                strb    ip, [r1], #1
+                mov     ip, ip, lsr #8
+                strb    ip, [r1], #1
+                mov     pc, lr
diff --git a/arch/arm26/lib/io-readsw.S b/arch/arm26/lib/io-readsw.S
new file mode 100644
index 000000000000..c65c1f28fcff
--- /dev/null
+++ b/arch/arm26/lib/io-readsw.S
@@ -0,0 +1,107 @@
+/*
+ *  linux/arch/arm26/lib/io-readsw.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+.insw_bad_alignment:
+                adr     r0, .insw_bad_align_msg
+                mov     r2, lr
+                b       panic
+.insw_bad_align_msg:
+                .asciz  "insw: bad buffer alignment (0x%p, lr=0x%08lX)\n"
+                .align
+.insw_align:    tst     r1, #1
+                bne     .insw_bad_alignment
+                ldr     r3, [r0]
+                strb    r3, [r1], #1
+                mov     r3, r3, lsr #8
+                strb    r3, [r1], #1
+                subs    r2, r2, #1
+                RETINSTR(moveq, pc, lr)
+ENTRY(__raw_readsw)
+                teq     r2, #0          @ do we have to check for the zero len?
+                moveq   pc, lr
+                tst     r1, #3
+                bne     .insw_align
+.insw_aligned:  mov     ip, #0xff
+                orr     ip, ip, ip, lsl #8
+                stmfd   sp!, {r4, r5, r6, lr}
+                subs    r2, r2, #8
+                bmi     .no_insw_8
+.insw_8_lp:     ldr     r3, [r0]
+                and     r3, r3, ip
+                ldr     r4, [r0]
+                orr     r3, r3, r4, lsl #16
+                ldr     r4, [r0]
+                and     r4, r4, ip
+                ldr     r5, [r0]
+                orr     r4, r4, r5, lsl #16
+                ldr     r5, [r0]
+                and     r5, r5, ip
+                ldr     r6, [r0]
+                orr     r5, r5, r6, lsl #16
+                ldr     r6, [r0]
+                and     r6, r6, ip
+                ldr     lr, [r0]
+                orr     r6, r6, lr, lsl #16
+                stmia   r1!, {r3 - r6}
+                subs    r2, r2, #8
+                bpl     .insw_8_lp
+                tst     r2, #7
+                LOADREGS(eqfd, sp!, {r4, r5, r6, pc})
+.no_insw_8:     tst     r2, #4
+                beq     .no_insw_4
+                ldr     r3, [r0]
+                and     r3, r3, ip
+                ldr     r4, [r0]
+                orr     r3, r3, r4, lsl #16
+                ldr     r4, [r0]
+                and     r4, r4, ip
+                ldr     r5, [r0]
+                orr     r4, r4, r5, lsl #16
+                stmia   r1!, {r3, r4}
+.no_insw_4:     tst     r2, #2
+                beq     .no_insw_2
+                ldr     r3, [r0]
+                and     r3, r3, ip
+                ldr     r4, [r0]
+                orr     r3, r3, r4, lsl #16
+                str     r3, [r1], #4
+.no_insw_2:     tst     r2, #1
+                ldrne   r3, [r0]
+                strneb  r3, [r1], #1
+                movne   r3, r3, lsr #8
+                strneb  r3, [r1]
+                LOADREGS(fd, sp!, {r4, r5, r6, pc})
diff --git a/arch/arm26/lib/io-writesb.S b/arch/arm26/lib/io-writesb.S
new file mode 100644
index 000000000000..16251b4d5101
--- /dev/null
+++ b/arch/arm26/lib/io-writesb.S
@@ -0,0 +1,122 @@
+/*
+ *  linux/arch/arm26/lib/io-writesb.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+.outsb_align:   rsb     ip, ip, #4
+                cmp     ip, r2
+                movgt   ip, r2
+                cmp     ip, #2
+                ldrb    r3, [r1], #1
+                strb    r3, [r0]
+                ldrgeb  r3, [r1], #1
+                strgeb  r3, [r0]
+                ldrgtb  r3, [r1], #1
+                strgtb  r3, [r0]
+                subs    r2, r2, ip
+                bne     .outsb_aligned
+ENTRY(__raw_writesb)
+                teq     r2, #0          @ do we have to check for the zero len?
+                moveq   pc, lr
+                ands    ip, r1, #3
+                bne     .outsb_align
+.outsb_aligned: stmfd   sp!, {r4 - r6, lr}
+                subs    r2, r2, #16
+                bmi     .outsb_no_16
+.outsb_16_lp:   ldmia   r1!, {r3 - r6}
+                strb    r3, [r0]
+                mov     r3, r3, lsr #8
+                strb    r3, [r0]
+                mov     r3, r3, lsr #8
+                strb    r3, [r0]
+                mov     r3, r3, lsr #8
+                strb    r3, [r0]
+                strb    r4, [r0]
+                mov     r4, r4, lsr #8
+                strb    r4, [r0]
+                mov     r4, r4, lsr #8
+                strb    r4, [r0]
+                mov     r4, r4, lsr #8
+                strb    r4, [r0]
+                strb    r5, [r0]
+                mov     r5, r5, lsr #8
+                strb    r5, [r0]
+                mov     r5, r5, lsr #8
+                strb    r5, [r0]
+                mov     r5, r5, lsr #8
+                strb    r5, [r0]
+                strb    r6, [r0]
+                mov     r6, r6, lsr #8
+                strb    r6, [r0]
+                mov     r6, r6, lsr #8
+                strb    r6, [r0]
+                mov     r6, r6, lsr #8
+                strb    r6, [r0]
+                subs    r2, r2, #16
+                bpl     .outsb_16_lp
+                tst     r2, #15
+                LOADREGS(eqfd, sp!, {r4 - r6, pc})
+.outsb_no_16:   tst     r2, #8
+                beq     .outsb_no_8
+                ldmia   r1!, {r3, r4}
+                strb    r3, [r0]
+                mov     r3, r3, lsr #8
+                strb    r3, [r0]
+                mov     r3, r3, lsr #8
+                strb    r3, [r0]
+                mov     r3, r3, lsr #8
+                strb    r3, [r0]
+                strb    r4, [r0]
+                mov     r4, r4, lsr #8
+                strb    r4, [r0]
+                mov     r4, r4, lsr #8
+                strb    r4, [r0]
+                mov     r4, r4, lsr #8
+                strb    r4, [r0]
+.outsb_no_8:    tst     r2, #4
+                beq     .outsb_no_4
+                ldr     r3, [r1], #4
+                strb    r3, [r0]
+                mov     r3, r3, lsr #8
+                strb    r3, [r0]
+                mov     r3, r3, lsr #8
+                strb    r3, [r0]
+                mov     r3, r3, lsr #8
+                strb    r3, [r0]
+.outsb_no_4:    ands    r2, r2, #3
+                LOADREGS(eqfd, sp!, {r4 - r6, pc})
+                cmp     r2, #2
+                ldrb    r3, [r1], #1
+                strb    r3, [r0]
+                ldrgeb  r3, [r1], #1
+                strgeb  r3, [r0]
+                ldrgtb  r3, [r1]
+                strgtb  r3, [r0]
+                LOADREGS(fd, sp!, {r4 - r6, pc})
diff --git a/arch/arm26/lib/io-writesl.S b/arch/arm26/lib/io-writesl.S
new file mode 100644
index 000000000000..4d6049b16e71
--- /dev/null
+++ b/arch/arm26/lib/io-writesl.S
@@ -0,0 +1,56 @@
+/*
+ *  linux/arch/arm26/lib/io-writesl.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+ENTRY(__raw_writesl)
+                teq     r2, #0          @ do we have to check for the zero len?
+                moveq   pc, lr
+                ands    ip, r1, #3
+                bne     2f
+1:              ldr     r3, [r1], #4
+                str     r3, [r0]
+                subs    r2, r2, #1
+                bne     1b
+                mov     pc, lr
+2:              bic     r1, r1, #3
+                cmp     ip, #2
+                ldr     r3, [r1], #4
+                bgt     4f
+                blt     5f
+3:              mov     ip, r3, lsr #16
+                ldr     r3, [r1], #4
+                orr     ip, ip, r3, lsl #16
+                str     ip, [r0]
+                subs    r2, r2, #1
+                bne     3b
+                mov     pc, lr
+4:              mov     ip, r3, lsr #24
+                ldr     r3, [r1], #4
+                orr     ip, ip, r3, lsl #8
+                str     ip, [r0]
+                subs    r2, r2, #1
+                bne     4b
+                mov     pc, lr
+5:              mov     ip, r3, lsr #8
+                ldr     r3, [r1], #4
+                orr     ip, ip, r3, lsl #24
+                str     ip, [r0]
+                subs    r2, r2, #1
+                bne     5b
+                mov     pc, lr
diff --git a/arch/arm26/lib/io-writesw.S b/arch/arm26/lib/io-writesw.S
new file mode 100644
index 000000000000..a24f891f6b1c
--- /dev/null
+++ b/arch/arm26/lib/io-writesw.S
@@ -0,0 +1,127 @@
+/*
+ *  linux/arch/arm26/lib/io-writesw.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+.outsw_bad_alignment:
+                adr     r0, .outsw_bad_align_msg
+                mov     r2, lr
+                b       panic
+.outsw_bad_align_msg:
+                .asciz  "outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n"
+                .align
+.outsw_align:   tst     r1, #1
+                bne     .outsw_bad_alignment
+                add     r1, r1, #2
+                ldr     r3, [r1, #-4]
+                mov     r3, r3, lsr #16
+                orr     r3, r3, r3, lsl #16
+                str     r3, [r0]
+                subs    r2, r2, #1
+                RETINSTR(moveq, pc, lr)
+ENTRY(__raw_writesw)
+                teq     r2, #0          @ do we have to check for the zero len?
+                moveq   pc, lr
+                tst     r1, #3
+                bne     .outsw_align
+.outsw_aligned: stmfd   sp!, {r4, r5, r6, lr}
+                subs    r2, r2, #8
+                bmi     .no_outsw_8
+.outsw_8_lp:    ldmia   r1!, {r3, r4, r5, r6}
+                mov     ip, r3, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r3, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+                mov     ip, r4, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r4, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+                mov     ip, r5, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r5, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+                mov     ip, r6, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r6, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+                subs    r2, r2, #8
+                bpl     .outsw_8_lp
+                tst     r2, #7
+                LOADREGS(eqfd, sp!, {r4, r5, r6, pc})
+.no_outsw_8:    tst     r2, #4
+                beq     .no_outsw_4
+                ldmia   r1!, {r3, r4}
+                mov     ip, r3, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r3, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+                mov     ip, r4, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r4, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+.no_outsw_4:    tst     r2, #2
+                beq     .no_outsw_2
+                ldr     r3, [r1], #4
+                mov     ip, r3, lsl #16
+                orr     ip, ip, ip, lsr #16
+                str     ip, [r0]
+                mov     ip, r3, lsr #16
+                orr     ip, ip, ip, lsl #16
+                str     ip, [r0]
+.no_outsw_2:    tst     r2, #1
+                ldrne   r3, [r1]
+                movne   ip, r3, lsl #16
+                orrne   ip, ip, ip, lsr #16
+                strne   ip, [r0]
+                LOADREGS(fd, sp!, {r4, r5, r6, pc})
diff --git a/arch/arm26/lib/kbd.c b/arch/arm26/lib/kbd.c
new file mode 100644
index 000000000000..22d2c93aaf1a
--- /dev/null
+++ b/arch/arm26/lib/kbd.c
@@ -0,0 +1,279 @@
+#include <linux/config.h>
+#include <linux/kd.h>
+//#include <linux/kbd_ll.h>
+#include <linux/kbd_kern.h>
+/*
+ * Translation of escaped scancodes to keycodes.
+ * This is now user-settable.
+ * The keycodes 1-88,96-111,119 are fairly standard, and
+ * should probably not be changed - changing might confuse X.
+ * X also interprets scancode 0x5d (KEY_Begin).
+ *
+ * For 1-88 keycode equals scancode.
+ */
+#define E0_KPENTER 96
+#define E0_RCTRL   97
+#define E0_KPSLASH 98
+#define E0_PRSCR   99
+#define E0_RALT    100
+#define E0_BREAK   101          /* (control-pause) */
+#define E0_HOME    102
+#define E0_UP      103
+#define E0_PGUP    104
+#define E0_LEFT    105
+#define E0_RIGHT   106
+#define E0_END     107
+#define E0_DOWN    108
+#define E0_PGDN    109
+#define E0_INS     110
+#define E0_DEL     111
+/* for USB 106 keyboard */
+#define E0_YEN         124
+#define E0_BACKSLASH   89
+#define E1_PAUSE   119
+/*
+ * The keycodes below are randomly located in 89-95,112-118,120-127.
+ * They could be thrown away (and all occurrences below replaced by 0),
+ * but that would force many users to use the `setkeycodes' utility, where
+ * they needed not before. It does not matter that there are duplicates, as
+ * long as no duplication occurs for any single keyboard.
+ */
+#define SC_LIM 89
+#define FOCUS_PF1 85            /* actual code! */
+#define FOCUS_PF2 89
+#define FOCUS_PF3 90
+#define FOCUS_PF4 91
+#define FOCUS_PF5 92
+#define FOCUS_PF6 93
+#define FOCUS_PF7 94
+#define FOCUS_PF8 95
+#define FOCUS_PF9 120
+#define FOCUS_PF10 121
+#define FOCUS_PF11 122
+#define FOCUS_PF12 123
+#define JAP_86     124
+/* tfj@olivia.ping.dk:
+ * The four keys are located over the numeric keypad, and are
+ * labelled A1-A4. It's an rc930 keyboard, from
+ * Regnecentralen/RC International, Now ICL.
+ * Scancodes: 59, 5a, 5b, 5c.
+ */
+#define RGN1 124
+#define RGN2 125
+#define RGN3 126
+#define RGN4 127
+static unsigned char high_keys[128 - SC_LIM] = {
+        RGN1, RGN2, RGN3, RGN4, 0, 0, 0,        /* 0x59-0x5f */
+        0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x67 */
+        0, 0, 0, 0, 0, FOCUS_PF11, 0, FOCUS_PF12,       /* 0x68-0x6f */
+        0, 0, 0, FOCUS_PF2, FOCUS_PF9, 0, 0, FOCUS_PF3, /* 0x70-0x77 */
+        FOCUS_PF4, FOCUS_PF5, FOCUS_PF6, FOCUS_PF7,     /* 0x78-0x7b */
+        FOCUS_PF8, JAP_86, FOCUS_PF10, 0        /* 0x7c-0x7f */
+};
+/* BTC */
+#define E0_MACRO   112
+/* LK450 */
+#define E0_F13     113
+#define E0_F14     114
+#define E0_HELP    115
+#define E0_DO      116
+#define E0_F17     117
+#define E0_KPMINPLUS 118
+/*
+ * My OmniKey generates e0 4c for  the "OMNI" key and the
+ * right alt key does nada. [kkoller@nyx10.cs.du.edu]
+ */
+#define E0_OK  124
+/*
+ * New microsoft keyboard is rumoured to have
+ * e0 5b (left window button), e0 5c (right window button),
+ * e0 5d (menu button). [or: LBANNER, RBANNER, RMENU]
+ * [or: Windows_L, Windows_R, TaskMan]
+ */
+#define E0_MSLW        125
+#define E0_MSRW        126
+#define E0_MSTM        127
+static unsigned char e0_keys[128] = {
+        0, 0, 0, 0, 0, 0, 0, 0, /* 0x00-0x07 */
+        0, 0, 0, 0, 0, 0, 0, 0, /* 0x08-0x0f */
+        0, 0, 0, 0, 0, 0, 0, 0, /* 0x10-0x17 */
+        0, 0, 0, 0, E0_KPENTER, E0_RCTRL, 0, 0, /* 0x18-0x1f */
+        0, 0, 0, 0, 0, 0, 0, 0, /* 0x20-0x27 */
+        0, 0, 0, 0, 0, 0, 0, 0, /* 0x28-0x2f */
+        0, 0, 0, 0, 0, E0_KPSLASH, 0, E0_PRSCR, /* 0x30-0x37 */
+        E0_RALT, 0, 0, 0, 0, E0_F13, E0_F14, E0_HELP,   /* 0x38-0x3f */
+        E0_DO, E0_F17, 0, 0, 0, 0, E0_BREAK, E0_HOME,   /* 0x40-0x47 */
+        E0_UP, E0_PGUP, 0, E0_LEFT, E0_OK, E0_RIGHT, E0_KPMINPLUS, E0_END,      /* 0x48-0x4f */
+        E0_DOWN, E0_PGDN, E0_INS, E0_DEL, 0, 0, 0, 0,   /* 0x50-0x57 */
+        0, 0, 0, E0_MSLW, E0_MSRW, E0_MSTM, 0, 0,       /* 0x58-0x5f */
+        0, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x67 */
+        0, 0, 0, 0, 0, 0, 0, E0_MACRO,  /* 0x68-0x6f */
+        //0, 0, 0, 0, 0, 0, 0, 0,                          /* 0x70-0x77 */
+        0, 0, 0, 0, 0, E0_BACKSLASH, 0, 0,      /* 0x70-0x77 */
+        0, 0, 0, E0_YEN, 0, 0, 0, 0     /* 0x78-0x7f */
+};
+static int gen_setkeycode(unsigned int scancode, unsigned int keycode)
+{
+        if (scancode < SC_LIM || scancode > 255 || keycode > 127)
+                return -EINVAL;
+        if (scancode < 128)
+                high_keys[scancode - SC_LIM] = keycode;
+        else
+                e0_keys[scancode - 128] = keycode;
+        return 0;
+}
+static int gen_getkeycode(unsigned int scancode)
+{
+        return
+            (scancode < SC_LIM || scancode > 255) ? -EINVAL :
+            (scancode <
+             128) ? high_keys[scancode - SC_LIM] : e0_keys[scancode - 128];
+}
+static int
+gen_translate(unsigned char scancode, unsigned char *keycode, char raw_mode)
+{
+        static int prev_scancode;
+        /* special prefix scancodes.. */
+        if (scancode == 0xe0 || scancode == 0xe1) {
+                prev_scancode = scancode;
+                return 0;
+        }
+        /* 0xFF is sent by a few keyboards, ignore it. 0x00 is error */
+        if (scancode == 0x00 || scancode == 0xff) {
+                prev_scancode = 0;
+                return 0;
+        }
+        scancode &= 0x7f;
+        if (prev_scancode) {
+                /*
+                 * usually it will be 0xe0, but a Pause key generates
+                 * e1 1d 45 e1 9d c5 when pressed, and nothing when released
+                 */
+                if (prev_scancode != 0xe0) {
+                        if (prev_scancode == 0xe1 && scancode == 0x1d) {
+                                prev_scancode = 0x100;
+                                return 0;
+                        }
+                                else if (prev_scancode == 0x100
+                                         && scancode == 0x45) {
+                                *keycode = E1_PAUSE;
+                                prev_scancode = 0;
+                        } else {
+#ifdef KBD_REPORT_UNKN
+                                if (!raw_mode)
+                                        printk(KERN_INFO
+                                               "keyboard: unknown e1 escape sequence\n");
+#endif
+                                prev_scancode = 0;
+                                return 0;
+                        }
+                } else {
+                        prev_scancode = 0;
+                        /*
+                         *  The keyboard maintains its own internal caps lock and
+                         *  num lock statuses. In caps lock mode E0 AA precedes make
+                         *  code and E0 2A follows break code. In num lock mode,
+                         *  E0 2A precedes make code and E0 AA follows break code.
+                         *  We do our own book-keeping, so we will just ignore these.
+                         */
+                        /*
+                         *  For my keyboard there is no caps lock mode, but there are
+                         *  both Shift-L and Shift-R modes. The former mode generates
+                         *  E0 2A / E0 AA pairs, the latter E0 B6 / E0 36 pairs.
+                         *  So, we should also ignore the latter. - aeb@cwi.nl
+                         */
+                        if (scancode == 0x2a || scancode == 0x36)
+                                return 0;
+                        if (e0_keys[scancode])
+                                *keycode = e0_keys[scancode];
+                        else {
+#ifdef KBD_REPORT_UNKN
+                                if (!raw_mode)
+                                        printk(KERN_INFO
+                                               "keyboard: unknown scancode e0 %02x\n",
+                                               scancode);
+#endif
+                                return 0;
+                        }
+                }
+        } else if (scancode >= SC_LIM) {
+                /* This happens with the FOCUS 9000 keyboard
+                   Its keys PF1..PF12 are reported to generate
+                   55 73 77 78 79 7a 7b 7c 74 7e 6d 6f
+                   Moreover, unless repeated, they do not generate
+                   key-down events, so we have to zero up_flag below */
+                /* Also, Japanese 86/106 keyboards are reported to
+                   generate 0x73 and 0x7d for \ - and \ | respectively. */
+                /* Also, some Brazilian keyboard is reported to produce
+                   0x73 and 0x7e for \ ? and KP-dot, respectively. */
+                *keycode = high_keys[scancode - SC_LIM];
+                if (!*keycode) {
+                        if (!raw_mode) {
+#ifdef KBD_REPORT_UNKN
+                                printk(KERN_INFO
+                                       "keyboard: unrecognized scancode (%02x)"
+                                       " - ignored\n", scancode);
+#endif
+                        }
+                        return 0;
+                }
+        } else
+                *keycode = scancode;
+        return 1;
+}
+static char gen_unexpected_up(unsigned char keycode)
+{
+        /* unexpected, but this can happen: maybe this was a key release for a
+           FOCUS 9000 PF key; if we want to see it, we have to clear up_flag */
+        if (keycode >= SC_LIM || keycode == 85)
+                return 0;
+        else
+                return 0200;
+}
+/*
+ * These are the default mappings
+ */
+int  (*k_setkeycode)(unsigned int, unsigned int) = gen_setkeycode;
+int  (*k_getkeycode)(unsigned int) = gen_getkeycode;
+int  (*k_translate)(unsigned char, unsigned char *, char) = gen_translate;
+char (*k_unexpected_up)(unsigned char) = gen_unexpected_up;
+void (*k_leds)(unsigned char);
+/* Simple translation table for the SysRq keys */
+#ifdef CONFIG_MAGIC_SYSRQ
+static unsigned char gen_sysrq_xlate[128] =
+        "\000\0331234567890-=\177\t"    /* 0x00 - 0x0f */
+        "qwertyuiop[]\r\000as"  /* 0x10 - 0x1f */
+        "dfghjkl;'`\000\\zxcv"  /* 0x20 - 0x2f */
+        "bnm,./\000*\000 \000\201\202\203\204\205"      /* 0x30 - 0x3f */
+        "\206\207\210\211\212\000\000789-456+1" /* 0x40 - 0x4f */
+        "230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000"       /* 0x50 - 0x5f */
+        "\r\000/";                      /* 0x60 - 0x6f */
+unsigned char *k_sysrq_xlate = gen_sysrq_xlate;
+int k_sysrq_key = 0x54;
+#endif
diff --git a/arch/arm26/lib/lib1funcs.S b/arch/arm26/lib/lib1funcs.S
new file mode 100644
index 000000000000..b8f9518db871
--- /dev/null
+++ b/arch/arm26/lib/lib1funcs.S
@@ -0,0 +1,314 @@
+@ libgcc1 routines for ARM cpu.
+@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
+/* Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file.  (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* This code is derived from gcc 2.95.3 */
+/* I Molton     29/07/01 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware.h>
+#include <linux/config.h>
+#define RET     movs
+#define RETc(x) mov##x##s
+#define RETCOND ^
+dividend        .req    r0
+divisor         .req    r1
+result          .req    r2
+overdone        .req    r2
+curbit          .req    r3
+ip              .req    r12
+sp              .req    r13
+lr              .req    r14
+pc              .req    r15
+        
+ENTRY(__udivsi3)
+        cmp     divisor, #0
+        beq     Ldiv0
+        mov     curbit, #1
+        mov     result, #0
+        cmp     dividend, divisor
+        bcc     Lgot_result_udivsi3
+1:
+        @ Unless the divisor is very big, shift it up in multiples of
+        @ four bits, since this is the amount of unwinding in the main
+        @ division loop.  Continue shifting until the divisor is 
+        @ larger than the dividend.
+        cmp     divisor, #0x10000000
+        cmpcc   divisor, dividend
+        movcc   divisor, divisor, lsl #4
+        movcc   curbit, curbit, lsl #4
+        bcc     1b
+2:
+        @ For very big divisors, we must shift it a bit at a time, or
+        @ we will be in danger of overflowing.
+        cmp     divisor, #0x80000000
+        cmpcc   divisor, dividend
+        movcc   divisor, divisor, lsl #1
+        movcc   curbit, curbit, lsl #1
+        bcc     2b
+3:
+        @ Test for possible subtractions, and note which bits
+        @ are done in the result.  On the final pass, this may subtract
+        @ too much from the dividend, but the result will be ok, since the
+        @ "bit" will have been shifted out at the bottom.
+        cmp     dividend, divisor
+        subcs   dividend, dividend, divisor
+        orrcs   result, result, curbit
+        cmp     dividend, divisor, lsr #1
+        subcs   dividend, dividend, divisor, lsr #1
+        orrcs   result, result, curbit, lsr #1
+        cmp     dividend, divisor, lsr #2
+        subcs   dividend, dividend, divisor, lsr #2
+        orrcs   result, result, curbit, lsr #2
+        cmp     dividend, divisor, lsr #3
+        subcs   dividend, dividend, divisor, lsr #3
+        orrcs   result, result, curbit, lsr #3
+        cmp     dividend, #0                    @ Early termination?
+        movnes  curbit, curbit, lsr #4          @ No, any more bits to do?
+        movne   divisor, divisor, lsr #4
+        bne     3b
+Lgot_result_udivsi3:
+        mov     r0, result
+        RET     pc, lr
+Ldiv0:
+        str     lr, [sp, #-4]!
+        bl      __div0
+        mov     r0, #0                  @ about as wrong as it could be
+        ldmia   sp!, {pc}RETCOND
+/* __umodsi3 ----------------------- */
+ENTRY(__umodsi3)
+        cmp     divisor, #0
+        beq     Ldiv0
+        mov     curbit, #1
+        cmp     dividend, divisor
+        RETc(cc)        pc, lr
+1:
+        @ Unless the divisor is very big, shift it up in multiples of
+        @ four bits, since this is the amount of unwinding in the main
+        @ division loop.  Continue shifting until the divisor is 
+        @ larger than the dividend.
+        cmp     divisor, #0x10000000
+        cmpcc   divisor, dividend
+        movcc   divisor, divisor, lsl #4
+        movcc   curbit, curbit, lsl #4
+        bcc     1b
+2:
+        @ For very big divisors, we must shift it a bit at a time, or
+        @ we will be in danger of overflowing.
+        cmp     divisor, #0x80000000
+        cmpcc   divisor, dividend
+        movcc   divisor, divisor, lsl #1
+        movcc   curbit, curbit, lsl #1
+        bcc     2b
+3:
+        @ Test for possible subtractions.  On the final pass, this may 
+        @ subtract too much from the dividend, so keep track of which
+        @ subtractions are done, we can fix them up afterwards...
+        mov     overdone, #0
+        cmp     dividend, divisor
+        subcs   dividend, dividend, divisor
+        cmp     dividend, divisor, lsr #1
+        subcs   dividend, dividend, divisor, lsr #1
+        orrcs   overdone, overdone, curbit, ror #1
+        cmp     dividend, divisor, lsr #2
+        subcs   dividend, dividend, divisor, lsr #2
+        orrcs   overdone, overdone, curbit, ror #2
+        cmp     dividend, divisor, lsr #3
+        subcs   dividend, dividend, divisor, lsr #3
+        orrcs   overdone, overdone, curbit, ror #3
+        mov     ip, curbit
+        cmp     dividend, #0                    @ Early termination?
+        movnes  curbit, curbit, lsr #4          @ No, any more bits to do?
+        movne   divisor, divisor, lsr #4
+        bne     3b
+        @ Any subtractions that we should not have done will be recorded in
+        @ the top three bits of "overdone".  Exactly which were not needed
+        @ are governed by the position of the bit, stored in ip.
+        @ If we terminated early, because dividend became zero,
+        @ then none of the below will match, since the bit in ip will not be
+        @ in the bottom nibble.
+        ands    overdone, overdone, #0xe0000000
+        RETc(eq)        pc, lr                          @ No fixups needed
+        tst     overdone, ip, ror #3
+        addne   dividend, dividend, divisor, lsr #3
+        tst     overdone, ip, ror #2
+        addne   dividend, dividend, divisor, lsr #2
+        tst     overdone, ip, ror #1
+        addne   dividend, dividend, divisor, lsr #1
+        RET     pc, lr
+ENTRY(__divsi3)
+        eor     ip, dividend, divisor           @ Save the sign of the result.
+        mov     curbit, #1
+        mov     result, #0
+        cmp     divisor, #0
+        rsbmi   divisor, divisor, #0            @ Loops below use unsigned.
+        beq     Ldiv0
+        cmp     dividend, #0
+        rsbmi   dividend, dividend, #0
+        cmp     dividend, divisor
+        bcc     Lgot_result_divsi3
+1:
+        @ Unless the divisor is very big, shift it up in multiples of
+        @ four bits, since this is the amount of unwinding in the main
+        @ division loop.  Continue shifting until the divisor is 
+        @ larger than the dividend.
+        cmp     divisor, #0x10000000
+        cmpcc   divisor, dividend
+        movcc   divisor, divisor, lsl #4
+        movcc   curbit, curbit, lsl #4
+        bcc     1b
+2:
+        @ For very big divisors, we must shift it a bit at a time, or
+        @ we will be in danger of overflowing.
+        cmp     divisor, #0x80000000
+        cmpcc   divisor, dividend
+        movcc   divisor, divisor, lsl #1
+        movcc   curbit, curbit, lsl #1
+        bcc     2b
+3:
+        @ Test for possible subtractions, and note which bits
+        @ are done in the result.  On the final pass, this may subtract
+        @ too much from the dividend, but the result will be ok, since the
+        @ "bit" will have been shifted out at the bottom.
+        cmp     dividend, divisor
+        subcs   dividend, dividend, divisor
+        orrcs   result, result, curbit
+        cmp     dividend, divisor, lsr #1
+        subcs   dividend, dividend, divisor, lsr #1
+        orrcs   result, result, curbit, lsr #1
+        cmp     dividend, divisor, lsr #2
+        subcs   dividend, dividend, divisor, lsr #2
+        orrcs   result, result, curbit, lsr #2
+        cmp     dividend, divisor, lsr #3
+        subcs   dividend, dividend, divisor, lsr #3
+        orrcs   result, result, curbit, lsr #3
+        cmp     dividend, #0                    @ Early termination?
+        movnes  curbit, curbit, lsr #4          @ No, any more bits to do?
+        movne   divisor, divisor, lsr #4
+        bne     3b
+Lgot_result_divsi3:
+        mov     r0, result
+        cmp     ip, #0
+        rsbmi   r0, r0, #0
+        RET     pc, lr
+ENTRY(__modsi3)
+        mov     curbit, #1
+        cmp     divisor, #0
+        rsbmi   divisor, divisor, #0            @ Loops below use unsigned.
+        beq     Ldiv0
+        @ Need to save the sign of the dividend, unfortunately, we need
+        @ ip later on; this is faster than pushing lr and using that.
+        str     dividend, [sp, #-4]!
+        cmp     dividend, #0
+        rsbmi   dividend, dividend, #0
+        cmp     dividend, divisor
+        bcc     Lgot_result_modsi3
+1:
+        @ Unless the divisor is very big, shift it up in multiples of
+        @ four bits, since this is the amount of unwinding in the main
+        @ division loop.  Continue shifting until the divisor is 
+        @ larger than the dividend.
+        cmp     divisor, #0x10000000
+        cmpcc   divisor, dividend
+        movcc   divisor, divisor, lsl #4
+        movcc   curbit, curbit, lsl #4
+        bcc     1b
+2:
+        @ For very big divisors, we must shift it a bit at a time, or
+        @ we will be in danger of overflowing.
+        cmp     divisor, #0x80000000
+        cmpcc   divisor, dividend
+        movcc   divisor, divisor, lsl #1
+        movcc   curbit, curbit, lsl #1
+        bcc     2b
+3:
+        @ Test for possible subtractions.  On the final pass, this may 
+        @ subtract too much from the dividend, so keep track of which
+        @ subtractions are done, we can fix them up afterwards...
+        mov     overdone, #0
+        cmp     dividend, divisor
+        subcs   dividend, dividend, divisor
+        cmp     dividend, divisor, lsr #1
+        subcs   dividend, dividend, divisor, lsr #1
+        orrcs   overdone, overdone, curbit, ror #1
+        cmp     dividend, divisor, lsr #2
+        subcs   dividend, dividend, divisor, lsr #2
+        orrcs   overdone, overdone, curbit, ror #2
+        cmp     dividend, divisor, lsr #3
+        subcs   dividend, dividend, divisor, lsr #3
+        orrcs   overdone, overdone, curbit, ror #3
+        mov     ip, curbit
+        cmp     dividend, #0                    @ Early termination?
+        movnes  curbit, curbit, lsr #4          @ No, any more bits to do?
+        movne   divisor, divisor, lsr #4
+        bne     3b
+        @ Any subtractions that we should not have done will be recorded in
+        @ the top three bits of "overdone".  Exactly which were not needed
+        @ are governed by the position of the bit, stored in ip.
+        @ If we terminated early, because dividend became zero,
+        @ then none of the below will match, since the bit in ip will not be
+        @ in the bottom nibble.
+        ands    overdone, overdone, #0xe0000000
+        beq     Lgot_result_modsi3
+        tst     overdone, ip, ror #3
+        addne   dividend, dividend, divisor, lsr #3
+        tst     overdone, ip, ror #2
+        addne   dividend, dividend, divisor, lsr #2
+        tst     overdone, ip, ror #1
+        addne   dividend, dividend, divisor, lsr #1
+Lgot_result_modsi3:
+        ldr     ip, [sp], #4
+        cmp     ip, #0
+        rsbmi   dividend, dividend, #0
+        RET     pc, lr
diff --git a/arch/arm26/lib/longlong.h b/arch/arm26/lib/longlong.h
new file mode 100644
index 000000000000..05ec1abd6a2c
--- /dev/null
+++ b/arch/arm26/lib/longlong.h
@@ -0,0 +1,184 @@
+/* longlong.h -- based on code from gcc-2.95.3
+   definitions for mixed size 32/64 bit arithmetic.
+   Copyright (C) 1991, 92, 94, 95, 96, 1997, 1998 Free Software Foundation, Inc.
+   This definition file is free software; you can redistribute it
+   and/or modify it under the terms of the GNU General Public
+   License as published by the Free Software Foundation; either
+   version 2, or (at your option) any later version.
+   This definition file is distributed in the hope that it will be
+   useful, but WITHOUT ANY WARRANTY; without even the implied
+   warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+   See the GNU General Public License for more details.
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+/* Borrowed from GCC 2.95.3, I Molton 29/07/01 */
+#ifndef SI_TYPE_SIZE
+#define SI_TYPE_SIZE 32
+#endif
+#define __BITS4 (SI_TYPE_SIZE / 4)
+#define __ll_B (1L << (SI_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((USItype) (t) % __ll_B)
+#define __ll_highpart(t) ((USItype) (t) / __ll_B)
+/* Define auxiliary asm macros.
+   1) umul_ppmm(high_prod, low_prod, multipler, multiplicand)
+   multiplies two USItype integers MULTIPLER and MULTIPLICAND,
+   and generates a two-part USItype product in HIGH_PROD and
+   LOW_PROD.
+   2) __umulsidi3(a,b) multiplies two USItype integers A and B,
+   and returns a UDItype product.  This is just a variant of umul_ppmm.
+   3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator) divides a two-word unsigned integer, composed by the
+   integers HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and
+   places the quotient in QUOTIENT and the remainder in REMAINDER.
+   HIGH_NUMERATOR must be less than DENOMINATOR for correct operation.
+   If, in addition, the most significant bit of DENOMINATOR must be 1,
+   then the pre-processor symbol UDIV_NEEDS_NORMALIZATION is defined to 1.
+   4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator).  Like udiv_qrnnd but the numbers are signed.  The
+   quotient is rounded towards 0.
+   5) count_leading_zeros(count, x) counts the number of zero-bits from
+   the msb to the first non-zero bit.  This is the number of steps X
+   needs to be shifted left to set the msb.  Undefined for X == 0.
+   6) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+   high_addend_2, low_addend_2) adds two two-word unsigned integers,
+   composed by HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and
+   LOW_ADDEND_2 respectively.  The result is placed in HIGH_SUM and
+   LOW_SUM.  Overflow (i.e. carry out) is not stored anywhere, and is
+   lost.
+   7) sub_ddmmss(high_difference, low_difference, high_minuend,
+   low_minuend, high_subtrahend, low_subtrahend) subtracts two
+   two-word unsigned integers, composed by HIGH_MINUEND_1 and
+   LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and LOW_SUBTRAHEND_2
+   respectively.  The result is placed in HIGH_DIFFERENCE and
+   LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
+   and is lost.
+   If any of these macros are left undefined for a particular CPU,
+   C macros are used.  */
+#if defined (__arm__)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("adds        %1, %4, %5                                      \n\
+        adc     %0, %2, %3"                                             \
+           : "=r" ((USItype) (sh)),                                     \
+             "=&r" ((USItype) (sl))                                     \
+           : "%r" ((USItype) (ah)),                                     \
+             "rI" ((USItype) (bh)),                                     \
+             "%r" ((USItype) (al)),                                     \
+             "rI" ((USItype) (bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subs        %1, %4, %5                                      \n\
+        sbc     %0, %2, %3"                                             \
+           : "=r" ((USItype) (sh)),                                     \
+             "=&r" ((USItype) (sl))                                     \
+           : "r" ((USItype) (ah)),                                      \
+             "rI" ((USItype) (bh)),                                     \
+             "r" ((USItype) (al)),                                      \
+             "rI" ((USItype) (bl)))
+#define umul_ppmm(xh, xl, a, b) \
+{register USItype __t0, __t1, __t2;                                     \
+  __asm__ ("%@ Inlined umul_ppmm                                        \n\
+        mov     %2, %5, lsr #16                                         \n\
+        mov     %0, %6, lsr #16                                         \n\
+        bic     %3, %5, %2, lsl #16                                     \n\
+        bic     %4, %6, %0, lsl #16                                     \n\
+        mul     %1, %3, %4                                              \n\
+        mul     %4, %2, %4                                              \n\
+        mul     %3, %0, %3                                              \n\
+        mul     %0, %2, %0                                              \n\
+        adds    %3, %4, %3                                              \n\
+        addcs   %0, %0, #65536                                          \n\
+        adds    %1, %1, %3, lsl #16                                     \n\
+        adc     %0, %0, %3, lsr #16"                                    \
+           : "=&r" ((USItype) (xh)),                                    \
+             "=r" ((USItype) (xl)),                                     \
+             "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
+           : "r" ((USItype) (a)),                                       \
+             "r" ((USItype) (b)));}
+#define UMUL_TIME 20
+#define UDIV_TIME 100
+#endif /* __arm__ */
+#define __umulsidi3(u, v) \
+  ({DIunion __w;                                                        \
+    umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
+    __w.ll; })
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+  do {                                                                  \
+    USItype __d1, __d0, __q1, __q0;                                     \
+    USItype __r1, __r0, __m;                                            \
+    __d1 = __ll_highpart (d);                                           \
+    __d0 = __ll_lowpart (d);                                            \
+                                                                        \
+    __r1 = (n1) % __d1;                                                 \
+    __q1 = (n1) / __d1;                                                 \
+    __m = (USItype) __q1 * __d0;                                        \
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
+    if (__r1 < __m)                                                     \
+      {                                                                 \
+        __q1--, __r1 += (d);                                            \
+        if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+          if (__r1 < __m)                                               \
+            __q1--, __r1 += (d);                                        \
+      }                                                                 \
+    __r1 -= __m;                                                        \
+                                                                        \
+    __r0 = __r1 % __d1;                                                 \
+    __q0 = __r1 / __d1;                                                 \
+    __m = (USItype) __q0 * __d0;                                        \
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
+    if (__r0 < __m)                                                     \
+      {                                                                 \
+        __q0--, __r0 += (d);                                            \
+        if (__r0 >= (d))                                                \
+          if (__r0 < __m)                                               \
+            __q0--, __r0 += (d);                                        \
+      }                                                                 \
+    __r0 -= __m;                                                        \
+                                                                        \
+    (q) = (USItype) __q1 * __ll_B | __q0;                               \
+    (r) = __r0;                                                         \
+  } while (0)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+extern const UQItype __clz_tab[];
+#define count_leading_zeros(count, x) \
+  do {                                                                  \
+    USItype __xr = (x);                                                 \
+    USItype __a;                                                        \
+                                                                        \
+    if (SI_TYPE_SIZE <= 32)                                             \
+      {                                                                 \
+        __a = __xr < ((USItype)1<<2*__BITS4)                            \
+          ? (__xr < ((USItype)1<<__BITS4) ? 0 : __BITS4)                \
+          : (__xr < ((USItype)1<<3*__BITS4) ?  2*__BITS4 : 3*__BITS4);  \
+      }                                                                 \
+    else                                                                \
+      {                                                                 \
+        for (__a = SI_TYPE_SIZE - 8; __a > 0; __a -= 8)                 \
+          if (((__xr >> __a) & 0xff) != 0)                              \
+            break;                                                      \
+      }                                                                 \
+                                                                        \
+    (count) = SI_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a);            \
+  } while (0)
diff --git a/arch/arm26/lib/lshrdi3.c b/arch/arm26/lib/lshrdi3.c
new file mode 100644
index 000000000000..b666f1bad451
--- /dev/null
+++ b/arch/arm26/lib/lshrdi3.c
@@ -0,0 +1,61 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+This file is part of GNU CC.
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#include "gcclib.h"
+DItype
+__lshrdi3 (DItype u, word_type b)
+{
+  DIunion w;
+  word_type bm;
+  DIunion uu;
+  if (b == 0)
+    return u;
+  uu.ll = u;
+  bm = (sizeof (SItype) * BITS_PER_UNIT) - b;
+  if (bm <= 0)
+    {
+      w.s.high = 0;
+      w.s.low = (USItype)uu.s.high >> -bm;
+    }
+  else
+    {
+      USItype carries = (USItype)uu.s.high << bm;
+      w.s.high = (USItype)uu.s.high >> b;
+      w.s.low = ((USItype)uu.s.low >> b) | carries;
+    }
+  return w.ll;
+}
diff --git a/arch/arm26/lib/memchr.S b/arch/arm26/lib/memchr.S
new file mode 100644
index 000000000000..34e7c14c08ad
--- /dev/null
+++ b/arch/arm26/lib/memchr.S
@@ -0,0 +1,25 @@
+/*
+ *  linux/arch/arm26/lib/memchr.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+        .text
+        .align  5
+ENTRY(memchr)
+1:      subs    r2, r2, #1
+        bmi     2f
+        ldrb    r3, [r0], #1
+        teq     r3, r1
+        bne     1b
+        sub     r0, r0, #1
+2:      movne   r0, #0
+        RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/memcpy.S b/arch/arm26/lib/memcpy.S
new file mode 100644
index 000000000000..3f719e412069
--- /dev/null
+++ b/arch/arm26/lib/memcpy.S
@@ -0,0 +1,318 @@
+/*
+ *  linux/arch/arm26/lib/memcpy.S
+ *
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+#define ENTER   \
+                mov     ip,sp   ;\
+                stmfd   sp!,{r4-r9,fp,ip,lr,pc} ;\
+                sub     fp,ip,#4
+#define EXIT    \
+                LOADREGS(ea, fp, {r4 - r9, fp, sp, pc})
+#define EXITEQ  \
+                LOADREGS(eqea, fp, {r4 - r9, fp, sp, pc})
+/*
+ * Prototype: void memcpy(void *to,const void *from,unsigned long n);
+ * ARM3: cant use memcopy here!!!
+ */
+ENTRY(memcpy)
+ENTRY(memmove)
+                ENTER
+                cmp     r1, r0
+                bcc     19f
+                subs    r2, r2, #4
+                blt     6f
+                ands    ip, r0, #3
+                bne     7f
+                ands    ip, r1, #3
+                bne     8f
+1:              subs    r2, r2, #8
+                blt     5f
+                subs    r2, r2, #0x14
+                blt     3f
+2:              ldmia   r1!,{r3 - r9, ip}
+                stmia   r0!,{r3 - r9, ip}
+                subs    r2, r2, #32
+                bge     2b
+                cmn     r2, #16
+                ldmgeia r1!, {r3 - r6}
+                stmgeia r0!, {r3 - r6}
+                subge   r2, r2, #0x10
+3:              adds    r2, r2, #0x14
+4:              ldmgeia r1!, {r3 - r5}
+                stmgeia r0!, {r3 - r5}
+                subges  r2, r2, #12
+                bge     4b
+5:              adds    r2, r2, #8
+                blt     6f
+                subs    r2, r2, #4
+                ldrlt   r3, [r1], #4
+                ldmgeia r1!, {r4, r5}
+                strlt   r3, [r0], #4
+                stmgeia r0!, {r4, r5}
+                subge   r2, r2, #4
+6:              adds    r2, r2, #4
+                EXITEQ
+                cmp     r2, #2
+                ldrb    r3, [r1], #1
+                ldrgeb  r4, [r1], #1
+                ldrgtb  r5, [r1], #1
+                strb    r3, [r0], #1
+                strgeb  r4, [r0], #1
+                strgtb  r5, [r0], #1
+                EXIT
+7:              rsb     ip, ip, #4
+                cmp     ip, #2
+                ldrb    r3, [r1], #1
+                ldrgeb  r4, [r1], #1
+                ldrgtb  r5, [r1], #1
+                strb    r3, [r0], #1
+                strgeb  r4, [r0], #1
+                strgtb  r5, [r0], #1
+                subs    r2, r2, ip
+                blt     6b
+                ands    ip, r1, #3
+                beq     1b
+8:              bic     r1, r1, #3
+                ldr     r7, [r1], #4
+                cmp     ip, #2
+                bgt     15f
+                beq     11f
+                cmp     r2, #12
+                blt     10f
+                sub     r2, r2, #12
+9:              mov     r3, r7, pull #8
+                ldmia   r1!, {r4 - r7}
+                orr     r3, r3, r4, push #24
+                mov     r4, r4, pull #8
+                orr     r4, r4, r5, push #24
+                mov     r5, r5, pull #8
+                orr     r5, r5, r6, push #24
+                mov     r6, r6, pull #8
+                orr     r6, r6, r7, push #24
+                stmia   r0!, {r3 - r6}
+                subs    r2, r2, #16
+                bge     9b
+                adds    r2, r2, #12
+                blt     100f
+10:             mov     r3, r7, pull #8
+                ldr     r7, [r1], #4
+                subs    r2, r2, #4
+                orr     r3, r3, r7, push #24
+                str     r3, [r0], #4
+                bge     10b
+100:            sub     r1, r1, #3
+                b       6b
+11:             cmp     r2, #12
+                blt     13f             /* */
+                sub     r2, r2, #12
+12:             mov     r3, r7, pull #16
+                ldmia   r1!, {r4 - r7}
+                orr     r3, r3, r4, push #16
+                mov     r4, r4, pull #16
+                orr     r4, r4, r5, push #16
+                mov     r5, r5, pull #16
+                orr     r5, r5, r6, push #16
+                mov     r6, r6, pull #16
+                orr     r6, r6, r7, push #16
+                stmia   r0!, {r3 - r6}
+                subs    r2, r2, #16
+                bge     12b
+                adds    r2, r2, #12
+                blt     14f
+13:             mov     r3, r7, pull #16
+                ldr     r7, [r1], #4
+                subs    r2, r2, #4
+                orr     r3, r3, r7, push #16
+                str     r3, [r0], #4
+                bge     13b
+14:             sub     r1, r1, #2
+                b       6b
+15:             cmp     r2, #12
+                blt     17f
+                sub     r2, r2, #12
+16:             mov     r3, r7, pull #24
+                ldmia   r1!, {r4 - r7}
+                orr     r3, r3, r4, push #8
+                mov     r4, r4, pull #24
+                orr     r4, r4, r5, push #8
+                mov     r5, r5, pull #24
+                orr     r5, r5, r6, push #8
+                mov     r6, r6, pull #24
+                orr     r6, r6, r7, push #8
+                stmia   r0!, {r3 - r6}
+                subs    r2, r2, #16
+                bge     16b
+                adds    r2, r2, #12
+                blt     18f
+17:             mov     r3, r7, pull #24
+                ldr     r7, [r1], #4
+                subs    r2, r2, #4
+                orr     r3, r3, r7, push #8
+                str     r3, [r0], #4
+                bge     17b
+18:             sub     r1, r1, #1
+                b       6b
+19:             add     r1, r1, r2
+                add     r0, r0, r2
+                subs    r2, r2, #4
+                blt     24f
+                ands    ip, r0, #3
+                bne     25f
+                ands    ip, r1, #3
+                bne     26f
+20:             subs    r2, r2, #8
+                blt     23f
+                subs    r2, r2, #0x14
+                blt     22f
+21:             ldmdb   r1!, {r3 - r9, ip}
+                stmdb   r0!, {r3 - r9, ip}
+                subs    r2, r2, #32
+                bge     21b
+22:             cmn     r2, #16
+                ldmgedb r1!, {r3 - r6}
+                stmgedb r0!, {r3 - r6}
+                subge   r2, r2, #16
+                adds    r2, r2, #20
+                ldmgedb r1!, {r3 - r5}
+                stmgedb r0!, {r3 - r5}
+                subge   r2, r2, #12
+23:             adds    r2, r2, #8
+                blt     24f
+                subs    r2, r2, #4
+                ldrlt   r3, [r1, #-4]!
+                ldmgedb r1!, {r4, r5}
+                strlt   r3, [r0, #-4]!
+                stmgedb r0!, {r4, r5}
+                subge   r2, r2, #4
+24:             adds    r2, r2, #4
+                EXITEQ
+                cmp     r2, #2
+                ldrb    r3, [r1, #-1]!
+                ldrgeb  r4, [r1, #-1]!
+                ldrgtb  r5, [r1, #-1]!
+                strb    r3, [r0, #-1]!
+                strgeb  r4, [r0, #-1]!
+                strgtb  r5, [r0, #-1]!
+                EXIT
+25:             cmp     ip, #2
+                ldrb    r3, [r1, #-1]!
+                ldrgeb  r4, [r1, #-1]!
+                ldrgtb  r5, [r1, #-1]!
+                strb    r3, [r0, #-1]!
+                strgeb  r4, [r0, #-1]!
+                strgtb  r5, [r0, #-1]!
+                subs    r2, r2, ip
+                blt     24b
+                ands    ip, r1, #3
+                beq     20b
+26:             bic     r1, r1, #3
+                ldr     r3, [r1], #0
+                cmp     ip, #2
+                blt     34f
+                beq     30f
+                cmp     r2, #12
+                blt     28f
+                sub     r2, r2, #12
+27:             mov     r7, r3, push #8
+                ldmdb   r1!, {r3, r4, r5, r6}
+                orr     r7, r7, r6, pull #24
+                mov     r6, r6, push #8
+                orr     r6, r6, r5, pull #24
+                mov     r5, r5, push #8
+                orr     r5, r5, r4, pull #24
+                mov     r4, r4, push #8
+                orr     r4, r4, r3, pull #24
+                stmdb   r0!, {r4, r5, r6, r7}
+                subs    r2, r2, #16
+                bge     27b
+                adds    r2, r2, #12
+                blt     29f
+28:             mov     ip, r3, push #8
+                ldr     r3, [r1, #-4]!
+                subs    r2, r2, #4
+                orr     ip, ip, r3, pull #24
+                str     ip, [r0, #-4]!
+                bge     28b
+29:             add     r1, r1, #3
+                b       24b
+30:             cmp     r2, #12
+                blt     32f
+                sub     r2, r2, #12
+31:             mov     r7, r3, push #16
+                ldmdb   r1!, {r3, r4, r5, r6}
+                orr     r7, r7, r6, pull #16
+                mov     r6, r6, push #16
+                orr     r6, r6, r5, pull #16
+                mov     r5, r5, push #16
+                orr     r5, r5, r4, pull #16
+                mov     r4, r4, push #16
+                orr     r4, r4, r3, pull #16
+                stmdb   r0!, {r4, r5, r6, r7}
+                subs    r2, r2, #16
+                bge     31b
+                adds    r2, r2, #12
+                blt     33f
+32:             mov     ip, r3, push #16
+                ldr     r3, [r1, #-4]!
+                subs    r2, r2, #4
+                orr     ip, ip, r3, pull #16
+                str     ip, [r0, #-4]!
+                bge     32b
+33:             add     r1, r1, #2
+                b       24b
+34:             cmp     r2, #12
+                blt     36f
+                sub     r2, r2, #12
+35:             mov     r7, r3, push #24
+                ldmdb   r1!, {r3, r4, r5, r6}
+                orr     r7, r7, r6, pull #8
+                mov     r6, r6, push #24
+                orr     r6, r6, r5, pull #8
+                mov     r5, r5, push #24
+                orr     r5, r5, r4, pull #8
+                mov     r4, r4, push #24
+                orr     r4, r4, r3, pull #8
+                stmdb   r0!, {r4, r5, r6, r7}
+                subs    r2, r2, #16
+                bge     35b
+                adds    r2, r2, #12
+                blt     37f
+36:             mov     ip, r3, push #24
+                ldr     r3, [r1, #-4]!
+                subs    r2, r2, #4
+                orr     ip, ip, r3, pull #8
+                str     ip, [r0, #-4]!
+                bge     36b
+37:             add     r1, r1, #1
+                b       24b
+                .align
diff --git a/arch/arm26/lib/memset.S b/arch/arm26/lib/memset.S
new file mode 100644
index 000000000000..aedec10b58f5
--- /dev/null
+++ b/arch/arm26/lib/memset.S
@@ -0,0 +1,80 @@
+/*
+ *  linux/arch/arm26/lib/memset.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+        .text
+        .align  5
+        .word   0
+1:      subs    r2, r2, #4              @ 1 do we have enough
+        blt     5f                      @ 1 bytes to align with?
+        cmp     r3, #2                  @ 1
+        strltb  r1, [r0], #1            @ 1
+        strleb  r1, [r0], #1            @ 1
+        strb    r1, [r0], #1            @ 1
+        add     r2, r2, r3              @ 1 (r2 = r2 - (4 - r3))
+/*
+ * The pointer is now aligned and the length is adjusted.  Try doing the
+ * memzero again.
+ */
+ENTRY(memset)
+        ands    r3, r0, #3              @ 1 unaligned?
+        bne     1b                      @ 1
+/*
+ * we know that the pointer in r0 is aligned to a word boundary.
+ */
+        orr     r1, r1, r1, lsl #8
+        orr     r1, r1, r1, lsl #16
+        mov     r3, r1
+        cmp     r2, #16
+        blt     4f
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+        str     lr, [sp, #-4]!
+        mov     ip, r1
+        mov     lr, r1
+2:      subs    r2, r2, #64
+        stmgeia r0!, {r1, r3, ip, lr}   @ 64 bytes at a time.
+        stmgeia r0!, {r1, r3, ip, lr}
+        stmgeia r0!, {r1, r3, ip, lr}
+        stmgeia r0!, {r1, r3, ip, lr}
+        bgt     2b
+        LOADREGS(eqfd, sp!, {pc})       @ Now <64 bytes to go.
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+        tst     r2, #32
+        stmneia r0!, {r1, r3, ip, lr}
+        stmneia r0!, {r1, r3, ip, lr}
+        tst     r2, #16
+        stmneia r0!, {r1, r3, ip, lr}
+        ldr     lr, [sp], #4
+4:      tst     r2, #8
+        stmneia r0!, {r1, r3}
+        tst     r2, #4
+        strne   r1, [r0], #4
+/*
+ * When we get here, we've got less than 4 bytes to zero.  We
+ * may have an unaligned pointer as well.
+ */
+5:      tst     r2, #2
+        strneb  r1, [r0], #1
+        strneb  r1, [r0], #1
+        tst     r2, #1
+        strneb  r1, [r0], #1
+        RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/memzero.S b/arch/arm26/lib/memzero.S
new file mode 100644
index 000000000000..cc5bf6860061
--- /dev/null
+++ b/arch/arm26/lib/memzero.S
@@ -0,0 +1,80 @@
+/*
+ *  linux/arch/arm26/lib/memzero.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+        .text
+        .align  5
+        .word   0
+/*
+ * Align the pointer in r0.  r3 contains the number of bytes that we are
+ * mis-aligned by, and r1 is the number of bytes.  If r1 < 4, then we
+ * don't bother; we use byte stores instead.
+ */
+1:      subs    r1, r1, #4              @ 1 do we have enough
+        blt     5f                      @ 1 bytes to align with?
+        cmp     r3, #2                  @ 1
+        strltb  r2, [r0], #1            @ 1
+        strleb  r2, [r0], #1            @ 1
+        strb    r2, [r0], #1            @ 1
+        add     r1, r1, r3              @ 1 (r1 = r1 - (4 - r3))
+/*
+ * The pointer is now aligned and the length is adjusted.  Try doing the
+ * memzero again.
+ */
+ENTRY(__memzero)
+        mov     r2, #0                  @ 1
+        ands    r3, r0, #3              @ 1 unaligned?
+        bne     1b                      @ 1
+/*
+ * r3 = 0, and we know that the pointer in r0 is aligned to a word boundary.
+ */
+        cmp     r1, #16                 @ 1 we can skip this chunk if we
+        blt     4f                      @ 1 have < 16 bytes
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+        str     lr, [sp, #-4]!          @ 1
+        mov     ip, r2                  @ 1
+        mov     lr, r2                  @ 1
+3:      subs    r1, r1, #64             @ 1 write 32 bytes out per loop
+        stmgeia r0!, {r2, r3, ip, lr}   @ 4
+        stmgeia r0!, {r2, r3, ip, lr}   @ 4
+        stmgeia r0!, {r2, r3, ip, lr}   @ 4
+        stmgeia r0!, {r2, r3, ip, lr}   @ 4
+        bgt     3b                      @ 1
+        LOADREGS(eqfd, sp!, {pc})       @ 1/2 quick exit
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+        tst     r1, #32                 @ 1
+        stmneia r0!, {r2, r3, ip, lr}   @ 4
+        stmneia r0!, {r2, r3, ip, lr}   @ 4
+        tst     r1, #16                 @ 1 16 bytes or more?
+        stmneia r0!, {r2, r3, ip, lr}   @ 4
+        ldr     lr, [sp], #4            @ 1
+4:      tst     r1, #8                  @ 1 8 bytes or more?
+        stmneia r0!, {r2, r3}           @ 2
+        tst     r1, #4                  @ 1 4 bytes or more?
+        strne   r2, [r0], #4            @ 1
+/*
+ * When we get here, we've got less than 4 bytes to zero.  We
+ * may have an unaligned pointer as well.
+ */
+5:      tst     r1, #2                  @ 1 2 bytes or more?
+        strneb  r2, [r0], #1            @ 1
+        strneb  r2, [r0], #1            @ 1
+        tst     r1, #1                  @ 1 a byte left over
+        strneb  r2, [r0], #1            @ 1
+        RETINSTR(mov,pc,lr)             @ 1
diff --git a/arch/arm26/lib/muldi3.c b/arch/arm26/lib/muldi3.c
new file mode 100644
index 000000000000..44d611b1cfdb
--- /dev/null
+++ b/arch/arm26/lib/muldi3.c
@@ -0,0 +1,77 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+This file is part of GNU CC.
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#include "gcclib.h"
+#define umul_ppmm(xh, xl, a, b) \
+{register USItype __t0, __t1, __t2;                                     \
+  __asm__ ("%@ Inlined umul_ppmm                                        \n\
+        mov     %2, %5, lsr #16                                         \n\
+        mov     %0, %6, lsr #16                                         \n\
+        bic     %3, %5, %2, lsl #16                                     \n\
+        bic     %4, %6, %0, lsl #16                                     \n\
+        mul     %1, %3, %4                                              \n\
+        mul     %4, %2, %4                                              \n\
+        mul     %3, %0, %3                                              \n\
+        mul     %0, %2, %0                                              \n\
+        adds    %3, %4, %3                                              \n\
+        addcs   %0, %0, #65536                                          \n\
+        adds    %1, %1, %3, lsl #16                                     \n\
+        adc     %0, %0, %3, lsr #16"                                    \
+           : "=&r" ((USItype) (xh)),                                    \
+             "=r" ((USItype) (xl)),                                     \
+             "=&r" (__t0), "=&r" (__t1), "=r" (__t2)                    \
+           : "r" ((USItype) (a)),                                       \
+             "r" ((USItype) (b)));}
+#define __umulsidi3(u, v) \
+  ({DIunion __w;                                                        \
+    umul_ppmm (__w.s.high, __w.s.low, u, v);                            \
+    __w.ll; })
+DItype
+__muldi3 (DItype u, DItype v)
+{
+  DIunion w;
+  DIunion uu, vv;
+  uu.ll = u,
+  vv.ll = v;
+  w.ll = __umulsidi3 (uu.s.low, vv.s.low);
+  w.s.high += ((USItype) uu.s.low * (USItype) vv.s.high
+               + (USItype) uu.s.high * (USItype) vv.s.low);
+  return w.ll;
+}
diff --git a/arch/arm26/lib/putuser.S b/arch/arm26/lib/putuser.S
new file mode 100644
index 000000000000..87588cbe46ae
--- /dev/null
+++ b/arch/arm26/lib/putuser.S
@@ -0,0 +1,109 @@
+/*
+ *  linux/arch/arm26/lib/putuser.S
+ *
+ *  Copyright (C) 2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Idea from x86 version, (C) Copyright 1998 Linus Torvalds
+ *
+ * These functions have a non-standard call interface to make
+ * them more efficient, especially as they return an error
+ * value in addition to the "real" return value.
+ *
+ * __put_user_X
+ *
+ * Inputs:      r0 contains the address
+ *              r1, r2 contains the value
+ * Outputs:     r0 is the error code
+ *              lr corrupted
+ *
+ * No other registers must be altered.  (see include/asm-arm/uaccess.h
+ * for specific ASM register usage).
+ *
+ * Note that ADDR_LIMIT is either 0 or 0xc0000000
+ * Note also that it is intended that __put_user_bad is not global.
+ */
+#include <asm/asm_offsets.h>
+#include <asm/thread_info.h>
+#include <asm/errno.h>
+        .global __put_user_1
+__put_user_1:
+        bic     r2, sp, #0x1f00
+        bic     r2, r2, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r2, [r2, #TI_ADDR_LIMIT]
+        sub     r2, r2, #1
+        cmp     r0, r2
+        bge     __put_user_bad
+1:      cmp     r0, #0x02000000
+        strlsbt r1, [r0]
+        strgeb  r1, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+        .global __put_user_2
+__put_user_2:
+        bic     r2, sp, #0x1f00
+        bic     r2, r2, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r2, [r2, #TI_ADDR_LIMIT]
+        sub     r2, r2, #2
+        cmp     r0, r2
+        bge     __put_user_bad
+2:      cmp     r0, #0x02000000
+        strlsbt r1, [r0], #1
+        strgeb  r1, [r0], #1
+        mov     r1, r1, lsr #8
+3:      strlsbt r1, [r0]
+        strgeb  r1, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+        .global __put_user_4
+__put_user_4:
+        bic     r2, sp, #0x1f00
+        bic     r2, r2, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     r2, [r2, #TI_ADDR_LIMIT]
+        sub     r2, r2, #4
+        cmp     r0, r2
+4:      bge     __put_user_bad
+        cmp     r0, #0x02000000
+        strlst  r1, [r0]
+        strge   r1, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+        .global __put_user_8
+__put_user_8:
+        bic     ip, sp, #0x1f00
+        bic     ip, ip, #0x00ff
+        str     lr, [sp, #-4]!
+        ldr     ip, [ip, #TI_ADDR_LIMIT]
+        sub     ip, ip, #8
+        cmp     r0, ip
+        bge     __put_user_bad
+        cmp     r0, #0x02000000
+5:      strlst  r1, [r0], #4
+6:      strlst  r2, [r0]
+        strge   r1, [r0], #4
+        strge   r2, [r0]
+        mov     r0, #0
+        ldmfd   sp!, {pc}^
+__put_user_bad:
+        mov     r0, #-EFAULT
+        mov     pc, lr
+.section __ex_table, "a"
+        .long   1b, __put_user_bad
+        .long   2b, __put_user_bad
+        .long   3b, __put_user_bad
+        .long   4b, __put_user_bad
+        .long   5b, __put_user_bad
+        .long   6b, __put_user_bad
+.previous
diff --git a/arch/arm26/lib/setbit.S b/arch/arm26/lib/setbit.S
new file mode 100644
index 000000000000..e180c1a1b2f1
--- /dev/null
+++ b/arch/arm26/lib/setbit.S
@@ -0,0 +1,29 @@
+/*
+ *  linux/arch/arm26/lib/setbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+/*
+ * Purpose  : Function to set a bit
+ * Prototype: int set_bit(int bit, void *addr)
+ */
+ENTRY(_set_bit_be)
+                eor     r0, r0, #0x18           @ big endian byte ordering
+ENTRY(_set_bit_le)
+                and     r2, r0, #7
+                mov     r3, #1
+                mov     r3, r3, lsl r2
+                save_and_disable_irqs ip, r2
+                ldrb    r2, [r1, r0, lsr #3]
+                orr     r2, r2, r3
+                strb    r2, [r1, r0, lsr #3]
+                restore_irqs ip
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/strchr.S b/arch/arm26/lib/strchr.S
new file mode 100644
index 000000000000..ecfff21aa7c7
--- /dev/null
+++ b/arch/arm26/lib/strchr.S
@@ -0,0 +1,25 @@
+/*
+ *  linux/arch/arm26/lib/strchr.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+                .align  5
+ENTRY(strchr)
+1:              ldrb    r2, [r0], #1
+                teq     r2, r1
+                teqne   r2, #0
+                bne     1b
+                teq     r2, #0
+                moveq   r0, #0
+                subne   r0, r0, #1
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/strrchr.S b/arch/arm26/lib/strrchr.S
new file mode 100644
index 000000000000..db43b28e78dc
--- /dev/null
+++ b/arch/arm26/lib/strrchr.S
@@ -0,0 +1,25 @@
+/*
+ *  linux/arch/arm26/lib/strrchr.S
+ *
+ *  Copyright (C) 1995-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+                .align  5
+ENTRY(strrchr)
+                mov     r3, #0
+1:              ldrb    r2, [r0], #1
+                teq     r2, r1
+                subeq   r3, r0, #1
+                teq     r2, #0
+                bne     1b
+                mov     r0, r3
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/testchangebit.S b/arch/arm26/lib/testchangebit.S
new file mode 100644
index 000000000000..17049a2d93a4
--- /dev/null
+++ b/arch/arm26/lib/testchangebit.S
@@ -0,0 +1,29 @@
+/*
+ *  linux/arch/arm26/lib/testchangebit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+ENTRY(_test_and_change_bit_be)
+                eor     r0, r0, #0x18           @ big endian byte ordering
+ENTRY(_test_and_change_bit_le)
+                add     r1, r1, r0, lsr #3
+                and     r3, r0, #7
+                mov     r0, #1
+                save_and_disable_irqs ip, r2
+                ldrb    r2, [r1]
+                tst     r2, r0, lsl r3
+                eor     r2, r2, r0, lsl r3
+                strb    r2, [r1]
+                restore_irqs ip
+                moveq   r0, #0
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/testclearbit.S b/arch/arm26/lib/testclearbit.S
new file mode 100644
index 000000000000..2506bd743ab4
--- /dev/null
+++ b/arch/arm26/lib/testclearbit.S
@@ -0,0 +1,29 @@
+/*
+ *  linux/arch/arm26/lib/testclearbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+ENTRY(_test_and_clear_bit_be)
+                eor     r0, r0, #0x18           @ big endian byte ordering
+ENTRY(_test_and_clear_bit_le)
+                add     r1, r1, r0, lsr #3      @ Get byte offset
+                and     r3, r0, #7              @ Get bit offset
+                mov     r0, #1
+                save_and_disable_irqs ip, r2
+                ldrb    r2, [r1]
+                tst     r2, r0, lsl r3
+                bic     r2, r2, r0, lsl r3
+                strb    r2, [r1]
+                restore_irqs ip
+                moveq   r0, #0
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/testsetbit.S b/arch/arm26/lib/testsetbit.S
new file mode 100644
index 000000000000..f827de64b22d
--- /dev/null
+++ b/arch/arm26/lib/testsetbit.S
@@ -0,0 +1,29 @@
+/*
+ *  linux/arch/arm26/lib/testsetbit.S
+ *
+ *  Copyright (C) 1995-1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+ENTRY(_test_and_set_bit_be)
+                eor     r0, r0, #0x18           @ big endian byte ordering
+ENTRY(_test_and_set_bit_le)
+                add     r1, r1, r0, lsr #3      @ Get byte offset
+                and     r3, r0, #7              @ Get bit offset
+                mov     r0, #1
+                save_and_disable_irqs ip, r2
+                ldrb    r2, [r1]
+                tst     r2, r0, lsl r3
+                orr     r2, r2, r0, lsl r3
+                strb    r2, [r1]
+                restore_irqs ip
+                moveq   r0, #0
+                RETINSTR(mov,pc,lr)
diff --git a/arch/arm26/lib/uaccess-kernel.S b/arch/arm26/lib/uaccess-kernel.S
new file mode 100644
index 000000000000..3950a1f6bc99
--- /dev/null
+++ b/arch/arm26/lib/uaccess-kernel.S
@@ -0,0 +1,173 @@
+/*
+ *  linux/arch/arm26/lib/uaccess-kernel.S
+ *
+ *  Copyright (C) 1998 Russell King
+ *
+ *  Note!  Some code fragments found in here have a special calling
+ *  convention - they are not APCS compliant!
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+                .text
+//FIXME - surely this can be done in C not asm, removing the problem of keeping C and asm in sync? (this is a struct uaccess_t)
+                .globl  uaccess_kernel
+uaccess_kernel:
+                .word   uaccess_kernel_put_byte
+                .word   uaccess_kernel_get_byte
+                .word   uaccess_kernel_put_half
+                .word   uaccess_kernel_get_half
+                .word   uaccess_kernel_put_word
+                .word   uaccess_kernel_get_word
+                .word   uaccess_kernel_put_dword
+                .word   uaccess_kernel_copy
+                .word   uaccess_kernel_copy
+                .word   uaccess_kernel_clear
+                .word   uaccess_kernel_strncpy
+                .word   uaccess_kernel_strnlen
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_kernel_put_byte:
+                stmfd   sp!, {lr}
+                strb    r0, [r1]
+                ldmfd   sp!, {pc}^
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_kernel_put_half:
+                stmfd   sp!, {lr}
+                strb    r0, [r1]
+                mov     r0, r0, lsr #8
+                strb    r0, [r1, #1]
+                ldmfd   sp!, {pc}^
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_kernel_put_word:
+                stmfd   sp!, {lr}
+                str     r0, [r1]
+                ldmfd   sp!, {pc}^
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_kernel_put_dword:
+                stmfd   sp!, {lr}
+                str     r0, [r1], #4
+                str     r0, [r1], #0
+                ldmfd   sp!, {pc}^
+@ In : r0 = addr, r1 = error
+@ Out: r0 = x, r1 = error
+uaccess_kernel_get_byte:
+                stmfd   sp!, {lr}
+                ldrb    r0, [r0]
+                ldmfd   sp!, {pc}^
+@ In : r0 = addr, r1 = error
+@ Out: r0 = x, r1 = error
+uaccess_kernel_get_half:
+                stmfd   sp!, {lr}
+                ldr     r0, [r0]
+                mov     r0, r0, lsl #16
+                mov     r0, r0, lsr #16
+                ldmfd   sp!, {pc}^
+@ In : r0 = addr, r1 = error
+@ Out: r0 = x, r1 = error
+uaccess_kernel_get_word:
+                stmfd   sp!, {lr}
+                ldr     r0, [r0]
+                ldmfd   sp!, {pc}^
+/* Prototype: int uaccess_kernel_copy(void *to, const char *from, size_t n)
+ * Purpose  : copy a block to kernel memory from kernel memory
+ * Params   : to   - kernel memory
+ *          : from - kernel memory
+ *          : n    - number of bytes to copy
+ * Returns  : Number of bytes NOT copied.
+ */
+uaccess_kernel_copy:
+                stmfd   sp!, {lr}
+                bl      memcpy
+                mov     r0, #0
+                ldmfd   sp!, {pc}^
+/* Prototype: int uaccess_kernel_clear(void *addr, size_t sz)
+ * Purpose  : clear some kernel memory
+ * Params   : addr - kernel memory address to clear
+ *          : sz   - number of bytes to clear
+ * Returns  : number of bytes NOT cleared
+ */
+uaccess_kernel_clear:
+                stmfd   sp!, {lr}
+                mov     r2, #0
+                cmp     r1, #4
+                blt     2f
+                ands    ip, r0, #3
+                beq     1f
+                cmp     ip, #1
+                strb    r2, [r0], #1
+                strleb  r2, [r0], #1
+                strltb  r2, [r0], #1
+                rsb     ip, ip, #4
+                sub     r1, r1, ip              @  7  6  5  4  3  2  1
+1:              subs    r1, r1, #8              @ -1 -2 -3 -4 -5 -6 -7
+                bmi     2f
+                str     r2, [r0], #4
+                str     r2, [r0], #4
+                b       1b
+2:              adds    r1, r1, #4              @  3  2  1  0 -1 -2 -3
+                strpl   r2, [r0], #4
+                tst     r1, #2                  @ 1x 1x 0x 0x 1x 1x 0x
+                strneb  r2, [r0], #1
+                strneb  r2, [r0], #1
+                tst     r1, #1                  @ x1 x0 x1 x0 x1 x0 x1
+                strneb  r2, [r0], #1
+                mov     r0, #0
+                ldmfd   sp!, {pc}^
+/* Prototype: size_t uaccess_kernel_strncpy(char *dst, char *src, size_t len)
+ * Purpose  : copy a string from kernel memory to kernel memory
+ * Params   : dst - kernel memory destination
+ *          : src - kernel memory source
+ *          : len - maximum length of string
+ * Returns  : number of characters copied
+ */
+uaccess_kernel_strncpy:
+                stmfd   sp!, {lr}
+                mov     ip, r2
+1:              subs    r2, r2, #1
+                bmi     2f
+                ldrb    r3, [r1], #1
+                strb    r3, [r0], #1
+                teq     r3, #0
+                bne     1b
+2:              subs    r0, ip, r2
+                ldmfd   sp!, {pc}^
+/* Prototype: int uaccess_kernel_strlen(char *str, long n)
+ * Purpose  : get length of a string in kernel memory
+ * Params   : str - address of string in kernel memory
+ * Returns  : length of string *including terminator*,
+ *            or zero on exception, or n + 1 if too long
+ */
+uaccess_kernel_strnlen:
+                stmfd   sp!, {lr}
+                mov     r2, r0
+1:              ldrb    r1, [r0], #1
+                teq     r1, #0
+                beq     2f
+                subs    r1, r1, #1
+                bne     1b
+                add     r0, r0, #1
+2:              sub     r0, r0, r2
+                ldmfd   sp!, {pc}^
diff --git a/arch/arm26/lib/uaccess-user.S b/arch/arm26/lib/uaccess-user.S
new file mode 100644
index 000000000000..130b8f28610a
--- /dev/null
+++ b/arch/arm26/lib/uaccess-user.S
@@ -0,0 +1,718 @@
+/*
+ *  linux/arch/arm26/lib/uaccess-user.S
+ *
+ *  Copyright (C) 1995, 1996,1997,1998 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Routines to block copy data to/from user memory
+ *   These are highly optimised both for the 4k page size
+ *   and for various alignments.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/page.h>
+                .text
+//FIXME - surely this can be done in C not asm, removing the problem of keeping C and asm in sync? (this is a struct uaccess_t)
+                .globl  uaccess_user
+uaccess_user:
+                .word   uaccess_user_put_byte
+                .word   uaccess_user_get_byte
+                .word   uaccess_user_put_half
+                .word   uaccess_user_get_half
+                .word   uaccess_user_put_word
+                .word   uaccess_user_get_word
+                .word   uaccess_user_put_dword
+                .word   uaccess_user_copy_from_user
+                .word   uaccess_user_copy_to_user
+                .word   uaccess_user_clear_user
+                .word   uaccess_user_strncpy_from_user
+                .word   uaccess_user_strnlen_user
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_user_put_byte:
+                stmfd   sp!, {lr}
+USER(           strbt   r0, [r1])
+                ldmfd   sp!, {pc}^
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_user_put_half:
+                stmfd   sp!, {lr}
+USER(           strbt   r0, [r1], #1)
+                mov     r0, r0, lsr #8
+USER(           strbt   r0, [r1])
+                ldmfd   sp!, {pc}^
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_user_put_word:
+                stmfd   sp!, {lr}
+USER(           strt    r0, [r1])
+                ldmfd   sp!, {pc}^
+@ In : r0 = x, r1 = addr, r2 = error
+@ Out: r2 = error
+uaccess_user_put_dword:
+                stmfd   sp!, {lr}
+USER(           strt    r0, [r1], #4)
+USER(           strt    r0, [r1], #0)
+                ldmfd   sp!, {pc}^
+9001:           mov     r2, #-EFAULT
+                ldmfd   sp!, {pc}^
+@ In : r0 = addr, r1 = error
+@ Out: r0 = x, r1 = error
+uaccess_user_get_byte:
+                stmfd   sp!, {lr}
+USER(           ldrbt   r0, [r0])
+                ldmfd   sp!, {pc}^
+@ In : r0 = addr, r1 = error
+@ Out: r0 = x, r1 = error
+uaccess_user_get_half:
+                stmfd   sp!, {lr}
+USER(           ldrt    r0, [r0])
+                mov     r0, r0, lsl #16
+                mov     r0, r0, lsr #16
+                ldmfd   sp!, {pc}^
+@ In : r0 = addr, r1 = error
+@ Out: r0 = x, r1 = error
+uaccess_user_get_word:
+                stmfd   sp!, {lr}
+USER(           ldrt    r0, [r0])
+                ldmfd   sp!, {pc}^
+9001:           mov     r1, #-EFAULT
+                ldmfd   sp!, {pc}^
+/* Prototype: int uaccess_user_copy_to_user(void *to, const char *from, size_t n)
+ * Purpose  : copy a block to user memory from kernel memory
+ * Params   : to   - user memory
+ *          : from - kernel memory
+ *          : n    - number of bytes to copy
+ * Returns  : Number of bytes NOT copied.
+ */
+.c2u_dest_not_aligned:
+                rsb     ip, ip, #4
+                cmp     ip, #2
+                ldrb    r3, [r1], #1
+USER(           strbt   r3, [r0], #1)                   @ May fault
+                ldrgeb  r3, [r1], #1
+USER(           strgebt r3, [r0], #1)                   @ May fault
+                ldrgtb  r3, [r1], #1
+USER(           strgtbt r3, [r0], #1)                   @ May fault
+                sub     r2, r2, ip
+                b       .c2u_dest_aligned
+ENTRY(uaccess_user_copy_to_user)
+                stmfd   sp!, {r2, r4 - r7, lr}
+                cmp     r2, #4
+                blt     .c2u_not_enough
+                ands    ip, r0, #3
+                bne     .c2u_dest_not_aligned
+.c2u_dest_aligned:
+                ands    ip, r1, #3
+                bne     .c2u_src_not_aligned
+/*
+ * Seeing as there has to be at least 8 bytes to copy, we can
+ * copy one word, and force a user-mode page fault...
+ */
+.c2u_0fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .c2u_0nowords
+                ldr     r3, [r1], #4
+USER(           strt    r3, [r0], #4)                   @ May fault
+                mov     ip, r0, lsl #32 - PAGE_SHIFT    @ On each page, use a ld/st??t instruction
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .c2u_0fupi
+/*
+ * ip = max no. of bytes to copy before needing another "strt" insn
+ */
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #32
+                blt     .c2u_0rem8lp
+.c2u_0cpy8lp:   ldmia   r1!, {r3 - r6}
+                stmia   r0!, {r3 - r6}                  @ Shouldnt fault
+                ldmia   r1!, {r3 - r6}
+                stmia   r0!, {r3 - r6}                  @ Shouldnt fault
+                subs    ip, ip, #32
+                bpl     .c2u_0cpy8lp
+.c2u_0rem8lp:   cmn     ip, #16
+                ldmgeia r1!, {r3 - r6}
+                stmgeia r0!, {r3 - r6}                  @ Shouldnt fault
+                tst     ip, #8
+                ldmneia r1!, {r3 - r4}
+                stmneia r0!, {r3 - r4}                  @ Shouldnt fault
+                tst     ip, #4
+                ldrne   r3, [r1], #4
+                strnet  r3, [r0], #4                    @ Shouldnt fault
+                ands    ip, ip, #3
+                beq     .c2u_0fupi
+.c2u_0nowords:  teq     ip, #0
+                beq     .c2u_finished
+.c2u_nowords:   cmp     ip, #2
+                ldrb    r3, [r1], #1
+USER(           strbt   r3, [r0], #1)                   @ May fault
+                ldrgeb  r3, [r1], #1
+USER(           strgebt r3, [r0], #1)                   @ May fault
+                ldrgtb  r3, [r1], #1
+USER(           strgtbt r3, [r0], #1)                   @ May fault
+                b       .c2u_finished
+.c2u_not_enough:
+                movs    ip, r2
+                bne     .c2u_nowords
+.c2u_finished:  mov     r0, #0
+                LOADREGS(fd,sp!,{r2, r4 - r7, pc})
+.c2u_src_not_aligned:
+                bic     r1, r1, #3
+                ldr     r7, [r1], #4
+                cmp     ip, #2
+                bgt     .c2u_3fupi
+                beq     .c2u_2fupi
+.c2u_1fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .c2u_1nowords
+                mov     r3, r7, pull #8
+                ldr     r7, [r1], #4
+                orr     r3, r3, r7, push #24
+USER(           strt    r3, [r0], #4)                   @ May fault
+                mov     ip, r0, lsl #32 - PAGE_SHIFT
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .c2u_1fupi
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #16
+                blt     .c2u_1rem8lp
+.c2u_1cpy8lp:   mov     r3, r7, pull #8
+                ldmia   r1!, {r4 - r7}
+                orr     r3, r3, r4, push #24
+                mov     r4, r4, pull #8
+                orr     r4, r4, r5, push #24
+                mov     r5, r5, pull #8
+                orr     r5, r5, r6, push #24
+                mov     r6, r6, pull #8
+                orr     r6, r6, r7, push #24
+                stmia   r0!, {r3 - r6}                  @ Shouldnt fault
+                subs    ip, ip, #16
+                bpl     .c2u_1cpy8lp
+.c2u_1rem8lp:   tst     ip, #8
+                movne   r3, r7, pull #8
+                ldmneia r1!, {r4, r7}
+                orrne   r3, r3, r4, push #24
+                movne   r4, r4, pull #8
+                orrne   r4, r4, r7, push #24
+                stmneia r0!, {r3 - r4}                  @ Shouldnt fault
+                tst     ip, #4
+                movne   r3, r7, pull #8
+                ldrne   r7, [r1], #4
+                orrne   r3, r3, r7, push #24
+                strnet  r3, [r0], #4                    @ Shouldnt fault
+                ands    ip, ip, #3
+                beq     .c2u_1fupi
+.c2u_1nowords:  mov     r3, r7, lsr #byte(1)
+                teq     ip, #0
+                beq     .c2u_finished
+                cmp     ip, #2
+USER(           strbt   r3, [r0], #1)                   @ May fault
+                movge   r3, r7, lsr #byte(2)
+USER(           strgebt r3, [r0], #1)                   @ May fault
+                movgt   r3, r7, lsr #byte(3)
+USER(           strgtbt r3, [r0], #1)                   @ May fault
+                b       .c2u_finished
+.c2u_2fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .c2u_2nowords
+                mov     r3, r7, pull #16
+                ldr     r7, [r1], #4
+                orr     r3, r3, r7, push #16
+USER(           strt    r3, [r0], #4)                   @ May fault
+                mov     ip, r0, lsl #32 - PAGE_SHIFT
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .c2u_2fupi
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #16
+                blt     .c2u_2rem8lp
+.c2u_2cpy8lp:   mov     r3, r7, pull #16
+                ldmia   r1!, {r4 - r7}
+                orr     r3, r3, r4, push #16
+                mov     r4, r4, pull #16
+                orr     r4, r4, r5, push #16
+                mov     r5, r5, pull #16
+                orr     r5, r5, r6, push #16
+                mov     r6, r6, pull #16
+                orr     r6, r6, r7, push #16
+                stmia   r0!, {r3 - r6}                  @ Shouldnt fault
+                subs    ip, ip, #16
+                bpl     .c2u_2cpy8lp
+.c2u_2rem8lp:   tst     ip, #8
+                movne   r3, r7, pull #16
+                ldmneia r1!, {r4, r7}
+                orrne   r3, r3, r4, push #16
+                movne   r4, r4, pull #16
+                orrne   r4, r4, r7, push #16
+                stmneia r0!, {r3 - r4}                  @ Shouldnt fault
+                tst     ip, #4
+                movne   r3, r7, pull #16
+                ldrne   r7, [r1], #4
+                orrne   r3, r3, r7, push #16
+                strnet  r3, [r0], #4                    @ Shouldnt fault
+                ands    ip, ip, #3
+                beq     .c2u_2fupi
+.c2u_2nowords:  mov     r3, r7, lsr #byte(2)
+                teq     ip, #0
+                beq     .c2u_finished
+                cmp     ip, #2
+USER(           strbt   r3, [r0], #1)                   @ May fault
+                movge   r3, r7, lsr #byte(3)
+USER(           strgebt r3, [r0], #1)                   @ May fault
+                ldrgtb  r3, [r1], #0
+USER(           strgtbt r3, [r0], #1)                   @ May fault
+                b       .c2u_finished
+.c2u_3fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .c2u_3nowords
+                mov     r3, r7, pull #24
+                ldr     r7, [r1], #4
+                orr     r3, r3, r7, push #8
+USER(           strt    r3, [r0], #4)                   @ May fault
+                mov     ip, r0, lsl #32 - PAGE_SHIFT
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .c2u_3fupi
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #16
+                blt     .c2u_3rem8lp
+.c2u_3cpy8lp:   mov     r3, r7, pull #24
+                ldmia   r1!, {r4 - r7}
+                orr     r3, r3, r4, push #8
+                mov     r4, r4, pull #24
+                orr     r4, r4, r5, push #8
+                mov     r5, r5, pull #24
+                orr     r5, r5, r6, push #8
+                mov     r6, r6, pull #24
+                orr     r6, r6, r7, push #8
+                stmia   r0!, {r3 - r6}                  @ Shouldnt fault
+                subs    ip, ip, #16
+                bpl     .c2u_3cpy8lp
+.c2u_3rem8lp:   tst     ip, #8
+                movne   r3, r7, pull #24
+                ldmneia r1!, {r4, r7}
+                orrne   r3, r3, r4, push #8
+                movne   r4, r4, pull #24
+                orrne   r4, r4, r7, push #8
+                stmneia r0!, {r3 - r4}                  @ Shouldnt fault
+                tst     ip, #4
+                movne   r3, r7, pull #24
+                ldrne   r7, [r1], #4
+                orrne   r3, r3, r7, push #8
+                strnet  r3, [r0], #4                    @ Shouldnt fault
+                ands    ip, ip, #3
+                beq     .c2u_3fupi
+.c2u_3nowords:  mov     r3, r7, lsr #byte(3)
+                teq     ip, #0
+                beq     .c2u_finished
+                cmp     ip, #2
+USER(           strbt   r3, [r0], #1)                   @ May fault
+                ldrgeb  r3, [r1], #1
+USER(           strgebt r3, [r0], #1)                   @ May fault
+                ldrgtb  r3, [r1], #0
+USER(           strgtbt r3, [r0], #1)                   @ May fault
+                b       .c2u_finished
+                .section .fixup,"ax"
+                .align  0
+9001:           LOADREGS(fd,sp!, {r0, r4 - r7, pc})
+                .previous
+/* Prototype: unsigned long uaccess_user_copy_from_user(void *to,const void *from,unsigned long n);
+ * Purpose  : copy a block from user memory to kernel memory
+ * Params   : to   - kernel memory
+ *          : from - user memory
+ *          : n    - number of bytes to copy
+ * Returns  : Number of bytes NOT copied.
+ */
+.cfu_dest_not_aligned:
+                rsb     ip, ip, #4
+                cmp     ip, #2
+USER(           ldrbt   r3, [r1], #1)                   @ May fault
+                strb    r3, [r0], #1
+USER(           ldrgebt r3, [r1], #1)                   @ May fault
+                strgeb  r3, [r0], #1
+USER(           ldrgtbt r3, [r1], #1)                   @ May fault
+                strgtb  r3, [r0], #1
+                sub     r2, r2, ip
+                b       .cfu_dest_aligned
+ENTRY(uaccess_user_copy_from_user)
+                stmfd   sp!, {r0, r2, r4 - r7, lr}
+                cmp     r2, #4
+                blt     .cfu_not_enough
+                ands    ip, r0, #3
+                bne     .cfu_dest_not_aligned
+.cfu_dest_aligned:
+                ands    ip, r1, #3
+                bne     .cfu_src_not_aligned
+/*
+ * Seeing as there has to be at least 8 bytes to copy, we can
+ * copy one word, and force a user-mode page fault...
+ */
+.cfu_0fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .cfu_0nowords
+USER(           ldrt    r3, [r1], #4)
+                str     r3, [r0], #4
+                mov     ip, r1, lsl #32 - PAGE_SHIFT    @ On each page, use a ld/st??t instruction
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .cfu_0fupi
+/*
+ * ip = max no. of bytes to copy before needing another "strt" insn
+ */
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #32
+                blt     .cfu_0rem8lp
+.cfu_0cpy8lp:   ldmia   r1!, {r3 - r6}                  @ Shouldnt fault
+                stmia   r0!, {r3 - r6}
+                ldmia   r1!, {r3 - r6}                  @ Shouldnt fault
+                stmia   r0!, {r3 - r6}
+                subs    ip, ip, #32
+                bpl     .cfu_0cpy8lp
+.cfu_0rem8lp:   cmn     ip, #16
+                ldmgeia r1!, {r3 - r6}                  @ Shouldnt fault
+                stmgeia r0!, {r3 - r6}
+                tst     ip, #8
+                ldmneia r1!, {r3 - r4}                  @ Shouldnt fault
+                stmneia r0!, {r3 - r4}
+                tst     ip, #4
+                ldrnet  r3, [r1], #4                    @ Shouldnt fault
+                strne   r3, [r0], #4
+                ands    ip, ip, #3
+                beq     .cfu_0fupi
+.cfu_0nowords:  teq     ip, #0
+                beq     .cfu_finished
+.cfu_nowords:   cmp     ip, #2
+USER(           ldrbt   r3, [r1], #1)                   @ May fault
+                strb    r3, [r0], #1
+USER(           ldrgebt r3, [r1], #1)                   @ May fault
+                strgeb  r3, [r0], #1
+USER(           ldrgtbt r3, [r1], #1)                   @ May fault
+                strgtb  r3, [r0], #1
+                b       .cfu_finished
+.cfu_not_enough:
+                movs    ip, r2
+                bne     .cfu_nowords
+.cfu_finished:  mov     r0, #0
+                add     sp, sp, #8
+                LOADREGS(fd,sp!,{r4 - r7, pc})
+.cfu_src_not_aligned:
+                bic     r1, r1, #3
+USER(           ldrt    r7, [r1], #4)                   @ May fault
+                cmp     ip, #2
+                bgt     .cfu_3fupi
+                beq     .cfu_2fupi
+.cfu_1fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .cfu_1nowords
+                mov     r3, r7, pull #8
+USER(           ldrt    r7, [r1], #4)                   @ May fault
+                orr     r3, r3, r7, push #24
+                str     r3, [r0], #4
+                mov     ip, r1, lsl #32 - PAGE_SHIFT
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .cfu_1fupi
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #16
+                blt     .cfu_1rem8lp
+.cfu_1cpy8lp:   mov     r3, r7, pull #8
+                ldmia   r1!, {r4 - r7}                  @ Shouldnt fault
+                orr     r3, r3, r4, push #24
+                mov     r4, r4, pull #8
+                orr     r4, r4, r5, push #24
+                mov     r5, r5, pull #8
+                orr     r5, r5, r6, push #24
+                mov     r6, r6, pull #8
+                orr     r6, r6, r7, push #24
+                stmia   r0!, {r3 - r6}
+                subs    ip, ip, #16
+                bpl     .cfu_1cpy8lp
+.cfu_1rem8lp:   tst     ip, #8
+                movne   r3, r7, pull #8
+                ldmneia r1!, {r4, r7}                   @ Shouldnt fault
+                orrne   r3, r3, r4, push #24
+                movne   r4, r4, pull #8
+                orrne   r4, r4, r7, push #24
+                stmneia r0!, {r3 - r4}
+                tst     ip, #4
+                movne   r3, r7, pull #8
+USER(           ldrnet  r7, [r1], #4)                   @ May fault
+                orrne   r3, r3, r7, push #24
+                strne   r3, [r0], #4
+                ands    ip, ip, #3
+                beq     .cfu_1fupi
+.cfu_1nowords:  mov     r3, r7, lsr #byte(1)
+                teq     ip, #0
+                beq     .cfu_finished
+                cmp     ip, #2
+                strb    r3, [r0], #1
+                movge   r3, r7, lsr #byte(2)
+                strgeb  r3, [r0], #1
+                movgt   r3, r7, lsr #byte(3)
+                strgtb  r3, [r0], #1
+                b       .cfu_finished
+.cfu_2fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .cfu_2nowords
+                mov     r3, r7, pull #16
+USER(           ldrt    r7, [r1], #4)                   @ May fault
+                orr     r3, r3, r7, push #16
+                str     r3, [r0], #4
+                mov     ip, r1, lsl #32 - PAGE_SHIFT
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .cfu_2fupi
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #16
+                blt     .cfu_2rem8lp
+.cfu_2cpy8lp:   mov     r3, r7, pull #16
+                ldmia   r1!, {r4 - r7}                  @ Shouldnt fault
+                orr     r3, r3, r4, push #16
+                mov     r4, r4, pull #16
+                orr     r4, r4, r5, push #16
+                mov     r5, r5, pull #16
+                orr     r5, r5, r6, push #16
+                mov     r6, r6, pull #16
+                orr     r6, r6, r7, push #16
+                stmia   r0!, {r3 - r6}
+                subs    ip, ip, #16
+                bpl     .cfu_2cpy8lp
+.cfu_2rem8lp:   tst     ip, #8
+                movne   r3, r7, pull #16
+                ldmneia r1!, {r4, r7}                   @ Shouldnt fault
+                orrne   r3, r3, r4, push #16
+                movne   r4, r4, pull #16
+                orrne   r4, r4, r7, push #16
+                stmneia r0!, {r3 - r4}
+                tst     ip, #4
+                movne   r3, r7, pull #16
+USER(           ldrnet  r7, [r1], #4)                   @ May fault
+                orrne   r3, r3, r7, push #16
+                strne   r3, [r0], #4
+                ands    ip, ip, #3
+                beq     .cfu_2fupi
+.cfu_2nowords:  mov     r3, r7, lsr #byte(2)
+                teq     ip, #0
+                beq     .cfu_finished
+                cmp     ip, #2
+                strb    r3, [r0], #1
+                movge   r3, r7, lsr #byte(3)
+                strgeb  r3, [r0], #1
+USER(           ldrgtbt r3, [r1], #0)                   @ May fault
+                strgtb  r3, [r0], #1
+                b       .cfu_finished
+.cfu_3fupi:     subs    r2, r2, #4
+                addmi   ip, r2, #4
+                bmi     .cfu_3nowords
+                mov     r3, r7, pull #24
+USER(           ldrt    r7, [r1], #4)                   @ May fault
+                orr     r3, r3, r7, push #8
+                str     r3, [r0], #4
+                mov     ip, r1, lsl #32 - PAGE_SHIFT
+                rsb     ip, ip, #0
+                movs    ip, ip, lsr #32 - PAGE_SHIFT
+                beq     .cfu_3fupi
+                cmp     r2, ip
+                movlt   ip, r2
+                sub     r2, r2, ip
+                subs    ip, ip, #16
+                blt     .cfu_3rem8lp
+.cfu_3cpy8lp:   mov     r3, r7, pull #24
+                ldmia   r1!, {r4 - r7}                  @ Shouldnt fault
+                orr     r3, r3, r4, push #8
+                mov     r4, r4, pull #24
+                orr     r4, r4, r5, push #8
+                mov     r5, r5, pull #24
+                orr     r5, r5, r6, push #8
+                mov     r6, r6, pull #24
+                orr     r6, r6, r7, push #8
+                stmia   r0!, {r3 - r6}
+                subs    ip, ip, #16
+                bpl     .cfu_3cpy8lp
+.cfu_3rem8lp:   tst     ip, #8
+                movne   r3, r7, pull #24
+                ldmneia r1!, {r4, r7}                   @ Shouldnt fault
+                orrne   r3, r3, r4, push #8
+                movne   r4, r4, pull #24
+                orrne   r4, r4, r7, push #8
+                stmneia r0!, {r3 - r4}
+                tst     ip, #4
+                movne   r3, r7, pull #24
+USER(           ldrnet  r7, [r1], #4)                   @ May fault
+                orrne   r3, r3, r7, push #8
+                strne   r3, [r0], #4
+                ands    ip, ip, #3
+                beq     .cfu_3fupi
+.cfu_3nowords:  mov     r3, r7, lsr #byte(3)
+                teq     ip, #0
+                beq     .cfu_finished
+                cmp     ip, #2
+                strb    r3, [r0], #1
+USER(           ldrgebt r3, [r1], #1)                   @ May fault
+                strgeb  r3, [r0], #1
+USER(           ldrgtbt r3, [r1], #1)                   @ May fault
+                strgtb  r3, [r0], #1
+                b       .cfu_finished
+                .section .fixup,"ax"
+                .align  0
+                /*
+                 * We took an exception.  r0 contains a pointer to
+                 * the byte not copied.
+                 */
+9001:           ldr     r2, [sp], #4                    @ void *to
+                sub     r2, r0, r2                      @ bytes copied
+                ldr     r1, [sp], #4                    @ unsigned long count
+                subs    r4, r1, r2                      @ bytes left to copy
+                movne   r1, r4
+                blne    __memzero
+                mov     r0, r4
+                LOADREGS(fd,sp!, {r4 - r7, pc})
+                .previous
+/* Prototype: int uaccess_user_clear_user(void *addr, size_t sz)
+ * Purpose  : clear some user memory
+ * Params   : addr - user memory address to clear
+ *          : sz   - number of bytes to clear
+ * Returns  : number of bytes NOT cleared
+ */
+ENTRY(uaccess_user_clear_user)
+                stmfd   sp!, {r1, lr}
+                mov     r2, #0
+                cmp     r1, #4
+                blt     2f
+                ands    ip, r0, #3
+                beq     1f
+                cmp     ip, #2
+USER(           strbt   r2, [r0], #1)
+USER(           strlebt r2, [r0], #1)
+USER(           strltbt r2, [r0], #1)
+                rsb     ip, ip, #4
+                sub     r1, r1, ip              @  7  6  5  4  3  2  1
+1:              subs    r1, r1, #8              @ -1 -2 -3 -4 -5 -6 -7
+USER(           strplt  r2, [r0], #4)
+USER(           strplt  r2, [r0], #4)
+                bpl     1b
+                adds    r1, r1, #4              @  3  2  1  0 -1 -2 -3
+USER(           strplt  r2, [r0], #4)
+2:              tst     r1, #2                  @ 1x 1x 0x 0x 1x 1x 0x
+USER(           strnebt r2, [r0], #1)
+USER(           strnebt r2, [r0], #1)
+                tst     r1, #1                  @ x1 x0 x1 x0 x1 x0 x1
+USER(           strnebt r2, [r0], #1)
+                mov     r0, #0
+                LOADREGS(fd,sp!, {r1, pc})
+                .section .fixup,"ax"
+                .align  0
+9001:           LOADREGS(fd,sp!, {r0, pc})
+                .previous
+/*
+ * Copy a string from user space to kernel space.
+ *  r0 = dst, r1 = src, r2 = byte length
+ * returns the number of characters copied (strlen of copied string),
+ *  -EFAULT on exception, or "len" if we fill the whole buffer
+ */
+ENTRY(uaccess_user_strncpy_from_user)
+        save_lr
+        mov     ip, r1
+1:      subs    r2, r2, #1
+USER(   ldrplbt r3, [r1], #1)
+        bmi     2f
+        strb    r3, [r0], #1
+        teq     r3, #0
+        bne     1b
+        sub     r1, r1, #1      @ take NUL character out of count
+2:      sub     r0, r1, ip
+        restore_pc
+        .section .fixup,"ax"
+        .align  0
+9001:   mov     r3, #0
+        strb    r3, [r0, #0]    @ null terminate
+        mov     r0, #-EFAULT
+        restore_pc
+        .previous
+/* Prototype: unsigned long uaccess_user_strnlen_user(const char *str, long n)
+ * Purpose  : get length of a string in user memory
+ * Params   : str - address of string in user memory
+ * Returns  : length of string *including terminator*
+ *            or zero on exception, or n + 1 if too long
+ */
+ENTRY(uaccess_user_strnlen_user)
+        save_lr
+        mov     r2, r0
+1:
+USER(   ldrbt   r3, [r0], #1)
+        teq     r3, #0
+        beq     2f
+        subs    r1, r1, #1
+        bne     1b
+        add     r0, r0, #1
+2:      sub     r0, r0, r2
+        restore_pc
+        .section .fixup,"ax"
+        .align  0
+9001:   mov     r0, #0
+        restore_pc
+        .previous
diff --git a/arch/arm26/lib/ucmpdi2.c b/arch/arm26/lib/ucmpdi2.c
new file mode 100644
index 000000000000..6c6ae63efa02
--- /dev/null
+++ b/arch/arm26/lib/ucmpdi2.c
@@ -0,0 +1,51 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+This file is part of GNU CC.
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#include "gcclib.h"
+word_type
+__ucmpdi2 (DItype a, DItype b)
+{
+  DIunion au, bu;
+  au.ll = a, bu.ll = b;
+  if ((USItype) au.s.high < (USItype) bu.s.high)
+    return 0;
+  else if ((USItype) au.s.high > (USItype) bu.s.high)
+    return 2;
+  if ((USItype) au.s.low < (USItype) bu.s.low)
+    return 0;
+  else if ((USItype) au.s.low > (USItype) bu.s.low)
+    return 2;
+  return 1;
+}
diff --git a/arch/arm26/lib/udivdi3.c b/arch/arm26/lib/udivdi3.c
new file mode 100644
index 000000000000..d25195f673f4
--- /dev/null
+++ b/arch/arm26/lib/udivdi3.c
@@ -0,0 +1,242 @@
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989, 92-98, 1999 Free Software Foundation, Inc.
+This file is part of GNU CC.
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.
+ */
+/* support functions required by the kernel. based on code from gcc-2.95.3 */
+/* I Molton     29/07/01 */
+#include "gcclib.h"
+#include "longlong.h"
+static const UQItype __clz_tab[] =
+{
+  0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+  8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+};
+UDItype
+__udivmoddi4 (UDItype n, UDItype d, UDItype *rp)
+{
+  DIunion ww;
+  DIunion nn, dd;
+  DIunion rr;
+  USItype d0, d1, n0, n1, n2;
+  USItype q0, q1;
+  USItype b, bm;
+  nn.ll = n;
+  dd.ll = d;
+  d0 = dd.s.low;
+  d1 = dd.s.high;
+  n0 = nn.s.low;
+  n1 = nn.s.high;
+  if (d1 == 0)
+    {
+      if (d0 > n1)
+        {
+          /* 0q = nn / 0D */
+          count_leading_zeros (bm, d0);
+          if (bm != 0)
+            {
+              /* Normalize, i.e. make the most significant bit of the
+                 denominator set.  */
+              d0 = d0 << bm;
+              n1 = (n1 << bm) | (n0 >> (SI_TYPE_SIZE - bm));
+              n0 = n0 << bm;
+            }
+          udiv_qrnnd (q0, n0, n1, n0, d0);
+          q1 = 0;
+          /* Remainder in n0 >> bm.  */
+        }
+      else
+        {
+          /* qq = NN / 0d */
+          if (d0 == 0)
+            d0 = 1 / d0;        /* Divide intentionally by zero.  */
+          count_leading_zeros (bm, d0);
+          if (bm == 0)
+            {
+              /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
+                 conclude (the most significant bit of n1 is set) /\ (the
+                 leading quotient digit q1 = 1).
+                 This special case is necessary, not an optimization.
+                 (Shifts counts of SI_TYPE_SIZE are undefined.)  */
+              n1 -= d0;
+              q1 = 1;
+            }
+          else
+            {
+              /* Normalize.  */
+              b = SI_TYPE_SIZE - bm;
+              d0 = d0 << bm;
+              n2 = n1 >> b;
+              n1 = (n1 << bm) | (n0 >> b);
+              n0 = n0 << bm;
+              udiv_qrnnd (q1, n1, n2, n1, d0);
+            }
+          /* n1 != d0...  */
+          udiv_qrnnd (q0, n0, n1, n0, d0);
+          /* Remainder in n0 >> bm.  */
+        }
+      if (rp != 0)
+        {
+          rr.s.low = n0 >> bm;
+          rr.s.high = 0;
+          *rp = rr.ll;
+        }
+    }
+  else
+    {
+      if (d1 > n1)
+        {
+          /* 00 = nn / DD */
+          q0 = 0;
+          q1 = 0;
+          /* Remainder in n1n0.  */
+          if (rp != 0)
+            {
+              rr.s.low = n0;
+              rr.s.high = n1;
+              *rp = rr.ll;
+            }
+        }
+      else
+        {
+          /* 0q = NN / dd */
+          count_leading_zeros (bm, d1);
+          if (bm == 0)
+            {
+              /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
+                 conclude (the most significant bit of n1 is set) /\ (the
+                 quotient digit q0 = 0 or 1).
+                 This special case is necessary, not an optimization.  */
+              /* The condition on the next line takes advantage of that
+                 n1 >= d1 (true due to program flow).  */
+              if (n1 > d1 || n0 >= d0)
+                {
+                  q0 = 1;
+                  sub_ddmmss (n1, n0, n1, n0, d1, d0);
+                }
+              else
+                q0 = 0;
+              q1 = 0;
+              if (rp != 0)
+                {
+                  rr.s.low = n0;
+                  rr.s.high = n1;
+                  *rp = rr.ll;
+                }
+            }
+          else
+            {
+              USItype m1, m0;
+              /* Normalize.  */
+              b = SI_TYPE_SIZE - bm;
+              d1 = (d1 << bm) | (d0 >> b);
+              d0 = d0 << bm;
+              n2 = n1 >> b;
+              n1 = (n1 << bm) | (n0 >> b);
+              n0 = n0 << bm;
+              udiv_qrnnd (q0, n1, n2, n1, d1);
+              umul_ppmm (m1, m0, q0, d0);
+              if (m1 > n1 || (m1 == n1 && m0 > n0))
+                {
+                  q0--;
+                  sub_ddmmss (m1, m0, m1, m0, d1, d0);
+                }
+              q1 = 0;
+              /* Remainder in (n1n0 - m1m0) >> bm.  */
+              if (rp != 0)
+                {
+                  sub_ddmmss (n1, n0, n1, n0, m1, m0);
+                  rr.s.low = (n1 << b) | (n0 >> bm);
+                  rr.s.high = n1 >> bm;
+                  *rp = rr.ll;
+                }
+            }
+        }
+    }
+  ww.s.low = q0;
+  ww.s.high = q1;
+  return ww.ll;
+}
+UDItype
+__udivdi3 (UDItype n, UDItype d)
+{
+  return __udivmoddi4 (n, d, (UDItype *) 0);
+}
+UDItype
+__umoddi3 (UDItype u, UDItype v)
+{
+  UDItype w;
+  (void) __udivmoddi4 (u ,v, &w);
+  return w;
+}
diff --git a/arch/arm26/machine/Makefile b/arch/arm26/machine/Makefile
new file mode 100644
index 000000000000..86ea97cc07fc
--- /dev/null
+++ b/arch/arm26/machine/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the linux kernel.
+#
+# Object file lists.
+obj-y                   := dma.o irq.o latches.o
diff --git a/arch/arm26/machine/dma.c b/arch/arm26/machine/dma.c
new file mode 100644
index 000000000000..cbc7c61d5b32
--- /dev/null
+++ b/arch/arm26/machine/dma.c
@@ -0,0 +1,215 @@
+/*
+ *  linux/arch/arm26/kernel/dma.c
+ *
+ *  Copyright (C) 1998-1999 Dave Gilbert / Russell King
+ *  Copyright (C) 2003 Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  DMA functions specific to Archimedes and A5000 architecture
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <asm/dma.h>
+#include <asm/fiq.h>
+#include <asm/irq.h>
+#include <asm/io.h>
+#include <asm/hardware.h>
+#include <asm/mach-types.h>
+#define DPRINTK(x...) printk(KERN_DEBUG x)
+#if defined(CONFIG_BLK_DEV_FD1772) || defined(CONFIG_BLK_DEV_FD1772_MODULE)
+extern unsigned char fdc1772_dma_read, fdc1772_dma_read_end;
+extern unsigned char fdc1772_dma_write, fdc1772_dma_write_end;
+extern void fdc1772_setupdma(unsigned int count,unsigned int addr);
+static void arc_floppy_data_enable_dma(dmach_t channel, dma_t *dma)
+{
+        DPRINTK("arc_floppy_data_enable_dma\n");
+        if (dma->using_sg)
+                BUG();
+        switch (dma->dma_mode) {
+        case DMA_MODE_READ: { /* read */
+                unsigned long flags;
+                DPRINTK("enable_dma fdc1772 data read\n");
+                local_save_flags_cli(flags);
+                clf();
+                        
+                memcpy ((void *)0x1c, (void *)&fdc1772_dma_read,
+                        &fdc1772_dma_read_end - &fdc1772_dma_read);
+                fdc1772_setupdma(dma->buf.length, dma->buf.__address); /* Sets data pointer up */
+                enable_fiq(FIQ_FLOPPYDATA);
+                local_irq_restore(flags);
+           }
+           break;
+        case DMA_MODE_WRITE: { /* write */
+                unsigned long flags;
+                DPRINTK("enable_dma fdc1772 data write\n");
+                local_save_flags_cli(flags);
+                clf();
+                memcpy ((void *)0x1c, (void *)&fdc1772_dma_write,
+                        &fdc1772_dma_write_end - &fdc1772_dma_write);
+                fdc1772_setupdma(dma->buf.length, dma->buf.__address); /* Sets data pointer up */
+                enable_fiq(FIQ_FLOPPYDATA);
+                local_irq_restore(flags);
+            }
+            break;
+        default:
+                printk ("enable_dma: dma%d not initialised\n", channel);
+        }
+}
+static int arc_floppy_data_get_dma_residue(dmach_t channel, dma_t *dma)
+{
+        extern unsigned int fdc1772_bytestogo;
+        /* 10/1/1999 DAG - I presume its the number of bytes left? */
+        return fdc1772_bytestogo;
+}
+static void arc_floppy_cmdend_enable_dma(dmach_t channel, dma_t *dma)
+{
+        /* Need to build a branch at the FIQ address */
+        extern void fdc1772_comendhandler(void);
+        unsigned long flags;
+        DPRINTK("arc_floppy_cmdend_enable_dma\n");
+        /*printk("enable_dma fdc1772 command end FIQ\n");*/
+        save_flags(flags);
+        clf();
+        
+        /* B fdc1772_comendhandler */
+        *((unsigned int *)0x1c)=0xea000000 |
+                        (((unsigned int)fdc1772_comendhandler-(0x1c+8))/4);
+        local_irq_restore(flags);
+}
+static int arc_floppy_cmdend_get_dma_residue(dmach_t channel, dma_t *dma)
+{
+        /* 10/1/1999 DAG - Presume whether there is an outstanding command? */
+        extern unsigned int fdc1772_fdc_int_done;
+        /* Explicit! If the int done is 0 then 1 int to go */
+        return (fdc1772_fdc_int_done==0)?1:0;
+}
+static void arc_disable_dma(dmach_t channel, dma_t *dma)
+{
+        disable_fiq(dma->dma_irq);
+}
+static struct dma_ops arc_floppy_data_dma_ops = {
+        .type           = "FIQDMA",
+        .enable         = arc_floppy_data_enable_dma,
+        .disable        = arc_disable_dma,
+        .residue        = arc_floppy_data_get_dma_residue,
+};
+static struct dma_ops arc_floppy_cmdend_dma_ops = {
+        .type           = "FIQCMD",
+        .enable         = arc_floppy_cmdend_enable_dma,
+        .disable        = arc_disable_dma,
+        .residue        = arc_floppy_cmdend_get_dma_residue,
+};
+#endif
+#ifdef CONFIG_ARCH_A5K
+static struct fiq_handler fh = {
+        .name   = "floppydata"
+};
+static int a5k_floppy_get_dma_residue(dmach_t channel, dma_t *dma)
+{
+        struct pt_regs regs;
+        get_fiq_regs(&regs);
+        return regs.ARM_r9;
+}
+static void a5k_floppy_enable_dma(dmach_t channel, dma_t *dma)
+{
+        struct pt_regs regs;
+        void *fiqhandler_start;
+        unsigned int fiqhandler_length;
+        extern void floppy_fiqsetup(unsigned long len, unsigned long addr,
+                                     unsigned long port);
+        if (dma->using_sg)
+                BUG();
+        if (dma->dma_mode == DMA_MODE_READ) {
+                extern unsigned char floppy_fiqin_start, floppy_fiqin_end;
+                fiqhandler_start = &floppy_fiqin_start;
+                fiqhandler_length = &floppy_fiqin_end - &floppy_fiqin_start;
+        } else {
+                extern unsigned char floppy_fiqout_start, floppy_fiqout_end;
+                fiqhandler_start = &floppy_fiqout_start;
+                fiqhandler_length = &floppy_fiqout_end - &floppy_fiqout_start;
+        }
+        if (claim_fiq(&fh)) {
+                printk("floppydma: couldn't claim FIQ.\n");
+                return;
+        }
+        memcpy((void *)0x1c, fiqhandler_start, fiqhandler_length);
+        regs.ARM_r9 = dma->buf.length;
+        regs.ARM_r10 = (unsigned long)dma->buf.__address;
+        regs.ARM_fp = FLOPPYDMA_BASE;
+        set_fiq_regs(&regs);
+        enable_fiq(dma->dma_irq);
+}
+static void a5k_floppy_disable_dma(dmach_t channel, dma_t *dma)
+{
+        disable_fiq(dma->dma_irq);
+        release_fiq(&fh);
+}
+static struct dma_ops a5k_floppy_dma_ops = {
+        .type           = "FIQDMA",
+        .enable         = a5k_floppy_enable_dma,
+        .disable        = a5k_floppy_disable_dma,
+        .residue        = a5k_floppy_get_dma_residue,
+};
+#endif
+/*
+ * This is virtual DMA - we don't need anything here
+ */
+static void sound_enable_disable_dma(dmach_t channel, dma_t *dma)
+{
+}
+static struct dma_ops sound_dma_ops = {
+        .type           = "VIRTUAL",
+        .enable         = sound_enable_disable_dma,
+        .disable        = sound_enable_disable_dma,
+};
+void __init arch_dma_init(dma_t *dma)
+{
+#if defined(CONFIG_BLK_DEV_FD1772) || defined(CONFIG_BLK_DEV_FD1772_MODULE)
+        if (machine_is_archimedes()) {
+                dma[DMA_VIRTUAL_FLOPPY0].dma_irq = FIQ_FLOPPYDATA;
+                dma[DMA_VIRTUAL_FLOPPY0].d_ops   = &arc_floppy_data_dma_ops;
+                dma[DMA_VIRTUAL_FLOPPY1].dma_irq = 1;
+                dma[DMA_VIRTUAL_FLOPPY1].d_ops   = &arc_floppy_cmdend_dma_ops;
+        }
+#endif
+#ifdef CONFIG_ARCH_A5K
+        if (machine_is_a5k()) {
+                dma[DMA_VIRTUAL_FLOPPY0].dma_irq = FIQ_FLOPPYDATA;
+                dma[DMA_VIRTUAL_FLOPPY0].d_ops   = &a5k_floppy_dma_ops;
+        }
+#endif
+        dma[DMA_VIRTUAL_SOUND].d_ops = &sound_dma_ops;
+}
diff --git a/arch/arm26/machine/irq.c b/arch/arm26/machine/irq.c
new file mode 100644
index 000000000000..4361863f7ed2
--- /dev/null
+++ b/arch/arm26/machine/irq.c
@@ -0,0 +1,165 @@
+/*
+ *  linux/arch/arm26/mach-arc/irq.c
+ *
+ *  Copyright (C) 1996 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Changelog:
+ *   24-09-1996 RMK     Created
+ *   10-10-1996 RMK     Brought up to date with arch-sa110eval
+ *   22-10-1996 RMK     Changed interrupt numbers & uses new inb/outb macros
+ *   11-01-1998 RMK     Added mask_and_ack_irq
+ *   22-08-1998 RMK     Restructured IRQ routines
+ *   08-09-2002 IM      Brought up to date for 2.5
+ *   01-06-2003 JMA     Removed arc_fiq_chip
+ */
+#include <linux/config.h>
+#include <linux/init.h>
+#include <asm/irq.h>
+#include <asm/irqchip.h>
+#include <asm/ioc.h>
+#include <asm/io.h>
+#include <asm/system.h>
+extern void init_FIQ(void);
+#define a_clf() clf()
+#define a_stf() stf()
+static void arc_ack_irq_a(unsigned int irq)
+{
+        unsigned int val, mask;
+        mask = 1 << irq;
+        a_clf();
+        val = ioc_readb(IOC_IRQMASKA);
+        ioc_writeb(val & ~mask, IOC_IRQMASKA);
+        ioc_writeb(mask, IOC_IRQCLRA);
+        a_stf();
+}
+static void arc_mask_irq_a(unsigned int irq)
+{
+        unsigned int val, mask;
+        mask = 1 << irq;
+        a_clf();
+        val = ioc_readb(IOC_IRQMASKA);
+        ioc_writeb(val & ~mask, IOC_IRQMASKA);
+        a_stf();
+}
+static void arc_unmask_irq_a(unsigned int irq)
+{
+        unsigned int val, mask;
+        mask = 1 << irq;
+        a_clf();
+        val = ioc_readb(IOC_IRQMASKA);
+        ioc_writeb(val | mask, IOC_IRQMASKA);
+        a_stf();
+}
+static struct irqchip arc_a_chip = {
+        .ack    = arc_ack_irq_a,
+        .mask   = arc_mask_irq_a,
+        .unmask = arc_unmask_irq_a,
+};
+static void arc_mask_irq_b(unsigned int irq)
+{
+        unsigned int val, mask;
+        mask = 1 << (irq & 7);
+        val = ioc_readb(IOC_IRQMASKB);
+        ioc_writeb(val & ~mask, IOC_IRQMASKB);
+}
+static void arc_unmask_irq_b(unsigned int irq)
+{
+        unsigned int val, mask;
+        mask = 1 << (irq & 7);
+        val = ioc_readb(IOC_IRQMASKB);
+        ioc_writeb(val | mask, IOC_IRQMASKB);
+}
+static struct irqchip arc_b_chip = {
+        .ack    = arc_mask_irq_b,
+        .mask   = arc_mask_irq_b,
+        .unmask = arc_unmask_irq_b,
+};
+/* FIXME - JMA none of these functions are used in arm26 currently
+static void arc_mask_irq_fiq(unsigned int irq)
+{
+        unsigned int val, mask;
+        mask = 1 << (irq & 7);
+        val = ioc_readb(IOC_FIQMASK);
+        ioc_writeb(val & ~mask, IOC_FIQMASK);
+}
+static void arc_unmask_irq_fiq(unsigned int irq)
+{
+        unsigned int val, mask;
+        mask = 1 << (irq & 7);
+        val = ioc_readb(IOC_FIQMASK);
+        ioc_writeb(val | mask, IOC_FIQMASK);
+}
+static struct irqchip arc_fiq_chip = {
+        .ack    = arc_mask_irq_fiq,
+        .mask   = arc_mask_irq_fiq,
+        .unmask = arc_unmask_irq_fiq,
+};
+*/
+void __init arc_init_irq(void)
+{
+        unsigned int irq, flags;
+        /* Disable all IOC interrupt sources */
+        ioc_writeb(0, IOC_IRQMASKA);
+        ioc_writeb(0, IOC_IRQMASKB);
+        ioc_writeb(0, IOC_FIQMASK);
+        for (irq = 0; irq < NR_IRQS; irq++) {
+                flags = IRQF_VALID;
+                
+                if (irq <= 6 || (irq >= 9 && irq <= 15))
+                        flags |= IRQF_PROBE;
+        
+                if (irq == IRQ_KEYBOARDTX)
+                        flags |= IRQF_NOAUTOEN; 
+                
+                switch (irq) {
+                case 0 ... 7:
+                        set_irq_chip(irq, &arc_a_chip);
+                        set_irq_handler(irq, do_level_IRQ);
+                        set_irq_flags(irq, flags);
+                        break;
+                case 8 ... 15:
+                        set_irq_chip(irq, &arc_b_chip);
+                        set_irq_handler(irq, do_level_IRQ);
+                        set_irq_flags(irq, flags);
+/*              case 64 ... 72:
+                        set_irq_chip(irq, &arc_fiq_chip);
+                        set_irq_flags(irq, flags);
+                        break;
+*/
+                }
+        }
+        irq_desc[IRQ_KEYBOARDTX].noautoenable = 1;
+        init_FIQ();
+}
diff --git a/arch/arm26/machine/latches.c b/arch/arm26/machine/latches.c
new file mode 100644
index 000000000000..94f05d2a3b2b
--- /dev/null
+++ b/arch/arm26/machine/latches.c
@@ -0,0 +1,72 @@
+/*
+ *  linux/arch/arm26/kernel/latches.c
+ *
+ *  Copyright (C) David Alan Gilbert 1995/1996,2000
+ *  Copyright (C) Ian Molton 2003
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Support for the latches on the old Archimedes which control the floppy,
+ *  hard disc and printer
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <asm/io.h>
+#include <asm/hardware.h>
+#include <asm/mach-types.h>
+#include <asm/oldlatches.h>
+static unsigned char latch_a_copy;
+static unsigned char latch_b_copy;
+/* newval=(oldval & ~mask)|newdata */
+void oldlatch_aupdate(unsigned char mask,unsigned char newdata)
+{
+        unsigned long flags;
+        BUG_ON(!machine_is_archimedes());
+        local_irq_save(flags); //FIXME: was local_save_flags
+        latch_a_copy = (latch_a_copy & ~mask) | newdata;
+        __raw_writeb(latch_a_copy, LATCHA_BASE);
+        local_irq_restore(flags);
+        printk("Latch: A = 0x%02x\n", latch_a_copy);
+}
+/* newval=(oldval & ~mask)|newdata */
+void oldlatch_bupdate(unsigned char mask,unsigned char newdata)
+{
+        unsigned long flags;
+        BUG_ON(!machine_is_archimedes());
+        local_irq_save(flags);//FIXME: was local_save_flags
+        latch_b_copy = (latch_b_copy & ~mask) | newdata;
+        __raw_writeb(latch_b_copy, LATCHB_BASE);
+        local_irq_restore(flags);
+        printk("Latch: B = 0x%02x\n", latch_b_copy);
+}
+static int __init oldlatch_init(void)
+{
+        if (machine_is_archimedes()) {
+                oldlatch_aupdate(0xff, 0xff);
+                /* Thats no FDC reset...*/
+                oldlatch_bupdate(0xff, LATCHB_FDCRESET);
+        }
+        return 0;
+}
+arch_initcall(oldlatch_init);
+EXPORT_SYMBOL(oldlatch_aupdate);
+EXPORT_SYMBOL(oldlatch_bupdate);
diff --git a/arch/arm26/mm/Makefile b/arch/arm26/mm/Makefile
new file mode 100644
index 000000000000..a8fb166d5c6d
--- /dev/null
+++ b/arch/arm26/mm/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for the linux arm26-specific parts of the memory manager.
+#
+obj-y           := init.o extable.o proc-funcs.o memc.o fault.o \
+                   small_page.o
diff --git a/arch/arm26/mm/extable.c b/arch/arm26/mm/extable.c
new file mode 100644
index 000000000000..2d9f5b5a78d6
--- /dev/null
+++ b/arch/arm26/mm/extable.c
@@ -0,0 +1,25 @@
+/*
+ *  linux/arch/arm26/mm/extable.c
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+int fixup_exception(struct pt_regs *regs)
+{
+        const struct exception_table_entry *fixup;
+        fixup = search_exception_tables(instruction_pointer(regs));
+        /*
+         * The kernel runs in SVC mode - make sure we keep running in SVC mode
+         * by frobbing the PSR appropriately (PSR and PC are in the same reg.
+         * on ARM26)
+         */
+        if (fixup)
+                regs->ARM_pc = fixup->fixup | PSR_I_BIT | MODE_SVC26;
+        return fixup != NULL;
+}
diff --git a/arch/arm26/mm/fault.c b/arch/arm26/mm/fault.c
new file mode 100644
index 000000000000..dacca8bb7744
--- /dev/null
+++ b/arch/arm26/mm/fault.c
@@ -0,0 +1,318 @@
+/*
+ *  linux/arch/arm26/mm/fault.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Modifications for ARM processor (c) 1995-2001 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h> //FIXME this header may be bogusly included
+#include "fault.h"
+#define FAULT_CODE_LDRSTRPOST   0x80
+#define FAULT_CODE_LDRSTRPRE    0x40
+#define FAULT_CODE_LDRSTRREG    0x20
+#define FAULT_CODE_LDMSTM       0x10
+#define FAULT_CODE_LDCSTC       0x08
+#define FAULT_CODE_PREFETCH     0x04
+#define FAULT_CODE_WRITE        0x02
+#define FAULT_CODE_FORCECOW     0x01
+#define DO_COW(m)               ((m) & (FAULT_CODE_WRITE|FAULT_CODE_FORCECOW))
+#define READ_FAULT(m)           (!((m) & FAULT_CODE_WRITE))
+#define DEBUG
+/*
+ * This is useful to dump out the page tables associated with
+ * 'addr' in mm 'mm'.
+ */
+void show_pte(struct mm_struct *mm, unsigned long addr)
+{
+        pgd_t *pgd;
+        if (!mm)
+                mm = &init_mm;
+        printk(KERN_ALERT "pgd = %p\n", mm->pgd);
+        pgd = pgd_offset(mm, addr);
+        printk(KERN_ALERT "[%08lx] *pgd=%08lx", addr, pgd_val(*pgd));
+        do {
+                pmd_t *pmd;
+                pte_t *pte;
+                pmd = pmd_offset(pgd, addr);
+                if (pmd_none(*pmd))
+                        break;
+                if (pmd_bad(*pmd)) {
+                        printk("(bad)");
+                        break;
+                }
+                /* We must not map this if we have highmem enabled */
+                /* FIXME */
+                pte = pte_offset_map(pmd, addr);
+                printk(", *pte=%08lx", pte_val(*pte));
+                pte_unmap(pte);
+        } while(0);
+        printk("\n");
+}
+/*
+ * Oops.  The kernel tried to access some page that wasn't present.
+ */
+static void
+__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
+                  struct pt_regs *regs)
+{
+        /*
+         * Are we prepared to handle this kernel fault?
+         */
+        if (fixup_exception(regs))
+                return;
+        /*
+         * No handler, we'll have to terminate things with extreme prejudice.
+         */
+        bust_spinlocks(1);
+        printk(KERN_ALERT
+                "Unable to handle kernel %s at virtual address %08lx\n",
+                (addr < PAGE_SIZE) ? "NULL pointer dereference" :
+                "paging request", addr);
+        show_pte(mm, addr);
+        die("Oops", regs, fsr);
+        bust_spinlocks(0);
+        do_exit(SIGKILL);
+}
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * User mode accesses just cause a SIGSEGV
+ */
+static void
+__do_user_fault(struct task_struct *tsk, unsigned long addr,
+                unsigned int fsr, int code, struct pt_regs *regs)
+{
+        struct siginfo si;
+#ifdef CONFIG_DEBUG_USER
+        printk("%s: unhandled page fault at 0x%08lx, code 0x%03x\n",
+               tsk->comm, addr, fsr);
+        show_pte(tsk->mm, addr);
+        show_regs(regs);
+        //dump_backtrace(regs, tsk); // FIXME ARM32 dropped this - why?
+        while(1); //FIXME - hack to stop debug going nutso
+#endif
+        tsk->thread.address = addr;
+        tsk->thread.error_code = fsr;
+        tsk->thread.trap_no = 14;
+        si.si_signo = SIGSEGV;
+        si.si_errno = 0;
+        si.si_code = code;
+        si.si_addr = (void *)addr;
+        force_sig_info(SIGSEGV, &si, tsk);
+}
+static int
+__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
+                struct task_struct *tsk)
+{
+        struct vm_area_struct *vma;
+        int fault, mask;
+        vma = find_vma(mm, addr);
+        fault = -2; /* bad map area */
+        if (!vma)
+                goto out;
+        if (vma->vm_start > addr)
+                goto check_stack;
+        /*
+         * Ok, we have a good vm_area for this
+         * memory access, so we can handle it.
+         */
+good_area:
+        if (READ_FAULT(fsr)) /* read? */
+                mask = VM_READ|VM_EXEC;
+        else
+                mask = VM_WRITE;
+        fault = -1; /* bad access type */
+        if (!(vma->vm_flags & mask))
+                goto out;
+        /*
+         * If for any reason at all we couldn't handle
+         * the fault, make sure we exit gracefully rather
+         * than endlessly redo the fault.
+         */
+survive:
+        fault = handle_mm_fault(mm, vma, addr & PAGE_MASK, DO_COW(fsr));
+        /*
+         * Handle the "normal" cases first - successful and sigbus
+         */
+        switch (fault) {
+        case 2:
+                tsk->maj_flt++;
+                return fault;
+        case 1:
+                tsk->min_flt++;
+        case 0:
+                return fault;
+        }
+        fault = -3; /* out of memory */
+        if (tsk->pid != 1)
+                goto out;
+        /*
+         * If we are out of memory for pid1,
+         * sleep for a while and retry
+         */
+        yield();
+        goto survive;
+check_stack:
+        if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
+                goto good_area;
+out:
+        return fault;
+}
+int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+        struct task_struct *tsk;
+        struct mm_struct *mm;
+        int fault;
+        tsk = current;
+        mm  = tsk->mm;
+        /*
+         * If we're in an interrupt or have no user
+         * context, we must not take the fault..
+         */
+        if (in_interrupt() || !mm)
+                goto no_context;
+        down_read(&mm->mmap_sem);
+        fault = __do_page_fault(mm, addr, fsr, tsk);
+        up_read(&mm->mmap_sem);
+        /*
+         * Handle the "normal" case first
+         */
+        if (fault > 0)
+                return 0;
+        /*
+         * We had some memory, but were unable to
+         * successfully fix up this page fault.
+         */
+        if (fault == 0){
+                goto do_sigbus;
+        }
+        /*
+         * If we are in kernel mode at this point, we
+         * have no context to handle this fault with.
+         * FIXME - is this test right?
+         */
+        if (!user_mode(regs)){
+                goto no_context;
+        }
+        if (fault == -3) {
+                /*
+                 * We ran out of memory, or some other thing happened to
+                 * us that made us unable to handle the page fault gracefully.
+                 */
+                printk("VM: killing process %s\n", tsk->comm);
+                do_exit(SIGKILL);
+        }
+        else{
+                __do_user_fault(tsk, addr, fsr, fault == -1 ? SEGV_ACCERR : SEGV_MAPERR, regs);
+        }
+        return 0;
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+do_sigbus:
+        /*
+         * Send a sigbus, regardless of whether we were in kernel
+         * or user mode.
+         */
+        tsk->thread.address = addr;  //FIXME - need other bits setting?
+        tsk->thread.error_code = fsr;
+        tsk->thread.trap_no = 14;
+        force_sig(SIGBUS, tsk);
+#ifdef CONFIG_DEBUG_USER
+        printk(KERN_DEBUG "%s: sigbus at 0x%08lx, pc=0x%08lx\n",
+                current->comm, addr, instruction_pointer(regs));
+#endif
+        /* Kernel mode? Handle exceptions or die */
+        if (user_mode(regs))
+                return 0;
+no_context:
+        __do_kernel_fault(mm, addr, fsr, regs);
+        return 0;
+}
+/*
+ * Handle a data abort.  Note that we have to handle a range of addresses
+ * on ARM2/3 for ldm.  If both pages are zero-mapped, then we have to force
+ * a copy-on-write.  However, on the second page, we always force COW.
+ */
+asmlinkage void
+do_DataAbort(unsigned long min_addr, unsigned long max_addr, int mode, struct pt_regs *regs)
+{
+        do_page_fault(min_addr, mode, regs);
+        if ((min_addr ^ max_addr) >> PAGE_SHIFT){
+               do_page_fault(max_addr, mode | FAULT_CODE_FORCECOW, regs);
+        }
+}
+asmlinkage int
+do_PrefetchAbort(unsigned long addr, struct pt_regs *regs)
+{
+#if 0
+        if (the memc mapping for this page exists) {
+                printk ("Page in, but got abort (undefined instruction?)\n");
+                return 0;
+        }
+#endif
+        do_page_fault(addr, FAULT_CODE_PREFETCH, regs);
+        return 1;
+}
diff --git a/arch/arm26/mm/fault.h b/arch/arm26/mm/fault.h
new file mode 100644
index 000000000000..4442d00d86ac
--- /dev/null
+++ b/arch/arm26/mm/fault.h
@@ -0,0 +1,5 @@
+void show_pte(struct mm_struct *mm, unsigned long addr);
+int do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
+unsigned long search_extable(unsigned long addr); //FIXME - is it right?
diff --git a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c
new file mode 100644
index 000000000000..1f09a9d0fb83
--- /dev/null
+++ b/arch/arm26/mm/init.c
@@ -0,0 +1,412 @@
+/*
+ *  linux/arch/arm26/mm/init.c
+ *
+ *  Copyright (C) 1995-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/bootmem.h>
+#include <linux/blkdev.h>
+#include <asm/segment.h>
+#include <asm/mach-types.h>
+#include <asm/dma.h>
+#include <asm/hardware.h>
+#include <asm/setup.h>
+#include <asm/tlb.h>
+#include <asm/map.h>
+#define TABLE_SIZE      PTRS_PER_PTE * sizeof(pte_t))
+struct mmu_gather mmu_gathers[NR_CPUS];
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern char _stext, _text, _etext, _end, __init_begin, __init_end;
+#ifdef CONFIG_XIP_KERNEL
+extern char _endtext, _sdata;
+#endif
+extern unsigned long phys_initrd_start;
+extern unsigned long phys_initrd_size;
+/*
+ * The sole use of this is to pass memory configuration
+ * data from paging_init to mem_init.
+ */
+static struct meminfo meminfo __initdata = { 0, };
+/*
+ * empty_zero_page is a special page that is used for
+ * zero-initialized data and COW.
+ */
+struct page *empty_zero_page;
+void show_mem(void)
+{
+        int free = 0, total = 0, reserved = 0;
+        int shared = 0, cached = 0, slab = 0;
+        struct page *page, *end;
+        printk("Mem-info:\n");
+        show_free_areas();
+        printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+        page = NODE_MEM_MAP(0);
+        end  = page + NODE_DATA(0)->node_spanned_pages;
+        do {
+                total++;
+                if (PageReserved(page))
+                        reserved++;
+                else if (PageSwapCache(page))
+                        cached++;
+                else if (PageSlab(page))
+                        slab++;
+                else if (!page_count(page))
+                        free++;
+                else
+                        shared += page_count(page) - 1;
+                page++;
+        } while (page < end);
+        printk("%d pages of RAM\n", total);
+        printk("%d free pages\n", free);
+        printk("%d reserved pages\n", reserved);
+        printk("%d slab pages\n", slab);
+        printk("%d pages shared\n", shared);
+        printk("%d pages swap cached\n", cached);
+}
+struct node_info {
+        unsigned int start;
+        unsigned int end;
+        int bootmap_pages;
+};
+#define PFN_DOWN(x)     ((x) >> PAGE_SHIFT)
+#define PFN_UP(x)       (PAGE_ALIGN(x) >> PAGE_SHIFT)
+#define PFN_SIZE(x)     ((x) >> PAGE_SHIFT)
+#define PFN_RANGE(s,e)  PFN_SIZE(PAGE_ALIGN((unsigned long)(e)) - \
+                                (((unsigned long)(s)) & PAGE_MASK))
+/*
+ * FIXME: We really want to avoid allocating the bootmap bitmap
+ * over the top of the initrd.  Hopefully, this is located towards
+ * the start of a bank, so if we allocate the bootmap bitmap at
+ * the end, we won't clash.
+ */
+static unsigned int __init
+find_bootmap_pfn(struct meminfo *mi, unsigned int bootmap_pages)
+{
+        unsigned int start_pfn, bootmap_pfn;
+        unsigned int start, end;
+        start_pfn   = PFN_UP((unsigned long)&_end);
+        bootmap_pfn = 0;
+        /* ARM26 machines only have one node */
+        if (mi->bank->node != 0)
+                BUG();
+        start = PFN_UP(mi->bank->start);
+        end   = PFN_DOWN(mi->bank->size + mi->bank->start);
+        if (start < start_pfn)
+                start = start_pfn;
+        if (end <= start)
+                BUG();
+        if (end - start >= bootmap_pages) 
+                bootmap_pfn = start;
+        else
+                BUG();
+        return bootmap_pfn;
+}
+/*
+ * Scan the memory info structure and pull out:
+ *  - the end of memory
+ *  - the number of nodes
+ *  - the pfn range of each node
+ *  - the number of bootmem bitmap pages
+ */
+static void __init
+find_memend_and_nodes(struct meminfo *mi, struct node_info *np)
+{
+        unsigned int memend_pfn = 0;
+        nodes_clear(node_online_map);
+        node_set_online(0);
+        np->bootmap_pages = 0;
+        if (mi->bank->size == 0) {
+                BUG();
+        }
+        /*
+         * Get the start and end pfns for this bank
+         */
+        np->start = PFN_UP(mi->bank->start);
+        np->end   = PFN_DOWN(mi->bank->start + mi->bank->size);
+        if (memend_pfn < np->end)
+                memend_pfn = np->end;
+        /*
+         * Calculate the number of pages we require to
+         * store the bootmem bitmaps.
+         */
+        np->bootmap_pages = bootmem_bootmap_pages(np->end - np->start);
+        /*
+         * This doesn't seem to be used by the Linux memory
+         * manager any more.  If we can get rid of it, we
+         * also get rid of some of the stuff above as well.
+         */
+        max_low_pfn = memend_pfn - PFN_DOWN(PHYS_OFFSET);
+        max_pfn = memend_pfn - PFN_DOWN(PHYS_OFFSET);
+        mi->end = memend_pfn << PAGE_SHIFT;
+}
+/*
+ * Initialise the bootmem allocator for all nodes.  This is called
+ * early during the architecture specific initialisation.
+ */
+void __init bootmem_init(struct meminfo *mi)
+{
+        struct node_info node_info;
+        unsigned int bootmap_pfn;
+        pg_data_t *pgdat = NODE_DATA(0);
+        find_memend_and_nodes(mi, &node_info);
+        bootmap_pfn   = find_bootmap_pfn(mi, node_info.bootmap_pages);
+        /*
+         * Note that node 0 must always have some pages.
+         */
+        if (node_info.end == 0)
+                BUG();
+        /*
+         * Initialise the bootmem allocator.
+         */
+        init_bootmem_node(pgdat, bootmap_pfn, node_info.start, node_info.end);
+        /*
+         * Register all available RAM in this node with the bootmem allocator. 
+         */
+        free_bootmem_node(pgdat, mi->bank->start, mi->bank->size);
+        /*
+         * Register the kernel text and data with bootmem.
+         * Note: with XIP we dont register .text since
+         * its in ROM.
+         */
+#ifdef CONFIG_XIP_KERNEL
+        reserve_bootmem_node(pgdat, __pa(&_sdata), &_end - &_sdata);
+#else
+        reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext);
+#endif
+        /*
+         * And don't forget to reserve the allocator bitmap,
+         * which will be freed later.
+         */
+        reserve_bootmem_node(pgdat, bootmap_pfn << PAGE_SHIFT,
+                             node_info.bootmap_pages << PAGE_SHIFT);
+        /*
+         * These should likewise go elsewhere.  They pre-reserve
+         * the screen memory region at the start of main system
+         * memory. FIXME - screen RAM is not 512K!
+         */
+        reserve_bootmem_node(pgdat, 0x02000000, 0x00080000);
+#ifdef CONFIG_BLK_DEV_INITRD
+        initrd_start = phys_initrd_start;
+        initrd_end = initrd_start + phys_initrd_size;
+        /* Achimedes machines only have one node, so initrd is in node 0 */
+#ifdef CONFIG_XIP_KERNEL
+        /* Only reserve initrd space if it is in RAM */
+        if(initrd_start && initrd_start < 0x03000000){
+#else
+        if(initrd_start){
+#endif
+                reserve_bootmem_node(pgdat, __pa(initrd_start),
+                                             initrd_end - initrd_start);
+        }
+#endif   /* CONFIG_BLK_DEV_INITRD */
+}
+/*
+ * paging_init() sets up the page tables, initialises the zone memory
+ * maps, and sets up the zero page, bad page and bad page tables.
+ */
+void __init paging_init(struct meminfo *mi)
+{
+        void *zero_page;
+        unsigned long zone_size[MAX_NR_ZONES];
+        unsigned long zhole_size[MAX_NR_ZONES];
+        struct bootmem_data *bdata;
+        pg_data_t *pgdat;
+        int i;
+        memcpy(&meminfo, mi, sizeof(meminfo));
+        /*
+         * allocate the zero page.  Note that we count on this going ok.
+         */
+        zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
+        /*
+         * initialise the page tables.
+         */
+        memtable_init(mi);
+        flush_tlb_all();
+        /*
+         * initialise the zones in node 0 (archimedes have only 1 node)
+         */
+        for (i = 0; i < MAX_NR_ZONES; i++) {
+                zone_size[i]  = 0;
+                zhole_size[i] = 0;
+        }
+        pgdat = NODE_DATA(0);
+        bdata = pgdat->bdata;
+        zone_size[0] = bdata->node_low_pfn -
+                        (bdata->node_boot_start >> PAGE_SHIFT);
+        if (!zone_size[0])
+                BUG();
+        pgdat->node_mem_map = NULL;
+        free_area_init_node(0, pgdat, zone_size,
+                        bdata->node_boot_start >> PAGE_SHIFT, zhole_size);
+        /*
+         * finish off the bad pages once
+         * the mem_map is initialised
+         */
+        memzero(zero_page, PAGE_SIZE);
+        empty_zero_page = virt_to_page(zero_page);
+}
+static inline void free_area(unsigned long addr, unsigned long end, char *s)
+{
+        unsigned int size = (end - addr) >> 10;
+        for (; addr < end; addr += PAGE_SIZE) {
+                struct page *page = virt_to_page(addr);
+                ClearPageReserved(page);
+                set_page_count(page, 1);
+                free_page(addr);
+                totalram_pages++;
+        }
+        if (size && s)
+                printk(KERN_INFO "Freeing %s memory: %dK\n", s, size);
+}
+/*
+ * mem_init() marks the free areas in the mem_map and tells us how much
+ * memory is free.  This is done after various parts of the system have
+ * claimed their memory after the kernel image.
+ */
+void __init mem_init(void)
+{
+        unsigned int codepages, datapages, initpages;
+        pg_data_t *pgdat = NODE_DATA(0);
+        extern int sysctl_overcommit_memory;
+        /* Note: data pages includes BSS */
+#ifdef CONFIG_XIP_KERNEL
+        codepages = &_endtext - &_text;
+        datapages = &_end - &_sdata;
+#else
+        codepages = &_etext - &_text;
+        datapages = &_end - &_etext;
+#endif
+        initpages = &__init_end - &__init_begin;
+        high_memory = (void *)__va(meminfo.end);
+        max_mapnr   = virt_to_page(high_memory) - mem_map;
+        /* this will put all unused low memory onto the freelists */
+        if (pgdat->node_spanned_pages != 0)
+                totalram_pages += free_all_bootmem_node(pgdat);
+        num_physpages = meminfo.bank[0].size >> PAGE_SHIFT;
+        printk(KERN_INFO "Memory: %luMB total\n", num_physpages >> (20 - PAGE_SHIFT));
+        printk(KERN_NOTICE "Memory: %luKB available (%dK code, "
+                "%dK data, %dK init)\n",
+                (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+                codepages >> 10, datapages >> 10, initpages >> 10);
+        /*
+         * Turn on overcommit on tiny machines
+         */
+        if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
+                sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
+                printk("Turning on overcommit\n");
+        }
+}
+void free_initmem(void){
+#ifndef CONFIG_XIP_KERNEL
+        free_area((unsigned long)(&__init_begin),
+                  (unsigned long)(&__init_end),
+                  "init");
+#endif
+}
+#ifdef CONFIG_BLK_DEV_INITRD
+static int keep_initrd;
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+#ifdef CONFIG_XIP_KERNEL
+        /* Only bin initrd if it is in RAM... */
+        if(!keep_initrd && start < 0x03000000)
+#else
+        if (!keep_initrd)
+#endif
+                free_area(start, end, "initrd");
+}
+static int __init keepinitrd_setup(char *__unused)
+{
+        keep_initrd = 1;
+        return 1;
+}
+__setup("keepinitrd", keepinitrd_setup);
+#endif
diff --git a/arch/arm26/mm/memc.c b/arch/arm26/mm/memc.c
new file mode 100644
index 000000000000..8e8a2bb2487d
--- /dev/null
+++ b/arch/arm26/mm/memc.c
@@ -0,0 +1,202 @@
+/*
+ *  linux/arch/arm26/mm/memc.c
+ *
+ *  Copyright (C) 1998-2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Page table sludge for older ARM processor architectures.
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/memory.h>
+#include <asm/hardware.h>
+#include <asm/map.h>
+#define MEMC_TABLE_SIZE (256*sizeof(unsigned long))
+kmem_cache_t *pte_cache, *pgd_cache;
+int page_nr;
+/*
+ * Allocate space for a page table and a MEMC table.
+ * Note that we place the MEMC
+ * table before the page directory.  This means we can
+ * easily get to both tightly-associated data structures
+ * with a single pointer.
+ */
+static inline pgd_t *alloc_pgd_table(void)
+{
+        void *pg2k = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
+        if (pg2k)
+                pg2k += MEMC_TABLE_SIZE;
+        return (pgd_t *)pg2k;
+}
+/*
+ * Free a page table. this function is the counterpart to get_pgd_slow
+ * below, not alloc_pgd_table above.
+ */
+void free_pgd_slow(pgd_t *pgd)
+{
+        unsigned long tbl = (unsigned long)pgd;
+        tbl -= MEMC_TABLE_SIZE;
+        kmem_cache_free(pgd_cache, (void *)tbl);
+}
+/*
+ * Allocate a new pgd and fill it in ready for use
+ *
+ * A new tasks pgd is completely empty (all pages !present) except for:
+ *
+ * o The machine vectors at virtual address 0x0
+ * o The vmalloc region at the top of address space
+ *
+ */
+#define FIRST_KERNEL_PGD_NR     (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD)
+pgd_t *get_pgd_slow(struct mm_struct *mm)
+{
+        pgd_t *new_pgd, *init_pgd;
+        pmd_t *new_pmd, *init_pmd;
+        pte_t *new_pte, *init_pte;
+        new_pgd = alloc_pgd_table();
+        if (!new_pgd)
+                goto no_pgd;
+        /*
+         * This lock is here just to satisfy pmd_alloc and pte_lock
+         * FIXME: I bet we could avoid taking it pretty much altogether
+         */
+        spin_lock(&mm->page_table_lock);
+        /*
+         * On ARM, first page must always be allocated since it contains
+         * the machine vectors.
+         */
+        new_pmd = pmd_alloc(mm, new_pgd, 0);
+        if (!new_pmd)
+                goto no_pmd;
+        new_pte = pte_alloc_kernel(mm, new_pmd, 0);
+        if (!new_pte)
+                goto no_pte;
+        init_pgd = pgd_offset(&init_mm, 0);
+        init_pmd = pmd_offset(init_pgd, 0);
+        init_pte = pte_offset(init_pmd, 0);
+        set_pte(new_pte, *init_pte);
+        /*
+         * the page table entries are zeroed
+         * when the table is created. (see the cache_ctor functions below)
+         * Now we need to plonk the kernel (vmalloc) area at the end of
+         * the address space. We copy this from the init thread, just like
+         * the init_pte we copied above...
+         */
+        memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
+                (PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
+        spin_unlock(&mm->page_table_lock);
+        /* update MEMC tables */
+        cpu_memc_update_all(new_pgd);
+        return new_pgd;
+no_pte:
+        spin_unlock(&mm->page_table_lock);
+        pmd_free(new_pmd);
+        free_pgd_slow(new_pgd);
+        return NULL;
+no_pmd:
+        spin_unlock(&mm->page_table_lock);
+        free_pgd_slow(new_pgd);
+        return NULL;
+no_pgd:
+        return NULL;
+}
+/*
+ * No special code is required here.
+ */
+void setup_mm_for_reboot(char mode)
+{
+}
+/*
+ * This contains the code to setup the memory map on an ARM2/ARM250/ARM3
+ *  o swapper_pg_dir = 0x0207d000
+ *  o kernel proper starts at 0x0208000
+ *  o create (allocate) a pte to contain the machine vectors
+ *  o populate the pte (points to 0x02078000) (FIXME - is it zeroed?)
+ *  o populate the init tasks page directory (pgd) with the new pte
+ *  o zero the rest of the init tasks pgdir (FIXME - what about vmalloc?!)
+ */
+void __init memtable_init(struct meminfo *mi)
+{
+        pte_t *pte;
+        int i;
+        page_nr = max_low_pfn;
+        pte = alloc_bootmem_low_pages(PTRS_PER_PTE * sizeof(pte_t));
+        pte[0] = mk_pte_phys(PAGE_OFFSET + SCREEN_SIZE, PAGE_READONLY);
+        pmd_populate(&init_mm, pmd_offset(swapper_pg_dir, 0), pte);
+        for (i = 1; i < PTRS_PER_PGD; i++)
+                pgd_val(swapper_pg_dir[i]) = 0;
+}
+void __init iotable_init(struct map_desc *io_desc)
+{
+        /* nothing to do */
+}
+/*
+ * We never have holes in the memmap
+ */
+void __init create_memmap_holes(struct meminfo *mi)
+{
+}
+static void pte_cache_ctor(void *pte, kmem_cache_t *cache, unsigned long flags)
+{
+        memzero(pte, sizeof(pte_t) * PTRS_PER_PTE);
+}
+static void pgd_cache_ctor(void *pgd, kmem_cache_t *cache, unsigned long flags)
+{
+        memzero(pgd + MEMC_TABLE_SIZE, USER_PTRS_PER_PGD * sizeof(pgd_t));
+}
+void __init pgtable_cache_init(void)
+{
+        pte_cache = kmem_cache_create("pte-cache",
+                                sizeof(pte_t) * PTRS_PER_PTE,
+                                0, 0, pte_cache_ctor, NULL);
+        if (!pte_cache)
+                BUG();
+        pgd_cache = kmem_cache_create("pgd-cache", MEMC_TABLE_SIZE +
+                                sizeof(pgd_t) * PTRS_PER_PGD,
+                                0, 0, pgd_cache_ctor, NULL);
+        if (!pgd_cache)
+                BUG();
+}
diff --git a/arch/arm26/mm/proc-funcs.S b/arch/arm26/mm/proc-funcs.S
new file mode 100644
index 000000000000..c3d4cd3f457e
--- /dev/null
+++ b/arch/arm26/mm/proc-funcs.S
@@ -0,0 +1,359 @@
+/*
+ *  linux/arch/arm26/mm/proc-arm2,3.S
+ *
+ *  Copyright (C) 1997-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  MMU functions for ARM2,3
+ *
+ *  These are the low level assembler for performing cache
+ *  and memory functions on ARM2, ARM250 and ARM3 processors.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm_offsets.h>
+#include <asm/procinfo.h>
+#include <asm/ptrace.h>
+/*
+ * MEMC workhorse code.  It's both a horse which things it's a pig.
+ */
+/*
+ * Function: cpu_memc_update_entry(pgd_t *pgd, unsigned long phys_pte, unsigned long addr)
+ * Params  : pgd        Page tables/MEMC mapping
+ *         : phys_pte   physical address, or PTE
+ *         : addr       virtual address
+ */
+ENTRY(cpu_memc_update_entry)
+                tst     r1, #PAGE_PRESENT               @ is the page present
+                orreq   r1, r1, #PAGE_OLD | PAGE_CLEAN
+                moveq   r2, #0x01f00000
+                mov     r3, r1, lsr #13                 @ convert to physical page nr
+                and     r3, r3, #0x3fc
+                adr     ip, memc_phys_table_32
+                ldr     r3, [ip, r3]
+                tst     r1, #PAGE_OLD | PAGE_NOT_USER
+                biceq   r3, r3, #0x200
+                tsteq   r1, #PAGE_READONLY | PAGE_CLEAN
+                biceq   r3, r3, #0x300
+                mov     r2, r2, lsr #15                 @ virtual -> nr
+                orr     r3, r3, r2, lsl #15
+                and     r2, r2, #0x300
+                orr     r3, r3, r2, lsl #2
+                and     r2, r3, #255
+                sub     r0, r0, #256 * 4
+                str     r3, [r0, r2, lsl #2]
+                strb    r3, [r3]
+                movs    pc, lr
+/*
+ * Params  : r0 = preserved
+ *         : r1 = memc table base (preserved)
+ *         : r2 = page table entry
+ *         : r3 = preserved
+ *         : r4 = unused
+ *         : r5 = memc physical address translation table
+ *         : ip = virtual address (preserved)
+ */
+update_pte:
+                mov     r4, r2, lsr #13
+                and     r4, r4, #0x3fc
+                ldr     r4, [r5, r4]                    @ covert to MEMC page
+                tst     r2, #PAGE_OLD | PAGE_NOT_USER   @ check for MEMC read
+                biceq   r4, r4, #0x200
+                tsteq   r2, #PAGE_READONLY | PAGE_CLEAN @ check for MEMC write
+                biceq   r4, r4, #0x300
+                orr     r4, r4, ip
+                and     r2, ip, #0x01800000
+                orr     r4, r4, r2, lsr #13
+                and     r2, r4, #255
+                str     r4, [r1, r2, lsl #2]
+                movs    pc, lr
+/*
+ * Params  : r0 = preserved
+ *         : r1 = memc table base (preserved)
+ *         : r2 = page table base
+ *         : r3 = preserved
+ *         : r4 = unused
+ *         : r5 = memc physical address translation table
+ *         : ip = virtual address (updated)
+ */
+update_pte_table:
+                stmfd   sp!, {r0, lr}
+                bic     r0, r2, #3
+1:              ldr     r2, [r0], #4                    @ get entry
+                tst     r2, #PAGE_PRESENT               @ page present
+                blne    update_pte                      @ process pte
+                add     ip, ip, #32768                  @ increment virt addr
+                ldr     r2, [r0], #4                    @ get entry
+                tst     r2, #PAGE_PRESENT               @ page present
+                blne    update_pte                      @ process pte
+                add     ip, ip, #32768                  @ increment virt addr
+                ldr     r2, [r0], #4                    @ get entry
+                tst     r2, #PAGE_PRESENT               @ page present
+                blne    update_pte                      @ process pte
+                add     ip, ip, #32768                  @ increment virt addr
+                ldr     r2, [r0], #4                    @ get entry
+                tst     r2, #PAGE_PRESENT               @ page present
+                blne    update_pte                      @ process pte
+                add     ip, ip, #32768                  @ increment virt addr
+                tst     ip, #32768 * 31                 @ finished?
+                bne     1b
+                ldmfd   sp!, {r0, pc}^
+/*
+ * Function: cpu_memc_update_all(pgd_t *pgd)
+ * Params  : pgd        Page tables/MEMC mapping
+ * Notes   : this is optimised for 32k pages
+ */
+ENTRY(cpu_memc_update_all)
+                stmfd   sp!, {r4, r5, lr}
+                bl      clear_tables
+                sub     r1, r0, #256 * 4                @ start of MEMC tables
+                adr     r5, memc_phys_table_32          @ Convert to logical page number
+                mov     ip, #0                          @ virtual address
+1:              ldmia   r0!, {r2, r3}                   @ load two pgd entries
+                tst     r2, #PAGE_PRESENT               @ is pgd entry present?
+                addeq   ip, ip, #1048576        @FIXME - PAGE_PRESENT is for PTEs technically...
+                blne    update_pte_table
+                mov     r2, r3
+                tst     r2, #PAGE_PRESENT               @ is pgd entry present?
+                addeq   ip, ip, #1048576
+                blne    update_pte_table
+                teq     ip, #32 * 1048576
+                bne     1b
+                ldmfd   sp!, {r4, r5, pc}^
+/*
+ * Build the table to map from physical page number to memc page number
+ */
+                .type   memc_phys_table_32, #object
+memc_phys_table_32:
+                .irp    b7, 0x00, 0x80
+                .irp    b6, 0x00, 0x02
+                .irp    b5, 0x00, 0x04
+                .irp    b4, 0x00, 0x01
+                .irp    b3, 0x00, 0x40
+                .irp    b2, 0x00, 0x20
+                .irp    b1, 0x00, 0x10
+                .irp    b0, 0x00, 0x08
+                .long   0x03800300 + \b7 + \b6 + \b5 + \b4 + \b3 + \b2 + \b1 + \b0
+                .endr
+                .endr
+                .endr
+                .endr
+                .endr
+                .endr
+                .endr
+                .endr
+                .size   memc_phys_table_32, . - memc_phys_table_32
+/*
+ * helper for cpu_memc_update_all, this clears out all
+ * mappings, setting them close to the top of memory,
+ * and inaccessible (0x01f00000).
+ * Params  : r0 = page table pointer
+ */
+clear_tables:   ldr     r1, _arm3_set_pgd - 4
+                ldr     r2, [r1]
+                sub     r1, r0, #256 * 4                @ start of MEMC tables
+                add     r2, r1, r2, lsl #2              @ end of tables
+                mov     r3, #0x03f00000                 @ Default mapping (null mapping)
+                orr     r3, r3, #0x00000f00
+                orr     r4, r3, #1
+                orr     r5, r3, #2
+                orr     ip, r3, #3
+1:              stmia   r1!, {r3, r4, r5, ip}
+                add     r3, r3, #4
+                add     r4, r4, #4
+                add     r5, r5, #4
+                add     ip, ip, #4
+                stmia   r1!, {r3, r4, r5, ip}
+                add     r3, r3, #4
+                add     r4, r4, #4
+                add     r5, r5, #4
+                add     ip, ip, #4
+                teq     r1, r2
+                bne     1b
+                mov     pc, lr
+/*
+ * Function: *_set_pgd(pgd_t *pgd)
+ * Params  : pgd        New page tables/MEMC mapping
+ * Purpose : update MEMC hardware with new mapping
+ */
+                .word   page_nr   @ extern - declared in mm-memc.c
+_arm3_set_pgd:  mcr     p15, 0, r1, c1, c0, 0           @ flush cache
+_arm2_set_pgd:  stmfd   sp!, {lr}
+                ldr     r1, _arm3_set_pgd - 4
+                ldr     r2, [r1]
+                sub     r0, r0, #256 * 4                @ start of MEMC tables
+                add     r1, r0, r2, lsl #2              @ end of tables
+1:              ldmia   r0!, {r2, r3, ip, lr}
+                strb    r2, [r2]
+                strb    r3, [r3]
+                strb    ip, [ip]
+                strb    lr, [lr]
+                ldmia   r0!, {r2, r3, ip, lr}
+                strb    r2, [r2]
+                strb    r3, [r3]
+                strb    ip, [ip]
+                strb    lr, [lr]
+                teq     r0, r1
+                bne     1b
+                ldmfd   sp!, {pc}^
+/*
+ * Function: *_proc_init (void)
+ * Purpose : Initialise the cache control registers
+ */
+_arm3_proc_init:
+                mov     r0, #0x001f0000
+                orr     r0, r0, #0x0000ff00
+                orr     r0, r0, #0x000000ff
+                mcr     p15, 0, r0, c3, c0              @ ARM3 Cacheable
+                mcr     p15, 0, r0, c4, c0              @ ARM3 Updateable
+                mov     r0, #0
+                mcr     p15, 0, r0, c5, c0              @ ARM3 Disruptive
+                mcr     p15, 0, r0, c1, c0              @ ARM3 Flush
+                mov     r0, #3
+                mcr     p15, 0, r0, c2, c0              @ ARM3 Control
+_arm2_proc_init:
+                movs    pc, lr
+/*
+ * Function: *_proc_fin (void)
+ * Purpose : Finalise processor (disable caches)
+ */
+_arm3_proc_fin: mov     r0, #2
+                mcr     p15, 0, r0, c2, c0
+_arm2_proc_fin: orrs    pc, lr, #PSR_I_BIT|PSR_F_BIT
+/*
+ * Function: *_xchg_1 (int new, volatile void *ptr)
+ * Params  : new        New value to store at...
+ *         : ptr        pointer to byte-wide location
+ * Purpose : Performs an exchange operation
+ * Returns : Original byte data at 'ptr'
+ */
+_arm2_xchg_1:   mov     r2, pc
+                orr     r2, r2, #PSR_I_BIT
+                teqp    r2, #0
+                ldrb    r2, [r1]
+                strb    r0, [r1]
+                mov     r0, r2
+                movs    pc, lr
+_arm3_xchg_1:   swpb    r0, r0, [r1]
+                movs    pc, lr
+/*
+ * Function: *_xchg_4 (int new, volatile void *ptr)
+ * Params  : new        New value to store at...
+ *         : ptr        pointer to word-wide location
+ * Purpose : Performs an exchange operation
+ * Returns : Original word data at 'ptr'
+ */
+_arm2_xchg_4:   mov     r2, pc
+                orr     r2, r2, #PSR_I_BIT
+                teqp    r2, #0
+                ldr     r2, [r1]
+                str     r0, [r1]
+                mov     r0, r2
+                movs    pc, lr
+_arm3_xchg_4:   swp     r0, r0, [r1]
+                movs    pc, lr
+_arm2_3_check_bugs:
+                bics    pc, lr, #PSR_F_BIT              @ Clear FIQ disable bit
+armvlsi_name:   .asciz  "ARM/VLSI"
+_arm2_name:     .asciz  "ARM 2"
+_arm250_name:   .asciz  "ARM 250"
+_arm3_name:     .asciz  "ARM 3"
+                .section ".init.text", #alloc, #execinstr
+/*
+ * Purpose : Function pointers used to access above functions - all calls
+ *           come through these
+ */
+                .globl  arm2_processor_functions
+arm2_processor_functions:
+                .word   _arm2_3_check_bugs
+                .word   _arm2_proc_init
+                .word   _arm2_proc_fin
+                .word   _arm2_set_pgd
+                .word   _arm2_xchg_1
+                .word   _arm2_xchg_4
+cpu_arm2_info:
+                .long   armvlsi_name
+                .long   _arm2_name
+                .globl  arm250_processor_functions
+arm250_processor_functions:
+                .word   _arm2_3_check_bugs
+                .word   _arm2_proc_init
+                .word   _arm2_proc_fin
+                .word   _arm2_set_pgd
+                .word   _arm3_xchg_1
+                .word   _arm3_xchg_4
+cpu_arm250_info:
+                .long   armvlsi_name
+                .long   _arm250_name
+                .globl  arm3_processor_functions
+arm3_processor_functions:
+                .word   _arm2_3_check_bugs
+                .word   _arm3_proc_init
+                .word   _arm3_proc_fin
+                .word   _arm3_set_pgd
+                .word   _arm3_xchg_1
+                .word   _arm3_xchg_4
+cpu_arm3_info:
+                .long   armvlsi_name
+                .long   _arm3_name
+arm2_arch_name: .asciz  "armv1"
+arm3_arch_name: .asciz  "armv2"
+arm2_elf_name:  .asciz  "v1"
+arm3_elf_name:  .asciz  "v2"
+                .align
+                .section ".proc.info", #alloc, #execinstr
+                .long   0x41560200
+                .long   0xfffffff0
+                .long   arm2_arch_name
+                .long   arm2_elf_name
+                .long   0
+                .long   cpu_arm2_info
+                .long   arm2_processor_functions
+                .long   0x41560250
+                .long   0xfffffff0
+                .long   arm3_arch_name
+                .long   arm3_elf_name
+                .long   0
+                .long   cpu_arm250_info
+                .long   arm250_processor_functions
+                .long   0x41560300
+                .long   0xfffffff0
+                .long   arm3_arch_name
+                .long   arm3_elf_name
+                .long   0
+                .long   cpu_arm3_info
+                .long   arm3_processor_functions
diff --git a/arch/arm26/mm/small_page.c b/arch/arm26/mm/small_page.c
new file mode 100644
index 000000000000..77be86cca789
--- /dev/null
+++ b/arch/arm26/mm/small_page.c
@@ -0,0 +1,194 @@
+/*
+ *  linux/arch/arm26/mm/small_page.c
+ *
+ *  Copyright (C) 1996  Russell King
+ *  Copyright (C) 2003  Ian Molton
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Changelog:
+ *   26/01/1996 RMK     Cleaned up various areas to make little more generic
+ *   07/02/1999 RMK     Support added for 16K and 32K page sizes
+ *                      containing 8K blocks
+ *   23/05/2004 IM      Fixed to use struct page->lru (thanks wli)
+ *
+ */
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/bitops.h>
+#include <asm/pgtable.h>
+#define PEDANTIC
+/*
+ * Requirement:
+ *  We need to be able to allocate naturally aligned memory of finer
+ *  granularity than the page size.  This is typically used for the
+ *  second level page tables on 32-bit ARMs.
+ *
+ * FIXME - this comment is *out of date*
+ * Theory:
+ *  We "misuse" the Linux memory management system.  We use alloc_page
+ *  to allocate a page and then mark it as reserved.  The Linux memory
+ *  management system will then ignore the "offset", "next_hash" and
+ *  "pprev_hash" entries in the mem_map for this page.
+ *
+ *  We then use a bitstring in the "offset" field to mark which segments
+ *  of the page are in use, and manipulate this as required during the
+ *  allocation and freeing of these small pages.
+ *
+ *  We also maintain a queue of pages being used for this purpose using
+ *  the "next_hash" and "pprev_hash" entries of mem_map;
+ */
+struct order {
+        struct list_head queue;
+        unsigned int mask;              /* (1 << shift) - 1             */
+        unsigned int shift;             /* (1 << shift) size of page    */
+        unsigned int block_mask;        /* nr_blocks - 1                */
+        unsigned int all_used;          /* (1 << nr_blocks) - 1         */
+};
+static struct order orders[] = {
+#if PAGE_SIZE == 32768
+        { LIST_HEAD_INIT(orders[0].queue), 2047, 11, 15, 0x0000ffff },
+        { LIST_HEAD_INIT(orders[1].queue), 8191, 13,  3, 0x0000000f }
+#else
+#error unsupported page size (ARGH!)
+#endif
+};
+#define USED_MAP(pg)                    ((pg)->index)
+#define TEST_AND_CLEAR_USED(pg,off)     (test_and_clear_bit(off, &USED_MAP(pg)))
+#define SET_USED(pg,off)                (set_bit(off, &USED_MAP(pg)))
+static DEFINE_SPINLOCK(small_page_lock);
+static unsigned long __get_small_page(int priority, struct order *order)
+{
+        unsigned long flags;
+        struct page *page;
+        int offset;
+        do {
+                spin_lock_irqsave(&small_page_lock, flags);
+                if (list_empty(&order->queue))
+                        goto need_new_page;
+                page = list_entry(order->queue.next, struct page, lru);
+again:
+#ifdef PEDANTIC
+                if (USED_MAP(page) & ~order->all_used)
+                        PAGE_BUG(page);
+#endif
+                offset = ffz(USED_MAP(page));
+                SET_USED(page, offset);
+                if (USED_MAP(page) == order->all_used)
+                        list_del_init(&page->lru);
+                spin_unlock_irqrestore(&small_page_lock, flags);
+                return (unsigned long) page_address(page) + (offset << order->shift);
+need_new_page:
+                spin_unlock_irqrestore(&small_page_lock, flags);
+                page = alloc_page(priority);
+                spin_lock_irqsave(&small_page_lock, flags);
+                if (list_empty(&order->queue)) {
+                        if (!page)
+                                goto no_page;
+                        SetPageReserved(page);
+                        USED_MAP(page) = 0;
+                        list_add(&page->lru, &order->queue);
+                        goto again;
+                }
+                spin_unlock_irqrestore(&small_page_lock, flags);
+                __free_page(page);
+        } while (1);
+no_page:
+        spin_unlock_irqrestore(&small_page_lock, flags);
+        return 0;
+}
+static void __free_small_page(unsigned long spage, struct order *order)
+{
+        unsigned long flags;
+        struct page *page;
+        if (virt_addr_valid(spage)) {
+                page = virt_to_page(spage);
+                /*
+                 * The container-page must be marked Reserved
+                 */
+                if (!PageReserved(page) || spage & order->mask)
+                        goto non_small;
+#ifdef PEDANTIC
+                if (USED_MAP(page) & ~order->all_used)
+                        PAGE_BUG(page);
+#endif
+                spage = spage >> order->shift;
+                spage &= order->block_mask;
+                /*
+                 * the following must be atomic wrt get_page
+                 */
+                spin_lock_irqsave(&small_page_lock, flags);
+                if (USED_MAP(page) == order->all_used)
+                        list_add(&page->lru, &order->queue);
+                if (!TEST_AND_CLEAR_USED(page, spage))
+                        goto already_free;
+                if (USED_MAP(page) == 0)
+                        goto free_page;
+                spin_unlock_irqrestore(&small_page_lock, flags);
+        }
+        return;
+free_page:
+        /*
+         * unlink the page from the small page queue and free it
+         */
+        list_del_init(&page->lru);
+        spin_unlock_irqrestore(&small_page_lock, flags);
+        ClearPageReserved(page);
+        __free_page(page);
+        return;
+non_small:
+        printk("Trying to free non-small page from %p\n", __builtin_return_address(0));
+        return;
+already_free:
+        printk("Trying to free free small page from %p\n", __builtin_return_address(0));
+}
+unsigned long get_page_8k(int priority)
+{
+        return __get_small_page(priority, orders+1);
+}
+void free_page_8k(unsigned long spage)
+{
+        __free_small_page(spage, orders+1);
+}
diff --git a/arch/arm26/nwfpe/ARM-gcc.h b/arch/arm26/nwfpe/ARM-gcc.h
new file mode 100644
index 000000000000..e6598470b076
--- /dev/null
+++ b/arch/arm26/nwfpe/ARM-gcc.h
@@ -0,0 +1,120 @@
+/*
+-------------------------------------------------------------------------------
+The macro `BITS64' can be defined to indicate that 64-bit integer types are
+supported by the compiler.
+-------------------------------------------------------------------------------
+*/
+#define BITS64
+/*
+-------------------------------------------------------------------------------
+Each of the following `typedef's defines the most convenient type that holds
+integers of at least as many bits as specified.  For example, `uint8' should
+be the most convenient type that can hold unsigned integers of as many as
+8 bits.  The `flag' type must be able to hold either a 0 or 1.  For most
+implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
+to the same as `int'.
+-------------------------------------------------------------------------------
+*/
+typedef char flag;
+typedef unsigned char uint8;
+typedef signed char int8;
+typedef int uint16;
+typedef int int16;
+typedef unsigned int uint32;
+typedef signed int int32;
+#ifdef BITS64
+typedef unsigned long long int bits64;
+typedef signed long long int sbits64;
+#endif
+/*
+-------------------------------------------------------------------------------
+Each of the following `typedef's defines a type that holds integers
+of _exactly_ the number of bits specified.  For instance, for most
+implementation of C, `bits16' and `sbits16' should be `typedef'ed to
+`unsigned short int' and `signed short int' (or `short int'), respectively.
+-------------------------------------------------------------------------------
+*/
+typedef unsigned char bits8;
+typedef signed char sbits8;
+typedef unsigned short int bits16;
+typedef signed short int sbits16;
+typedef unsigned int bits32;
+typedef signed int sbits32;
+#ifdef BITS64
+typedef unsigned long long int uint64;
+typedef signed long long int int64;
+#endif
+#ifdef BITS64
+/*
+-------------------------------------------------------------------------------
+The `LIT64' macro takes as its argument a textual integer literal and if
+necessary ``marks'' the literal as having a 64-bit integer type.  For
+example, the Gnu C Compiler (`gcc') requires that 64-bit literals be
+appended with the letters `LL' standing for `long long', which is `gcc's
+name for the 64-bit integer type.  Some compilers may allow `LIT64' to be
+defined as the identity macro:  `#define LIT64( a ) a'.
+-------------------------------------------------------------------------------
+*/
+#define LIT64( a ) a##LL
+#endif
+/*
+-------------------------------------------------------------------------------
+The macro `INLINE' can be used before functions that should be inlined.  If
+a compiler does not support explicit inlining, this macro should be defined
+to be `static'.
+-------------------------------------------------------------------------------
+*/
+#define INLINE extern __inline__
+/* For use as a GCC soft-float library we need some special function names. */
+#ifdef __LIBFLOAT__
+/* Some 32-bit ops can be mapped straight across by just changing the name. */
+#define float32_add                     __addsf3
+#define float32_sub                     __subsf3
+#define float32_mul                     __mulsf3
+#define float32_div                     __divsf3
+#define int32_to_float32                __floatsisf
+#define float32_to_int32_round_to_zero  __fixsfsi
+#define float32_to_uint32_round_to_zero __fixunssfsi
+/* These ones go through the glue code.  To avoid namespace pollution
+   we rename the internal functions too.  */
+#define float32_eq                      ___float32_eq
+#define float32_le                      ___float32_le
+#define float32_lt                      ___float32_lt
+/* All the 64-bit ops have to go through the glue, so we pull the same
+   trick.  */
+#define float64_add                     ___float64_add
+#define float64_sub                     ___float64_sub
+#define float64_mul                     ___float64_mul
+#define float64_div                     ___float64_div
+#define int32_to_float64                ___int32_to_float64
+#define float64_to_int32_round_to_zero  ___float64_to_int32_round_to_zero
+#define float64_to_uint32_round_to_zero ___float64_to_uint32_round_to_zero
+#define float64_to_float32              ___float64_to_float32
+#define float32_to_float64              ___float32_to_float64
+#define float64_eq                      ___float64_eq
+#define float64_le                      ___float64_le
+#define float64_lt                      ___float64_lt
+#if 0
+#define float64_add                     __adddf3
+#define float64_sub                     __subdf3
+#define float64_mul                     __muldf3
+#define float64_div                     __divdf3
+#define int32_to_float64                __floatsidf
+#define float64_to_int32_round_to_zero  __fixdfsi
+#define float64_to_uint32_round_to_zero __fixunsdfsi
+#define float64_to_float32              __truncdfsf2
+#define float32_to_float64              __extendsfdf2
+#endif
+#endif
diff --git a/arch/arm26/nwfpe/ChangeLog b/arch/arm26/nwfpe/ChangeLog
new file mode 100644
index 000000000000..0c580f764baf
--- /dev/null
+++ b/arch/arm26/nwfpe/ChangeLog
@@ -0,0 +1,83 @@
+2002-01-19  Russell King <rmk@arm.linux.org.uk>
+        * fpa11.h - Add documentation
+                  - remove userRegisters pointer from this structure.
+                  - add new method to obtain integer register values.
+        * softfloat.c - Remove float128
+        * softfloat.h - Remove float128
+        * softfloat-specialize - Remove float128
+        * The FPA11 structure is not a kernel-specific data structure.
+          It is used by users of ptrace to examine the values of the
+          floating point registers.  Therefore, any changes to the
+          FPA11 structure (size or position of elements contained
+          within) have to be well thought out.
+        * Since 128-bit float requires the FPA11 structure to change
+          size, it has been removed.  128-bit float is currently unused,
+          and needs various things to be re-worked so that we won't
+          overflow the available space in the task structure.
+        * The changes are designed to break any patch that goes on top
+          of this code, so that the authors properly review their changes.
+1999-08-19  Scott Bambrough  <scottb@netwinder.org>
+        * fpmodule.c - Changed version number to 0.95
+        * fpa11.h - modified FPA11, FPREG structures
+        * fpa11.c - Changes due to FPA11, FPREG structure alterations.
+        * fpa11_cpdo.c - Changes due to FPA11, FPREG structure alterations.
+        * fpa11_cpdt.c - Changes due to FPA11, FPREG structure alterations.
+        * fpa11_cprt.c - Changes due to FPA11, FPREG structure alterations.
+        * single_cpdo.c - Changes due to FPA11, FPREG structure alterations.
+        * double_cpdo.c - Changes due to FPA11, FPREG structure alterations.
+        * extended_cpdo.c - Changes due to FPA11, FPREG structure alterations.
+        * I discovered several bugs.  First and worst is that the kernel
+          passes in a pointer to the FPE's state area.  This is defined
+          as a struct user_fp (see user.h).  This pointer was cast to a
+          FPA11*.  Unfortunately FPA11 and user_fp are of different sizes;
+          user_fp is smaller.  This meant that the FPE scribbled on things
+          below its area, which is bad, as the area is in the thread_struct
+          embedded in the process task structure.  Thus we were scribbling
+          over one of the most important structures in the entire OS.
+        * user_fp and FPA11 have now been harmonized.  Most of the changes
+          in the above code were dereferencing problems due to moving the
+          register type out of FPREG, and getting rid of the union variable
+          fpvalue.
+        * Second I noticed resetFPA11 was not always being called for a
+          task.  This should happen on the first floating point exception
+          that occurs.  It is controlled by init_flag in FPA11.  The
+          comment in the code beside init_flag state the kernel guarantees
+          this to be zero.  Not so.  I found that the kernel recycles task
+          structures, and that recycled ones may not have init_flag zeroed.
+          I couldn't even find anything that guarantees it is zeroed when
+          when the task structure is initially allocated.  In any case
+          I now initialize the entire FPE state in the thread structure to
+          zero when allocated and recycled.  See alloc_task_struct() and
+          flush_thread() in arch/arm/process.c.  The change to
+          alloc_task_struct() may not be necessary, but I left it in for
+          completeness (better safe than sorry).
+1998-11-23  Scott Bambrough  <scottb@netwinder.org>
+        * README.FPE - fix typo in description of lfm/sfm instructions
+        * NOTES - Added file to describe known bugs/problems 
+        * fpmodule.c - Changed version number to 0.94
+1998-11-20  Scott Bambrough  <scottb@netwinder.org>
+        * README.FPE - fix description of URD, NRM instructions
+        * TODO - remove URD, NRM instructions from TODO list
+        * single_cpdo.c - implement URD, NRM
+        * double_cpdo.c - implement URD, NRM
+        * extended_cpdo.c - implement URD, NRM
+1998-11-19  Scott Bambrough  <scottb@netwinder.org>
+        * ChangeLog - Added this file to track changes made.
+        * fpa11.c - added code to initialize register types to typeNone
+        * fpa11_cpdt.c - fixed bug in storeExtended (typeExtended changed to
+          typeDouble in switch statement)
diff --git a/arch/arm26/nwfpe/Makefile b/arch/arm26/nwfpe/Makefile
new file mode 100644
index 000000000000..b39d34dff054
--- /dev/null
+++ b/arch/arm26/nwfpe/Makefile
@@ -0,0 +1,15 @@
+#
+# Copyright (C) 1998, 1999, 2001 Philip Blundell
+#
+obj-y                   :=
+obj-m                   :=
+obj-n                   :=
+obj-$(CONFIG_FPE_NWFPE) += nwfpe.o
+nwfpe-objs              := fpa11.o fpa11_cpdo.o fpa11_cpdt.o fpa11_cprt.o \
+                           fpmodule.o fpopcode.o softfloat.o \
+                           single_cpdo.o double_cpdo.o extended_cpdo.o \
+                           entry.o
diff --git a/arch/arm26/nwfpe/double_cpdo.c b/arch/arm26/nwfpe/double_cpdo.c
new file mode 100644
index 000000000000..7f4fef0216c7
--- /dev/null
+++ b/arch/arm26/nwfpe/double_cpdo.c
@@ -0,0 +1,288 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#include "fpa11.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+float64 float64_exp(float64 Fm);
+float64 float64_ln(float64 Fm);
+float64 float64_sin(float64 rFm);
+float64 float64_cos(float64 rFm);
+float64 float64_arcsin(float64 rFm);
+float64 float64_arctan(float64 rFm);
+float64 float64_log(float64 rFm);
+float64 float64_tan(float64 rFm);
+float64 float64_arccos(float64 rFm);
+float64 float64_pow(float64 rFn,float64 rFm);
+float64 float64_pol(float64 rFn,float64 rFm);
+unsigned int DoubleCPDO(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   float64 rFm, rFn = 0; //FIXME - should be zero?
+   unsigned int Fd, Fm, Fn, nRc = 1;
+   //printk("DoubleCPDO(0x%08x)\n",opcode);
+   
+   Fm = getFm(opcode);
+   if (CONSTANT_FM(opcode))
+   {
+     rFm = getDoubleConstant(Fm);
+   }
+   else
+   {  
+     switch (fpa11->fType[Fm])
+     {
+        case typeSingle:
+          rFm = float32_to_float64(fpa11->fpreg[Fm].fSingle);
+        break;
+        case typeDouble:
+          rFm = fpa11->fpreg[Fm].fDouble;
+          break;
+        case typeExtended:
+            // !! patb
+            //printk("not implemented! why not?\n");
+            //!! ScottB
+            // should never get here, if extended involved
+            // then other operand should be promoted then
+            // ExtendedCPDO called.
+            break;
+        default: return 0;
+     }
+   }
+   if (!MONADIC_INSTRUCTION(opcode))
+   {
+      Fn = getFn(opcode);
+      switch (fpa11->fType[Fn])
+      {
+        case typeSingle:
+          rFn = float32_to_float64(fpa11->fpreg[Fn].fSingle);
+        break;
+        case typeDouble:
+          rFn = fpa11->fpreg[Fn].fDouble;
+        break;
+        
+        default: return 0;
+      }
+   }
+   Fd = getFd(opcode);
+   /* !! this switch isn't optimized; better (opcode & MASK_ARITHMETIC_OPCODE)>>24, sort of */
+   switch (opcode & MASK_ARITHMETIC_OPCODE)
+   {
+      /* dyadic opcodes */
+      case ADF_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_add(rFn,rFm);
+      break;
+      case MUF_CODE:
+      case FML_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_mul(rFn,rFm);
+      break;
+      case SUF_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_sub(rFn,rFm);
+      break;
+      case RSF_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_sub(rFm,rFn);
+      break;
+      case DVF_CODE:
+      case FDV_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_div(rFn,rFm);
+      break;
+      case RDF_CODE:
+      case FRD_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_div(rFm,rFn);
+      break;
+#if 0
+      case POW_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_pow(rFn,rFm);
+      break;
+      case RPW_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_pow(rFm,rFn);
+      break;
+#endif
+      case RMF_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_rem(rFn,rFm);
+      break;
+#if 0
+      case POL_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_pol(rFn,rFm);
+      break;
+#endif
+      /* monadic opcodes */
+      case MVF_CODE:
+         fpa11->fpreg[Fd].fDouble = rFm;
+      break;
+      case MNF_CODE:
+      {
+         unsigned int *p = (unsigned int*)&rFm;
+         p[1] ^= 0x80000000;
+         fpa11->fpreg[Fd].fDouble = rFm;
+      }
+      break;
+      case ABS_CODE:
+      {
+         unsigned int *p = (unsigned int*)&rFm;
+         p[1] &= 0x7fffffff;
+         fpa11->fpreg[Fd].fDouble = rFm;
+      }
+      break;
+      case RND_CODE:
+      case URD_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_round_to_int(rFm);
+      break;
+      case SQT_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_sqrt(rFm);
+      break;
+#if 0
+      case LOG_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_log(rFm);
+      break;
+      case LGN_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_ln(rFm);
+      break;
+      case EXP_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_exp(rFm);
+      break;
+      case SIN_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_sin(rFm);
+      break;
+      case COS_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_cos(rFm);
+      break;
+      case TAN_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_tan(rFm);
+      break;
+      case ASN_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_arcsin(rFm);
+      break;
+      case ACS_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_arccos(rFm);
+      break;
+      case ATN_CODE:
+         fpa11->fpreg[Fd].fDouble = float64_arctan(rFm);
+      break;
+#endif
+      case NRM_CODE:
+      break;
+      
+      default:
+      {
+        nRc = 0;
+      }
+   }
+   if (0 != nRc) fpa11->fType[Fd] = typeDouble;
+   return nRc;
+}
+#if 0
+float64 float64_exp(float64 rFm)
+{
+  return rFm;
+//series
+}
+float64 float64_ln(float64 rFm)
+{
+  return rFm;
+//series
+}
+float64 float64_sin(float64 rFm)
+{
+  return rFm;
+//series
+}
+float64 float64_cos(float64 rFm)
+{
+   return rFm;
+   //series
+}
+#if 0
+float64 float64_arcsin(float64 rFm)
+{
+//series
+}
+float64 float64_arctan(float64 rFm)
+{
+  //series
+}
+#endif
+float64 float64_log(float64 rFm)
+{
+  return float64_div(float64_ln(rFm),getDoubleConstant(7));
+}
+float64 float64_tan(float64 rFm)
+{
+  return float64_div(float64_sin(rFm),float64_cos(rFm));
+}
+float64 float64_arccos(float64 rFm)
+{
+return rFm;
+   //return float64_sub(halfPi,float64_arcsin(rFm));
+}
+float64 float64_pow(float64 rFn,float64 rFm)
+{
+  return float64_exp(float64_mul(rFm,float64_ln(rFn))); 
+}
+float64 float64_pol(float64 rFn,float64 rFm)
+{
+  return float64_arctan(float64_div(rFn,rFm)); 
+}
+#endif
diff --git a/arch/arm26/nwfpe/entry.S b/arch/arm26/nwfpe/entry.S
new file mode 100644
index 000000000000..7d6dfaad80c2
--- /dev/null
+++ b/arch/arm26/nwfpe/entry.S
@@ -0,0 +1,114 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998
+    (c) Philip Blundell 1998-1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#include <asm/asm_offsets.h>
+/* This is the kernel's entry point into the floating point emulator.
+It is called from the kernel with code similar to this:
+        mov     fp, #0
+        teqp    pc, #PSR_I_BIT | MODE_SVC
+        ldr     r4, .LC2
+        ldr     pc, [r4]                @ Call FP module USR entry point
+The kernel expects the emulator to return via one of two possible
+points of return it passes to the emulator.  The emulator, if
+successful in its emulation, jumps to ret_from_exception and the
+kernel takes care of returning control from the trap to the user code.
+If the emulator is unable to emulate the instruction, it returns to
+fpundefinstr and the kernel halts the user program with a core dump.
+This routine does four things:
+1) It saves SP into a variable called userRegisters.  The kernel has
+created a struct pt_regs on the stack and saved the user registers
+into it.  See /usr/include/asm/proc/ptrace.h for details.  The
+emulator code uses userRegisters as the base of an array of words from
+which the contents of the registers can be extracted.
+2) It locates the FP emulator work area within the TSS structure and
+points `fpa11' to it.
+3) It calls EmulateAll to emulate a floating point instruction.
+EmulateAll returns 1 if the emulation was successful, or 0 if not.
+4) If an instruction has been emulated successfully, it looks ahead at
+the next instruction.  If it is a floating point instruction, it
+executes the instruction, without returning to user space.  In this
+way it repeatedly looks ahead and executes floating point instructions
+until it encounters a non floating point instruction, at which time it
+returns via _fpreturn.
+This is done to reduce the effect of the trap overhead on each
+floating point instructions.  GCC attempts to group floating point
+instructions to allow the emulator to spread the cost of the trap over
+several floating point instructions.  */
+        .globl  nwfpe_enter
+nwfpe_enter:
+        mov     sl, sp
+        bl      FPA11_CheckInit         @ check to see if we are initialised
+        ldr     r5, [sp, #60]           @ get contents of PC
+        bic     r5, r5, #0xfc000003
+        ldr     r0, [r5, #-4]           @ get actual instruction into r0
+        bl      EmulateAll              @ emulate the instruction
+1:      cmp     r0, #0                  @ was emulation successful
+        beq     fpundefinstr            @ no, return failure
+next:
+.Lx1:   ldrt    r6, [r5], #4            @ get the next instruction and
+                                        @ increment PC
+        and     r2, r6, #0x0F000000     @ test for FP insns
+        teq     r2, #0x0C000000
+        teqne   r2, #0x0D000000
+        teqne   r2, #0x0E000000
+        bne     ret_from_exception      @ return ok if not a fp insn
+        ldr     r9, [sp, #60]           @ get new condition codes
+        and     r9, r9, #0xfc000003
+        orr     r7, r5, r9
+        str     r7, [sp, #60]           @ update PC copy in regs
+        mov     r0, r6                  @ save a copy
+        mov     r1, r9                  @ fetch the condition codes
+        bl      checkCondition          @ check the condition
+        cmp     r0, #0                  @ r0 = 0 ==> condition failed
+        @ if condition code failed to match, next insn
+        beq     next                    @ get the next instruction;
+            
+        mov     r0, r6                  @ prepare for EmulateAll()
+        adr     lr, 1b
+        orr     lr, lr, #3
+        b       EmulateAll              @ if r0 != 0, goto EmulateAll
+.Lret:  b       ret_from_exception      @ let the user eat segfaults
+        
+        @ We need to be prepared for the instruction at .Lx1 to fault.
+        @ Emit the appropriate exception gunk to fix things up.
+        .section __ex_table,"a"
+        .align  3
+        .long   .Lx1
+        ldr     lr, [lr, $(.Lret - .Lx1)/4]
+        .previous
diff --git a/arch/arm26/nwfpe/extended_cpdo.c b/arch/arm26/nwfpe/extended_cpdo.c
new file mode 100644
index 000000000000..331407596d91
--- /dev/null
+++ b/arch/arm26/nwfpe/extended_cpdo.c
@@ -0,0 +1,273 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#include "fpa11.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+floatx80 floatx80_exp(floatx80 Fm);
+floatx80 floatx80_ln(floatx80 Fm);
+floatx80 floatx80_sin(floatx80 rFm);
+floatx80 floatx80_cos(floatx80 rFm);
+floatx80 floatx80_arcsin(floatx80 rFm);
+floatx80 floatx80_arctan(floatx80 rFm);
+floatx80 floatx80_log(floatx80 rFm);
+floatx80 floatx80_tan(floatx80 rFm);
+floatx80 floatx80_arccos(floatx80 rFm);
+floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm);
+floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm);
+unsigned int ExtendedCPDO(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   floatx80 rFm, rFn;
+   unsigned int Fd, Fm, Fn, nRc = 1;
+   //printk("ExtendedCPDO(0x%08x)\n",opcode);
+   
+   Fm = getFm(opcode);
+   if (CONSTANT_FM(opcode))
+   {
+     rFm = getExtendedConstant(Fm);
+   }
+   else
+   {  
+     switch (fpa11->fType[Fm])
+     {
+        case typeSingle:
+          rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle);
+        break;
+        case typeDouble:
+          rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble);
+        break;
+        
+        case typeExtended:
+          rFm = fpa11->fpreg[Fm].fExtended;
+        break;
+        
+        default: return 0;
+     }
+   }
+   
+   if (!MONADIC_INSTRUCTION(opcode))
+   {
+      Fn = getFn(opcode);
+      switch (fpa11->fType[Fn])
+      {
+        case typeSingle:
+          rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle);
+        break;
+        case typeDouble:
+          rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble);
+        break;
+        
+        case typeExtended:
+          rFn = fpa11->fpreg[Fn].fExtended;
+        break;
+        
+        default: return 0;
+      }
+   }
+   Fd = getFd(opcode);
+   switch (opcode & MASK_ARITHMETIC_OPCODE)
+   {
+      /* dyadic opcodes */
+      case ADF_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_add(rFn,rFm);
+      break;
+      case MUF_CODE:
+      case FML_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_mul(rFn,rFm);
+      break;
+      case SUF_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_sub(rFn,rFm);
+      break;
+      case RSF_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_sub(rFm,rFn);
+      break;
+      case DVF_CODE:
+      case FDV_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_div(rFn,rFm);
+      break;
+      case RDF_CODE:
+      case FRD_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_div(rFm,rFn);
+      break;
+#if 0
+      case POW_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_pow(rFn,rFm);
+      break;
+      case RPW_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_pow(rFm,rFn);
+      break;
+#endif
+      case RMF_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_rem(rFn,rFm);
+      break;
+#if 0
+      case POL_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_pol(rFn,rFm);
+      break;
+#endif
+      /* monadic opcodes */
+      case MVF_CODE:
+         fpa11->fpreg[Fd].fExtended = rFm;
+      break;
+      case MNF_CODE:
+         rFm.high ^= 0x8000;
+         fpa11->fpreg[Fd].fExtended = rFm;
+      break;
+      case ABS_CODE:
+         rFm.high &= 0x7fff;
+         fpa11->fpreg[Fd].fExtended = rFm;
+      break;
+      case RND_CODE:
+      case URD_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_round_to_int(rFm);
+      break;
+      case SQT_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_sqrt(rFm);
+      break;
+#if 0
+      case LOG_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_log(rFm);
+      break;
+      case LGN_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_ln(rFm);
+      break;
+      case EXP_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_exp(rFm);
+      break;
+      case SIN_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_sin(rFm);
+      break;
+      case COS_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_cos(rFm);
+      break;
+      case TAN_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_tan(rFm);
+      break;
+      case ASN_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_arcsin(rFm);
+      break;
+      case ACS_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_arccos(rFm);
+      break;
+      case ATN_CODE:
+         fpa11->fpreg[Fd].fExtended = floatx80_arctan(rFm);
+      break;
+#endif
+      case NRM_CODE:
+      break;
+      
+      default:
+      {
+        nRc = 0;
+      }
+   }
+   
+   if (0 != nRc) fpa11->fType[Fd] = typeExtended;
+   return nRc;
+}
+#if 0
+floatx80 floatx80_exp(floatx80 Fm)
+{
+//series
+}
+floatx80 floatx80_ln(floatx80 Fm)
+{
+//series
+}
+floatx80 floatx80_sin(floatx80 rFm)
+{
+//series
+}
+floatx80 floatx80_cos(floatx80 rFm)
+{
+//series
+}
+floatx80 floatx80_arcsin(floatx80 rFm)
+{
+//series
+}
+floatx80 floatx80_arctan(floatx80 rFm)
+{
+  //series
+}
+floatx80 floatx80_log(floatx80 rFm)
+{
+  return floatx80_div(floatx80_ln(rFm),getExtendedConstant(7));
+}
+floatx80 floatx80_tan(floatx80 rFm)
+{
+  return floatx80_div(floatx80_sin(rFm),floatx80_cos(rFm));
+}
+floatx80 floatx80_arccos(floatx80 rFm)
+{
+   //return floatx80_sub(halfPi,floatx80_arcsin(rFm));
+}
+floatx80 floatx80_pow(floatx80 rFn,floatx80 rFm)
+{
+  return floatx80_exp(floatx80_mul(rFm,floatx80_ln(rFn))); 
+}
+floatx80 floatx80_pol(floatx80 rFn,floatx80 rFm)
+{
+  return floatx80_arctan(floatx80_div(rFn,rFm)); 
+}
+#endif
diff --git a/arch/arm26/nwfpe/fpa11.c b/arch/arm26/nwfpe/fpa11.c
new file mode 100644
index 000000000000..e954540a9464
--- /dev/null
+++ b/arch/arm26/nwfpe/fpa11.c
@@ -0,0 +1,221 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#include "fpa11.h"
+#include "fpopcode.h"
+#include "fpmodule.h"
+#include "fpmodule.inl"
+#include <linux/compiler.h>
+#include <asm/system.h>
+/* forward declarations */
+unsigned int EmulateCPDO(const unsigned int);
+unsigned int EmulateCPDT(const unsigned int);
+unsigned int EmulateCPRT(const unsigned int);
+/* Reset the FPA11 chip.  Called to initialize and reset the emulator. */
+void resetFPA11(void)
+{
+  int i;
+  FPA11 *fpa11 = GET_FPA11();
+  
+  /* initialize the register type array */
+  for (i=0;i<=7;i++)
+  {
+    fpa11->fType[i] = typeNone;
+  }
+  
+  /* FPSR: set system id to FP_EMULATOR, set AC, clear all other bits */
+  fpa11->fpsr = FP_EMULATOR | BIT_AC;
+  
+  /* FPCR: set SB, AB and DA bits, clear all others */
+#if MAINTAIN_FPCR
+  fpa11->fpcr = MASK_RESET;
+#endif
+}
+void SetRoundingMode(const unsigned int opcode)
+{
+#if MAINTAIN_FPCR
+   FPA11 *fpa11 = GET_FPA11();
+   fpa11->fpcr &= ~MASK_ROUNDING_MODE;
+#endif   
+   switch (opcode & MASK_ROUNDING_MODE)
+   {
+      default:
+      case ROUND_TO_NEAREST:
+         float_rounding_mode = float_round_nearest_even;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_TO_NEAREST;
+#endif         
+      break;
+      
+      case ROUND_TO_PLUS_INFINITY:
+         float_rounding_mode = float_round_up;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_TO_PLUS_INFINITY;
+#endif         
+      break;
+      
+      case ROUND_TO_MINUS_INFINITY:
+         float_rounding_mode = float_round_down;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_TO_MINUS_INFINITY;
+#endif         
+      break;
+      
+      case ROUND_TO_ZERO:
+         float_rounding_mode = float_round_to_zero;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_TO_ZERO;
+#endif         
+      break;
+  }
+}
+void SetRoundingPrecision(const unsigned int opcode)
+{
+#if MAINTAIN_FPCR
+   FPA11 *fpa11 = GET_FPA11();
+   fpa11->fpcr &= ~MASK_ROUNDING_PRECISION;
+#endif   
+   switch (opcode & MASK_ROUNDING_PRECISION)
+   {
+      case ROUND_SINGLE:
+         floatx80_rounding_precision = 32;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_SINGLE;
+#endif         
+      break;
+      
+      case ROUND_DOUBLE:
+         floatx80_rounding_precision = 64;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_DOUBLE;
+#endif         
+      break;
+      
+      case ROUND_EXTENDED:
+         floatx80_rounding_precision = 80;
+#if MAINTAIN_FPCR         
+         fpa11->fpcr |= ROUND_EXTENDED;
+#endif         
+      break;
+      
+      default: floatx80_rounding_precision = 80;
+  }
+}
+void FPA11_CheckInit(void)
+{
+  FPA11 *fpa11 = GET_FPA11();
+  if (unlikely(fpa11->initflag == 0))
+  {
+    resetFPA11();
+    SetRoundingMode(ROUND_TO_NEAREST);
+    SetRoundingPrecision(ROUND_EXTENDED);
+    fpa11->initflag = 1;
+  }
+}
+/* Emulate the instruction in the opcode. */
+unsigned int EmulateAll(unsigned int opcode)
+{
+  unsigned int nRc = 1, code;
+  code = opcode & 0x00000f00;
+  if (code == 0x00000100 || code == 0x00000200)
+  {
+    /* For coprocessor 1 or 2 (FPA11) */
+    code = opcode & 0x0e000000;
+    if (code == 0x0e000000)
+    {
+      if (opcode & 0x00000010)
+      {
+        /* Emulate conversion opcodes. */
+        /* Emulate register transfer opcodes. */
+        /* Emulate comparison opcodes. */
+        nRc = EmulateCPRT(opcode);
+      }
+      else
+      {
+        /* Emulate monadic arithmetic opcodes. */
+        /* Emulate dyadic arithmetic opcodes. */
+        nRc = EmulateCPDO(opcode);
+      }
+    }
+    else if (code == 0x0c000000)
+    {
+      /* Emulate load/store opcodes. */
+      /* Emulate load/store multiple opcodes. */
+      nRc = EmulateCPDT(opcode);
+    }
+    else
+    {
+      /* Invalid instruction detected.  Return FALSE. */
+      nRc = 0;
+    }
+  }
+  return(nRc);
+}
+#if 0
+unsigned int EmulateAll1(unsigned int opcode)
+{
+  switch ((opcode >> 24) & 0xf)
+  {
+     case 0xc:
+     case 0xd:
+       if ((opcode >> 20) & 0x1)
+       {
+          switch ((opcode >> 8) & 0xf)
+          {
+             case 0x1: return PerformLDF(opcode); break;
+             case 0x2: return PerformLFM(opcode); break;
+             default: return 0;
+          }
+       }
+       else
+       {
+          switch ((opcode >> 8) & 0xf)
+          {
+             case 0x1: return PerformSTF(opcode); break;
+             case 0x2: return PerformSFM(opcode); break;
+             default: return 0;
+          }
+      }
+     break;
+     
+     case 0xe: 
+       if (opcode & 0x10)
+         return EmulateCPDO(opcode);
+       else
+         return EmulateCPRT(opcode);
+     break;
+  
+     default: return 0;
+  }
+}
+#endif
diff --git a/arch/arm26/nwfpe/fpa11.h b/arch/arm26/nwfpe/fpa11.h
new file mode 100644
index 000000000000..be09902a211d
--- /dev/null
+++ b/arch/arm26/nwfpe/fpa11.h
@@ -0,0 +1,87 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.com, 1998-1999
+    
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#ifndef __FPA11_H__
+#define __FPA11_H__
+#define GET_FPA11() ((FPA11 *)(&current_thread_info()->fpstate))
+/*
+ * The processes registers are always at the very top of the 8K
+ * stack+task struct.  Use the same method as 'current' uses to
+ * reach them.
+ */
+register unsigned int *user_registers asm("sl");
+#define GET_USERREG() (user_registers)
+#include <linux/thread_info.h>
+/* includes */
+#include "fpsr.h"               /* FP control and status register definitions */
+#include "softfloat.h"
+#define         typeNone                0x00
+#define         typeSingle              0x01
+#define         typeDouble              0x02
+#define         typeExtended            0x03
+/*
+ * This must be no more and no less than 12 bytes.
+ */
+typedef union tagFPREG {
+   floatx80 fExtended;
+   float64  fDouble;
+   float32  fSingle;
+} FPREG;
+/*
+ * FPA11 device model.
+ *
+ * This structure is exported to user space.  Do not re-order.
+ * Only add new stuff to the end, and do not change the size of
+ * any element.  Elements of this structure are used by user
+ * space, and must match struct user_fp in include/asm-arm/user.h.
+ * We include the byte offsets below for documentation purposes.
+ *
+ * The size of this structure and FPREG are checked by fpmodule.c
+ * on initialisation.  If the rules have been broken, NWFPE will
+ * not initialise.
+ */
+typedef struct tagFPA11 {
+/*   0 */  FPREG fpreg[8];              /* 8 floating point registers */
+/*  96 */  FPSR fpsr;                   /* floating point status register */
+/* 100 */  FPCR fpcr;                   /* floating point control register */
+/* 104 */  unsigned char fType[8];      /* type of floating point value held in
+                                           floating point registers.  One of none
+                                           single, double or extended. */
+/* 112 */  int initflag;                /* this is special.  The kernel guarantees
+                                           to set it to 0 when a thread is launched,
+                                           so we can use it to detect whether this
+                                           instance of the emulator needs to be
+                                           initialised. */
+} FPA11;
+extern void resetFPA11(void);
+extern void SetRoundingMode(const unsigned int);
+extern void SetRoundingPrecision(const unsigned int);
+#endif
diff --git a/arch/arm26/nwfpe/fpa11.inl b/arch/arm26/nwfpe/fpa11.inl
new file mode 100644
index 000000000000..1c45cba2de66
--- /dev/null
+++ b/arch/arm26/nwfpe/fpa11.inl
@@ -0,0 +1,51 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#include "fpa11.h"
+/* Read and write floating point status register */
+extern __inline__ unsigned int readFPSR(void)
+{
+  FPA11 *fpa11 = GET_FPA11();
+  return(fpa11->fpsr);
+}
+extern __inline__ void writeFPSR(FPSR reg)
+{
+  FPA11 *fpa11 = GET_FPA11();
+  /* the sysid byte in the status register is readonly */
+  fpa11->fpsr = (fpa11->fpsr & MASK_SYSID) | (reg & ~MASK_SYSID);
+}
+/* Read and write floating point control register */
+extern __inline__ FPCR readFPCR(void)
+{
+  FPA11 *fpa11 = GET_FPA11();
+  /* clear SB, AB and DA bits before returning FPCR */
+  return(fpa11->fpcr & ~MASK_RFC);
+}
+extern __inline__ void writeFPCR(FPCR reg)
+{
+  FPA11 *fpa11 = GET_FPA11();
+  fpa11->fpcr &= ~MASK_WFC;             /* clear SB, AB and DA bits */
+  fpa11->fpcr |= (reg & MASK_WFC);      /* write SB, AB and DA bits */
+}
diff --git a/arch/arm26/nwfpe/fpa11_cpdo.c b/arch/arm26/nwfpe/fpa11_cpdo.c
new file mode 100644
index 000000000000..343a6b9fd520
--- /dev/null
+++ b/arch/arm26/nwfpe/fpa11_cpdo.c
@@ -0,0 +1,117 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#include "fpa11.h"
+#include "fpopcode.h"
+unsigned int SingleCPDO(const unsigned int opcode);
+unsigned int DoubleCPDO(const unsigned int opcode);
+unsigned int ExtendedCPDO(const unsigned int opcode);
+unsigned int EmulateCPDO(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   unsigned int Fd, nType, nDest, nRc = 1;
+   
+   //printk("EmulateCPDO(0x%08x)\n",opcode);
+   /* Get the destination size.  If not valid let Linux perform
+      an invalid instruction trap. */
+   nDest = getDestinationSize(opcode);
+   if (typeNone == nDest) return 0;
+   
+   SetRoundingMode(opcode);
+     
+   /* Compare the size of the operands in Fn and Fm.
+      Choose the largest size and perform operations in that size,
+      in order to make use of all the precision of the operands. 
+      If Fm is a constant, we just grab a constant of a size 
+      matching the size of the operand in Fn. */
+   if (MONADIC_INSTRUCTION(opcode))
+     nType = nDest;
+   else
+     nType = fpa11->fType[getFn(opcode)];
+   
+   if (!CONSTANT_FM(opcode))
+   {
+     register unsigned int Fm = getFm(opcode);
+     if (nType < fpa11->fType[Fm])
+     {
+        nType = fpa11->fType[Fm];
+     }
+   }
+   switch (nType)
+   {
+      case typeSingle   : nRc = SingleCPDO(opcode);   break;
+      case typeDouble   : nRc = DoubleCPDO(opcode);   break;
+      case typeExtended : nRc = ExtendedCPDO(opcode); break;
+      default           : nRc = 0;
+   }
+   /* If the operation succeeded, check to see if the result in the
+      destination register is the correct size.  If not force it
+      to be. */
+   Fd = getFd(opcode);
+   nType = fpa11->fType[Fd];
+   if ((0 != nRc) && (nDest != nType))
+   {
+     switch (nDest)
+     {
+       case typeSingle:
+       {
+         if (typeDouble == nType)
+           fpa11->fpreg[Fd].fSingle = 
+              float64_to_float32(fpa11->fpreg[Fd].fDouble);
+         else
+           fpa11->fpreg[Fd].fSingle = 
+              floatx80_to_float32(fpa11->fpreg[Fd].fExtended);
+       }
+       break;
+          
+       case typeDouble:
+       {
+         if (typeSingle == nType)
+           fpa11->fpreg[Fd].fDouble = 
+              float32_to_float64(fpa11->fpreg[Fd].fSingle);
+         else
+           fpa11->fpreg[Fd].fDouble = 
+              floatx80_to_float64(fpa11->fpreg[Fd].fExtended);
+       }
+       break;
+          
+       case typeExtended:
+       {
+         if (typeSingle == nType)
+           fpa11->fpreg[Fd].fExtended = 
+              float32_to_floatx80(fpa11->fpreg[Fd].fSingle);
+         else
+           fpa11->fpreg[Fd].fExtended = 
+              float64_to_floatx80(fpa11->fpreg[Fd].fDouble);
+       }
+       break;
+     }
+     
+     fpa11->fType[Fd] = nDest;
+   }
+   
+   return nRc;
+}
diff --git a/arch/arm26/nwfpe/fpa11_cpdt.c b/arch/arm26/nwfpe/fpa11_cpdt.c
new file mode 100644
index 000000000000..e12db7c51a76
--- /dev/null
+++ b/arch/arm26/nwfpe/fpa11_cpdt.c
@@ -0,0 +1,368 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.com, 1998-1999
+    (c) Philip Blundell, 1998
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#include "fpa11.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpmodule.h"
+#include "fpmodule.inl"
+#include <asm/uaccess.h>
+static inline
+void loadSingle(const unsigned int Fn,const unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   fpa11->fType[Fn] = typeSingle;
+   get_user(fpa11->fpreg[Fn].fSingle, pMem);
+}
+static inline
+void loadDouble(const unsigned int Fn,const unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   unsigned int *p;
+   p = (unsigned int*)&fpa11->fpreg[Fn].fDouble;
+   fpa11->fType[Fn] = typeDouble;
+   get_user(p[0], &pMem[1]);
+   get_user(p[1], &pMem[0]); /* sign & exponent */
+}
+static inline
+void loadExtended(const unsigned int Fn,const unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   unsigned int *p;
+   p = (unsigned int*)&fpa11->fpreg[Fn].fExtended;
+   fpa11->fType[Fn] = typeExtended;
+   get_user(p[0], &pMem[0]);  /* sign & exponent */
+   get_user(p[1], &pMem[2]);  /* ls bits */
+   get_user(p[2], &pMem[1]);  /* ms bits */
+}
+static inline
+void loadMultiple(const unsigned int Fn,const unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   register unsigned int *p;
+   unsigned long x;
+   p = (unsigned int*)&(fpa11->fpreg[Fn]);
+   get_user(x, &pMem[0]);
+   fpa11->fType[Fn] = (x >> 14) & 0x00000003;
+   switch (fpa11->fType[Fn])
+   {
+      case typeSingle:
+      case typeDouble:
+      {
+         get_user(p[0], &pMem[2]);  /* Single */
+         get_user(p[1], &pMem[1]);  /* double msw */
+         p[2] = 0;        /* empty */
+      }
+      break;
+      case typeExtended:
+      {
+         get_user(p[1], &pMem[2]);
+         get_user(p[2], &pMem[1]);  /* msw */
+         p[0] = (x & 0x80003fff);
+      }
+      break;
+   }
+}
+static inline
+void storeSingle(const unsigned int Fn,unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   union
+   {
+     float32 f;
+     unsigned int i[1];
+   } val;
+   switch (fpa11->fType[Fn])
+   {
+      case typeDouble:
+         val.f = float64_to_float32(fpa11->fpreg[Fn].fDouble);
+      break;
+      case typeExtended:
+         val.f = floatx80_to_float32(fpa11->fpreg[Fn].fExtended);
+      break;
+      default: val.f = fpa11->fpreg[Fn].fSingle;
+   }
+   put_user(val.i[0], pMem);
+}
+static inline
+void storeDouble(const unsigned int Fn,unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   union
+   {
+     float64 f;
+     unsigned int i[2];
+   } val;
+   switch (fpa11->fType[Fn])
+   {
+      case typeSingle:
+         val.f = float32_to_float64(fpa11->fpreg[Fn].fSingle);
+      break;
+      case typeExtended:
+         val.f = floatx80_to_float64(fpa11->fpreg[Fn].fExtended);
+      break;
+      default: val.f = fpa11->fpreg[Fn].fDouble;
+   }
+   put_user(val.i[1], &pMem[0]);        /* msw */
+   put_user(val.i[0], &pMem[1]);        /* lsw */
+}
+static inline
+void storeExtended(const unsigned int Fn,unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   union
+   {
+     floatx80 f;
+     unsigned int i[3];
+   } val;
+   switch (fpa11->fType[Fn])
+   {
+      case typeSingle:
+         val.f = float32_to_floatx80(fpa11->fpreg[Fn].fSingle);
+      break;
+      case typeDouble:
+         val.f = float64_to_floatx80(fpa11->fpreg[Fn].fDouble);
+      break;
+      default: val.f = fpa11->fpreg[Fn].fExtended;
+   }
+   put_user(val.i[0], &pMem[0]); /* sign & exp */
+   put_user(val.i[1], &pMem[2]);
+   put_user(val.i[2], &pMem[1]); /* msw */
+}
+static inline
+void storeMultiple(const unsigned int Fn,unsigned int *pMem)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   register unsigned int nType, *p;
+   p = (unsigned int*)&(fpa11->fpreg[Fn]);
+   nType = fpa11->fType[Fn];
+   switch (nType)
+   {
+      case typeSingle:
+      case typeDouble:
+      {
+         put_user(p[0], &pMem[2]); /* single */
+         put_user(p[1], &pMem[1]); /* double msw */
+         put_user(nType << 14, &pMem[0]);
+      }
+      break;
+      case typeExtended:
+      {
+         put_user(p[2], &pMem[1]); /* msw */
+         put_user(p[1], &pMem[2]);
+         put_user((p[0] & 0x80003fff) | (nType << 14), &pMem[0]);
+      }
+      break;
+   }
+}
+unsigned int PerformLDF(const unsigned int opcode)
+{
+   unsigned int *pBase, *pAddress, *pFinal, nRc = 1,
+     write_back = WRITE_BACK(opcode);
+   //printk("PerformLDF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode));
+   pBase = (unsigned int*)readRegister(getRn(opcode));
+   if (REG_PC == getRn(opcode))
+   {
+     pBase += 2;
+     write_back = 0;
+   }
+   pFinal = pBase;
+   if (BIT_UP_SET(opcode))
+     pFinal += getOffset(opcode);
+   else
+     pFinal -= getOffset(opcode);
+   if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
+   switch (opcode & MASK_TRANSFER_LENGTH)
+   {
+      case TRANSFER_SINGLE  : loadSingle(getFd(opcode),pAddress);   break;
+      case TRANSFER_DOUBLE  : loadDouble(getFd(opcode),pAddress);   break;
+      case TRANSFER_EXTENDED: loadExtended(getFd(opcode),pAddress); break;
+      default: nRc = 0;
+   }
+   if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal);
+   return nRc;
+}
+unsigned int PerformSTF(const unsigned int opcode)
+{
+   unsigned int *pBase, *pAddress, *pFinal, nRc = 1,
+     write_back = WRITE_BACK(opcode);
+   //printk("PerformSTF(0x%08x), Fd = 0x%08x\n",opcode,getFd(opcode));
+   SetRoundingMode(ROUND_TO_NEAREST);
+   pBase = (unsigned int*)readRegister(getRn(opcode));
+   if (REG_PC == getRn(opcode))
+   {
+     pBase += 2;
+     write_back = 0;
+   }
+   pFinal = pBase;
+   if (BIT_UP_SET(opcode))
+     pFinal += getOffset(opcode);
+   else
+     pFinal -= getOffset(opcode);
+   if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
+   switch (opcode & MASK_TRANSFER_LENGTH)
+   {
+      case TRANSFER_SINGLE  : storeSingle(getFd(opcode),pAddress);   break;
+      case TRANSFER_DOUBLE  : storeDouble(getFd(opcode),pAddress);   break;
+      case TRANSFER_EXTENDED: storeExtended(getFd(opcode),pAddress); break;
+      default: nRc = 0;
+   }
+   if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal);
+   return nRc;
+}
+unsigned int PerformLFM(const unsigned int opcode)
+{
+   unsigned int i, Fd, *pBase, *pAddress, *pFinal,
+     write_back = WRITE_BACK(opcode);
+   pBase = (unsigned int*)readRegister(getRn(opcode));
+   if (REG_PC == getRn(opcode))
+   {
+     pBase += 2;
+     write_back = 0;
+   }
+   pFinal = pBase;
+   if (BIT_UP_SET(opcode))
+     pFinal += getOffset(opcode);
+   else
+     pFinal -= getOffset(opcode);
+   if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
+   Fd = getFd(opcode);
+   for (i=getRegisterCount(opcode);i>0;i--)
+   {
+     loadMultiple(Fd,pAddress);
+     pAddress += 3; Fd++;
+     if (Fd == 8) Fd = 0;
+   }
+   if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal);
+   return 1;
+}
+unsigned int PerformSFM(const unsigned int opcode)
+{
+   unsigned int i, Fd, *pBase, *pAddress, *pFinal,
+     write_back = WRITE_BACK(opcode);
+   pBase = (unsigned int*)readRegister(getRn(opcode));
+   if (REG_PC == getRn(opcode))
+   {
+     pBase += 2;
+     write_back = 0;
+   }
+   pFinal = pBase;
+   if (BIT_UP_SET(opcode))
+     pFinal += getOffset(opcode);
+   else
+     pFinal -= getOffset(opcode);
+   if (PREINDEXED(opcode)) pAddress = pFinal; else pAddress = pBase;
+   Fd = getFd(opcode);
+   for (i=getRegisterCount(opcode);i>0;i--)
+   {
+     storeMultiple(Fd,pAddress);
+     pAddress += 3; Fd++;
+     if (Fd == 8) Fd = 0;
+   }
+   if (write_back) writeRegister(getRn(opcode),(unsigned int)pFinal);
+   return 1;
+}
+#if 1
+unsigned int EmulateCPDT(const unsigned int opcode)
+{
+  unsigned int nRc = 0;
+  //printk("EmulateCPDT(0x%08x)\n",opcode);
+  if (LDF_OP(opcode))
+  {
+    nRc = PerformLDF(opcode);
+  }
+  else if (LFM_OP(opcode))
+  {
+    nRc = PerformLFM(opcode);
+  }
+  else if (STF_OP(opcode))
+  {
+    nRc = PerformSTF(opcode);
+  }
+  else if (SFM_OP(opcode))
+  {
+    nRc = PerformSFM(opcode);
+  }
+  else
+  {
+    nRc = 0;
+  }
+  return nRc;
+}
+#endif
diff --git a/arch/arm26/nwfpe/fpa11_cprt.c b/arch/arm26/nwfpe/fpa11_cprt.c
new file mode 100644
index 000000000000..a201076c1f14
--- /dev/null
+++ b/arch/arm26/nwfpe/fpa11_cprt.c
@@ -0,0 +1,289 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+    (c) Philip Blundell, 1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#include "fpa11.h"
+#include "milieu.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpa11.inl"
+#include "fpmodule.h"
+#include "fpmodule.inl"
+extern flag floatx80_is_nan(floatx80);
+extern flag float64_is_nan( float64);
+extern flag float32_is_nan( float32);
+void SetRoundingMode(const unsigned int opcode);
+unsigned int PerformFLT(const unsigned int opcode);
+unsigned int PerformFIX(const unsigned int opcode);
+static unsigned int
+PerformComparison(const unsigned int opcode);
+unsigned int EmulateCPRT(const unsigned int opcode)
+{
+  unsigned int nRc = 1;
+  //printk("EmulateCPRT(0x%08x)\n",opcode);
+  if (opcode & 0x800000)
+  {
+     /* This is some variant of a comparison (PerformComparison will
+        sort out which one).  Since most of the other CPRT
+        instructions are oddball cases of some sort or other it makes
+        sense to pull this out into a fast path.  */
+     return PerformComparison(opcode);
+  }
+  /* Hint to GCC that we'd like a jump table rather than a load of CMPs */
+  switch ((opcode & 0x700000) >> 20)
+  {
+    case  FLT_CODE >> 20: nRc = PerformFLT(opcode); break;
+    case  FIX_CODE >> 20: nRc = PerformFIX(opcode); break;
+    
+    case  WFS_CODE >> 20: writeFPSR(readRegister(getRd(opcode))); break;
+    case  RFS_CODE >> 20: writeRegister(getRd(opcode),readFPSR()); break;
+#if 0    /* We currently have no use for the FPCR, so there's no point
+            in emulating it. */
+    case  WFC_CODE >> 20: writeFPCR(readRegister(getRd(opcode)));
+    case  RFC_CODE >> 20: writeRegister(getRd(opcode),readFPCR()); break;
+#endif
+    default: nRc = 0;
+  }
+  
+  return nRc;
+}
+unsigned int PerformFLT(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   
+   unsigned int nRc = 1;
+   SetRoundingMode(opcode);
+   switch (opcode & MASK_ROUNDING_PRECISION)
+   {
+      case ROUND_SINGLE:
+      {
+        fpa11->fType[getFn(opcode)] = typeSingle;
+        fpa11->fpreg[getFn(opcode)].fSingle =
+           int32_to_float32(readRegister(getRd(opcode)));
+      }
+      break;
+      case ROUND_DOUBLE:
+      {
+        fpa11->fType[getFn(opcode)] = typeDouble;
+        fpa11->fpreg[getFn(opcode)].fDouble =
+            int32_to_float64(readRegister(getRd(opcode)));
+      }
+      break;
+        
+      case ROUND_EXTENDED:
+      {
+        fpa11->fType[getFn(opcode)] = typeExtended;
+        fpa11->fpreg[getFn(opcode)].fExtended =
+           int32_to_floatx80(readRegister(getRd(opcode)));
+      }
+      break;
+      
+      default: nRc = 0;
+  }
+  
+  return nRc;
+}
+unsigned int PerformFIX(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   unsigned int nRc = 1;
+   unsigned int Fn = getFm(opcode);
+   
+   SetRoundingMode(opcode);
+   switch (fpa11->fType[Fn])
+   {
+      case typeSingle:
+      {
+         writeRegister(getRd(opcode),
+                       float32_to_int32(fpa11->fpreg[Fn].fSingle));
+      }
+      break;
+      case typeDouble:
+      {
+         writeRegister(getRd(opcode),
+                       float64_to_int32(fpa11->fpreg[Fn].fDouble));
+      }
+      break;
+                       
+      case typeExtended:
+      {
+         writeRegister(getRd(opcode),
+                       floatx80_to_int32(fpa11->fpreg[Fn].fExtended));
+      }
+      break;
+      
+      default: nRc = 0;
+  }
+  
+  return nRc;
+}
+   
+static unsigned int __inline__
+PerformComparisonOperation(floatx80 Fn, floatx80 Fm)
+{
+   unsigned int flags = 0;
+   /* test for less than condition */
+   if (floatx80_lt(Fn,Fm))
+   {
+      flags |= CC_NEGATIVE;
+   }
+  
+   /* test for equal condition */
+   if (floatx80_eq(Fn,Fm))
+   {
+      flags |= CC_ZERO;
+   }
+   /* test for greater than or equal condition */
+   if (floatx80_lt(Fm,Fn))
+   {
+      flags |= CC_CARRY;
+   }
+   
+   writeConditionCodes(flags);
+   return 1;
+}
+/* This instruction sets the flags N, Z, C, V in the FPSR. */
+   
+static unsigned int PerformComparison(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   unsigned int Fn, Fm;
+   floatx80 rFn, rFm;
+   int e_flag = opcode & 0x400000;      /* 1 if CxFE */
+   int n_flag = opcode & 0x200000;      /* 1 if CNxx */
+   unsigned int flags = 0;
+   //printk("PerformComparison(0x%08x)\n",opcode);
+   Fn = getFn(opcode);
+   Fm = getFm(opcode);
+   /* Check for unordered condition and convert all operands to 80-bit
+      format.
+      ?? Might be some mileage in avoiding this conversion if possible.
+      Eg, if both operands are 32-bit, detect this and do a 32-bit
+      comparison (cheaper than an 80-bit one).  */
+   switch (fpa11->fType[Fn])
+   {
+      case typeSingle: 
+        //printk("single.\n");
+        if (float32_is_nan(fpa11->fpreg[Fn].fSingle))
+           goto unordered;
+        rFn = float32_to_floatx80(fpa11->fpreg[Fn].fSingle);
+      break;
+      case typeDouble: 
+        //printk("double.\n");
+        if (float64_is_nan(fpa11->fpreg[Fn].fDouble))
+           goto unordered;
+        rFn = float64_to_floatx80(fpa11->fpreg[Fn].fDouble);
+      break;
+      
+      case typeExtended: 
+        //printk("extended.\n");
+        if (floatx80_is_nan(fpa11->fpreg[Fn].fExtended))
+           goto unordered;
+        rFn = fpa11->fpreg[Fn].fExtended;
+      break;
+      
+      default: return 0;
+   }
+   if (CONSTANT_FM(opcode))
+   {
+     //printk("Fm is a constant: #%d.\n",Fm);
+     rFm = getExtendedConstant(Fm);
+     if (floatx80_is_nan(rFm))
+        goto unordered;
+   }
+   else
+   {
+     //printk("Fm = r%d which contains a ",Fm);
+      switch (fpa11->fType[Fm])
+      {
+         case typeSingle: 
+           //printk("single.\n");
+           if (float32_is_nan(fpa11->fpreg[Fm].fSingle))
+              goto unordered;
+           rFm = float32_to_floatx80(fpa11->fpreg[Fm].fSingle);
+         break;
+         case typeDouble: 
+           //printk("double.\n");
+           if (float64_is_nan(fpa11->fpreg[Fm].fDouble))
+              goto unordered;
+           rFm = float64_to_floatx80(fpa11->fpreg[Fm].fDouble);
+         break;
+      
+         case typeExtended: 
+           //printk("extended.\n");
+           if (floatx80_is_nan(fpa11->fpreg[Fm].fExtended))
+              goto unordered;
+           rFm = fpa11->fpreg[Fm].fExtended;
+         break;
+      
+         default: return 0;
+      }
+   }
+   if (n_flag)
+   {
+      rFm.high ^= 0x8000;
+   }
+   return PerformComparisonOperation(rFn,rFm);
+ unordered:
+   /* ?? The FPA data sheet is pretty vague about this, in particular
+      about whether the non-E comparisons can ever raise exceptions.
+      This implementation is based on a combination of what it says in
+      the data sheet, observation of how the Acorn emulator actually
+      behaves (and how programs expect it to) and guesswork.  */
+   flags |= CC_OVERFLOW;
+   flags &= ~(CC_ZERO | CC_NEGATIVE);
+   if (BIT_AC & readFPSR()) flags |= CC_CARRY;
+   if (e_flag) float_raise(float_flag_invalid);
+   writeConditionCodes(flags);
+   return 1;
+}
diff --git a/arch/arm26/nwfpe/fpmodule.c b/arch/arm26/nwfpe/fpmodule.c
new file mode 100644
index 000000000000..528fa710aa34
--- /dev/null
+++ b/arch/arm26/nwfpe/fpmodule.c
@@ -0,0 +1,182 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.com, 1998-1999
+    (c) Philip Blundell, 1998-1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#include "fpa11.h"
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/config.h>
+/* XXX */
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+/* XXX */
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpmodule.h"
+#include "fpa11.inl"
+/* kernel symbols required for signal handling */
+typedef struct task_struct*     PTASK;
+#ifdef MODULE
+void fp_send_sig(unsigned long sig, PTASK p, int priv);
+#if LINUX_VERSION_CODE > 0x20115
+MODULE_AUTHOR("Scott Bambrough <scottb@rebel.com>");
+MODULE_DESCRIPTION("NWFPE floating point emulator");
+#endif
+#else
+#define fp_send_sig     send_sig
+#define kern_fp_enter   fp_enter
+extern char fpe_type[];
+#endif
+/* kernel function prototypes required */
+void fp_setup(void);
+/* external declarations for saved kernel symbols */
+extern void (*kern_fp_enter)(void);
+/* Original value of fp_enter from kernel before patched by fpe_init. */ 
+static void (*orig_fp_enter)(void);
+/* forward declarations */
+extern void nwfpe_enter(void);
+#ifdef MODULE
+/*
+ * Return 0 if we can be unloaded.  This can only happen if
+ * kern_fp_enter is still pointing at nwfpe_enter
+ */
+static int fpe_unload(void)
+{
+  return (kern_fp_enter == nwfpe_enter) ? 0 : 1;
+}
+#endif
+static int __init fpe_init(void)
+{
+  if (sizeof(FPA11) > sizeof(union fp_state)) {
+    printk(KERN_ERR "nwfpe: bad structure size\n");
+    return -EINVAL;
+  }
+  if (sizeof(FPREG) != 12) {
+    printk(KERN_ERR "nwfpe: bad register size\n");
+    return -EINVAL;
+  }
+#ifdef MODULE
+  if (!mod_member_present(&__this_module, can_unload))
+    return -EINVAL;
+  __this_module.can_unload = fpe_unload;
+#else
+  if (fpe_type[0] && strcmp(fpe_type, "nwfpe"))
+    return 0;
+#endif
+  /* Display title, version and copyright information. */
+  printk(KERN_WARNING "NetWinder Floating Point Emulator V0.95 "
+         "(c) 1998-1999 Rebel.com\n");
+  /* Save pointer to the old FP handler and then patch ourselves in */
+  orig_fp_enter = kern_fp_enter;
+  kern_fp_enter = nwfpe_enter;
+  return 0;
+}
+static void __exit fpe_exit(void)
+{
+  /* Restore the values we saved earlier. */
+  kern_fp_enter = orig_fp_enter;
+}
+/*
+ScottB:  November 4, 1998
+Moved this function out of softfloat-specialize into fpmodule.c.
+This effectively isolates all the changes required for integrating with the
+Linux kernel into fpmodule.c.  Porting to NetBSD should only require modifying
+fpmodule.c to integrate with the NetBSD kernel (I hope!).
+[1/1/99: Not quite true any more unfortunately.  There is Linux-specific
+code to access data in user space in some other source files at the 
+moment (grep for get_user / put_user calls).  --philb]
+float_exception_flags is a global variable in SoftFloat.
+This function is called by the SoftFloat routines to raise a floating
+point exception.  We check the trap enable byte in the FPSR, and raise
+a SIGFPE exception if necessary.  If not the relevant bits in the 
+cumulative exceptions flag byte are set and we return.
+*/
+void float_raise(signed char flags)
+{
+  register unsigned int fpsr, cumulativeTraps;
+  
+#ifdef CONFIG_DEBUG_USER
+  printk(KERN_DEBUG "NWFPE: %s[%d] takes exception %08x at %p from %08x\n",
+         current->comm, current->pid, flags,
+         __builtin_return_address(0), GET_USERREG()[15]);
+#endif
+  /* Keep SoftFloat exception flags up to date.  */
+  float_exception_flags |= flags;
+  /* Read fpsr and initialize the cumulativeTraps.  */
+  fpsr = readFPSR();
+  cumulativeTraps = 0;
+  
+  /* For each type of exception, the cumulative trap exception bit is only
+     set if the corresponding trap enable bit is not set.  */
+  if ((!(fpsr & BIT_IXE)) && (flags & BIT_IXC))
+     cumulativeTraps |= BIT_IXC;  
+  if ((!(fpsr & BIT_UFE)) && (flags & BIT_UFC))
+     cumulativeTraps |= BIT_UFC;  
+  if ((!(fpsr & BIT_OFE)) && (flags & BIT_OFC))
+     cumulativeTraps |= BIT_OFC;  
+  if ((!(fpsr & BIT_DZE)) && (flags & BIT_DZC))
+     cumulativeTraps |= BIT_DZC;  
+  if ((!(fpsr & BIT_IOE)) && (flags & BIT_IOC))
+     cumulativeTraps |= BIT_IOC;  
+  /* Set the cumulative exceptions flags.  */
+  if (cumulativeTraps)
+    writeFPSR(fpsr | cumulativeTraps);
+  /* Raise an exception if necessary.  */
+  if (fpsr & (flags << 16))
+    fp_send_sig(SIGFPE, current, 1);
+}
+module_init(fpe_init);
+module_exit(fpe_exit);
diff --git a/arch/arm26/nwfpe/fpmodule.h b/arch/arm26/nwfpe/fpmodule.h
new file mode 100644
index 000000000000..ef71aab46a32
--- /dev/null
+++ b/arch/arm26/nwfpe/fpmodule.h
@@ -0,0 +1,47 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.com, 1998-1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#ifndef __FPMODULE_H__
+#define __FPMODULE_H__
+#include <linux/config.h>
+#define REG_ORIG_R0     16
+#define REG_CPSR        15
+#define REG_PC          15
+#define REG_LR          14
+#define REG_SP          13
+#define REG_IP          12
+#define REG_FP          11
+#define REG_R10         10
+#define REG_R9          9
+#define REG_R9          9
+#define REG_R8          8
+#define REG_R7          7
+#define REG_R6          6
+#define REG_R5          5
+#define REG_R4          4
+#define REG_R3          3
+#define REG_R2          2
+#define REG_R1          1
+#define REG_R0          0
+#endif
diff --git a/arch/arm26/nwfpe/fpmodule.inl b/arch/arm26/nwfpe/fpmodule.inl
new file mode 100644
index 000000000000..ef228378ffaf
--- /dev/null
+++ b/arch/arm26/nwfpe/fpmodule.inl
@@ -0,0 +1,84 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+extern __inline__
+unsigned int readRegister(const unsigned int nReg)
+{
+  /* Note: The CPU thinks it has dealt with the current instruction.  As
+           a result the program counter has been advanced to the next
+           instruction, and points 4 bytes beyond the actual instruction
+           that caused the invalid instruction trap to occur.  We adjust
+           for this in this routine.  LDF/STF instructions with Rn = PC
+           depend on the PC being correct, as they use PC+8 in their 
+           address calculations. */
+  unsigned int *userRegisters = GET_USERREG();
+  unsigned int val = userRegisters[nReg];
+  if (REG_PC == nReg) val -= 4;
+  return val;
+}
+extern __inline__
+void writeRegister(const unsigned int nReg, const unsigned int val)
+{
+  unsigned int *userRegisters = GET_USERREG();
+  userRegisters[nReg] = val;
+}
+extern __inline__
+unsigned int readCPSR(void)
+{
+  return(readRegister(REG_CPSR));
+}
+extern __inline__
+void writeCPSR(const unsigned int val)
+{
+  writeRegister(REG_CPSR,val);
+}
+extern __inline__
+unsigned int readConditionCodes(void)
+{
+#ifdef __FPEM_TEST__
+   return(0);
+#else
+   return(readCPSR() & CC_MASK);
+#endif
+}
+extern __inline__
+void writeConditionCodes(const unsigned int val)
+{
+  unsigned int *userRegisters = GET_USERREG();
+  unsigned int rval;
+  /*
+   * Operate directly on userRegisters since
+   * the CPSR may be the PC register itself.
+   */
+  rval = userRegisters[REG_CPSR] & ~CC_MASK;
+  userRegisters[REG_CPSR] = rval | (val & CC_MASK);
+}
+extern __inline__
+unsigned int readMemoryInt(unsigned int *pMem)
+{
+  return *pMem;
+}
diff --git a/arch/arm26/nwfpe/fpopcode.c b/arch/arm26/nwfpe/fpopcode.c
new file mode 100644
index 000000000000..d81ddd188322
--- /dev/null
+++ b/arch/arm26/nwfpe/fpopcode.c
@@ -0,0 +1,148 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#include "fpa11.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+#include "fpsr.h"
+#include "fpmodule.h"
+#include "fpmodule.inl"
+const floatx80 floatx80Constant[] = {
+  { 0x0000, 0x0000000000000000ULL},     /* extended 0.0 */
+  { 0x3fff, 0x8000000000000000ULL},     /* extended 1.0 */
+  { 0x4000, 0x8000000000000000ULL},     /* extended 2.0 */
+  { 0x4000, 0xc000000000000000ULL},     /* extended 3.0 */
+  { 0x4001, 0x8000000000000000ULL},     /* extended 4.0 */
+  { 0x4001, 0xa000000000000000ULL},     /* extended 5.0 */
+  { 0x3ffe, 0x8000000000000000ULL},     /* extended 0.5 */
+  { 0x4002, 0xa000000000000000ULL}      /* extended 10.0 */
+};  
+const float64 float64Constant[] = {
+  0x0000000000000000ULL,                /* double 0.0 */
+  0x3ff0000000000000ULL,                /* double 1.0 */
+  0x4000000000000000ULL,                /* double 2.0 */
+  0x4008000000000000ULL,                /* double 3.0 */
+  0x4010000000000000ULL,                /* double 4.0 */
+  0x4014000000000000ULL,                /* double 5.0 */
+  0x3fe0000000000000ULL,                /* double 0.5 */
+  0x4024000000000000ULL                 /* double 10.0 */
+};  
+const float32 float32Constant[] = {
+  0x00000000,                           /* single 0.0 */
+  0x3f800000,                           /* single 1.0 */
+  0x40000000,                           /* single 2.0 */
+  0x40400000,                           /* single 3.0 */
+  0x40800000,                           /* single 4.0 */
+  0x40a00000,                           /* single 5.0 */
+  0x3f000000,                           /* single 0.5 */
+  0x41200000                            /* single 10.0 */
+};  
+unsigned int getTransferLength(const unsigned int opcode)
+{
+  unsigned int nRc;
+  
+  switch (opcode & MASK_TRANSFER_LENGTH)
+  {
+    case 0x00000000: nRc = 1; break; /* single precision */
+    case 0x00008000: nRc = 2; break; /* double precision */
+    case 0x00400000: nRc = 3; break; /* extended precision */
+    default: nRc = 0;
+  }
+  
+  return(nRc);
+}
+unsigned int getRegisterCount(const unsigned int opcode)
+{
+  unsigned int nRc;
+  
+  switch (opcode & MASK_REGISTER_COUNT)
+  {
+    case 0x00000000: nRc = 4; break;
+    case 0x00008000: nRc = 1; break;
+    case 0x00400000: nRc = 2; break;
+    case 0x00408000: nRc = 3; break;
+    default: nRc = 0;
+  }
+  
+  return(nRc);
+}
+unsigned int getRoundingPrecision(const unsigned int opcode)
+{
+  unsigned int nRc;
+  
+  switch (opcode & MASK_ROUNDING_PRECISION)
+  {
+    case 0x00000000: nRc = 1; break;
+    case 0x00000080: nRc = 2; break;
+    case 0x00080000: nRc = 3; break;
+    default: nRc = 0;
+  }
+  
+  return(nRc);
+}
+unsigned int getDestinationSize(const unsigned int opcode)
+{
+  unsigned int nRc;
+  
+  switch (opcode & MASK_DESTINATION_SIZE)
+  {
+    case 0x00000000: nRc = typeSingle; break;
+    case 0x00000080: nRc = typeDouble; break;
+    case 0x00080000: nRc = typeExtended; break;
+    default: nRc = typeNone;
+  }
+  
+  return(nRc);
+}
+/* condition code lookup table
+ index into the table is test code: EQ, NE, ... LT, GT, AL, NV
+ bit position in short is condition code: NZCV */
+static const unsigned short aCC[16] = {
+    0xF0F0, // EQ == Z set
+    0x0F0F, // NE
+    0xCCCC, // CS == C set
+    0x3333, // CC
+    0xFF00, // MI == N set
+    0x00FF, // PL
+    0xAAAA, // VS == V set
+    0x5555, // VC
+    0x0C0C, // HI == C set && Z clear
+    0xF3F3, // LS == C clear || Z set
+    0xAA55, // GE == (N==V)
+    0x55AA, // LT == (N!=V)
+    0x0A05, // GT == (!Z && (N==V))
+    0xF5FA, // LE == (Z || (N!=V))
+    0xFFFF, // AL always
+    0 // NV
+};
+unsigned int checkCondition(const unsigned int opcode, const unsigned int ccodes)
+{
+  return (aCC[opcode>>28] >> (ccodes>>28)) & 1;
+}
diff --git a/arch/arm26/nwfpe/fpopcode.h b/arch/arm26/nwfpe/fpopcode.h
new file mode 100644
index 000000000000..13c7419262ab
--- /dev/null
+++ b/arch/arm26/nwfpe/fpopcode.h
@@ -0,0 +1,390 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#ifndef __FPOPCODE_H__
+#define __FPOPCODE_H__
+/*
+ARM Floating Point Instruction Classes
+| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 
+|c o n d|1 1 0 P|U|u|W|L|   Rn  |v|  Fd |0|0|0|1|  o f f s e t  | CPDT
+|c o n d|1 1 0 P|U|w|W|L|   Rn  |x|  Fd |0|0|0|1|  o f f s e t  | CPDT
+| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 
+|c o n d|1 1 1 0|a|b|c|d|e|  Fn |j|  Fd |0|0|0|1|f|g|h|0|i|  Fm | CPDO
+|c o n d|1 1 1 0|a|b|c|L|e|  Fn |   Rd  |0|0|0|1|f|g|h|1|i|  Fm | CPRT
+|c o n d|1 1 1 0|a|b|c|1|e|  Fn |1|1|1|1|0|0|0|1|f|g|h|1|i|  Fm | comparisons
+| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | 
+CPDT            data transfer instructions
+                LDF, STF, LFM, SFM
+                
+CPDO            dyadic arithmetic instructions
+                ADF, MUF, SUF, RSF, DVF, RDF,
+                POW, RPW, RMF, FML, FDV, FRD, POL
+CPDO            monadic arithmetic instructions
+                MVF, MNF, ABS, RND, SQT, LOG, LGN, EXP,
+                SIN, COS, TAN, ASN, ACS, ATN, URD, NRM
+                
+CPRT            joint arithmetic/data transfer instructions
+                FIX (arithmetic followed by load/store)
+                FLT (load/store followed by arithmetic)
+                CMF, CNF CMFE, CNFE (comparisons)
+                WFS, RFS (write/read floating point status register)
+                WFC, RFC (write/read floating point control register)
+cond            condition codes
+P               pre/post index bit: 0 = postindex, 1 = preindex
+U               up/down bit: 0 = stack grows down, 1 = stack grows up
+W               write back bit: 1 = update base register (Rn)
+L               load/store bit: 0 = store, 1 = load
+Rn              base register
+Rd              destination/source register             
+Fd              floating point destination register
+Fn              floating point source register
+Fm              floating point source register or floating point constant
+uv              transfer length (TABLE 1)
+wx              register count (TABLE 2)
+abcd            arithmetic opcode (TABLES 3 & 4)
+ef              destination size (rounding precision) (TABLE 5)
+gh              rounding mode (TABLE 6)
+j               dyadic/monadic bit: 0 = dyadic, 1 = monadic
+i               constant bit: 1 = constant (TABLE 6)
+*/
+/*
+TABLE 1
+-------------------------+---+---+---------+---------+
+|  Precision              | u | v | FPSR.EP | length  |
+-------------------------+---+---+---------+---------+
+| Single                  | 0 � 0 |    x    | 1 words |
+| Double                  | 1 � 1 |    x    | 2 words |
+| Extended                | 1 � 1 |    x    | 3 words |
+| Packed decimal          | 1 � 1 |    0    | 3 words |
+| Expanded packed decimal | 1 � 1 |    1    | 4 words |
+-------------------------+---+---+---------+---------+
+Note: x = don't care
+*/
+/*
+TABLE 2
+---+---+---------------------------------+
+| w | x | Number of registers to transfer |
+---+---+---------------------------------+
+| 0 � 1 |  1                              |
+| 1 � 0 |  2                              |
+| 1 � 1 |  3                              |
+| 0 � 0 |  4                              |
+---+---+---------------------------------+
+*/
+/*
+TABLE 3: Dyadic Floating Point Opcodes
+---+---+---+---+----------+-----------------------+-----------------------+
+| a | b | c | d | Mnemonic | Description           | Operation             |
+---+---+---+---+----------+-----------------------+-----------------------+
+| 0 | 0 | 0 | 0 | ADF      | Add                   | Fd := Fn + Fm         |
+| 0 | 0 | 0 | 1 | MUF      | Multiply              | Fd := Fn * Fm         |
+| 0 | 0 | 1 | 0 | SUF      | Subtract              | Fd := Fn - Fm         |
+| 0 | 0 | 1 | 1 | RSF      | Reverse subtract      | Fd := Fm - Fn         |
+| 0 | 1 | 0 | 0 | DVF      | Divide                | Fd := Fn / Fm         |
+| 0 | 1 | 0 | 1 | RDF      | Reverse divide        | Fd := Fm / Fn         |
+| 0 | 1 | 1 | 0 | POW      | Power                 | Fd := Fn ^ Fm         |
+| 0 | 1 | 1 | 1 | RPW      | Reverse power         | Fd := Fm ^ Fn         |
+| 1 | 0 | 0 | 0 | RMF      | Remainder             | Fd := IEEE rem(Fn/Fm) |
+| 1 | 0 | 0 | 1 | FML      | Fast Multiply         | Fd := Fn * Fm         |
+| 1 | 0 | 1 | 0 | FDV      | Fast Divide           | Fd := Fn / Fm         |
+| 1 | 0 | 1 | 1 | FRD      | Fast reverse divide   | Fd := Fm / Fn         |
+| 1 | 1 | 0 | 0 | POL      | Polar angle (ArcTan2) | Fd := arctan2(Fn,Fm)  |
+| 1 | 1 | 0 | 1 |          | undefined instruction | trap                  |
+| 1 | 1 | 1 | 0 |          | undefined instruction | trap                  |
+| 1 | 1 | 1 | 1 |          | undefined instruction | trap                  |
+---+---+---+---+----------+-----------------------+-----------------------+
+Note: POW, RPW, POL are deprecated, and are available for backwards
+      compatibility only.
+*/
+/*
+TABLE 4: Monadic Floating Point Opcodes
+---+---+---+---+----------+-----------------------+-----------------------+
+| a | b | c | d | Mnemonic | Description           | Operation             |
+---+---+---+---+----------+-----------------------+-----------------------+
+| 0 | 0 | 0 | 0 | MVF      | Move                  | Fd := Fm              |
+| 0 | 0 | 0 | 1 | MNF      | Move negated          | Fd := - Fm            |
+| 0 | 0 | 1 | 0 | ABS      | Absolute value        | Fd := abs(Fm)         |
+| 0 | 0 | 1 | 1 | RND      | Round to integer      | Fd := int(Fm)         |
+| 0 | 1 | 0 | 0 | SQT      | Square root           | Fd := sqrt(Fm)        |
+| 0 | 1 | 0 | 1 | LOG      | Log base 10           | Fd := log10(Fm)       |
+| 0 | 1 | 1 | 0 | LGN      | Log base e            | Fd := ln(Fm)          |
+| 0 | 1 | 1 | 1 | EXP      | Exponent              | Fd := e ^ Fm          |
+| 1 | 0 | 0 | 0 | SIN      | Sine                  | Fd := sin(Fm)         |
+| 1 | 0 | 0 | 1 | COS      | Cosine                | Fd := cos(Fm)         |
+| 1 | 0 | 1 | 0 | TAN      | Tangent               | Fd := tan(Fm)         |
+| 1 | 0 | 1 | 1 | ASN      | Arc Sine              | Fd := arcsin(Fm)      |
+| 1 | 1 | 0 | 0 | ACS      | Arc Cosine            | Fd := arccos(Fm)      |
+| 1 | 1 | 0 | 1 | ATN      | Arc Tangent           | Fd := arctan(Fm)      |
+| 1 | 1 | 1 | 0 | URD      | Unnormalized round    | Fd := int(Fm)         |
+| 1 | 1 | 1 | 1 | NRM      | Normalize             | Fd := norm(Fm)        |
+---+---+---+---+----------+-----------------------+-----------------------+
+Note: LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN are deprecated, and are
+      available for backwards compatibility only.
+*/
+/*
+TABLE 5
+-------------------------+---+---+
+|  Rounding Precision     | e | f |
+-------------------------+---+---+
+| IEEE Single precision   | 0 � 0 |
+| IEEE Double precision   | 0 � 1 |
+| IEEE Extended precision | 1 � 0 |
+| undefined (trap)        | 1 � 1 |
+-------------------------+---+---+
+*/
+/*
+TABLE 5
+---------------------------------+---+---+
+|  Rounding Mode                  | g | h |
+---------------------------------+---+---+
+| Round to nearest (default)      | 0 � 0 |
+| Round toward plus infinity      | 0 � 1 |
+| Round toward negative infinity  | 1 � 0 |
+| Round toward zero               | 1 � 1 |
+---------------------------------+---+---+
+*/
+/*
+===
+=== Definitions for load and store instructions
+===
+*/
+/* bit masks */
+#define BIT_PREINDEX    0x01000000
+#define BIT_UP          0x00800000
+#define BIT_WRITE_BACK  0x00200000
+#define BIT_LOAD        0x00100000
+/* masks for load/store */
+#define MASK_CPDT               0x0c000000  /* data processing opcode */
+#define MASK_OFFSET             0x000000ff
+#define MASK_TRANSFER_LENGTH    0x00408000
+#define MASK_REGISTER_COUNT     MASK_TRANSFER_LENGTH
+#define MASK_COPROCESSOR        0x00000f00
+/* Tests for transfer length */
+#define TRANSFER_SINGLE         0x00000000
+#define TRANSFER_DOUBLE         0x00008000
+#define TRANSFER_EXTENDED       0x00400000
+#define TRANSFER_PACKED         MASK_TRANSFER_LENGTH
+/* Get the coprocessor number from the opcode. */
+#define getCoprocessorNumber(opcode)    ((opcode & MASK_COPROCESSOR) >> 8)
+/* Get the offset from the opcode. */
+#define getOffset(opcode)               (opcode & MASK_OFFSET)
+/* Tests for specific data transfer load/store opcodes. */
+#define TEST_OPCODE(opcode,mask)        (((opcode) & (mask)) == (mask))
+#define LOAD_OP(opcode)   TEST_OPCODE((opcode),MASK_CPDT | BIT_LOAD)
+#define STORE_OP(opcode)  ((opcode & (MASK_CPDT | BIT_LOAD)) == MASK_CPDT)
+#define LDF_OP(opcode)  (LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 1))
+#define LFM_OP(opcode)  (LOAD_OP(opcode) && (getCoprocessorNumber(opcode) == 2))
+#define STF_OP(opcode)  (STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 1))
+#define SFM_OP(opcode)  (STORE_OP(opcode) && (getCoprocessorNumber(opcode) == 2))
+#define PREINDEXED(opcode)              ((opcode & BIT_PREINDEX) != 0)
+#define POSTINDEXED(opcode)             ((opcode & BIT_PREINDEX) == 0)
+#define BIT_UP_SET(opcode)              ((opcode & BIT_UP) != 0)
+#define BIT_UP_CLEAR(opcode)            ((opcode & BIT_DOWN) == 0)
+#define WRITE_BACK(opcode)              ((opcode & BIT_WRITE_BACK) != 0)
+#define LOAD(opcode)                    ((opcode & BIT_LOAD) != 0)
+#define STORE(opcode)                   ((opcode & BIT_LOAD) == 0)
+/*
+===
+=== Definitions for arithmetic instructions
+===
+*/
+/* bit masks */
+#define BIT_MONADIC     0x00008000
+#define BIT_CONSTANT    0x00000008
+#define CONSTANT_FM(opcode)             ((opcode & BIT_CONSTANT) != 0)
+#define MONADIC_INSTRUCTION(opcode)     ((opcode & BIT_MONADIC) != 0)
+/* instruction identification masks */
+#define MASK_CPDO               0x0e000000  /* arithmetic opcode */
+#define MASK_ARITHMETIC_OPCODE  0x00f08000
+#define MASK_DESTINATION_SIZE   0x00080080
+/* dyadic arithmetic opcodes. */
+#define ADF_CODE        0x00000000
+#define MUF_CODE        0x00100000
+#define SUF_CODE        0x00200000
+#define RSF_CODE        0x00300000
+#define DVF_CODE        0x00400000
+#define RDF_CODE        0x00500000
+#define POW_CODE        0x00600000
+#define RPW_CODE        0x00700000
+#define RMF_CODE        0x00800000
+#define FML_CODE        0x00900000
+#define FDV_CODE        0x00a00000
+#define FRD_CODE        0x00b00000
+#define POL_CODE        0x00c00000
+/* 0x00d00000 is an invalid dyadic arithmetic opcode */
+/* 0x00e00000 is an invalid dyadic arithmetic opcode */
+/* 0x00f00000 is an invalid dyadic arithmetic opcode */
+/* monadic arithmetic opcodes. */
+#define MVF_CODE        0x00008000
+#define MNF_CODE        0x00108000
+#define ABS_CODE        0x00208000
+#define RND_CODE        0x00308000
+#define SQT_CODE        0x00408000
+#define LOG_CODE        0x00508000
+#define LGN_CODE        0x00608000
+#define EXP_CODE        0x00708000
+#define SIN_CODE        0x00808000
+#define COS_CODE        0x00908000
+#define TAN_CODE        0x00a08000
+#define ASN_CODE        0x00b08000
+#define ACS_CODE        0x00c08000
+#define ATN_CODE        0x00d08000
+#define URD_CODE        0x00e08000
+#define NRM_CODE        0x00f08000
+/*
+===
+=== Definitions for register transfer and comparison instructions
+===
+*/
+#define MASK_CPRT               0x0e000010  /* register transfer opcode */
+#define MASK_CPRT_CODE          0x00f00000
+#define FLT_CODE                0x00000000
+#define FIX_CODE                0x00100000
+#define WFS_CODE                0x00200000
+#define RFS_CODE                0x00300000
+#define WFC_CODE                0x00400000
+#define RFC_CODE                0x00500000
+#define CMF_CODE                0x00900000
+#define CNF_CODE                0x00b00000
+#define CMFE_CODE               0x00d00000
+#define CNFE_CODE               0x00f00000
+/*
+===
+=== Common definitions
+===
+*/
+/* register masks */
+#define MASK_Rd         0x0000f000
+#define MASK_Rn         0x000f0000
+#define MASK_Fd         0x00007000
+#define MASK_Fm         0x00000007
+#define MASK_Fn         0x00070000
+/* condition code masks */
+#define CC_MASK         0xf0000000
+#define CC_NEGATIVE     0x80000000
+#define CC_ZERO         0x40000000
+#define CC_CARRY        0x20000000
+#define CC_OVERFLOW     0x10000000
+#define CC_EQ           0x00000000
+#define CC_NE           0x10000000
+#define CC_CS           0x20000000
+#define CC_HS           CC_CS
+#define CC_CC           0x30000000
+#define CC_LO           CC_CC
+#define CC_MI           0x40000000
+#define CC_PL           0x50000000
+#define CC_VS           0x60000000
+#define CC_VC           0x70000000
+#define CC_HI           0x80000000
+#define CC_LS           0x90000000
+#define CC_GE           0xa0000000
+#define CC_LT           0xb0000000
+#define CC_GT           0xc0000000
+#define CC_LE           0xd0000000
+#define CC_AL           0xe0000000
+#define CC_NV           0xf0000000
+/* rounding masks/values */
+#define MASK_ROUNDING_MODE      0x00000060
+#define ROUND_TO_NEAREST        0x00000000
+#define ROUND_TO_PLUS_INFINITY  0x00000020
+#define ROUND_TO_MINUS_INFINITY 0x00000040
+#define ROUND_TO_ZERO           0x00000060
+#define MASK_ROUNDING_PRECISION 0x00080080
+#define ROUND_SINGLE            0x00000000
+#define ROUND_DOUBLE            0x00000080
+#define ROUND_EXTENDED          0x00080000
+/* Get the condition code from the opcode. */
+#define getCondition(opcode)            (opcode >> 28)
+/* Get the source register from the opcode. */
+#define getRn(opcode)                   ((opcode & MASK_Rn) >> 16)
+/* Get the destination floating point register from the opcode. */
+#define getFd(opcode)                   ((opcode & MASK_Fd) >> 12)
+/* Get the first source floating point register from the opcode. */
+#define getFn(opcode)           ((opcode & MASK_Fn) >> 16)
+/* Get the second source floating point register from the opcode. */
+#define getFm(opcode)           (opcode & MASK_Fm)
+/* Get the destination register from the opcode. */
+#define getRd(opcode)           ((opcode & MASK_Rd) >> 12)
+/* Get the rounding mode from the opcode. */
+#define getRoundingMode(opcode)         ((opcode & MASK_ROUNDING_MODE) >> 5)
+static inline const floatx80 getExtendedConstant(const unsigned int nIndex)
+{
+   extern const floatx80 floatx80Constant[];
+   return floatx80Constant[nIndex];
+} 
+static inline const float64 getDoubleConstant(const unsigned int nIndex)
+{
+   extern const float64 float64Constant[];
+   return float64Constant[nIndex];
+} 
+static inline const float32 getSingleConstant(const unsigned int nIndex)
+{
+   extern const float32 float32Constant[];
+   return float32Constant[nIndex];
+} 
+extern unsigned int getRegisterCount(const unsigned int opcode);
+extern unsigned int getDestinationSize(const unsigned int opcode);
+#endif
diff --git a/arch/arm26/nwfpe/fpsr.h b/arch/arm26/nwfpe/fpsr.h
new file mode 100644
index 000000000000..6dafb0f5243c
--- /dev/null
+++ b/arch/arm26/nwfpe/fpsr.h
@@ -0,0 +1,108 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.com, 1998-1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#ifndef __FPSR_H__
+#define __FPSR_H__
+/*
+The FPSR is a 32 bit register consisting of 4 parts, each exactly
+one byte.
+        SYSTEM ID
+        EXCEPTION TRAP ENABLE BYTE
+        SYSTEM CONTROL BYTE
+        CUMULATIVE EXCEPTION FLAGS BYTE
+        
+The FPCR is a 32 bit register consisting of bit flags.
+*/
+/* SYSTEM ID
+------------
+Note: the system id byte is read only  */
+typedef unsigned int FPSR;  /* type for floating point status register */
+typedef unsigned int FPCR;  /* type for floating point control register */
+#define MASK_SYSID              0xff000000
+#define BIT_HARDWARE            0x80000000
+#define FP_EMULATOR             0x01000000      /* System ID for emulator */ 
+#define FP_ACCELERATOR          0x81000000      /* System ID for FPA11 */
+/* EXCEPTION TRAP ENABLE BYTE
+----------------------------- */
+#define MASK_TRAP_ENABLE        0x00ff0000
+#define MASK_TRAP_ENABLE_STRICT 0x001f0000
+#define BIT_IXE         0x00100000   /* inexact exception enable */
+#define BIT_UFE         0x00080000   /* underflow exception enable */
+#define BIT_OFE         0x00040000   /* overflow exception enable */
+#define BIT_DZE         0x00020000   /* divide by zero exception enable */
+#define BIT_IOE         0x00010000   /* invalid operation exception enable */
+/* SYSTEM CONTROL BYTE
+---------------------- */
+#define MASK_SYSTEM_CONTROL     0x0000ff00
+#define MASK_TRAP_STRICT        0x00001f00
+#define BIT_AC  0x00001000      /* use alternative C-flag definition
+                                   for compares */
+#define BIT_EP  0x00000800      /* use expanded packed decimal format */
+#define BIT_SO  0x00000400      /* select synchronous operation of FPA */
+#define BIT_NE  0x00000200      /* NaN exception bit */
+#define BIT_ND  0x00000100      /* no denormalized numbers bit */
+/* CUMULATIVE EXCEPTION FLAGS BYTE
+---------------------------------- */
+#define MASK_EXCEPTION_FLAGS            0x000000ff
+#define MASK_EXCEPTION_FLAGS_STRICT     0x0000001f
+#define BIT_IXC         0x00000010      /* inexact exception flag */
+#define BIT_UFC         0x00000008      /* underflow exception flag */
+#define BIT_OFC         0x00000004      /* overfloat exception flag */
+#define BIT_DZC         0x00000002      /* divide by zero exception flag */
+#define BIT_IOC         0x00000001      /* invalid operation exception flag */
+/* Floating Point Control Register
+----------------------------------*/
+#define BIT_RU          0x80000000      /* rounded up bit */
+#define BIT_IE          0x10000000      /* inexact bit */
+#define BIT_MO          0x08000000      /* mantissa overflow bit */
+#define BIT_EO          0x04000000      /* exponent overflow bit */
+#define BIT_SB          0x00000800      /* store bounce */
+#define BIT_AB          0x00000400      /* arithmetic bounce */
+#define BIT_RE          0x00000200      /* rounding exception */
+#define BIT_DA          0x00000100      /* disable FPA */
+#define MASK_OP         0x00f08010      /* AU operation code */
+#define MASK_PR         0x00080080      /* AU precision */
+#define MASK_S1         0x00070000      /* AU source register 1 */
+#define MASK_S2         0x00000007      /* AU source register 2 */
+#define MASK_DS         0x00007000      /* AU destination register */
+#define MASK_RM         0x00000060      /* AU rounding mode */
+#define MASK_ALU        0x9cfff2ff      /* only ALU can write these bits */
+#define MASK_RESET      0x00000d00      /* bits set on reset, all others cleared */
+#define MASK_WFC        MASK_RESET
+#define MASK_RFC        ~MASK_RESET
+#endif
diff --git a/arch/arm26/nwfpe/milieu.h b/arch/arm26/nwfpe/milieu.h
new file mode 100644
index 000000000000..a3892ab2dca4
--- /dev/null
+++ b/arch/arm26/nwfpe/milieu.h
@@ -0,0 +1,48 @@
+/*
+===============================================================================
+This C header file is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+===============================================================================
+*/
+/*
+-------------------------------------------------------------------------------
+Include common integer types and flags.
+-------------------------------------------------------------------------------
+*/
+#include "ARM-gcc.h"
+/*
+-------------------------------------------------------------------------------
+Symbolic Boolean literals.
+-------------------------------------------------------------------------------
+*/
+enum {
+    FALSE = 0,
+    TRUE  = 1
+};
diff --git a/arch/arm26/nwfpe/single_cpdo.c b/arch/arm26/nwfpe/single_cpdo.c
new file mode 100644
index 000000000000..5cdcddbb8999
--- /dev/null
+++ b/arch/arm26/nwfpe/single_cpdo.c
@@ -0,0 +1,255 @@
+/*
+    NetWinder Floating Point Emulator
+    (c) Rebel.COM, 1998,1999
+    Direct questions, comments to Scott Bambrough <scottb@netwinder.org>
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+#include "fpa11.h"
+#include "softfloat.h"
+#include "fpopcode.h"
+float32 float32_exp(float32 Fm);
+float32 float32_ln(float32 Fm);
+float32 float32_sin(float32 rFm);
+float32 float32_cos(float32 rFm);
+float32 float32_arcsin(float32 rFm);
+float32 float32_arctan(float32 rFm);
+float32 float32_log(float32 rFm);
+float32 float32_tan(float32 rFm);
+float32 float32_arccos(float32 rFm);
+float32 float32_pow(float32 rFn,float32 rFm);
+float32 float32_pol(float32 rFn,float32 rFm);
+unsigned int SingleCPDO(const unsigned int opcode)
+{
+   FPA11 *fpa11 = GET_FPA11();
+   float32 rFm, rFn = 0; //FIXME - should be zero?
+   unsigned int Fd, Fm, Fn, nRc = 1;
+   Fm = getFm(opcode);
+   if (CONSTANT_FM(opcode))
+   {
+     rFm = getSingleConstant(Fm);
+   }
+   else
+   {  
+     switch (fpa11->fType[Fm])
+     {
+        case typeSingle:
+          rFm = fpa11->fpreg[Fm].fSingle;
+        break;
+        
+        default: return 0;
+     }
+   }
+   if (!MONADIC_INSTRUCTION(opcode))
+   {
+      Fn = getFn(opcode);
+      switch (fpa11->fType[Fn])
+      {
+        case typeSingle:
+          rFn = fpa11->fpreg[Fn].fSingle;
+        break;
+        default: return 0;
+      }
+   }
+   Fd = getFd(opcode);
+   switch (opcode & MASK_ARITHMETIC_OPCODE)
+   {
+      /* dyadic opcodes */
+      case ADF_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_add(rFn,rFm);
+      break;
+      case MUF_CODE:
+      case FML_CODE:
+        fpa11->fpreg[Fd].fSingle = float32_mul(rFn,rFm);
+      break;
+      case SUF_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_sub(rFn,rFm);
+      break;
+      case RSF_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_sub(rFm,rFn);
+      break;
+      case DVF_CODE:
+      case FDV_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_div(rFn,rFm);
+      break;
+      case RDF_CODE:
+      case FRD_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_div(rFm,rFn);
+      break;
+#if 0
+      case POW_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_pow(rFn,rFm);
+      break;
+      case RPW_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_pow(rFm,rFn);
+      break;
+#endif
+      case RMF_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_rem(rFn,rFm);
+      break;
+#if 0
+      case POL_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_pol(rFn,rFm);
+      break;
+#endif
+      /* monadic opcodes */
+      case MVF_CODE:
+         fpa11->fpreg[Fd].fSingle = rFm;
+      break;
+      case MNF_CODE:
+         rFm ^= 0x80000000;
+         fpa11->fpreg[Fd].fSingle = rFm;
+      break;
+      case ABS_CODE:
+         rFm &= 0x7fffffff;
+         fpa11->fpreg[Fd].fSingle = rFm;
+      break;
+      case RND_CODE:
+      case URD_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_round_to_int(rFm);
+      break;
+      case SQT_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_sqrt(rFm);
+      break;
+#if 0
+      case LOG_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_log(rFm);
+      break;
+      case LGN_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_ln(rFm);
+      break;
+      case EXP_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_exp(rFm);
+      break;
+      case SIN_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_sin(rFm);
+      break;
+      case COS_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_cos(rFm);
+      break;
+      case TAN_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_tan(rFm);
+      break;
+      case ASN_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_arcsin(rFm);
+      break;
+      case ACS_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_arccos(rFm);
+      break;
+      case ATN_CODE:
+         fpa11->fpreg[Fd].fSingle = float32_arctan(rFm);
+      break;
+#endif
+      case NRM_CODE:
+      break;
+      
+      default:
+      {
+        nRc = 0;
+      }
+   }
+   if (0 != nRc) fpa11->fType[Fd] = typeSingle;
+   return nRc;
+}
+#if 0
+float32 float32_exp(float32 Fm)
+{
+//series
+}
+float32 float32_ln(float32 Fm)
+{
+//series
+}
+float32 float32_sin(float32 rFm)
+{
+//series
+}
+float32 float32_cos(float32 rFm)
+{
+//series
+}
+float32 float32_arcsin(float32 rFm)
+{
+//series
+}
+float32 float32_arctan(float32 rFm)
+{
+  //series
+}
+float32 float32_arccos(float32 rFm)
+{
+   //return float32_sub(halfPi,float32_arcsin(rFm));
+}
+float32 float32_log(float32 rFm)
+{
+  return float32_div(float32_ln(rFm),getSingleConstant(7));
+}
+float32 float32_tan(float32 rFm)
+{
+  return float32_div(float32_sin(rFm),float32_cos(rFm));
+}
+float32 float32_pow(float32 rFn,float32 rFm)
+{
+  return float32_exp(float32_mul(rFm,float32_ln(rFn))); 
+}
+float32 float32_pol(float32 rFn,float32 rFm)
+{
+  return float32_arctan(float32_div(rFn,rFm)); 
+}
+#endif
diff --git a/arch/arm26/nwfpe/softfloat-macros b/arch/arm26/nwfpe/softfloat-macros
new file mode 100644
index 000000000000..5469989f2c5e
--- /dev/null
+++ b/arch/arm26/nwfpe/softfloat-macros
@@ -0,0 +1,740 @@
+/*
+===============================================================================
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+===============================================================================
+*/
+/*
+-------------------------------------------------------------------------------
+Shifts `a' right by the number of bits given in `count'.  If any nonzero
+bits are shifted off, they are ``jammed'' into the least significant bit of
+the result by setting the least significant bit to 1.  The value of `count'
+can be arbitrarily large; in particular, if `count' is greater than 32, the
+result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+The result is stored in the location pointed to by `zPtr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
+{
+    bits32 z;
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 32 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+    *zPtr = z;
+}
+/*
+-------------------------------------------------------------------------------
+Shifts `a' right by the number of bits given in `count'.  If any nonzero
+bits are shifted off, they are ``jammed'' into the least significant bit of
+the result by setting the least significant bit to 1.  The value of `count'
+can be arbitrarily large; in particular, if `count' is greater than 64, the
+result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+The result is stored in the location pointed to by `zPtr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
+{
+    bits64 z;
+ __asm__("@shift64RightJamming -- start");   
+    if ( count == 0 ) {
+        z = a;
+    }
+    else if ( count < 64 ) {
+        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
+    }
+    else {
+        z = ( a != 0 );
+    }
+ __asm__("@shift64RightJamming -- end");   
+    *zPtr = z;
+}
+/*
+-------------------------------------------------------------------------------
+Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+_plus_ the number of bits given in `count'.  The shifted result is at most
+64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
+bits shifted off form a second 64-bit result as follows:  The _last_ bit
+shifted off is the most-significant bit of the extra result, and the other
+63 bits of the extra result are all zero if and only if _all_but_the_last_
+bits shifted off were all zero.  This extra result is stored in the location
+pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
+    (This routine makes more sense if `a0' and `a1' are considered to form a
+fixed-point value with binary point between `a0' and `a1'.  This fixed-point
+value is shifted right by the number of bits given in `count', and the
+integer part of the result is returned at the location pointed to by
+`z0Ptr'.  The fractional part of the result may be slightly corrupted as
+described above, and is returned at the location pointed to by `z1Ptr'.)
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shift64ExtraRightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1 != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+/*
+-------------------------------------------------------------------------------
+Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+number of bits given in `count'.  Any bits shifted off are lost.  The value
+of `count' can be arbitrarily large; in particular, if `count' is greater
+than 128, the result will be 0.  The result is broken into two 64-bit pieces
+which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shift128Right(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count );
+        z0 = a0>>count;
+    }
+    else {
+        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+/*
+-------------------------------------------------------------------------------
+Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+number of bits given in `count'.  If any nonzero bits are shifted off, they
+are ``jammed'' into the least significant bit of the result by setting the
+least significant bit to 1.  The value of `count' can be arbitrarily large;
+in particular, if `count' is greater than 128, the result will be either 0
+or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+nonzero.  The result is broken into two 64-bit pieces which are stored at
+the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shift128RightJamming(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z0, z1;
+    int8 negCount = ( - count ) & 63;
+    if ( count == 0 ) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if ( count < 64 ) {
+        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
+        z0 = a0>>count;
+    }
+    else {
+        if ( count == 64 ) {
+            z1 = a0 | ( a1 != 0 );
+        }
+        else if ( count < 128 ) {
+            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
+        }
+        else {
+            z1 = ( ( a0 | a1 ) != 0 );
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+/*
+-------------------------------------------------------------------------------
+Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+by 64 _plus_ the number of bits given in `count'.  The shifted result is
+at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+off form a third 64-bit result as follows:  The _last_ bit shifted off is
+the most-significant bit of the extra result, and the other 63 bits of the
+extra result are all zero if and only if _all_but_the_last_ bits shifted off
+were all zero.  This extra result is stored in the location pointed to by
+`z2Ptr'.  The value of `count' can be arbitrarily large.
+    (This routine makes more sense if `a0', `a1', and `a2' are considered
+to form a fixed-point value with binary point between `a1' and `a2'.  This
+fixed-point value is shifted right by the number of bits given in `count',
+and the integer part of the result is returned at the locations pointed to
+by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+corrupted as described above, and is returned at the location pointed to by
+`z2Ptr'.)
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shift128ExtraRightJamming(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount = ( - count ) & 63;
+    if ( count == 0 ) {
+        z2 = a2;
+        z1 = a1;
+        z0 = a0;
+    }
+    else {
+        if ( count < 64 ) {
+            z2 = a1<<negCount;
+            z1 = ( a0<<negCount ) | ( a1>>count );
+            z0 = a0>>count;
+        }
+        else {
+            if ( count == 64 ) {
+                z2 = a1;
+                z1 = a0;
+            }
+            else {
+                a2 |= a1;
+                if ( count < 128 ) {
+                    z2 = a0<<negCount;
+                    z1 = a0>>( count & 63 );
+                }
+                else {
+                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
+                    z1 = 0;
+                }
+            }
+            z0 = 0;
+        }
+        z2 |= ( a2 != 0 );
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+/*
+-------------------------------------------------------------------------------
+Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+number of bits given in `count'.  Any bits shifted off are lost.  The value
+of `count' must be less than 64.  The result is broken into two 64-bit
+pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shortShift128Left(
+     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    *z1Ptr = a1<<count;
+    *z0Ptr =
+        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
+}
+/*
+-------------------------------------------------------------------------------
+Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
+by the number of bits given in `count'.  Any bits shifted off are lost.
+The value of `count' must be less than 64.  The result is broken into three
+64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+`z1Ptr', and `z2Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ shortShift192Left(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     int16 count,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 negCount;
+    z2 = a2<<count;
+    z1 = a1<<count;
+    z0 = a0<<count;
+    if ( 0 < count ) {
+        negCount = ( ( - count ) & 63 );
+        z1 |= a2>>negCount;
+        z0 |= a1>>negCount;
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+/*
+-------------------------------------------------------------------------------
+Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
+any carry out is lost.  The result is broken into two 64-bit pieces which
+are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ add128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits64 z1;
+    z1 = a1 + b1;
+    *z1Ptr = z1;
+    *z0Ptr = a0 + b0 + ( z1 < a1 );
+}
+/*
+-------------------------------------------------------------------------------
+Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+modulo 2^192, so any carry out is lost.  The result is broken into three
+64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+`z1Ptr', and `z2Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ add192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 carry0, carry1;
+    z2 = a2 + b2;
+    carry1 = ( z2 < a2 );
+    z1 = a1 + b1;
+    carry0 = ( z1 < a1 );
+    z0 = a0 + b0;
+    z1 += carry1;
+    z0 += ( z1 < carry1 );
+    z0 += carry0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+/*
+-------------------------------------------------------------------------------
+Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
+2^128, so any borrow out (carry out) is lost.  The result is broken into two
+64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+`z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ sub128(
+     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    *z1Ptr = a1 - b1;
+    *z0Ptr = a0 - b0 - ( a1 < b1 );
+}
+/*
+-------------------------------------------------------------------------------
+Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
+result is broken into three 64-bit pieces which are stored at the locations
+pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ sub192(
+     bits64 a0,
+     bits64 a1,
+     bits64 a2,
+     bits64 b0,
+     bits64 b1,
+     bits64 b2,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2;
+    int8 borrow0, borrow1;
+    z2 = a2 - b2;
+    borrow1 = ( a2 < b2 );
+    z1 = a1 - b1;
+    borrow0 = ( a1 < b1 );
+    z0 = a0 - b0;
+    z0 -= ( z1 < borrow1 );
+    z1 -= borrow1;
+    z0 -= borrow0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+/*
+-------------------------------------------------------------------------------
+Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
+into two 64-bit pieces which are stored at the locations pointed to by
+`z0Ptr' and `z1Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
+{
+    bits32 aHigh, aLow, bHigh, bLow;
+    bits64 z0, zMiddleA, zMiddleB, z1;
+    aLow = a;
+    aHigh = a>>32;
+    bLow = b;
+    bHigh = b>>32;
+    z1 = ( (bits64) aLow ) * bLow;
+    zMiddleA = ( (bits64) aLow ) * bHigh;
+    zMiddleB = ( (bits64) aHigh ) * bLow;
+    z0 = ( (bits64) aHigh ) * bHigh;
+    zMiddleA += zMiddleB;
+    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
+    zMiddleA <<= 32;
+    z1 += zMiddleA;
+    z0 += ( z1 < zMiddleA );
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+/*
+-------------------------------------------------------------------------------
+Multiplies the 128-bit value formed by concatenating `a0' and `a1' by `b' to
+obtain a 192-bit product.  The product is broken into three 64-bit pieces
+which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+`z2Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ mul128By64To192(
+     bits64 a0,
+     bits64 a1,
+     bits64 b,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr
+ )
+{
+    bits64 z0, z1, z2, more1;
+    mul64To128( a1, b, &z1, &z2 );
+    mul64To128( a0, b, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+/*
+-------------------------------------------------------------------------------
+Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+product.  The product is broken into four 64-bit pieces which are stored at
+the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+-------------------------------------------------------------------------------
+*/
+INLINE void
+ mul128To256(
+     bits64 a0,
+     bits64 a1,
+     bits64 b0,
+     bits64 b1,
+     bits64 *z0Ptr,
+     bits64 *z1Ptr,
+     bits64 *z2Ptr,
+     bits64 *z3Ptr
+ )
+{
+    bits64 z0, z1, z2, z3;
+    bits64 more1, more2;
+    mul64To128( a1, b1, &z2, &z3 );
+    mul64To128( a1, b0, &z1, &more2 );
+    add128( z1, more2, 0, z2, &z1, &z2 );
+    mul64To128( a0, b0, &z0, &more1 );
+    add128( z0, more1, 0, z1, &z0, &z1 );
+    mul64To128( a0, b1, &more1, &more2 );
+    add128( more1, more2, 0, z2, &more1, &z2 );
+    add128( z0, z1, 0, more1, &z0, &z1 );
+    *z3Ptr = z3;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+/*
+-------------------------------------------------------------------------------
+Returns an approximation to the 64-bit integer quotient obtained by dividing
+`b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
+divisor `b' must be at least 2^63.  If q is the exact quotient truncated
+toward zero, the approximation returned lies between q and q + 2 inclusive.
+If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+unsigned integer is returned.
+-------------------------------------------------------------------------------
+*/
+static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
+{
+    bits64 b0, b1;
+    bits64 rem0, rem1, term0, term1;
+    bits64 z;
+    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
+    b0 = b>>32;
+    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
+    mul64To128( b, z, &term0, &term1 );
+    sub128( a0, a1, term0, term1, &rem0, &rem1 );
+    while ( ( (sbits64) rem0 ) < 0 ) {
+        z -= LIT64( 0x100000000 );
+        b1 = b<<32;
+        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
+    }
+    rem0 = ( rem0<<32 ) | ( rem1>>32 );
+    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
+    return z;
+}
+/*
+-------------------------------------------------------------------------------
+Returns an approximation to the square root of the 32-bit significand given
+by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+`aExp' (the least significant bit) is 1, the integer returned approximates
+2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+case, the approximation returned lies strictly within +/-2 of the exact
+value.
+-------------------------------------------------------------------------------
+*/
+static bits32 estimateSqrt32( int16 aExp, bits32 a )
+{
+    static const bits16 sqrtOddAdjustments[] = {
+        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+    };
+    static const bits16 sqrtEvenAdjustments[] = {
+        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+    };
+    int8 index;
+    bits32 z;
+    index = ( a>>27 ) & 15;
+    if ( aExp & 1 ) {
+        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
+        z = ( ( a / z )<<14 ) + ( z<<15 );
+        a >>= 1;
+    }
+    else {
+        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
+        z = a / z + z;
+        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
+        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
+    }
+    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the number of leading 0 bits before the most-significant 1 bit
+of `a'.  If `a' is zero, 32 is returned.
+-------------------------------------------------------------------------------
+*/
+static int8 countLeadingZeros32( bits32 a )
+{
+    static const int8 countLeadingZerosHigh[] = {
+        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    int8 shiftCount;
+    shiftCount = 0;
+    if ( a < 0x10000 ) {
+        shiftCount += 16;
+        a <<= 16;
+    }
+    if ( a < 0x1000000 ) {
+        shiftCount += 8;
+        a <<= 8;
+    }
+    shiftCount += countLeadingZerosHigh[ a>>24 ];
+    return shiftCount;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the number of leading 0 bits before the most-significant 1 bit
+of `a'.  If `a' is zero, 64 is returned.
+-------------------------------------------------------------------------------
+*/
+static int8 countLeadingZeros64( bits64 a )
+{
+    int8 shiftCount;
+    shiftCount = 0;
+    if ( a < ( (bits64) 1 )<<32 ) {
+        shiftCount += 32;
+    }
+    else {
+        a >>= 32;
+    }
+    shiftCount += countLeadingZeros32( a );
+    return shiftCount;
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+Otherwise, returns 0.
+-------------------------------------------------------------------------------
+*/
+INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+    return ( a0 == b0 ) && ( a1 == b1 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+Otherwise, returns 0.
+-------------------------------------------------------------------------------
+*/
+INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+returns 0.
+-------------------------------------------------------------------------------
+*/
+INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
+not equal to the 128-bit value formed by concatenating `b0' and `b1'.
+Otherwise, returns 0.
+-------------------------------------------------------------------------------
+*/
+INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
+{
+    return ( a0 != b0 ) || ( a1 != b1 );
+}
diff --git a/arch/arm26/nwfpe/softfloat-specialize b/arch/arm26/nwfpe/softfloat-specialize
new file mode 100644
index 000000000000..acf409144763
--- /dev/null
+++ b/arch/arm26/nwfpe/softfloat-specialize
@@ -0,0 +1,366 @@
+/*
+===============================================================================
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+===============================================================================
+*/
+/*
+-------------------------------------------------------------------------------
+Underflow tininess-detection mode, statically initialized to default value.
+(The declaration in `softfloat.h' must match the `int8' type here.)
+-------------------------------------------------------------------------------
+*/
+int8 float_detect_tininess = float_tininess_after_rounding;
+/*
+-------------------------------------------------------------------------------
+Raises the exceptions specified by `flags'.  Floating-point traps can be
+defined here if desired.  It is currently not possible for such a trap to
+substitute a result value.  If traps are not implemented, this routine
+should be simply `float_exception_flags |= flags;'.
+ScottB:  November 4, 1998
+Moved this function out of softfloat-specialize into fpmodule.c.
+This effectively isolates all the changes required for integrating with the
+Linux kernel into fpmodule.c.  Porting to NetBSD should only require modifying
+fpmodule.c to integrate with the NetBSD kernel (I hope!).
+-------------------------------------------------------------------------------
+void float_raise( int8 flags )
+{
+    float_exception_flags |= flags;
+}
+*/
+/*
+-------------------------------------------------------------------------------
+Internal canonical NaN format.
+-------------------------------------------------------------------------------
+*/
+typedef struct {
+    flag sign;
+    bits64 high, low;
+} commonNaNT;
+/*
+-------------------------------------------------------------------------------
+The pattern for a default generated single-precision NaN.
+-------------------------------------------------------------------------------
+*/
+#define float32_default_nan 0xFFFFFFFF
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is a NaN;
+otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float32_is_nan( float32 a )
+{
+    return ( 0xFF000000 < (bits32) ( a<<1 ) );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is a signaling
+NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float32_is_signaling_nan( float32 a )
+{
+    return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point NaN
+`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+exception is raised.
+-------------------------------------------------------------------------------
+*/
+static commonNaNT float32ToCommonNaN( float32 a )
+{
+    commonNaNT z;
+    if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>31;
+    z.low = 0;
+    z.high = ( (bits64) a )<<41;
+    return z;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the canonical NaN `a' to the single-
+precision floating-point format.
+-------------------------------------------------------------------------------
+*/
+static float32 commonNaNToFloat32( commonNaNT a )
+{
+    return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
+}
+/*
+-------------------------------------------------------------------------------
+Takes two single-precision floating-point values `a' and `b', one of which
+is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+signaling NaN, the invalid exception is raised.
+-------------------------------------------------------------------------------
+*/
+static float32 propagateFloat32NaN( float32 a, float32 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+    aIsNaN = float32_is_nan( a );
+    aIsSignalingNaN = float32_is_signaling_nan( a );
+    bIsNaN = float32_is_nan( b );
+    bIsSignalingNaN = float32_is_signaling_nan( b );
+    a |= 0x00400000;
+    b |= 0x00400000;
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+}
+/*
+-------------------------------------------------------------------------------
+The pattern for a default generated double-precision NaN.
+-------------------------------------------------------------------------------
+*/
+#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF )
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is a NaN;
+otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float64_is_nan( float64 a )
+{
+    return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is a signaling
+NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag float64_is_signaling_nan( float64 a )
+{
+    return
+           ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+        && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point NaN
+`a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+exception is raised.
+-------------------------------------------------------------------------------
+*/
+static commonNaNT float64ToCommonNaN( float64 a )
+{
+    commonNaNT z;
+    if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a>>63;
+    z.low = 0;
+    z.high = a<<12;
+    return z;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the canonical NaN `a' to the double-
+precision floating-point format.
+-------------------------------------------------------------------------------
+*/
+static float64 commonNaNToFloat64( commonNaNT a )
+{
+    return
+          ( ( (bits64) a.sign )<<63 )
+        | LIT64( 0x7FF8000000000000 )
+        | ( a.high>>12 );
+}
+/*
+-------------------------------------------------------------------------------
+Takes two double-precision floating-point values `a' and `b', one of which
+is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+signaling NaN, the invalid exception is raised.
+-------------------------------------------------------------------------------
+*/
+static float64 propagateFloat64NaN( float64 a, float64 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+    aIsNaN = float64_is_nan( a );
+    aIsSignalingNaN = float64_is_signaling_nan( a );
+    bIsNaN = float64_is_nan( b );
+    bIsSignalingNaN = float64_is_signaling_nan( b );
+    a |= LIT64( 0x0008000000000000 );
+    b |= LIT64( 0x0008000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+}
+#ifdef FLOATX80
+/*
+-------------------------------------------------------------------------------
+The pattern for a default generated extended double-precision NaN.  The
+`high' and `low' values hold the most- and least-significant bits,
+respectively.
+-------------------------------------------------------------------------------
+*/
+#define floatx80_default_nan_high 0xFFFF
+#define floatx80_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is a
+NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_is_nan( floatx80 a )
+{
+    return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is a
+signaling NaN; otherwise returns 0.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_is_signaling_nan( floatx80 a )
+{
+    //register int lr;
+    bits64 aLow;
+    //__asm__("mov %0, lr" : : "g" (lr));
+    //fp_printk("floatx80_is_signalling_nan() called from 0x%08x\n",lr);
+    aLow = a.low & ~ LIT64( 0x4000000000000000 );
+    return
+           ( ( a.high & 0x7FFF ) == 0x7FFF )
+        && (bits64) ( aLow<<1 )
+        && ( a.low == aLow );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
+invalid exception is raised.
+-------------------------------------------------------------------------------
+*/
+static commonNaNT floatx80ToCommonNaN( floatx80 a )
+{
+    commonNaNT z;
+    if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
+    z.sign = a.high>>15;
+    z.low = 0;
+    z.high = a.low<<1;
+    return z;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the canonical NaN `a' to the extended
+double-precision floating-point format.
+-------------------------------------------------------------------------------
+*/
+static floatx80 commonNaNToFloatx80( commonNaNT a )
+{
+    floatx80 z;
+    z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
+    z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
+    return z;
+}
+/*
+-------------------------------------------------------------------------------
+Takes two extended double-precision floating-point values `a' and `b', one
+of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+`b' is a signaling NaN, the invalid exception is raised.
+-------------------------------------------------------------------------------
+*/
+static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
+{
+    flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+    aIsNaN = floatx80_is_nan( a );
+    aIsSignalingNaN = floatx80_is_signaling_nan( a );
+    bIsNaN = floatx80_is_nan( b );
+    bIsSignalingNaN = floatx80_is_signaling_nan( b );
+    a.low |= LIT64( 0xC000000000000000 );
+    b.low |= LIT64( 0xC000000000000000 );
+    if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
+    if ( aIsNaN ) {
+        return ( aIsSignalingNaN & bIsNaN ) ? b : a;
+    }
+    else {
+        return b;
+    }
+}
+#endif
diff --git a/arch/arm26/nwfpe/softfloat.c b/arch/arm26/nwfpe/softfloat.c
new file mode 100644
index 000000000000..26c1b916e527
--- /dev/null
+++ b/arch/arm26/nwfpe/softfloat.c
@@ -0,0 +1,3439 @@
+/*
+===============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+===============================================================================
+*/
+#include "fpa11.h"
+#include "milieu.h"
+#include "softfloat.h"
+/*
+-------------------------------------------------------------------------------
+Floating-point rounding mode, extended double-precision rounding precision,
+and exception flags.
+-------------------------------------------------------------------------------
+*/
+int8 float_rounding_mode = float_round_nearest_even;
+int8 floatx80_rounding_precision = 80;
+int8 float_exception_flags;
+/*
+-------------------------------------------------------------------------------
+Primitive arithmetic functions, including multi-word arithmetic, and
+division and square root approximations.  (Can be specialized to target if
+desired.)
+-------------------------------------------------------------------------------
+*/
+#include "softfloat-macros"
+/*
+-------------------------------------------------------------------------------
+Functions and definitions to determine:  (1) whether tininess for underflow
+is detected before or after rounding by default, (2) what (if anything)
+happens when exceptions are raised, (3) how signaling NaNs are distinguished
+from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+are propagated from function inputs to output.  These details are target-
+specific.
+-------------------------------------------------------------------------------
+*/
+#include "softfloat-specialize"
+/*
+-------------------------------------------------------------------------------
+Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+and 7, and returns the properly rounded 32-bit integer corresponding to the
+input.  If `zSign' is nonzero, the input is negated before being converted
+to an integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point
+input is simply rounded to an integer, with the inexact exception raised if
+the input cannot be represented exactly as an integer.  If the fixed-point
+input is too large, however, the invalid exception is raised and the largest
+positive or negative integer is returned.
+-------------------------------------------------------------------------------
+*/
+static int32 roundAndPackInt32( flag zSign, bits64 absZ )
+{
+    int8 roundingMode;
+    flag roundNearestEven;
+    int8 roundIncrement, roundBits;
+    int32 z;
+    roundingMode = float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    roundIncrement = 0x40;
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = 0x7F;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = absZ & 0x7F;
+    absZ = ( absZ + roundIncrement )>>7;
+    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+    z = absZ;
+    if ( zSign ) z = - z;
+    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
+        float_exception_flags |= float_flag_invalid;
+        return zSign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    return z;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the fraction bits of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE bits32 extractFloat32Frac( float32 a )
+{
+    return a & 0x007FFFFF;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE int16 extractFloat32Exp( float32 a )
+{
+    return ( a>>23 ) & 0xFF;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the sign bit of the single-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE flag extractFloat32Sign( float32 a )
+{
+    return a>>31;
+}
+/*
+-------------------------------------------------------------------------------
+Normalizes the subnormal single-precision floating-point value represented
+by the denormalized significand `aSig'.  The normalized exponent and
+significand are stored at the locations pointed to by `zExpPtr' and
+`zSigPtr', respectively.
+-------------------------------------------------------------------------------
+*/
+static void
+ normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
+{
+    int8 shiftCount;
+    shiftCount = countLeadingZeros32( aSig ) - 8;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+/*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+single-precision floating-point value, returning the result.  After being
+shifted into the proper positions, the three fields are simply added
+together to form the result.  This means that any integer portion of `zSig'
+will be added into the exponent.  Since a properly normalized significand
+will have an integer portion equal to 1, the `zExp' input should be 1 less
+than the desired result exponent whenever `zSig' is a complete, normalized
+significand.
+-------------------------------------------------------------------------------
+*/
+INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+#if 0
+   float32 f;
+   __asm__("@ packFloat32;              \n\
+            mov %0, %1, asl #31;        \n\
+            orr %0, %2, asl #23;        \n\
+            orr %0, %3"
+            : /* no outputs */
+            : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
+            : "cc");
+   return f;
+#else
+    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
+#endif 
+}
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper single-precision floating-
+point value corresponding to the abstract input.  Ordinarily, the abstract
+value is simply rounded and packed into the single-precision format, with
+the inexact exception raised if the abstract input cannot be represented
+exactly.  If the abstract value is too large, however, the overflow and
+inexact exceptions are raised and an infinity or maximal finite value is
+returned.  If the abstract value is too small, the input value is rounded to
+a subnormal number, and the underflow and inexact exceptions are raised if
+the abstract input cannot be represented exactly as a subnormal single-
+precision floating-point number.
+    The input significand `zSig' has its binary point between bits 30
+and 29, which is 7 bits to the left of the usual location.  This shifted
+significand must be normalized or smaller.  If `zSig' is not normalized,
+`zExp' must be 0; in that case, the result returned is a subnormal number,
+and it must not require rounding.  In the usual case that `zSig' is
+normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+The handling of underflow and overflow follows the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+    int8 roundingMode;
+    flag roundNearestEven;
+    int8 roundIncrement, roundBits;
+    flag isTiny;
+    roundingMode = float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    roundIncrement = 0x40;
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = 0x7F;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & 0x7F;
+    if ( 0xFD <= (bits16) zExp ) {
+        if (    ( 0xFD < zExp )
+             || (    ( zExp == 0xFD )
+                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
+           ) {
+            float_raise( float_flag_overflow | float_flag_inexact );
+            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
+        }
+        if ( zExp < 0 ) {
+            isTiny =
+                   ( float_detect_tininess == float_tininess_before_rounding )
+                || ( zExp < -1 )
+                || ( zSig + roundIncrement < 0x80000000 );
+            shift32RightJamming( zSig, - zExp, &zSig );
+            zExp = 0;
+            roundBits = zSig & 0x7F;
+            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+        }
+    }
+    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    zSig = ( zSig + roundIncrement )>>7;
+    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+    if ( zSig == 0 ) zExp = 0;
+    return packFloat32( zSign, zExp, zSig );
+}
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper single-precision floating-
+point value corresponding to the abstract input.  This routine is just like
+`roundAndPackFloat32' except that `zSig' does not have to be normalized in
+any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+point exponent.
+-------------------------------------------------------------------------------
+*/
+static float32
+ normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+{
+    int8 shiftCount;
+    shiftCount = countLeadingZeros32( zSig ) - 1;
+    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the fraction bits of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE bits64 extractFloat64Frac( float64 a )
+{
+    return a & LIT64( 0x000FFFFFFFFFFFFF );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE int16 extractFloat64Exp( float64 a )
+{
+    return ( a>>52 ) & 0x7FF;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the sign bit of the double-precision floating-point value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE flag extractFloat64Sign( float64 a )
+{
+    return a>>63;
+}
+/*
+-------------------------------------------------------------------------------
+Normalizes the subnormal double-precision floating-point value represented
+by the denormalized significand `aSig'.  The normalized exponent and
+significand are stored at the locations pointed to by `zExpPtr' and
+`zSigPtr', respectively.
+-------------------------------------------------------------------------------
+*/
+static void
+ normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
+{
+    int8 shiftCount;
+    shiftCount = countLeadingZeros64( aSig ) - 11;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+/*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+double-precision floating-point value, returning the result.  After being
+shifted into the proper positions, the three fields are simply added
+together to form the result.  This means that any integer portion of `zSig'
+will be added into the exponent.  Since a properly normalized significand
+will have an integer portion equal to 1, the `zExp' input should be 1 less
+than the desired result exponent whenever `zSig' is a complete, normalized
+significand.
+-------------------------------------------------------------------------------
+*/
+INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+    return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
+}
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper double-precision floating-
+point value corresponding to the abstract input.  Ordinarily, the abstract
+value is simply rounded and packed into the double-precision format, with
+the inexact exception raised if the abstract input cannot be represented
+exactly.  If the abstract value is too large, however, the overflow and
+inexact exceptions are raised and an infinity or maximal finite value is
+returned.  If the abstract value is too small, the input value is rounded to
+a subnormal number, and the underflow and inexact exceptions are raised if
+the abstract input cannot be represented exactly as a subnormal double-
+precision floating-point number.
+    The input significand `zSig' has its binary point between bits 62
+and 61, which is 10 bits to the left of the usual location.  This shifted
+significand must be normalized or smaller.  If `zSig' is not normalized,
+`zExp' must be 0; in that case, the result returned is a subnormal number,
+and it must not require rounding.  In the usual case that `zSig' is
+normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+The handling of underflow and overflow follows the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+    int8 roundingMode;
+    flag roundNearestEven;
+    int16 roundIncrement, roundBits;
+    flag isTiny;
+    roundingMode = float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    roundIncrement = 0x200;
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = 0x3FF;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & 0x3FF;
+    if ( 0x7FD <= (bits16) zExp ) {
+        if (    ( 0x7FD < zExp )
+             || (    ( zExp == 0x7FD )
+                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
+           ) {
+            //register int lr = __builtin_return_address(0);
+            //printk("roundAndPackFloat64 called from 0x%08x\n",lr);
+            float_raise( float_flag_overflow | float_flag_inexact );
+            return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
+        }
+        if ( zExp < 0 ) {
+            isTiny =
+                   ( float_detect_tininess == float_tininess_before_rounding )
+                || ( zExp < -1 )
+                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
+            shift64RightJamming( zSig, - zExp, &zSig );
+            zExp = 0;
+            roundBits = zSig & 0x3FF;
+            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+        }
+    }
+    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    zSig = ( zSig + roundIncrement )>>10;
+    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
+    if ( zSig == 0 ) zExp = 0;
+    return packFloat64( zSign, zExp, zSig );
+}
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and significand `zSig', and returns the proper double-precision floating-
+point value corresponding to the abstract input.  This routine is just like
+`roundAndPackFloat64' except that `zSig' does not have to be normalized in
+any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+point exponent.
+-------------------------------------------------------------------------------
+*/
+static float64
+ normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+{
+    int8 shiftCount;
+    shiftCount = countLeadingZeros64( zSig ) - 1;
+    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
+}
+#ifdef FLOATX80
+/*
+-------------------------------------------------------------------------------
+Returns the fraction bits of the extended double-precision floating-point
+value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE bits64 extractFloatx80Frac( floatx80 a )
+{
+    return a.low;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the exponent bits of the extended double-precision floating-point
+value `a'.
+-------------------------------------------------------------------------------
+*/
+INLINE int32 extractFloatx80Exp( floatx80 a )
+{
+    return a.high & 0x7FFF;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the sign bit of the extended double-precision floating-point value
+`a'.
+-------------------------------------------------------------------------------
+*/
+INLINE flag extractFloatx80Sign( floatx80 a )
+{
+    return a.high>>15;
+}
+/*
+-------------------------------------------------------------------------------
+Normalizes the subnormal extended double-precision floating-point value
+represented by the denormalized significand `aSig'.  The normalized exponent
+and significand are stored at the locations pointed to by `zExpPtr' and
+`zSigPtr', respectively.
+-------------------------------------------------------------------------------
+*/
+static void
+ normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
+{
+    int8 shiftCount;
+    shiftCount = countLeadingZeros64( aSig );
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+/*
+-------------------------------------------------------------------------------
+Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+extended double-precision floating-point value, returning the result.
+-------------------------------------------------------------------------------
+*/
+INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
+{
+    floatx80 z;
+    z.low = zSig;
+    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
+    return z;
+}
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+and extended significand formed by the concatenation of `zSig0' and `zSig1',
+and returns the proper extended double-precision floating-point value
+corresponding to the abstract input.  Ordinarily, the abstract value is
+rounded and packed into the extended double-precision format, with the
+inexact exception raised if the abstract input cannot be represented
+exactly.  If the abstract value is too large, however, the overflow and
+inexact exceptions are raised and an infinity or maximal finite value is
+returned.  If the abstract value is too small, the input value is rounded to
+a subnormal number, and the underflow and inexact exceptions are raised if
+the abstract input cannot be represented exactly as a subnormal extended
+double-precision floating-point number.
+    If `roundingPrecision' is 32 or 64, the result is rounded to the same
+number of bits as single or double precision, respectively.  Otherwise, the
+result is rounded to the full precision of the extended double-precision
+format.
+    The input significand must be normalized or smaller.  If the input
+significand is not normalized, `zExp' must be 0; in that case, the result
+returned is a subnormal number, and it must not require rounding.  The
+handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static floatx80
+ roundAndPackFloatx80(
+     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ )
+{
+    int8 roundingMode;
+    flag roundNearestEven, increment, isTiny;
+    int64 roundIncrement, roundMask, roundBits;
+    roundingMode = float_rounding_mode;
+    roundNearestEven = ( roundingMode == float_round_nearest_even );
+    if ( roundingPrecision == 80 ) goto precision80;
+    if ( roundingPrecision == 64 ) {
+        roundIncrement = LIT64( 0x0000000000000400 );
+        roundMask = LIT64( 0x00000000000007FF );
+    }
+    else if ( roundingPrecision == 32 ) {
+        roundIncrement = LIT64( 0x0000008000000000 );
+        roundMask = LIT64( 0x000000FFFFFFFFFF );
+    }
+    else {
+        goto precision80;
+    }
+    zSig0 |= ( zSig1 != 0 );
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            roundIncrement = 0;
+        }
+        else {
+            roundIncrement = roundMask;
+            if ( zSign ) {
+                if ( roundingMode == float_round_up ) roundIncrement = 0;
+            }
+            else {
+                if ( roundingMode == float_round_down ) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig0 & roundMask;
+    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+        if (    ( 0x7FFE < zExp )
+             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
+           ) {
+            goto overflow;
+        }
+        if ( zExp <= 0 ) {
+            isTiny =
+                   ( float_detect_tininess == float_tininess_before_rounding )
+                || ( zExp < 0 )
+                || ( zSig0 <= zSig0 + roundIncrement );
+            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
+            zExp = 0;
+            roundBits = zSig0 & roundMask;
+            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+            if ( roundBits ) float_exception_flags |= float_flag_inexact;
+            zSig0 += roundIncrement;
+            if ( (sbits64) zSig0 < 0 ) zExp = 1;
+            roundIncrement = roundMask + 1;
+            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+                roundMask |= roundIncrement;
+            }
+            zSig0 &= ~ roundMask;
+            return packFloatx80( zSign, zExp, zSig0 );
+        }
+    }
+    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    zSig0 += roundIncrement;
+    if ( zSig0 < roundIncrement ) {
+        ++zExp;
+        zSig0 = LIT64( 0x8000000000000000 );
+    }
+    roundIncrement = roundMask + 1;
+    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+        roundMask |= roundIncrement;
+    }
+    zSig0 &= ~ roundMask;
+    if ( zSig0 == 0 ) zExp = 0;
+    return packFloatx80( zSign, zExp, zSig0 );
+ precision80:
+    increment = ( (sbits64) zSig1 < 0 );
+    if ( ! roundNearestEven ) {
+        if ( roundingMode == float_round_to_zero ) {
+            increment = 0;
+        }
+        else {
+            if ( zSign ) {
+                increment = ( roundingMode == float_round_down ) && zSig1;
+            }
+            else {
+                increment = ( roundingMode == float_round_up ) && zSig1;
+            }
+        }
+    }
+    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+        if (    ( 0x7FFE < zExp )
+             || (    ( zExp == 0x7FFE )
+                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
+                  && increment
+                )
+           ) {
+            roundMask = 0;
+ overflow:
+            float_raise( float_flag_overflow | float_flag_inexact );
+            if (    ( roundingMode == float_round_to_zero )
+                 || ( zSign && ( roundingMode == float_round_up ) )
+                 || ( ! zSign && ( roundingMode == float_round_down ) )
+               ) {
+                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
+            }
+            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+        }
+        if ( zExp <= 0 ) {
+            isTiny =
+                   ( float_detect_tininess == float_tininess_before_rounding )
+                || ( zExp < 0 )
+                || ! increment
+                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
+            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
+            zExp = 0;
+            if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
+            if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+            if ( roundNearestEven ) {
+                increment = ( (sbits64) zSig1 < 0 );
+            }
+            else {
+                if ( zSign ) {
+                    increment = ( roundingMode == float_round_down ) && zSig1;
+                }
+                else {
+                    increment = ( roundingMode == float_round_up ) && zSig1;
+                }
+            }
+            if ( increment ) {
+                ++zSig0;
+                zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
+                if ( (sbits64) zSig0 < 0 ) zExp = 1;
+            }
+            return packFloatx80( zSign, zExp, zSig0 );
+        }
+    }
+    if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+    if ( increment ) {
+        ++zSig0;
+        if ( zSig0 == 0 ) {
+            ++zExp;
+            zSig0 = LIT64( 0x8000000000000000 );
+        }
+        else {
+            zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
+        }
+    }
+    else {
+        if ( zSig0 == 0 ) zExp = 0;
+    }
+    
+    return packFloatx80( zSign, zExp, zSig0 );
+}
+/*
+-------------------------------------------------------------------------------
+Takes an abstract floating-point value having sign `zSign', exponent
+`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+and returns the proper extended double-precision floating-point value
+corresponding to the abstract input.  This routine is just like
+`roundAndPackFloatx80' except that the input significand does not have to be
+normalized.
+-------------------------------------------------------------------------------
+*/
+static floatx80
+ normalizeRoundAndPackFloatx80(
+     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ )
+{
+    int8 shiftCount;
+    if ( zSig0 == 0 ) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
+    }
+    shiftCount = countLeadingZeros64( zSig0 );
+    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+    zExp -= shiftCount;
+    return
+        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
+}
+#endif
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a' to
+the single-precision floating-point format.  The conversion is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 int32_to_float32( int32 a )
+{
+    flag zSign;
+    if ( a == 0 ) return 0;
+    if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
+    zSign = ( a < 0 );
+    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a' to
+the double-precision floating-point format.  The conversion is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 int32_to_float64( int32 a )
+{
+    flag aSign;
+    uint32 absA;
+    int8 shiftCount;
+    bits64 zSig;
+    if ( a == 0 ) return 0;
+    aSign = ( a < 0 );
+    absA = aSign ? - a : a;
+    shiftCount = countLeadingZeros32( absA ) + 21;
+    zSig = absA;
+    return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
+}
+#ifdef FLOATX80
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the 32-bit two's complement integer `a'
+to the extended double-precision floating-point format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 int32_to_floatx80( int32 a )
+{
+    flag zSign;
+    uint32 absA;
+    int8 shiftCount;
+    bits64 zSig;
+    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+    zSign = ( a < 0 );
+    absA = zSign ? - a : a;
+    shiftCount = countLeadingZeros32( absA ) + 32;
+    zSig = absA;
+    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
+}
+#endif
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*/
+int32 float32_to_int32( float32 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits32 aSig;
+    bits64 zSig;
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+    if ( aExp ) aSig |= 0x00800000;
+    shiftCount = 0xAF - aExp;
+    zSig = aSig;
+    zSig <<= 32;
+    if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
+    return roundAndPackInt32( aSign, zSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic, except that the conversion is always rounded toward zero.  If
+`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+conversion overflows, the largest integer with the same sign as `a' is
+returned.
+-------------------------------------------------------------------------------
+*/
+int32 float32_to_int32_round_to_zero( float32 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits32 aSig;
+    int32 z;
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    shiftCount = aExp - 0x9E;
+    if ( 0 <= shiftCount ) {
+        if ( a == 0xCF000000 ) return 0x80000000;
+        float_raise( float_flag_invalid );
+        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
+        return 0x80000000;
+    }
+    else if ( aExp <= 0x7E ) {
+        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+        return 0;
+    }
+    aSig = ( aSig | 0x00800000 )<<8;
+    z = aSig>>( - shiftCount );
+    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return aSign ? - z : z;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the double-precision floating-point format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float32_to_float64( float32 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
+        return packFloat64( aSign, 0x7FF, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+        --aExp;
+    }
+    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
+}
+#ifdef FLOATX80
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the single-precision floating-point value
+`a' to the extended double-precision floating-point format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 float32_to_floatx80( float32 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 aSig;
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
+        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    aSig |= 0x00800000;
+    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
+}
+#endif
+/*
+-------------------------------------------------------------------------------
+Rounds the single-precision floating-point value `a' to an integer, and
+returns the result as a single-precision floating-point value.  The
+operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_round_to_int( float32 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits32 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    float32 z;
+    aExp = extractFloat32Exp( a );
+    if ( 0x96 <= aExp ) {
+        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
+            return propagateFloat32NaN( a, a );
+        }
+        return a;
+    }
+    if ( aExp <= 0x7E ) {
+        if ( (bits32) ( a<<1 ) == 0 ) return a;
+        float_exception_flags |= float_flag_inexact;
+        aSign = extractFloat32Sign( a );
+        switch ( float_rounding_mode ) {
+         case float_round_nearest_even:
+            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
+                return packFloat32( aSign, 0x7F, 0 );
+            }
+            break;
+         case float_round_down:
+            return aSign ? 0xBF800000 : 0;
+         case float_round_up:
+            return aSign ? 0x80000000 : 0x3F800000;
+        }
+        return packFloat32( aSign, 0, 0 );
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x96 - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    roundingMode = float_rounding_mode;
+    if ( roundingMode == float_round_nearest_even ) {
+        z += lastBitMask>>1;
+        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+    }
+    else if ( roundingMode != float_round_to_zero ) {
+        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+            z += roundBitsMask;
+        }
+    }
+    z &= ~ roundBitsMask;
+    if ( z != a ) float_exception_flags |= float_flag_inexact;
+    return z;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the absolute values of the single-precision
+floating-point values `a' and `b'.  If `zSign' is true, the sum is negated
+before being returned.  `zSign' is ignored if the result is a NaN.  The
+addition is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
+{
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig, zSig;
+    int16 expDiff;
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 6;
+    bSig <<= 6;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0xFF ) {
+            if ( aSig ) return propagateFloat32NaN( a, b );
+            return a;
+        }
+        if ( bExp == 0 ) {
+            --expDiff;
+        }
+        else {
+            bSig |= 0x20000000;
+        }
+        shift32RightJamming( bSig, expDiff, &bSig );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0xFF ) {
+            if ( bSig ) return propagateFloat32NaN( a, b );
+            return packFloat32( zSign, 0xFF, 0 );
+        }
+        if ( aExp == 0 ) {
+            ++expDiff;
+        }
+        else {
+            aSig |= 0x20000000;
+        }
+        shift32RightJamming( aSig, - expDiff, &aSig );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0xFF ) {
+            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
+            return a;
+        }
+        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
+        zSig = 0x40000000 + aSig + bSig;
+        zExp = aExp;
+        goto roundAndPack;
+    }
+    aSig |= 0x20000000;
+    zSig = ( aSig + bSig )<<1;
+    --zExp;
+    if ( (sbits32) zSig < 0 ) {
+        zSig = aSig + bSig;
+        ++zExp;
+    }
+ roundAndPack:
+    return roundAndPackFloat32( zSign, zExp, zSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the absolute values of the single-
+precision floating-point values `a' and `b'.  If `zSign' is true, the
+difference is negated before being returned.  `zSign' is ignored if the
+result is a NaN.  The subtraction is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
+{
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig, zSig;
+    int16 expDiff;
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 7;
+    bSig <<= 7;
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0xFF ) {
+        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
+        float_raise( float_flag_invalid );
+        return float32_default_nan;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    if ( bSig < aSig ) goto aBigger;
+    if ( aSig < bSig ) goto bBigger;
+    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b );
+        return packFloat32( zSign ^ 1, 0xFF, 0 );
+    }
+    if ( aExp == 0 ) {
+        ++expDiff;
+    }
+    else {
+        aSig |= 0x40000000;
+    }
+    shift32RightJamming( aSig, - expDiff, &aSig );
+    bSig |= 0x40000000;
+ bBigger:
+    zSig = bSig - aSig;
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return propagateFloat32NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        --expDiff;
+    }
+    else {
+        bSig |= 0x40000000;
+    }
+    shift32RightJamming( bSig, expDiff, &bSig );
+    aSig |= 0x40000000;
+ aBigger:
+    zSig = aSig - bSig;
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the single-precision floating-point values `a'
+and `b'.  The operation is performed according to the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_add( float32 a, float32 b )
+{
+    flag aSign, bSign;
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign == bSign ) {
+        return addFloat32Sigs( a, b, aSign );
+    }
+    else {
+        return subFloat32Sigs( a, b, aSign );
+    }
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the single-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_sub( float32 a, float32 b )
+{
+    flag aSign, bSign;
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign == bSign ) {
+        return subFloat32Sigs( a, b, aSign );
+    }
+    else {
+        return addFloat32Sigs( a, b, aSign );
+    }
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the single-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_mul( float32 a, float32 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig;
+    bits64 zSig64;
+    bits32 zSig;
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    bSign = extractFloat32Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0xFF ) {
+        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+            return propagateFloat32NaN( a, b );
+        }
+        if ( ( bExp | bSig ) == 0 ) {
+            float_raise( float_flag_invalid );
+            return float32_default_nan;
+        }
+        return packFloat32( zSign, 0xFF, 0 );
+    }
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b );
+        if ( ( aExp | aSig ) == 0 ) {
+            float_raise( float_flag_invalid );
+            return float32_default_nan;
+        }
+        return packFloat32( zSign, 0xFF, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
+        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+    }
+    zExp = aExp + bExp - 0x7F;
+    aSig = ( aSig | 0x00800000 )<<7;
+    bSig = ( bSig | 0x00800000 )<<8;
+    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
+    zSig = zSig64;
+    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
+        zSig <<= 1;
+        --zExp;
+    }
+    return roundAndPackFloat32( zSign, zExp, zSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of dividing the single-precision floating-point value `a'
+by the corresponding value `b'.  The operation is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_div( float32 a, float32 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits32 aSig, bSig, zSig;
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    bSign = extractFloat32Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return propagateFloat32NaN( a, b );
+        if ( bExp == 0xFF ) {
+            if ( bSig ) return propagateFloat32NaN( a, b );
+            float_raise( float_flag_invalid );
+            return float32_default_nan;
+        }
+        return packFloat32( zSign, 0xFF, 0 );
+    }
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b );
+        return packFloat32( zSign, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            if ( ( aExp | aSig ) == 0 ) {
+                float_raise( float_flag_invalid );
+                return float32_default_nan;
+            }
+            float_raise( float_flag_divbyzero );
+            return packFloat32( zSign, 0xFF, 0 );
+        }
+        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = aExp - bExp + 0x7D;
+    aSig = ( aSig | 0x00800000 )<<7;
+    bSig = ( bSig | 0x00800000 )<<8;
+    if ( bSig <= ( aSig + aSig ) ) {
+        aSig >>= 1;
+        ++zExp;
+    }
+    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
+    if ( ( zSig & 0x3F ) == 0 ) {
+        zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
+    }
+    return roundAndPackFloat32( zSign, zExp, zSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the remainder of the single-precision floating-point value `a'
+with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_rem( float32 a, float32 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, expDiff;
+    bits32 aSig, bSig;
+    bits32 q;
+    bits64 aSig64, bSig64, q64;
+    bits32 alternateASig;
+    sbits32 sigMean;
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    bSig = extractFloat32Frac( b );
+    bExp = extractFloat32Exp( b );
+    bSign = extractFloat32Sign( b );
+    if ( aExp == 0xFF ) {
+        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+            return propagateFloat32NaN( a, b );
+        }
+        float_raise( float_flag_invalid );
+        return float32_default_nan;
+    }
+    if ( bExp == 0xFF ) {
+        if ( bSig ) return propagateFloat32NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            float_raise( float_flag_invalid );
+            return float32_default_nan;
+        }
+        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return a;
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    expDiff = aExp - bExp;
+    aSig |= 0x00800000;
+    bSig |= 0x00800000;
+    if ( expDiff < 32 ) {
+        aSig <<= 8;
+        bSig <<= 8;
+        if ( expDiff < 0 ) {
+            if ( expDiff < -1 ) return a;
+            aSig >>= 1;
+        }
+        q = ( bSig <= aSig );
+        if ( q ) aSig -= bSig;
+        if ( 0 < expDiff ) {
+            q = ( ( (bits64) aSig )<<32 ) / bSig;
+            q >>= 32 - expDiff;
+            bSig >>= 2;
+            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+        }
+        else {
+            aSig >>= 2;
+            bSig >>= 2;
+        }
+    }
+    else {
+        if ( bSig <= aSig ) aSig -= bSig;
+        aSig64 = ( (bits64) aSig )<<40;
+        bSig64 = ( (bits64) bSig )<<40;
+        expDiff -= 64;
+        while ( 0 < expDiff ) {
+            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+            aSig64 = - ( ( bSig * q64 )<<38 );
+            expDiff -= 62;
+        }
+        expDiff += 64;
+        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+        q = q64>>( 64 - expDiff );
+        bSig <<= 6;
+        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
+    }
+    do {
+        alternateASig = aSig;
+        ++q;
+        aSig -= bSig;
+    } while ( 0 <= (sbits32) aSig );
+    sigMean = aSig + alternateASig;
+    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+        aSig = alternateASig;
+    }
+    zSign = ( (sbits32) aSig < 0 );
+    if ( zSign ) aSig = - aSig;
+    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the square root of the single-precision floating-point value `a'.
+The operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float32_sqrt( float32 a )
+{
+    flag aSign;
+    int16 aExp, zExp;
+    bits32 aSig, zSig;
+    bits64 rem, term;
+    aSig = extractFloat32Frac( a );
+    aExp = extractFloat32Exp( a );
+    aSign = extractFloat32Sign( a );
+    if ( aExp == 0xFF ) {
+        if ( aSig ) return propagateFloat32NaN( a, 0 );
+        if ( ! aSign ) return a;
+        float_raise( float_flag_invalid );
+        return float32_default_nan;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig ) == 0 ) return a;
+        float_raise( float_flag_invalid );
+        return float32_default_nan;
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return 0;
+        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
+    aSig = ( aSig | 0x00800000 )<<8;
+    zSig = estimateSqrt32( aExp, aSig ) + 2;
+    if ( ( zSig & 0x7F ) <= 5 ) {
+        if ( zSig < 2 ) {
+            zSig = 0xFFFFFFFF;
+        }
+        else {
+            aSig >>= aExp & 1;
+            term = ( (bits64) zSig ) * zSig;
+            rem = ( ( (bits64) aSig )<<32 ) - term;
+            while ( (sbits64) rem < 0 ) {
+                --zSig;
+                rem += ( ( (bits64) zSig )<<1 ) | 1;
+            }
+            zSig |= ( rem != 0 );
+        }
+    }
+    shift32RightJamming( zSig, 1, &zSig );
+    return roundAndPackFloat32( 0, zExp, zSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is equal to the
+corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_eq( float32 a, float32 b )
+{
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  The comparison is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_le( float32 a, float32 b )
+{
+    flag aSign, bSign;
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_lt( float32 a, float32 b )
+{
+    flag aSign, bSign;
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is equal to the
+corresponding value `b', and 0 otherwise.  The invalid exception is raised
+if either operand is a NaN.  Otherwise, the comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_eq_signaling( float32 a, float32 b )
+{
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+cause an exception.  Otherwise, the comparison is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_le_quiet( float32 a, float32 b )
+{
+    flag aSign, bSign;
+    //int16 aExp, bExp;
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the single-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float32_lt_quiet( float32 a, float32 b )
+{
+    flag aSign, bSign;
+    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+       ) {
+        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat32Sign( a );
+    bSign = extractFloat32Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*/
+int32 float64_to_int32( float64 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig;
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+    shiftCount = 0x42C - aExp;
+    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
+    return roundAndPackInt32( aSign, aSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic, except that the conversion is always rounded toward zero.  If
+`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+conversion overflows, the largest integer with the same sign as `a' is
+returned.
+-------------------------------------------------------------------------------
+*/
+int32 float64_to_int32_round_to_zero( float64 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig, savedASig;
+    int32 z;
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    shiftCount = 0x433 - aExp;
+    if ( shiftCount < 21 ) {
+        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+        goto invalid;
+    }
+    else if ( 52 < shiftCount ) {
+        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        return 0;
+    }
+    aSig |= LIT64( 0x0010000000000000 );
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_exception_flags |= float_flag_invalid;
+        return aSign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return z;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement unsigned integer format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic---which means in particular that the conversion is rounded
+according to the current rounding mode.  If `a' is a NaN, the largest
+positive integer is returned.  Otherwise, if the conversion overflows, the
+largest positive integer is returned.
+-------------------------------------------------------------------------------
+*/
+int32 float64_to_uint32( float64 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig;
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = 0; //extractFloat64Sign( a );
+    //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+    shiftCount = 0x42C - aExp;
+    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
+    return roundAndPackInt32( aSign, aSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the 32-bit two's complement integer format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic, except that the conversion is always rounded toward zero.  If
+`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
+conversion overflows, the largest positive integer is returned.
+-------------------------------------------------------------------------------
+*/
+int32 float64_to_uint32_round_to_zero( float64 a )
+{
+    flag aSign;
+    int16 aExp, shiftCount;
+    bits64 aSig, savedASig;
+    int32 z;
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    shiftCount = 0x433 - aExp;
+    if ( shiftCount < 21 ) {
+        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+        goto invalid;
+    }
+    else if ( 52 < shiftCount ) {
+        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        return 0;
+    }
+    aSig |= LIT64( 0x0010000000000000 );
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_exception_flags |= float_flag_invalid;
+        return aSign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return z;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the single-precision floating-point format.  The conversion is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 float64_to_float32( float64 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+    bits32 zSig;
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
+        return packFloat32( aSign, 0xFF, 0 );
+    }
+    shift64RightJamming( aSig, 22, &aSig );
+    zSig = aSig;
+    if ( aExp || zSig ) {
+        zSig |= 0x40000000;
+        aExp -= 0x381;
+    }
+    return roundAndPackFloat32( aSign, aExp, zSig );
+}
+#ifdef FLOATX80
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the double-precision floating-point value
+`a' to the extended double-precision floating-point format.  The conversion
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 float64_to_floatx80( float64 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 aSig;
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
+        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    return
+        packFloatx80(
+            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
+}
+#endif
+/*
+-------------------------------------------------------------------------------
+Rounds the double-precision floating-point value `a' to an integer, and
+returns the result as a double-precision floating-point value.  The
+operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_round_to_int( float64 a )
+{
+    flag aSign;
+    int16 aExp;
+    bits64 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    float64 z;
+    aExp = extractFloat64Exp( a );
+    if ( 0x433 <= aExp ) {
+        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
+            return propagateFloat64NaN( a, a );
+        }
+        return a;
+    }
+    if ( aExp <= 0x3FE ) {
+        if ( (bits64) ( a<<1 ) == 0 ) return a;
+        float_exception_flags |= float_flag_inexact;
+        aSign = extractFloat64Sign( a );
+        switch ( float_rounding_mode ) {
+         case float_round_nearest_even:
+            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
+                return packFloat64( aSign, 0x3FF, 0 );
+            }
+            break;
+         case float_round_down:
+            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
+         case float_round_up:
+            return
+            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
+        }
+        return packFloat64( aSign, 0, 0 );
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x433 - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    roundingMode = float_rounding_mode;
+    if ( roundingMode == float_round_nearest_even ) {
+        z += lastBitMask>>1;
+        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+    }
+    else if ( roundingMode != float_round_to_zero ) {
+        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+            z += roundBitsMask;
+        }
+    }
+    z &= ~ roundBitsMask;
+    if ( z != a ) float_exception_flags |= float_flag_inexact;
+    return z;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the absolute values of the double-precision
+floating-point values `a' and `b'.  If `zSign' is true, the sum is negated
+before being returned.  `zSign' is ignored if the result is a NaN.  The
+addition is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
+{
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig;
+    int16 expDiff;
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 9;
+    bSig <<= 9;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0x7FF ) {
+            if ( aSig ) return propagateFloat64NaN( a, b );
+            return a;
+        }
+        if ( bExp == 0 ) {
+            --expDiff;
+        }
+        else {
+            bSig |= LIT64( 0x2000000000000000 );
+        }
+        shift64RightJamming( bSig, expDiff, &bSig );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0x7FF ) {
+            if ( bSig ) return propagateFloat64NaN( a, b );
+            return packFloat64( zSign, 0x7FF, 0 );
+        }
+        if ( aExp == 0 ) {
+            ++expDiff;
+        }
+        else {
+            aSig |= LIT64( 0x2000000000000000 );
+        }
+        shift64RightJamming( aSig, - expDiff, &aSig );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0x7FF ) {
+            if ( aSig | bSig ) return propagateFloat64NaN( a, b );
+            return a;
+        }
+        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
+        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
+        zExp = aExp;
+        goto roundAndPack;
+    }
+    aSig |= LIT64( 0x2000000000000000 );
+    zSig = ( aSig + bSig )<<1;
+    --zExp;
+    if ( (sbits64) zSig < 0 ) {
+        zSig = aSig + bSig;
+        ++zExp;
+    }
+ roundAndPack:
+    return roundAndPackFloat64( zSign, zExp, zSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the absolute values of the double-
+precision floating-point values `a' and `b'.  If `zSign' is true, the
+difference is negated before being returned.  `zSign' is ignored if the
+result is a NaN.  The subtraction is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
+{
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig;
+    int16 expDiff;
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    expDiff = aExp - bExp;
+    aSig <<= 10;
+    bSig <<= 10;
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0x7FF ) {
+        if ( aSig | bSig ) return propagateFloat64NaN( a, b );
+        float_raise( float_flag_invalid );
+        return float64_default_nan;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    if ( bSig < aSig ) goto aBigger;
+    if ( aSig < bSig ) goto bBigger;
+    return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b );
+        return packFloat64( zSign ^ 1, 0x7FF, 0 );
+    }
+    if ( aExp == 0 ) {
+        ++expDiff;
+    }
+    else {
+        aSig |= LIT64( 0x4000000000000000 );
+    }
+    shift64RightJamming( aSig, - expDiff, &aSig );
+    bSig |= LIT64( 0x4000000000000000 );
+ bBigger:
+    zSig = bSig - aSig;
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return propagateFloat64NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        --expDiff;
+    }
+    else {
+        bSig |= LIT64( 0x4000000000000000 );
+    }
+    shift64RightJamming( bSig, expDiff, &bSig );
+    aSig |= LIT64( 0x4000000000000000 );
+ aBigger:
+    zSig = aSig - bSig;
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the double-precision floating-point values `a'
+and `b'.  The operation is performed according to the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_add( float64 a, float64 b )
+{
+    flag aSign, bSign;
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign == bSign ) {
+        return addFloat64Sigs( a, b, aSign );
+    }
+    else {
+        return subFloat64Sigs( a, b, aSign );
+    }
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the double-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_sub( float64 a, float64 b )
+{
+    flag aSign, bSign;
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign == bSign ) {
+        return subFloat64Sigs( a, b, aSign );
+    }
+    else {
+        return addFloat64Sigs( a, b, aSign );
+    }
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the double-precision floating-point values
+`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_mul( float64 a, float64 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    bSign = extractFloat64Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FF ) {
+        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+            return propagateFloat64NaN( a, b );
+        }
+        if ( ( bExp | bSig ) == 0 ) {
+            float_raise( float_flag_invalid );
+            return float64_default_nan;
+        }
+        return packFloat64( zSign, 0x7FF, 0 );
+    }
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b );
+        if ( ( aExp | aSig ) == 0 ) {
+            float_raise( float_flag_invalid );
+            return float64_default_nan;
+        }
+        return packFloat64( zSign, 0x7FF, 0 );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
+        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+    }
+    zExp = aExp + bExp - 0x3FF;
+    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+    mul64To128( aSig, bSig, &zSig0, &zSig1 );
+    zSig0 |= ( zSig1 != 0 );
+    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
+        zSig0 <<= 1;
+        --zExp;
+    }
+    return roundAndPackFloat64( zSign, zExp, zSig0 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of dividing the double-precision floating-point value `a'
+by the corresponding value `b'.  The operation is performed according to
+the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_div( float64 a, float64 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig;
+    bits64 rem0, rem1;
+    bits64 term0, term1;
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    bSign = extractFloat64Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return propagateFloat64NaN( a, b );
+        if ( bExp == 0x7FF ) {
+            if ( bSig ) return propagateFloat64NaN( a, b );
+            float_raise( float_flag_invalid );
+            return float64_default_nan;
+        }
+        return packFloat64( zSign, 0x7FF, 0 );
+    }
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b );
+        return packFloat64( zSign, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            if ( ( aExp | aSig ) == 0 ) {
+                float_raise( float_flag_invalid );
+                return float64_default_nan;
+            }
+            float_raise( float_flag_divbyzero );
+            return packFloat64( zSign, 0x7FF, 0 );
+        }
+        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = aExp - bExp + 0x3FD;
+    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+    if ( bSig <= ( aSig + aSig ) ) {
+        aSig >>= 1;
+        ++zExp;
+    }
+    zSig = estimateDiv128To64( aSig, 0, bSig );
+    if ( ( zSig & 0x1FF ) <= 2 ) {
+        mul64To128( bSig, zSig, &term0, &term1 );
+        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+        while ( (sbits64) rem0 < 0 ) {
+            --zSig;
+            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+        }
+        zSig |= ( rem1 != 0 );
+    }
+    return roundAndPackFloat64( zSign, zExp, zSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the remainder of the double-precision floating-point value `a'
+with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_rem( float64 a, float64 b )
+{
+    flag aSign, bSign, zSign;
+    int16 aExp, bExp, expDiff;
+    bits64 aSig, bSig;
+    bits64 q, alternateASig;
+    sbits64 sigMean;
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    bSig = extractFloat64Frac( b );
+    bExp = extractFloat64Exp( b );
+    bSign = extractFloat64Sign( b );
+    if ( aExp == 0x7FF ) {
+        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+            return propagateFloat64NaN( a, b );
+        }
+        float_raise( float_flag_invalid );
+        return float64_default_nan;
+    }
+    if ( bExp == 0x7FF ) {
+        if ( bSig ) return propagateFloat64NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            float_raise( float_flag_invalid );
+            return float64_default_nan;
+        }
+        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return a;
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    expDiff = aExp - bExp;
+    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
+    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+    if ( expDiff < 0 ) {
+        if ( expDiff < -1 ) return a;
+        aSig >>= 1;
+    }
+    q = ( bSig <= aSig );
+    if ( q ) aSig -= bSig;
+    expDiff -= 64;
+    while ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig, 0, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        aSig = - ( ( bSig>>2 ) * q );
+        expDiff -= 62;
+    }
+    expDiff += 64;
+    if ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig, 0, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        q >>= 64 - expDiff;
+        bSig >>= 2;
+        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+    }
+    else {
+        aSig >>= 2;
+        bSig >>= 2;
+    }
+    do {
+        alternateASig = aSig;
+        ++q;
+        aSig -= bSig;
+    } while ( 0 <= (sbits64) aSig );
+    sigMean = aSig + alternateASig;
+    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+        aSig = alternateASig;
+    }
+    zSign = ( (sbits64) aSig < 0 );
+    if ( zSign ) aSig = - aSig;
+    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the square root of the double-precision floating-point value `a'.
+The operation is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 float64_sqrt( float64 a )
+{
+    flag aSign;
+    int16 aExp, zExp;
+    bits64 aSig, zSig;
+    bits64 rem0, rem1, term0, term1; //, shiftedRem;
+    //float64 z;
+    aSig = extractFloat64Frac( a );
+    aExp = extractFloat64Exp( a );
+    aSign = extractFloat64Sign( a );
+    if ( aExp == 0x7FF ) {
+        if ( aSig ) return propagateFloat64NaN( a, a );
+        if ( ! aSign ) return a;
+        float_raise( float_flag_invalid );
+        return float64_default_nan;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig ) == 0 ) return a;
+        float_raise( float_flag_invalid );
+        return float64_default_nan;
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return 0;
+        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
+    aSig |= LIT64( 0x0010000000000000 );
+    zSig = estimateSqrt32( aExp, aSig>>21 );
+    zSig <<= 31;
+    aSig <<= 9 - ( aExp & 1 );
+    zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
+    if ( ( zSig & 0x3FF ) <= 5 ) {
+        if ( zSig < 2 ) {
+            zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
+        }
+        else {
+            aSig <<= 2;
+            mul64To128( zSig, zSig, &term0, &term1 );
+            sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+            while ( (sbits64) rem0 < 0 ) {
+                --zSig;
+                shortShift128Left( 0, zSig, 1, &term0, &term1 );
+                term1 |= 1;
+                add128( rem0, rem1, term0, term1, &rem0, &rem1 );
+            }
+            zSig |= ( ( rem0 | rem1 ) != 0 );
+        }
+    }
+    shift64RightJamming( zSig, 1, &zSig );
+    return roundAndPackFloat64( 0, zExp, zSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is equal to the
+corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_eq( float64 a, float64 b )
+{
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  The comparison is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_le( float64 a, float64 b )
+{
+    flag aSign, bSign;
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  The comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_lt( float64 a, float64 b )
+{
+    flag aSign, bSign;
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is equal to the
+corresponding value `b', and 0 otherwise.  The invalid exception is raised
+if either operand is a NaN.  Otherwise, the comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_eq_signaling( float64 a, float64 b )
+{
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than or
+equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
+cause an exception.  Otherwise, the comparison is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_le_quiet( float64 a, float64 b )
+{
+    flag aSign, bSign;
+    //int16 aExp, bExp;
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
+    return ( a == b ) || ( aSign ^ ( a < b ) );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the double-precision floating-point value `a' is less than
+the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
+exception.  Otherwise, the comparison is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag float64_lt_quiet( float64 a, float64 b )
+{
+    flag aSign, bSign;
+    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+       ) {
+        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloat64Sign( a );
+    bSign = extractFloat64Sign( b );
+    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
+    return ( a != b ) && ( aSign ^ ( a < b ) );
+}
+#ifdef FLOATX80
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point value `a' to the 32-bit two's complement integer format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic---which means in particular that the conversion
+is rounded according to the current rounding mode.  If `a' is a NaN, the
+largest positive integer is returned.  Otherwise, if the conversion
+overflows, the largest integer with the same sign as `a' is returned.
+-------------------------------------------------------------------------------
+*/
+int32 floatx80_to_int32( floatx80 a )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig;
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+    shiftCount = 0x4037 - aExp;
+    if ( shiftCount <= 0 ) shiftCount = 1;
+    shift64RightJamming( aSig, shiftCount, &aSig );
+    return roundAndPackInt32( aSign, aSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point value `a' to the 32-bit two's complement integer format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic, except that the conversion is always rounded
+toward zero.  If `a' is a NaN, the largest positive integer is returned.
+Otherwise, if the conversion overflows, the largest integer with the same
+sign as `a' is returned.
+-------------------------------------------------------------------------------
+*/
+int32 floatx80_to_int32_round_to_zero( floatx80 a )
+{
+    flag aSign;
+    int32 aExp, shiftCount;
+    bits64 aSig, savedASig;
+    int32 z;
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    shiftCount = 0x403E - aExp;
+    if ( shiftCount < 32 ) {
+        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+        goto invalid;
+    }
+    else if ( 63 < shiftCount ) {
+        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        return 0;
+    }
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = aSig;
+    if ( aSign ) z = - z;
+    if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+        float_exception_flags |= float_flag_invalid;
+        return aSign ? 0x80000000 : 0x7FFFFFFF;
+    }
+    if ( ( aSig<<shiftCount ) != savedASig ) {
+        float_exception_flags |= float_flag_inexact;
+    }
+    return z;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point value `a' to the single-precision floating-point format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float32 floatx80_to_float32( floatx80 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig;
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) {
+            return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
+        }
+        return packFloat32( aSign, 0xFF, 0 );
+    }
+    shift64RightJamming( aSig, 33, &aSig );
+    if ( aExp || aSig ) aExp -= 0x3F81;
+    return roundAndPackFloat32( aSign, aExp, aSig );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of converting the extended double-precision floating-
+point value `a' to the double-precision floating-point format.  The
+conversion is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+float64 floatx80_to_float64( floatx80 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 aSig, zSig;
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) {
+            return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
+        }
+        return packFloat64( aSign, 0x7FF, 0 );
+    }
+    shift64RightJamming( aSig, 1, &zSig );
+    if ( aExp || aSig ) aExp -= 0x3C01;
+    return roundAndPackFloat64( aSign, aExp, zSig );
+}
+/*
+-------------------------------------------------------------------------------
+Rounds the extended double-precision floating-point value `a' to an integer,
+and returns the result as an extended quadruple-precision floating-point
+value.  The operation is performed according to the IEC/IEEE Standard for
+Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_round_to_int( floatx80 a )
+{
+    flag aSign;
+    int32 aExp;
+    bits64 lastBitMask, roundBitsMask;
+    int8 roundingMode;
+    floatx80 z;
+    aExp = extractFloatx80Exp( a );
+    if ( 0x403E <= aExp ) {
+        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
+            return propagateFloatx80NaN( a, a );
+        }
+        return a;
+    }
+    if ( aExp <= 0x3FFE ) {
+        if (    ( aExp == 0 )
+             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+            return a;
+        }
+        float_exception_flags |= float_flag_inexact;
+        aSign = extractFloatx80Sign( a );
+        switch ( float_rounding_mode ) {
+         case float_round_nearest_even:
+            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
+               ) {
+                return
+                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
+            }
+            break;
+         case float_round_down:
+            return
+                  aSign ?
+                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
+                : packFloatx80( 0, 0, 0 );
+         case float_round_up:
+            return
+                  aSign ? packFloatx80( 1, 0, 0 )
+                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
+        }
+        return packFloatx80( aSign, 0, 0 );
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x403E - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    roundingMode = float_rounding_mode;
+    if ( roundingMode == float_round_nearest_even ) {
+        z.low += lastBitMask>>1;
+        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+    }
+    else if ( roundingMode != float_round_to_zero ) {
+        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+            z.low += roundBitsMask;
+        }
+    }
+    z.low &= ~ roundBitsMask;
+    if ( z.low == 0 ) {
+        ++z.high;
+        z.low = LIT64( 0x8000000000000000 );
+    }
+    if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
+    return z;
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the absolute values of the extended double-
+precision floating-point values `a' and `b'.  If `zSign' is true, the sum is
+negated before being returned.  `zSign' is ignored if the result is a NaN.
+The addition is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    int32 expDiff;
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    expDiff = aExp - bExp;
+    if ( 0 < expDiff ) {
+        if ( aExp == 0x7FFF ) {
+            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+            return a;
+        }
+        if ( bExp == 0 ) --expDiff;
+        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+        zExp = aExp;
+    }
+    else if ( expDiff < 0 ) {
+        if ( bExp == 0x7FFF ) {
+            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+        }
+        if ( aExp == 0 ) ++expDiff;
+        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+        zExp = bExp;
+    }
+    else {
+        if ( aExp == 0x7FFF ) {
+            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+                return propagateFloatx80NaN( a, b );
+            }
+            return a;
+        }
+        zSig1 = 0;
+        zSig0 = aSig + bSig;
+        if ( aExp == 0 ) {
+            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
+            goto roundAndPack;
+        }
+        zExp = aExp;
+        goto shiftRight1;
+    }
+    
+    zSig0 = aSig + bSig;
+    if ( (sbits64) zSig0 < 0 ) goto roundAndPack; 
+ shiftRight1:
+    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
+    zSig0 |= LIT64( 0x8000000000000000 );
+    ++zExp;
+ roundAndPack:
+    return
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the absolute values of the extended
+double-precision floating-point values `a' and `b'.  If `zSign' is true,
+the difference is negated before being returned.  `zSign' is ignored if the
+result is a NaN.  The subtraction is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    int32 expDiff;
+    floatx80 z;
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    expDiff = aExp - bExp;
+    if ( 0 < expDiff ) goto aExpBigger;
+    if ( expDiff < 0 ) goto bExpBigger;
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+            return propagateFloatx80NaN( a, b );
+        }
+        float_raise( float_flag_invalid );
+        z.low = floatx80_default_nan_low;
+        z.high = floatx80_default_nan_high;
+        return z;
+    }
+    if ( aExp == 0 ) {
+        aExp = 1;
+        bExp = 1;
+    }
+    zSig1 = 0;
+    if ( bSig < aSig ) goto aBigger;
+    if ( aSig < bSig ) goto bBigger;
+    return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
+ bExpBigger:
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) ++expDiff;
+    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+ bBigger:
+    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) --expDiff;
+    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+ aBigger:
+    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
+    zExp = aExp;
+ normalizeRoundAndPack:
+    return
+        normalizeRoundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of adding the extended double-precision floating-point
+values `a' and `b'.  The operation is performed according to the IEC/IEEE
+Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_add( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+    
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign == bSign ) {
+        return addFloatx80Sigs( a, b, aSign );
+    }
+    else {
+        return subFloatx80Sigs( a, b, aSign );
+    }
+    
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of subtracting the extended double-precision floating-
+point values `a' and `b'.  The operation is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_sub( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign == bSign ) {
+        return subFloatx80Sigs( a, b, aSign );
+    }
+    else {
+        return addFloatx80Sigs( a, b, aSign );
+    }
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of multiplying the extended double-precision floating-
+point values `a' and `b'.  The operation is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_mul( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    floatx80 z;
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    bSign = extractFloatx80Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FFF ) {
+        if (    (bits64) ( aSig<<1 )
+             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+            return propagateFloatx80NaN( a, b );
+        }
+        if ( ( bExp | bSig ) == 0 ) goto invalid;
+        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low = floatx80_default_nan_low;
+            z.high = floatx80_default_nan_high;
+            return z;
+        }
+        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    }
+    zExp = aExp + bExp - 0x3FFE;
+    mul64To128( aSig, bSig, &zSig0, &zSig1 );
+    if ( 0 < (sbits64) zSig0 ) {
+        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
+        --zExp;
+    }
+    return
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the result of dividing the extended double-precision floating-point
+value `a' by the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_div( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, zExp;
+    bits64 aSig, bSig, zSig0, zSig1;
+    bits64 rem0, rem1, rem2, term0, term1, term2;
+    floatx80 z;
+    aSig = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    bSign = extractFloatx80Sign( b );
+    zSign = aSign ^ bSign;
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        if ( bExp == 0x7FFF ) {
+            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+            goto invalid;
+        }
+        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        return packFloatx80( zSign, 0, 0 );
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+            if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+                float_raise( float_flag_invalid );
+                z.low = floatx80_default_nan_low;
+                z.high = floatx80_default_nan_high;
+                return z;
+            }
+            float_raise( float_flag_divbyzero );
+            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+        }
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+    }
+    zExp = aExp - bExp + 0x3FFE;
+    rem1 = 0;
+    if ( bSig <= aSig ) {
+        shift128Right( aSig, 0, 1, &aSig, &rem1 );
+        ++zExp;
+    }
+    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
+    mul64To128( bSig, zSig0, &term0, &term1 );
+    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
+    while ( (sbits64) rem0 < 0 ) {
+        --zSig0;
+        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+    }
+    zSig1 = estimateDiv128To64( rem1, 0, bSig );
+    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
+        mul64To128( bSig, zSig1, &term1, &term2 );
+        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+        while ( (sbits64) rem1 < 0 ) {
+            --zSig1;
+            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
+        }
+        zSig1 |= ( ( rem1 | rem2 ) != 0 );
+    }
+    return
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the remainder of the extended double-precision floating-point value
+`a' with respect to the corresponding value `b'.  The operation is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_rem( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign, zSign;
+    int32 aExp, bExp, expDiff;
+    bits64 aSig0, aSig1, bSig;
+    bits64 q, term0, term1, alternateASig0, alternateASig1;
+    floatx80 z;
+    aSig0 = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    bSig = extractFloatx80Frac( b );
+    bExp = extractFloatx80Exp( b );
+    bSign = extractFloatx80Sign( b );
+    if ( aExp == 0x7FFF ) {
+        if (    (bits64) ( aSig0<<1 )
+             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+            return propagateFloatx80NaN( a, b );
+        }
+        goto invalid;
+    }
+    if ( bExp == 0x7FFF ) {
+        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+        return a;
+    }
+    if ( bExp == 0 ) {
+        if ( bSig == 0 ) {
+ invalid:
+            float_raise( float_flag_invalid );
+            z.low = floatx80_default_nan_low;
+            z.high = floatx80_default_nan_high;
+            return z;
+        }
+        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+    }
+    if ( aExp == 0 ) {
+        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
+        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+    }
+    bSig |= LIT64( 0x8000000000000000 );
+    zSign = aSign;
+    expDiff = aExp - bExp;
+    aSig1 = 0;
+    if ( expDiff < 0 ) {
+        if ( expDiff < -1 ) return a;
+        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
+        expDiff = 0;
+    }
+    q = ( bSig <= aSig0 );
+    if ( q ) aSig0 -= bSig;
+    expDiff -= 64;
+    while ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig0, aSig1, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        mul64To128( bSig, q, &term0, &term1 );
+        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
+        expDiff -= 62;
+    }
+    expDiff += 64;
+    if ( 0 < expDiff ) {
+        q = estimateDiv128To64( aSig0, aSig1, bSig );
+        q = ( 2 < q ) ? q - 2 : 0;
+        q >>= 64 - expDiff;
+        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
+        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
+        while ( le128( term0, term1, aSig0, aSig1 ) ) {
+            ++q;
+            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+        }
+    }
+    else {
+        term1 = 0;
+        term0 = bSig;
+    }
+    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
+    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
+         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
+              && ( q & 1 ) )
+       ) {
+        aSig0 = alternateASig0;
+        aSig1 = alternateASig1;
+        zSign = ! zSign;
+    }
+    return
+        normalizeRoundAndPackFloatx80(
+            80, zSign, bExp + expDiff, aSig0, aSig1 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns the square root of the extended double-precision floating-point
+value `a'.  The operation is performed according to the IEC/IEEE Standard
+for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_sqrt( floatx80 a )
+{
+    flag aSign;
+    int32 aExp, zExp;
+    bits64 aSig0, aSig1, zSig0, zSig1;
+    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+    bits64 shiftedRem0, shiftedRem1;
+    floatx80 z;
+    aSig0 = extractFloatx80Frac( a );
+    aExp = extractFloatx80Exp( a );
+    aSign = extractFloatx80Sign( a );
+    if ( aExp == 0x7FFF ) {
+        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
+        if ( ! aSign ) return a;
+        goto invalid;
+    }
+    if ( aSign ) {
+        if ( ( aExp | aSig0 ) == 0 ) return a;
+ invalid:
+        float_raise( float_flag_invalid );
+        z.low = floatx80_default_nan_low;
+        z.high = floatx80_default_nan_high;
+        return z;
+    }
+    if ( aExp == 0 ) {
+        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
+        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+    }
+    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
+    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
+    zSig0 <<= 31;
+    aSig1 = 0;
+    shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
+    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
+    if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
+    shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
+    mul64To128( zSig0, zSig0, &term0, &term1 );
+    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+    while ( (sbits64) rem0 < 0 ) {
+        --zSig0;
+        shortShift128Left( 0, zSig0, 1, &term0, &term1 );
+        term1 |= 1;
+        add128( rem0, rem1, term0, term1, &rem0, &rem1 );
+    }
+    shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
+    zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
+    if ( (bits64) ( zSig1<<1 ) <= 10 ) {
+        if ( zSig1 == 0 ) zSig1 = 1;
+        mul64To128( zSig0, zSig1, &term1, &term2 );
+        shortShift128Left( term1, term2, 1, &term1, &term2 );
+        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+        mul64To128( zSig1, zSig1, &term2, &term3 );
+        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+        while ( (sbits64) rem1 < 0 ) {
+            --zSig1;
+            shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
+            term3 |= 1;
+            add192(
+                rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
+        }
+        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+    }
+    return
+        roundAndPackFloatx80(
+            floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is
+equal to the corresponding value `b', and 0 otherwise.  The comparison is
+performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_eq( floatx80 a, floatx80 b )
+{
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    return
+           ( a.low == b.low )
+        && (    ( a.high == b.high )
+             || (    ( a.low == 0 )
+                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+           );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is
+less than or equal to the corresponding value `b', and 0 otherwise.  The
+comparison is performed according to the IEC/IEEE Standard for Binary
+Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_le( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 == 0 );
+    }
+    return
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is
+less than the corresponding value `b', and 0 otherwise.  The comparison
+is performed according to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_lt( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 != 0 );
+    }
+    return
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is equal
+to the corresponding value `b', and 0 otherwise.  The invalid exception is
+raised if either operand is a NaN.  Otherwise, the comparison is performed
+according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_eq_signaling( floatx80 a, floatx80 b )
+{
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        float_raise( float_flag_invalid );
+        return 0;
+    }
+    return
+           ( a.low == b.low )
+        && (    ( a.high == b.high )
+             || (    ( a.low == 0 )
+                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+           );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is less
+than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
+do not cause an exception.  Otherwise, the comparison is performed according
+to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_le_quiet( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 == 0 );
+    }
+    return
+          aSign ? le128( b.high, b.low, a.high, a.low )
+        : le128( a.high, a.low, b.high, b.low );
+}
+/*
+-------------------------------------------------------------------------------
+Returns 1 if the extended double-precision floating-point value `a' is less
+than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
+an exception.  Otherwise, the comparison is performed according to the
+IEC/IEEE Standard for Binary Floating-point Arithmetic.
+-------------------------------------------------------------------------------
+*/
+flag floatx80_lt_quiet( floatx80 a, floatx80 b )
+{
+    flag aSign, bSign;
+    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
+              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+       ) {
+        if (    floatx80_is_signaling_nan( a )
+             || floatx80_is_signaling_nan( b ) ) {
+            float_raise( float_flag_invalid );
+        }
+        return 0;
+    }
+    aSign = extractFloatx80Sign( a );
+    bSign = extractFloatx80Sign( b );
+    if ( aSign != bSign ) {
+        return
+               aSign
+            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+                 != 0 );
+    }
+    return
+          aSign ? lt128( b.high, b.low, a.high, a.low )
+        : lt128( a.high, a.low, b.high, b.low );
+}
+#endif
diff --git a/arch/arm26/nwfpe/softfloat.h b/arch/arm26/nwfpe/softfloat.h
new file mode 100644
index 000000000000..22c2193a4997
--- /dev/null
+++ b/arch/arm26/nwfpe/softfloat.h
@@ -0,0 +1,232 @@
+/*
+===============================================================================
+This C header file is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2.
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
+arithmetic/softfloat.html'.
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) they include prominent notice that the work is derivative, and (2) they
+include prominent notice akin to these three paragraphs for those parts of
+this code that are retained.
+===============================================================================
+*/
+#ifndef __SOFTFLOAT_H__
+#define __SOFTFLOAT_H__
+/*
+-------------------------------------------------------------------------------
+The macro `FLOATX80' must be defined to enable the extended double-precision
+floating-point format `floatx80'.  If this macro is not defined, the
+`floatx80' type will not be defined, and none of the functions that either
+input or output the `floatx80' type will be defined.
+-------------------------------------------------------------------------------
+*/
+#define FLOATX80
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point types.
+-------------------------------------------------------------------------------
+*/
+typedef unsigned long int float32;
+typedef unsigned long long float64;
+typedef struct {
+    unsigned short high;
+    unsigned long long low;
+} floatx80;
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point underflow tininess-detection mode.
+-------------------------------------------------------------------------------
+*/
+extern signed char float_detect_tininess;
+enum {
+    float_tininess_after_rounding  = 0,
+    float_tininess_before_rounding = 1
+};
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point rounding mode.
+-------------------------------------------------------------------------------
+*/
+extern signed char float_rounding_mode;
+enum {
+    float_round_nearest_even = 0,
+    float_round_to_zero      = 1,
+    float_round_down         = 2,
+    float_round_up           = 3
+};
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE floating-point exception flags.
+-------------------------------------------------------------------------------
+extern signed char float_exception_flags;
+enum {
+    float_flag_inexact   =  1,
+    float_flag_underflow =  2,
+    float_flag_overflow  =  4,
+    float_flag_divbyzero =  8,
+    float_flag_invalid   = 16
+};
+ScottB: November 4, 1998
+Changed the enumeration to match the bit order in the FPA11.
+*/
+extern signed char float_exception_flags;
+enum {
+    float_flag_invalid   =  1,
+    float_flag_divbyzero =  2,
+    float_flag_overflow  =  4,
+    float_flag_underflow =  8,
+    float_flag_inexact   = 16
+};
+/*
+-------------------------------------------------------------------------------
+Routine to raise any or all of the software IEC/IEEE floating-point
+exception flags.
+-------------------------------------------------------------------------------
+*/
+void float_raise( signed char );
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE integer-to-floating-point conversion routines.
+-------------------------------------------------------------------------------
+*/
+float32 int32_to_float32( signed int );
+float64 int32_to_float64( signed int );
+#ifdef FLOATX80
+floatx80 int32_to_floatx80( signed int );
+#endif
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE single-precision conversion routines.
+-------------------------------------------------------------------------------
+*/
+signed int float32_to_int32( float32 );
+signed int float32_to_int32_round_to_zero( float32 );
+float64 float32_to_float64( float32 );
+#ifdef FLOATX80
+floatx80 float32_to_floatx80( float32 );
+#endif
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE single-precision operations.
+-------------------------------------------------------------------------------
+*/
+float32 float32_round_to_int( float32 );
+float32 float32_add( float32, float32 );
+float32 float32_sub( float32, float32 );
+float32 float32_mul( float32, float32 );
+float32 float32_div( float32, float32 );
+float32 float32_rem( float32, float32 );
+float32 float32_sqrt( float32 );
+char float32_eq( float32, float32 );
+char float32_le( float32, float32 );
+char float32_lt( float32, float32 );
+char float32_eq_signaling( float32, float32 );
+char float32_le_quiet( float32, float32 );
+char float32_lt_quiet( float32, float32 );
+char float32_is_signaling_nan( float32 );
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE double-precision conversion routines.
+-------------------------------------------------------------------------------
+*/
+signed int float64_to_int32( float64 );
+signed int float64_to_int32_round_to_zero( float64 );
+float32 float64_to_float32( float64 );
+#ifdef FLOATX80
+floatx80 float64_to_floatx80( float64 );
+#endif
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE double-precision operations.
+-------------------------------------------------------------------------------
+*/
+float64 float64_round_to_int( float64 );
+float64 float64_add( float64, float64 );
+float64 float64_sub( float64, float64 );
+float64 float64_mul( float64, float64 );
+float64 float64_div( float64, float64 );
+float64 float64_rem( float64, float64 );
+float64 float64_sqrt( float64 );
+char float64_eq( float64, float64 );
+char float64_le( float64, float64 );
+char float64_lt( float64, float64 );
+char float64_eq_signaling( float64, float64 );
+char float64_le_quiet( float64, float64 );
+char float64_lt_quiet( float64, float64 );
+char float64_is_signaling_nan( float64 );
+#ifdef FLOATX80
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE extended double-precision conversion routines.
+-------------------------------------------------------------------------------
+*/
+signed int floatx80_to_int32( floatx80 );
+signed int floatx80_to_int32_round_to_zero( floatx80 );
+float32 floatx80_to_float32( floatx80 );
+float64 floatx80_to_float64( floatx80 );
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE extended double-precision rounding precision.  Valid
+values are 32, 64, and 80.
+-------------------------------------------------------------------------------
+*/
+extern signed char floatx80_rounding_precision;
+/*
+-------------------------------------------------------------------------------
+Software IEC/IEEE extended double-precision operations.
+-------------------------------------------------------------------------------
+*/
+floatx80 floatx80_round_to_int( floatx80 );
+floatx80 floatx80_add( floatx80, floatx80 );
+floatx80 floatx80_sub( floatx80, floatx80 );
+floatx80 floatx80_mul( floatx80, floatx80 );
+floatx80 floatx80_div( floatx80, floatx80 );
+floatx80 floatx80_rem( floatx80, floatx80 );
+floatx80 floatx80_sqrt( floatx80 );
+char floatx80_eq( floatx80, floatx80 );
+char floatx80_le( floatx80, floatx80 );
+char floatx80_lt( floatx80, floatx80 );
+char floatx80_eq_signaling( floatx80, floatx80 );
+char floatx80_le_quiet( floatx80, floatx80 );
+char floatx80_lt_quiet( floatx80, floatx80 );
+char floatx80_is_signaling_nan( floatx80 );
+#endif
+#endif
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/arm26