From 14cf11af6cf608eb8c23e989ddb17a715ddce109 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 26 Sep 2005 16:04:21 +1000
Subject: powerpc: Merge enough to start building in arch/powerpc.

This creates the directory structure under arch/powerpc and a bunch
of Kconfig files.  It does a first-cut merge of arch/powerpc/mm,
arch/powerpc/lib and arch/powerpc/platforms/powermac.  This is enough
to build a 32-bit powermac kernel with ARCH=powerpc.

For now we are getting some unmerged files from arch/ppc/kernel and
arch/ppc/syslib, or arch/ppc64/kernel.  This makes some minor changes
to files in those directories and files outside arch/powerpc.

The boot directory is still not merged.  That's going to be interesting.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/lib/Makefile     |   9 +
 arch/powerpc/lib/checksum.S   | 225 ++++++++++++++
 arch/powerpc/lib/checksum64.S | 229 ++++++++++++++
 arch/powerpc/lib/copy32.S     | 543 +++++++++++++++++++++++++++++++++
 arch/powerpc/lib/copypage.S   | 121 ++++++++
 arch/powerpc/lib/copyuser.S   | 576 +++++++++++++++++++++++++++++++++++
 arch/powerpc/lib/div64.S      |  58 ++++
 arch/powerpc/lib/e2a.c        | 108 +++++++
 arch/powerpc/lib/memcpy.S     | 172 +++++++++++
 arch/powerpc/lib/rheap.c      | 693 ++++++++++++++++++++++++++++++++++++++++++
 arch/powerpc/lib/sstep.c      | 141 +++++++++
 arch/powerpc/lib/strcase.c    |  23 ++
 arch/powerpc/lib/string.S     | 203 +++++++++++++
 arch/powerpc/lib/usercopy.c   |  41 +++
 14 files changed, 3142 insertions(+)
 create mode 100644 arch/powerpc/lib/Makefile
 create mode 100644 arch/powerpc/lib/checksum.S
 create mode 100644 arch/powerpc/lib/checksum64.S
 create mode 100644 arch/powerpc/lib/copy32.S
 create mode 100644 arch/powerpc/lib/copypage.S
 create mode 100644 arch/powerpc/lib/copyuser.S
 create mode 100644 arch/powerpc/lib/div64.S
 create mode 100644 arch/powerpc/lib/e2a.c
 create mode 100644 arch/powerpc/lib/memcpy.S
 create mode 100644 arch/powerpc/lib/rheap.c
 create mode 100644 arch/powerpc/lib/sstep.c
 create mode 100644 arch/powerpc/lib/strcase.c
 create mode 100644 arch/powerpc/lib/string.S
 create mode 100644 arch/powerpc/lib/usercopy.c

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
new file mode 100644
index 000000000000..347f9798e433
--- /dev/null
+++ b/arch/powerpc/lib/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for ppc-specific library files..
+#
+
+obj-y			:= strcase.o string.o
+obj-$(CONFIG_PPC32)	+= div64.o copy32.o checksum.o
+obj-$(CONFIG_PPC64)	+= copypage.o copyuser.o memcpy.o usercopy.o \
+			   sstep.o checksum64.o
+obj-$(CONFIG_PPC_ISERIES) += e2a.o
diff --git a/arch/powerpc/lib/checksum.S b/arch/powerpc/lib/checksum.S
new file mode 100644
index 000000000000..7874e8a80455
--- /dev/null
+++ b/arch/powerpc/lib/checksum.S
@@ -0,0 +1,225 @@
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *	
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+	.text
+
+/*
+ * ip_fast_csum(buf, len) -- Optimized for IP header
+ * len is in words and is always >= 5.
+ */
+_GLOBAL(ip_fast_csum)
+	lwz	r0,0(r3)
+	lwzu	r5,4(r3)
+	addic.	r4,r4,-2
+	addc	r0,r0,r5
+	mtctr	r4
+	blelr-
+1:	lwzu	r4,4(r3)
+	adde	r0,r0,r4
+	bdnz	1b
+	addze	r0,r0		/* add in final carry */
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Compute checksum of TCP or UDP pseudo-header:
+ *   csum_tcpudp_magic(saddr, daddr, len, proto, sum)
+ */	
+_GLOBAL(csum_tcpudp_magic)
+	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
+	addc	r0,r3,r4	/* add 4 32-bit words together */
+	adde	r0,r0,r5
+	adde	r0,r0,r7
+	addze	r0,r0		/* add in final carry */
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * csum_partial(buff, len, sum)
+ */
+_GLOBAL(csum_partial)
+	addic	r0,r5,0
+	subi	r3,r3,4
+	srwi.	r6,r4,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r5,r3,2		/* Align buffer to longword boundary */
+	beq+	1f
+	lhz	r5,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r4,r4,2
+	addc	r0,r0,r5
+	srwi.	r6,r4,2		/* # words to do */
+	beq	3f
+1:	mtctr	r6
+2:	lwzu	r5,4(r3)	/* the bdnz has zero overhead, so it should */
+	adde	r0,r0,r5	/* be unnecessary to unroll this loop */
+	bdnz	2b
+	andi.	r4,r4,3
+3:	cmpwi	0,r4,2
+	blt+	4f
+	lhz	r5,4(r3)
+	addi	r3,r3,2
+	subi	r4,r4,2
+	adde	r0,r0,r5
+4:	cmpwi	0,r4,1
+	bne+	5f
+	lbz	r5,4(r3)
+	slwi	r5,r5,8		/* Upper byte of word */
+	adde	r0,r0,r5
+5:	addze	r3,r0		/* add in final carry */
+	blr
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit), while copying the block to dst.
+ * If an access exception occurs on src or dst, it stores -EFAULT
+ * to *src_err or *dst_err respectively, and (for an error on
+ * src) zeroes the rest of dst.
+ *
+ * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
+ */
+_GLOBAL(csum_partial_copy_generic)
+	addic	r0,r6,0
+	subi	r3,r3,4
+	subi	r4,r4,4
+	srwi.	r6,r5,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r9,r4,2		/* Align dst to longword boundary */
+	beq+	1f
+81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r5,r5,2
+91:	sth	r6,4(r4)
+	addi	r4,r4,2
+	addc	r0,r0,r6
+	srwi.	r6,r5,2		/* # words to do */
+	beq	3f
+1:	srwi.	r6,r5,4		/* # groups of 4 words to do */
+	beq	10f
+	mtctr	r6
+71:	lwz	r6,4(r3)
+72:	lwz	r9,8(r3)
+73:	lwz	r10,12(r3)
+74:	lwzu	r11,16(r3)
+	adde	r0,r0,r6
+75:	stw	r6,4(r4)
+	adde	r0,r0,r9
+76:	stw	r9,8(r4)
+	adde	r0,r0,r10
+77:	stw	r10,12(r4)
+	adde	r0,r0,r11
+78:	stwu	r11,16(r4)
+	bdnz	71b
+10:	rlwinm.	r6,r5,30,30,31	/* # words left to do */
+	beq	13f
+	mtctr	r6
+82:	lwzu	r9,4(r3)
+92:	stwu	r9,4(r4)
+	adde	r0,r0,r9
+	bdnz	82b
+13:	andi.	r5,r5,3
+3:	cmpwi	0,r5,2
+	blt+	4f
+83:	lhz	r6,4(r3)
+	addi	r3,r3,2
+	subi	r5,r5,2
+93:	sth	r6,4(r4)
+	addi	r4,r4,2
+	adde	r0,r0,r6
+4:	cmpwi	0,r5,1
+	bne+	5f
+84:	lbz	r6,4(r3)
+94:	stb	r6,4(r4)
+	slwi	r6,r6,8		/* Upper byte of word */
+	adde	r0,r0,r6
+5:	addze	r3,r0		/* add in final carry */
+	blr
+
+/* These shouldn't go in the fixup section, since that would
+   cause the ex_table addresses to get out of order. */
+
+src_error_4:
+	mfctr	r6		/* update # bytes remaining from ctr */
+	rlwimi	r5,r6,4,0,27
+	b	79f
+src_error_1:
+	li	r6,0
+	subi	r5,r5,2
+95:	sth	r6,4(r4)
+	addi	r4,r4,2
+79:	srwi.	r6,r5,2
+	beq	3f
+	mtctr	r6
+src_error_2:
+	li	r6,0
+96:	stwu	r6,4(r4)
+	bdnz	96b
+3:	andi.	r5,r5,3
+	beq	src_error
+src_error_3:
+	li	r6,0
+	mtctr	r5
+	addi	r4,r4,3
+97:	stbu	r6,1(r4)
+	bdnz	97b
+src_error:
+	cmpwi	0,r7,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r7)
+1:	addze	r3,r0
+	blr
+
+dst_error:
+	cmpwi	0,r8,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r8)
+1:	addze	r3,r0
+	blr
+
+.section __ex_table,"a"
+	.long	81b,src_error_1
+	.long	91b,dst_error
+	.long	71b,src_error_4
+	.long	72b,src_error_4
+	.long	73b,src_error_4
+	.long	74b,src_error_4
+	.long	75b,dst_error
+	.long	76b,dst_error
+	.long	77b,dst_error
+	.long	78b,dst_error
+	.long	82b,src_error_2
+	.long	92b,dst_error
+	.long	83b,src_error_3
+	.long	93b,dst_error
+	.long	84b,src_error_3
+	.long	94b,dst_error
+	.long	95b,dst_error
+	.long	96b,dst_error
+	.long	97b,dst_error
diff --git a/arch/powerpc/lib/checksum64.S b/arch/powerpc/lib/checksum64.S
new file mode 100644
index 000000000000..ef96c6c58efc
--- /dev/null
+++ b/arch/powerpc/lib/checksum64.S
@@ -0,0 +1,229 @@
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *	
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+/*
+ * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
+ * len is in words and is always >= 5.
+ *
+ * In practice len == 5, but this is not guaranteed.  So this code does not
+ * attempt to use doubleword instructions.
+ */
+_GLOBAL(ip_fast_csum)
+	lwz	r0,0(r3)
+	lwzu	r5,4(r3)
+	addic.	r4,r4,-2
+	addc	r0,r0,r5
+	mtctr	r4
+	blelr-
+1:	lwzu	r4,4(r3)
+	adde	r0,r0,r4
+	bdnz	1b
+	addze	r0,r0		/* add in final carry */
+        rldicl  r4,r0,32,0      /* fold two 32-bit halves together */
+        add     r0,r0,r4
+        srdi    r0,r0,32
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Compute checksum of TCP or UDP pseudo-header:
+ *   csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum)
+ * No real gain trying to do this specially for 64 bit, but
+ * the 32 bit addition may spill into the upper bits of
+ * the doubleword so we still must fold it down from 64.
+ */	
+_GLOBAL(csum_tcpudp_magic)
+	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
+	addc	r0,r3,r4	/* add 4 32-bit words together */
+	adde	r0,r0,r5
+	adde	r0,r0,r7
+        rldicl  r4,r0,32,0      /* fold 64 bit value */
+        add     r0,r4,r0
+        srdi    r0,r0,32
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit).
+ *
+ * This code assumes at least halfword alignment, though the length
+ * can be any number of bytes.  The sum is accumulated in r5.
+ *
+ * csum_partial(r3=buff, r4=len, r5=sum)
+ */
+_GLOBAL(csum_partial)
+        subi	r3,r3,8		/* we'll offset by 8 for the loads */
+        srdi.	r6,r4,3         /* divide by 8 for doubleword count */
+        addic   r5,r5,0         /* clear carry */
+        beq	3f              /* if we're doing < 8 bytes */
+        andi.	r0,r3,2         /* aligned on a word boundary already? */
+        beq+	1f
+        lhz     r6,8(r3)        /* do 2 bytes to get aligned */
+        addi    r3,r3,2
+        subi    r4,r4,2
+        addc    r5,r5,r6
+        srdi.   r6,r4,3         /* recompute number of doublewords */
+        beq     3f              /* any left? */
+1:      mtctr   r6
+2:      ldu     r6,8(r3)        /* main sum loop */
+        adde    r5,r5,r6
+        bdnz    2b
+        andi.	r4,r4,7         /* compute bytes left to sum after doublewords */
+3:	cmpwi	0,r4,4		/* is at least a full word left? */
+	blt	4f
+	lwz	r6,8(r3)	/* sum this word */
+	addi	r3,r3,4
+	subi	r4,r4,4
+	adde	r5,r5,r6
+4:	cmpwi	0,r4,2		/* is at least a halfword left? */
+        blt+	5f
+        lhz     r6,8(r3)        /* sum this halfword */
+        addi    r3,r3,2
+        subi    r4,r4,2
+        adde    r5,r5,r6
+5:	cmpwi	0,r4,1		/* is at least a byte left? */
+        bne+    6f
+        lbz     r6,8(r3)        /* sum this byte */
+        slwi    r6,r6,8         /* this byte is assumed to be the upper byte of a halfword */
+        adde    r5,r5,r6
+6:      addze	r5,r5		/* add in final carry */
+	rldicl  r4,r5,32,0      /* fold two 32-bit halves together */
+        add     r3,r4,r5
+        srdi    r3,r3,32
+        blr
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit), while copying the block to dst.
+ * If an access exception occurs on src or dst, it stores -EFAULT
+ * to *src_err or *dst_err respectively, and (for an error on
+ * src) zeroes the rest of dst.
+ *
+ * This code needs to be reworked to take advantage of 64 bit sum+copy.
+ * However, due to tokenring halfword alignment problems this will be very
+ * tricky.  For now we'll leave it until we instrument it somehow.
+ *
+ * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
+ */
+_GLOBAL(csum_partial_copy_generic)
+	addic	r0,r6,0
+	subi	r3,r3,4
+	subi	r4,r4,4
+	srwi.	r6,r5,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r9,r4,2		/* Align dst to longword boundary */
+	beq+	1f
+81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r5,r5,2
+91:	sth	r6,4(r4)
+	addi	r4,r4,2
+	addc	r0,r0,r6
+	srwi.	r6,r5,2		/* # words to do */
+	beq	3f
+1:	mtctr	r6
+82:	lwzu	r6,4(r3)	/* the bdnz has zero overhead, so it should */
+92:	stwu	r6,4(r4)	/* be unnecessary to unroll this loop */
+	adde	r0,r0,r6
+	bdnz	82b
+	andi.	r5,r5,3
+3:	cmpwi	0,r5,2
+	blt+	4f
+83:	lhz	r6,4(r3)
+	addi	r3,r3,2
+	subi	r5,r5,2
+93:	sth	r6,4(r4)
+	addi	r4,r4,2
+	adde	r0,r0,r6
+4:	cmpwi	0,r5,1
+	bne+	5f
+84:	lbz	r6,4(r3)
+94:	stb	r6,4(r4)
+	slwi	r6,r6,8		/* Upper byte of word */
+	adde	r0,r0,r6
+5:	addze	r3,r0		/* add in final carry (unlikely with 64-bit regs) */
+        rldicl  r4,r3,32,0      /* fold 64 bit value */
+        add     r3,r4,r3
+        srdi    r3,r3,32
+	blr
+
+/* These shouldn't go in the fixup section, since that would
+   cause the ex_table addresses to get out of order. */
+
+	.globl src_error_1
+src_error_1:
+	li	r6,0
+	subi	r5,r5,2
+95:	sth	r6,4(r4)
+	addi	r4,r4,2
+	srwi.	r6,r5,2
+	beq	3f
+	mtctr	r6
+	.globl src_error_2
+src_error_2:
+	li	r6,0
+96:	stwu	r6,4(r4)
+	bdnz	96b
+3:	andi.	r5,r5,3
+	beq	src_error
+	.globl src_error_3
+src_error_3:
+	li	r6,0
+	mtctr	r5
+	addi	r4,r4,3
+97:	stbu	r6,1(r4)
+	bdnz	97b
+	.globl src_error
+src_error:
+	cmpdi	0,r7,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r7)
+1:	addze	r3,r0
+	blr
+
+	.globl dst_error
+dst_error:
+	cmpdi	0,r8,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r8)
+1:	addze	r3,r0
+	blr
+
+.section __ex_table,"a"
+	.align  3
+	.llong	81b,src_error_1
+	.llong	91b,dst_error
+	.llong	82b,src_error_2
+	.llong	92b,dst_error
+	.llong	83b,src_error_3
+	.llong	93b,dst_error
+	.llong	84b,src_error_3
+	.llong	94b,dst_error
+	.llong	95b,dst_error
+	.llong	96b,dst_error
+	.llong	97b,dst_error
diff --git a/arch/powerpc/lib/copy32.S b/arch/powerpc/lib/copy32.S
new file mode 100644
index 000000000000..420a912198a2
--- /dev/null
+++ b/arch/powerpc/lib/copy32.S
@@ -0,0 +1,543 @@
+/*
+ * Memory copy functions for 32-bit PowerPC.
+ *
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+#define COPY_16_BYTES		\
+	lwz	r7,4(r4);	\
+	lwz	r8,8(r4);	\
+	lwz	r9,12(r4);	\
+	lwzu	r10,16(r4);	\
+	stw	r7,4(r6);	\
+	stw	r8,8(r6);	\
+	stw	r9,12(r6);	\
+	stwu	r10,16(r6)
+
+#define COPY_16_BYTES_WITHEX(n)	\
+8 ## n ## 0:			\
+	lwz	r7,4(r4);	\
+8 ## n ## 1:			\
+	lwz	r8,8(r4);	\
+8 ## n ## 2:			\
+	lwz	r9,12(r4);	\
+8 ## n ## 3:			\
+	lwzu	r10,16(r4);	\
+8 ## n ## 4:			\
+	stw	r7,4(r6);	\
+8 ## n ## 5:			\
+	stw	r8,8(r6);	\
+8 ## n ## 6:			\
+	stw	r9,12(r6);	\
+8 ## n ## 7:			\
+	stwu	r10,16(r6)
+
+#define COPY_16_BYTES_EXCODE(n)			\
+9 ## n ## 0:					\
+	addi	r5,r5,-(16 * n);		\
+	b	104f;				\
+9 ## n ## 1:					\
+	addi	r5,r5,-(16 * n);		\
+	b	105f;				\
+.section __ex_table,"a";			\
+	.align	2;				\
+	.long	8 ## n ## 0b,9 ## n ## 0b;	\
+	.long	8 ## n ## 1b,9 ## n ## 0b;	\
+	.long	8 ## n ## 2b,9 ## n ## 0b;	\
+	.long	8 ## n ## 3b,9 ## n ## 0b;	\
+	.long	8 ## n ## 4b,9 ## n ## 1b;	\
+	.long	8 ## n ## 5b,9 ## n ## 1b;	\
+	.long	8 ## n ## 6b,9 ## n ## 1b;	\
+	.long	8 ## n ## 7b,9 ## n ## 1b;	\
+	.text
+
+	.text
+	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
+	.stabs	"copy32.S",N_SO,0,0,0f
+0:
+
+CACHELINE_BYTES = L1_CACHE_LINE_SIZE
+LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE
+CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1)
+
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero.  This requires that the destination
+ * area is cacheable.  -- paulus
+ */
+_GLOBAL(cacheable_memzero)
+	mr	r5,r4
+	li	r4,0
+	addi	r6,r3,-4
+	cmplwi	0,r5,4
+	blt	7f
+	stwu	r4,4(r6)
+	beqlr
+	andi.	r0,r6,3
+	add	r5,r0,r5
+	subf	r6,r0,r6
+	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
+	add	r8,r7,r5
+	srwi	r9,r8,LG_CACHELINE_BYTES
+	addic.	r9,r9,-1	/* total number of complete cachelines */
+	ble	2f
+	xori	r0,r7,CACHELINE_MASK & ~3
+	srwi.	r0,r0,2
+	beq	3f
+	mtctr	r0
+4:	stwu	r4,4(r6)
+	bdnz	4b
+3:	mtctr	r9
+	li	r7,4
+#if !defined(CONFIG_8xx)
+10:	dcbz	r7,r6
+#else
+10:	stw	r4, 4(r6)
+	stw	r4, 8(r6)
+	stw	r4, 12(r6)
+	stw	r4, 16(r6)
+#if CACHE_LINE_SIZE >= 32
+	stw	r4, 20(r6)
+	stw	r4, 24(r6)
+	stw	r4, 28(r6)
+	stw	r4, 32(r6)
+#endif /* CACHE_LINE_SIZE */
+#endif
+	addi	r6,r6,CACHELINE_BYTES
+	bdnz	10b
+	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
+	addi	r5,r5,4
+2:	srwi	r0,r5,2
+	mtctr	r0
+	bdz	6f
+1:	stwu	r4,4(r6)
+	bdnz	1b
+6:	andi.	r5,r5,3
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r6,3
+8:	stbu	r4,1(r6)
+	bdnz	8b
+	blr
+
+_GLOBAL(memset)
+	rlwimi	r4,r4,8,16,23
+	rlwimi	r4,r4,16,0,15
+	addi	r6,r3,-4
+	cmplwi	0,r5,4
+	blt	7f
+	stwu	r4,4(r6)
+	beqlr
+	andi.	r0,r6,3
+	add	r5,r0,r5
+	subf	r6,r0,r6
+	srwi	r0,r5,2
+	mtctr	r0
+	bdz	6f
+1:	stwu	r4,4(r6)
+	bdnz	1b
+6:	andi.	r5,r5,3
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r6,3
+8:	stbu	r4,1(r6)
+	bdnz	8b
+	blr
+
+/*
+ * This version uses dcbz on the complete cache lines in the
+ * destination area to reduce memory traffic.  This requires that
+ * the destination area is cacheable.
+ * We only use this version if the source and dest don't overlap.
+ * -- paulus.
+ */
+_GLOBAL(cacheable_memcpy)
+	add	r7,r3,r5		/* test if the src & dst overlap */
+	add	r8,r4,r5
+	cmplw	0,r4,r7
+	cmplw	1,r3,r8
+	crand	0,0,4			/* cr0.lt &= cr1.lt */
+	blt	memcpy			/* if regions overlap */
+
+	addi	r4,r4,-4
+	addi	r6,r3,-4
+	neg	r0,r3
+	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	beq	58f
+
+	cmplw	0,r5,r0			/* is this more than total to do? */
+	blt	63f			/* if not much to do */
+	andi.	r8,r0,3			/* get it word-aligned first */
+	subf	r5,r0,r5
+	mtctr	r8
+	beq+	61f
+70:	lbz	r9,4(r4)		/* do some bytes */
+	stb	r9,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	70b
+61:	srwi.	r0,r0,2
+	mtctr	r0
+	beq	58f
+72:	lwzu	r9,4(r4)		/* do some words */
+	stwu	r9,4(r6)
+	bdnz	72b
+
+58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
+	li	r11,4
+	mtctr	r0
+	beq	63f
+53:
+#if !defined(CONFIG_8xx)
+	dcbz	r11,r6
+#endif
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+#endif
+#endif
+#endif
+	bdnz	53b
+
+63:	srwi.	r0,r5,2
+	mtctr	r0
+	beq	64f
+30:	lwzu	r0,4(r4)
+	stwu	r0,4(r6)
+	bdnz	30b
+
+64:	andi.	r0,r5,3
+	mtctr	r0
+	beq+	65f
+40:	lbz	r0,4(r4)
+	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	40b
+65:	blr
+
+_GLOBAL(memmove)
+	cmplw	0,r3,r4
+	bgt	backwards_memcpy
+	/* fall through */
+
+_GLOBAL(memcpy)
+	srwi.	r7,r5,3
+	addi	r6,r3,-4
+	addi	r4,r4,-4
+	beq	2f			/* if less than 8 bytes to do */
+	andi.	r0,r6,3			/* get dest word aligned */
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,4(r4)
+	lwzu	r8,8(r4)
+	stw	r7,4(r6)
+	stwu	r8,8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,4(r4)
+	addi	r5,r5,-4
+	stwu	r0,4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r4,r4,3
+	addi	r6,r6,3
+4:	lbzu	r0,1(r4)
+	stbu	r0,1(r6)
+	bdnz	4b
+	blr
+5:	subfic	r0,r0,4
+	mtctr	r0
+6:	lbz	r7,4(r4)
+	addi	r4,r4,1
+	stb	r7,4(r6)
+	addi	r6,r6,1
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+_GLOBAL(backwards_memcpy)
+	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
+	add	r6,r3,r5
+	add	r4,r4,r5
+	beq	2f
+	andi.	r0,r6,3
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,-4(r4)
+	lwzu	r8,-8(r4)
+	stw	r7,-4(r6)
+	stwu	r8,-8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,-4(r4)
+	subi	r5,r5,4
+	stwu	r0,-4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+4:	lbzu	r0,-1(r4)
+	stbu	r0,-1(r6)
+	bdnz	4b
+	blr
+5:	mtctr	r0
+6:	lbzu	r7,-1(r4)
+	stbu	r7,-1(r6)
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+_GLOBAL(__copy_tofrom_user)
+	addi	r4,r4,-4
+	addi	r6,r3,-4
+	neg	r0,r3
+	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	beq	58f
+
+	cmplw	0,r5,r0			/* is this more than total to do? */
+	blt	63f			/* if not much to do */
+	andi.	r8,r0,3			/* get it word-aligned first */
+	mtctr	r8
+	beq+	61f
+70:	lbz	r9,4(r4)		/* do some bytes */
+71:	stb	r9,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	70b
+61:	subf	r5,r0,r5
+	srwi.	r0,r0,2
+	mtctr	r0
+	beq	58f
+72:	lwzu	r9,4(r4)		/* do some words */
+73:	stwu	r9,4(r6)
+	bdnz	72b
+
+	.section __ex_table,"a"
+	.align	2
+	.long	70b,100f
+	.long	71b,101f
+	.long	72b,102f
+	.long	73b,103f
+	.text
+
+58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
+	li	r11,4
+	beq	63f
+
+#ifdef CONFIG_8xx
+	/* Don't use prefetch on 8xx */
+	mtctr	r0
+	li	r0,0
+53:	COPY_16_BYTES_WITHEX(0)
+	bdnz	53b
+
+#else /* not CONFIG_8xx */
+	/* Here we decide how far ahead to prefetch the source */
+	li	r3,4
+	cmpwi	r0,1
+	li	r7,0
+	ble	114f
+	li	r7,1
+#if MAX_COPY_PREFETCH > 1
+	/* Heuristically, for large transfers we prefetch
+	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
+	   we prefetch 1 cacheline ahead. */
+	cmpwi	r0,MAX_COPY_PREFETCH
+	ble	112f
+	li	r7,MAX_COPY_PREFETCH
+112:	mtctr	r7
+111:	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+	bdnz	111b
+#else
+	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+#endif /* MAX_COPY_PREFETCH > 1 */
+
+114:	subf	r8,r7,r0
+	mr	r0,r7
+	mtctr	r8
+
+53:	dcbt	r3,r4
+54:	dcbz	r11,r6
+	.section __ex_table,"a"
+	.align	2
+	.long	54b,105f
+	.text
+/* the main body of the cacheline loop */
+	COPY_16_BYTES_WITHEX(0)
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES_WITHEX(1)
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES_WITHEX(2)
+	COPY_16_BYTES_WITHEX(3)
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES_WITHEX(4)
+	COPY_16_BYTES_WITHEX(5)
+	COPY_16_BYTES_WITHEX(6)
+	COPY_16_BYTES_WITHEX(7)
+#endif
+#endif
+#endif
+	bdnz	53b
+	cmpwi	r0,0
+	li	r3,4
+	li	r7,0
+	bne	114b
+#endif /* CONFIG_8xx */
+
+63:	srwi.	r0,r5,2
+	mtctr	r0
+	beq	64f
+30:	lwzu	r0,4(r4)
+31:	stwu	r0,4(r6)
+	bdnz	30b
+
+64:	andi.	r0,r5,3
+	mtctr	r0
+	beq+	65f
+40:	lbz	r0,4(r4)
+41:	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	40b
+65:	li	r3,0
+	blr
+
+/* read fault, initial single-byte copy */
+100:	li	r9,0
+	b	90f
+/* write fault, initial single-byte copy */
+101:	li	r9,1
+90:	subf	r5,r8,r5
+	li	r3,0
+	b	99f
+/* read fault, initial word copy */
+102:	li	r9,0
+	b	91f
+/* write fault, initial word copy */
+103:	li	r9,1
+91:	li	r3,2
+	b	99f
+
+/*
+ * this stuff handles faults in the cacheline loop and branches to either
+ * 104f (if in read part) or 105f (if in write part), after updating r5
+ */
+	COPY_16_BYTES_EXCODE(0)
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES_EXCODE(1)
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES_EXCODE(2)
+	COPY_16_BYTES_EXCODE(3)
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES_EXCODE(4)
+	COPY_16_BYTES_EXCODE(5)
+	COPY_16_BYTES_EXCODE(6)
+	COPY_16_BYTES_EXCODE(7)
+#endif
+#endif
+#endif
+
+/* read fault in cacheline loop */
+104:	li	r9,0
+	b	92f
+/* fault on dcbz (effectively a write fault) */
+/* or write fault in cacheline loop */
+105:	li	r9,1
+92:	li	r3,LG_CACHELINE_BYTES
+	mfctr	r8
+	add	r0,r0,r8
+	b	106f
+/* read fault in final word loop */
+108:	li	r9,0
+	b	93f
+/* write fault in final word loop */
+109:	li	r9,1
+93:	andi.	r5,r5,3
+	li	r3,2
+	b	99f
+/* read fault in final byte loop */
+110:	li	r9,0
+	b	94f
+/* write fault in final byte loop */
+111:	li	r9,1
+94:	li	r5,0
+	li	r3,0
+/*
+ * At this stage the number of bytes not copied is
+ * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
+ */
+99:	mfctr	r0
+106:	slw	r3,r0,r3
+	add.	r3,r3,r5
+	beq	120f			/* shouldn't happen */
+	cmpwi	0,r9,0
+	bne	120f
+/* for a read fault, first try to continue the copy one byte at a time */
+	mtctr	r3
+130:	lbz	r0,4(r4)
+131:	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	130b
+/* then clear out the destination: r3 bytes starting at 4(r6) */
+132:	mfctr	r3
+	srwi.	r0,r3,2
+	li	r9,0
+	mtctr	r0
+	beq	113f
+112:	stwu	r9,4(r6)
+	bdnz	112b
+113:	andi.	r0,r3,3
+	mtctr	r0
+	beq	120f
+114:	stb	r9,4(r6)
+	addi	r6,r6,1
+	bdnz	114b
+120:	blr
+
+	.section __ex_table,"a"
+	.align	2
+	.long	30b,108b
+	.long	31b,109b
+	.long	40b,110b
+	.long	41b,111b
+	.long	130b,132b
+	.long	131b,120b
+	.long	112b,120b
+	.long	114b,120b
+	.text
diff --git a/arch/powerpc/lib/copypage.S b/arch/powerpc/lib/copypage.S
new file mode 100644
index 000000000000..733d61618bbf
--- /dev/null
+++ b/arch/powerpc/lib/copypage.S
@@ -0,0 +1,121 @@
+/*
+ * arch/ppc64/lib/copypage.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(copy_page)
+	std	r31,-8(1)
+	std	r30,-16(1)
+	std	r29,-24(1)
+	std	r28,-32(1)
+	std	r27,-40(1)
+	std	r26,-48(1)
+	std	r25,-56(1)
+	std	r24,-64(1)
+	std	r23,-72(1)
+	std	r22,-80(1)
+	std	r21,-88(1)
+	std	r20,-96(1)
+	li	r5,4096/32 - 1
+	addi	r3,r3,-8
+	li	r12,5
+0:	addi	r5,r5,-24
+	mtctr	r12
+	ld	r22,640(4)
+	ld	r21,512(4)
+	ld	r20,384(4)
+	ld	r11,256(4)
+	ld	r9,128(4)
+	ld	r7,0(4)
+	ld	r25,648(4)
+	ld	r24,520(4)
+	ld	r23,392(4)
+	ld	r10,264(4)
+	ld	r8,136(4)
+	ldu	r6,8(4)
+	cmpwi	r5,24
+1:	std	r22,648(3)
+	std	r21,520(3)
+	std	r20,392(3)
+	std	r11,264(3)
+	std	r9,136(3)
+	std	r7,8(3)
+	ld	r28,648(4)
+	ld	r27,520(4)
+	ld	r26,392(4)
+	ld	r31,264(4)
+	ld	r30,136(4)
+	ld	r29,8(4)
+	std	r25,656(3)
+	std	r24,528(3)
+	std	r23,400(3)
+	std	r10,272(3)
+	std	r8,144(3)
+	std	r6,16(3)
+	ld	r22,656(4)
+	ld	r21,528(4)
+	ld	r20,400(4)
+	ld	r11,272(4)
+	ld	r9,144(4)
+	ld	r7,16(4)
+	std	r28,664(3)
+	std	r27,536(3)
+	std	r26,408(3)
+	std	r31,280(3)
+	std	r30,152(3)
+	stdu	r29,24(3)
+	ld	r25,664(4)
+	ld	r24,536(4)
+	ld	r23,408(4)
+	ld	r10,280(4)
+	ld	r8,152(4)
+	ldu	r6,24(4)
+	bdnz	1b
+	std	r22,648(3)
+	std	r21,520(3)
+	std	r20,392(3)
+	std	r11,264(3)
+	std	r9,136(3)
+	std	r7,8(3)
+	addi	r4,r4,640
+	addi	r3,r3,648
+	bge	0b
+	mtctr	r5
+	ld	r7,0(4)
+	ld	r8,8(4)
+	ldu	r9,16(4)
+3:	ld	r10,8(4)
+	std	r7,8(3)
+	ld	r7,16(4)
+	std	r8,16(3)
+	ld	r8,24(4)
+	std	r9,24(3)
+	ldu	r9,32(4)
+	stdu	r10,32(3)
+	bdnz	3b
+4:	ld	r10,8(4)
+	std	r7,8(3)
+	std	r8,16(3)
+	std	r9,24(3)
+	std	r10,32(3)
+9:	ld	r20,-96(1)
+	ld	r21,-88(1)
+	ld	r22,-80(1)
+	ld	r23,-72(1)
+	ld	r24,-64(1)
+	ld	r25,-56(1)
+	ld	r26,-48(1)
+	ld	r27,-40(1)
+	ld	r28,-32(1)
+	ld	r29,-24(1)
+	ld	r30,-16(1)
+	ld	r31,-8(1)
+	blr
diff --git a/arch/powerpc/lib/copyuser.S b/arch/powerpc/lib/copyuser.S
new file mode 100644
index 000000000000..a0b3fbbd6fb1
--- /dev/null
+++ b/arch/powerpc/lib/copyuser.S
@@ -0,0 +1,576 @@
+/*
+ * arch/ppc64/lib/copyuser.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+	.align	7
+_GLOBAL(__copy_tofrom_user)
+	/* first check for a whole page copy on a page boundary */
+	cmpldi	cr1,r5,16
+	cmpdi	cr6,r5,4096
+	or	r0,r3,r4
+	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
+	andi.	r0,r0,4095
+	std	r3,-24(r1)
+	crand	cr0*4+2,cr0*4+2,cr6*4+2
+	std	r4,-16(r1)
+	std	r5,-8(r1)
+	dcbt	0,r4
+	beq	.Lcopy_page
+	andi.	r6,r6,7
+	mtcrf	0x01,r5
+	blt	cr1,.Lshort_copy
+	bne	.Ldst_unaligned
+.Ldst_aligned:
+	andi.	r0,r4,7
+	addi	r3,r3,-16
+	bne	.Lsrc_unaligned
+	srdi	r7,r5,4
+20:	ld	r9,0(r4)
+	addi	r4,r4,-8
+	mtctr	r7
+	andi.	r5,r5,7
+	bf	cr7*4+0,22f
+	addi	r3,r3,8
+	addi	r4,r4,8
+	mr	r8,r9
+	blt	cr1,72f
+21:	ld	r9,8(r4)
+70:	std	r8,8(r3)
+22:	ldu	r8,16(r4)
+71:	stdu	r9,16(r3)
+	bdnz	21b
+72:	std	r8,8(r3)
+	beq+	3f
+	addi	r3,r3,16
+23:	ld	r9,8(r4)
+.Ldo_tail:
+	bf	cr7*4+1,1f
+	rotldi	r9,r9,32
+73:	stw	r9,0(r3)
+	addi	r3,r3,4
+1:	bf	cr7*4+2,2f
+	rotldi	r9,r9,16
+74:	sth	r9,0(r3)
+	addi	r3,r3,2
+2:	bf	cr7*4+3,3f
+	rotldi	r9,r9,8
+75:	stb	r9,0(r3)
+3:	li	r3,0
+	blr
+
+.Lsrc_unaligned:
+	srdi	r6,r5,3
+	addi	r5,r5,-16
+	subf	r4,r0,r4
+	srdi	r7,r5,4
+	sldi	r10,r0,3
+	cmpldi	cr6,r6,3
+	andi.	r5,r5,7
+	mtctr	r7
+	subfic	r11,r10,64
+	add	r5,r5,r0
+	bt	cr7*4+0,28f
+
+24:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
+25:	ld	r0,8(r4)
+	sld	r6,r9,r10
+26:	ldu	r9,16(r4)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	or	r7,r7,r6
+	blt	cr6,79f
+27:	ld	r0,8(r4)
+	b	2f
+
+28:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
+29:	ldu	r9,8(r4)
+	sld	r8,r0,r10
+	addi	r3,r3,-8
+	blt	cr6,5f
+30:	ld	r0,8(r4)
+	srd	r12,r9,r11
+	sld	r6,r9,r10
+31:	ldu	r9,16(r4)
+	or	r12,r8,r12
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	addi	r3,r3,16
+	beq	cr6,78f
+
+1:	or	r7,r7,r6
+32:	ld	r0,8(r4)
+76:	std	r12,8(r3)
+2:	srd	r12,r9,r11
+	sld	r6,r9,r10
+33:	ldu	r9,16(r4)
+	or	r12,r8,r12
+77:	stdu	r7,16(r3)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	bdnz	1b
+
+78:	std	r12,8(r3)
+	or	r7,r7,r6
+79:	std	r7,16(r3)
+5:	srd	r12,r9,r11
+	or	r12,r8,r12
+80:	std	r12,24(r3)
+	bne	6f
+	li	r3,0
+	blr
+6:	cmpwi	cr1,r5,8
+	addi	r3,r3,32
+	sld	r9,r9,r10
+	ble	cr1,.Ldo_tail
+34:	ld	r0,8(r4)
+	srd	r7,r0,r11
+	or	r9,r7,r9
+	b	.Ldo_tail
+
+.Ldst_unaligned:
+	mtcrf	0x01,r6		/* put #bytes to 8B bdry into cr7 */
+	subf	r5,r6,r5
+	li	r7,0
+	cmpldi	r1,r5,16
+	bf	cr7*4+3,1f
+35:	lbz	r0,0(r4)
+81:	stb	r0,0(r3)
+	addi	r7,r7,1
+1:	bf	cr7*4+2,2f
+36:	lhzx	r0,r7,r4
+82:	sthx	r0,r7,r3
+	addi	r7,r7,2
+2:	bf	cr7*4+1,3f
+37:	lwzx	r0,r7,r4
+83:	stwx	r0,r7,r3
+3:	mtcrf	0x01,r5
+	add	r4,r6,r4
+	add	r3,r6,r3
+	b	.Ldst_aligned
+
+.Lshort_copy:
+	bf	cr7*4+0,1f
+38:	lwz	r0,0(r4)
+39:	lwz	r9,4(r4)
+	addi	r4,r4,8
+84:	stw	r0,0(r3)
+85:	stw	r9,4(r3)
+	addi	r3,r3,8
+1:	bf	cr7*4+1,2f
+40:	lwz	r0,0(r4)
+	addi	r4,r4,4
+86:	stw	r0,0(r3)
+	addi	r3,r3,4
+2:	bf	cr7*4+2,3f
+41:	lhz	r0,0(r4)
+	addi	r4,r4,2
+87:	sth	r0,0(r3)
+	addi	r3,r3,2
+3:	bf	cr7*4+3,4f
+42:	lbz	r0,0(r4)
+88:	stb	r0,0(r3)
+4:	li	r3,0
+	blr
+
+/*
+ * exception handlers follow
+ * we have to return the number of bytes not copied
+ * for an exception on a load, we set the rest of the destination to 0
+ */
+
+136:
+137:
+	add	r3,r3,r7
+	b	1f
+130:
+131:
+	addi	r3,r3,8
+120:
+122:
+124:
+125:
+126:
+127:
+128:
+129:
+133:
+	addi	r3,r3,8
+121:
+132:
+	addi	r3,r3,8
+123:
+134:
+135:
+138:
+139:
+140:
+141:
+142:
+
+/*
+ * here we have had a fault on a load and r3 points to the first
+ * unmodified byte of the destination
+ */
+1:	ld	r6,-24(r1)
+	ld	r4,-16(r1)
+	ld	r5,-8(r1)
+	subf	r6,r6,r3
+	add	r4,r4,r6
+	subf	r5,r6,r5	/* #bytes left to go */
+
+/*
+ * first see if we can copy any more bytes before hitting another exception
+ */
+	mtctr	r5
+43:	lbz	r0,0(r4)
+	addi	r4,r4,1
+89:	stb	r0,0(r3)
+	addi	r3,r3,1
+	bdnz	43b
+	li	r3,0		/* huh? all copied successfully this time? */
+	blr
+
+/*
+ * here we have trapped again, need to clear ctr bytes starting at r3
+ */
+143:	mfctr	r5
+	li	r0,0
+	mr	r4,r3
+	mr	r3,r5		/* return the number of bytes not copied */
+1:	andi.	r9,r4,7
+	beq	3f
+90:	stb	r0,0(r4)
+	addic.	r5,r5,-1
+	addi	r4,r4,1
+	bne	1b
+	blr
+3:	cmpldi	cr1,r5,8
+	srdi	r9,r5,3
+	andi.	r5,r5,7
+	blt	cr1,93f
+	mtctr	r9
+91:	std	r0,0(r4)
+	addi	r4,r4,8
+	bdnz	91b
+93:	beqlr
+	mtctr	r5	
+92:	stb	r0,0(r4)
+	addi	r4,r4,1
+	bdnz	92b
+	blr
+
+/*
+ * exception handlers for stores: we just need to work
+ * out how many bytes weren't copied
+ */
+182:
+183:
+	add	r3,r3,r7
+	b	1f
+180:
+	addi	r3,r3,8
+171:
+177:
+	addi	r3,r3,8
+170:
+172:
+176:
+178:
+	addi	r3,r3,4
+185:
+	addi	r3,r3,4
+173:
+174:
+175:
+179:
+181:
+184:
+186:
+187:
+188:
+189:	
+1:
+	ld	r6,-24(r1)
+	ld	r5,-8(r1)
+	add	r6,r6,r5
+	subf	r3,r3,r6	/* #bytes not copied */
+190:
+191:
+192:
+	blr			/* #bytes not copied in r3 */
+
+	.section __ex_table,"a"
+	.align	3
+	.llong	20b,120b
+	.llong	21b,121b
+	.llong	70b,170b
+	.llong	22b,122b
+	.llong	71b,171b
+	.llong	72b,172b
+	.llong	23b,123b
+	.llong	73b,173b
+	.llong	74b,174b
+	.llong	75b,175b
+	.llong	24b,124b
+	.llong	25b,125b
+	.llong	26b,126b
+	.llong	27b,127b
+	.llong	28b,128b
+	.llong	29b,129b
+	.llong	30b,130b
+	.llong	31b,131b
+	.llong	32b,132b
+	.llong	76b,176b
+	.llong	33b,133b
+	.llong	77b,177b
+	.llong	78b,178b
+	.llong	79b,179b
+	.llong	80b,180b
+	.llong	34b,134b
+	.llong	35b,135b
+	.llong	81b,181b
+	.llong	36b,136b
+	.llong	82b,182b
+	.llong	37b,137b
+	.llong	83b,183b
+	.llong	38b,138b
+	.llong	39b,139b
+	.llong	84b,184b
+	.llong	85b,185b
+	.llong	40b,140b
+	.llong	86b,186b
+	.llong	41b,141b
+	.llong	87b,187b
+	.llong	42b,142b
+	.llong	88b,188b
+	.llong	43b,143b
+	.llong	89b,189b
+	.llong	90b,190b
+	.llong	91b,191b
+	.llong	92b,192b
+	
+	.text
+
+/*
+ * Routine to copy a whole page of data, optimized for POWER4.
+ * On POWER4 it is more than 50% faster than the simple loop
+ * above (following the .Ldst_aligned label) but it runs slightly
+ * slower on POWER3.
+ */
+.Lcopy_page:
+	std	r31,-32(1)
+	std	r30,-40(1)
+	std	r29,-48(1)
+	std	r28,-56(1)
+	std	r27,-64(1)
+	std	r26,-72(1)
+	std	r25,-80(1)
+	std	r24,-88(1)
+	std	r23,-96(1)
+	std	r22,-104(1)
+	std	r21,-112(1)
+	std	r20,-120(1)
+	li	r5,4096/32 - 1
+	addi	r3,r3,-8
+	li	r0,5
+0:	addi	r5,r5,-24
+	mtctr	r0
+20:	ld	r22,640(4)
+21:	ld	r21,512(4)
+22:	ld	r20,384(4)
+23:	ld	r11,256(4)
+24:	ld	r9,128(4)
+25:	ld	r7,0(4)
+26:	ld	r25,648(4)
+27:	ld	r24,520(4)
+28:	ld	r23,392(4)
+29:	ld	r10,264(4)
+30:	ld	r8,136(4)
+31:	ldu	r6,8(4)
+	cmpwi	r5,24
+1:
+32:	std	r22,648(3)
+33:	std	r21,520(3)
+34:	std	r20,392(3)
+35:	std	r11,264(3)
+36:	std	r9,136(3)
+37:	std	r7,8(3)
+38:	ld	r28,648(4)
+39:	ld	r27,520(4)
+40:	ld	r26,392(4)
+41:	ld	r31,264(4)
+42:	ld	r30,136(4)
+43:	ld	r29,8(4)
+44:	std	r25,656(3)
+45:	std	r24,528(3)
+46:	std	r23,400(3)
+47:	std	r10,272(3)
+48:	std	r8,144(3)
+49:	std	r6,16(3)
+50:	ld	r22,656(4)
+51:	ld	r21,528(4)
+52:	ld	r20,400(4)
+53:	ld	r11,272(4)
+54:	ld	r9,144(4)
+55:	ld	r7,16(4)
+56:	std	r28,664(3)
+57:	std	r27,536(3)
+58:	std	r26,408(3)
+59:	std	r31,280(3)
+60:	std	r30,152(3)
+61:	stdu	r29,24(3)
+62:	ld	r25,664(4)
+63:	ld	r24,536(4)
+64:	ld	r23,408(4)
+65:	ld	r10,280(4)
+66:	ld	r8,152(4)
+67:	ldu	r6,24(4)
+	bdnz	1b
+68:	std	r22,648(3)
+69:	std	r21,520(3)
+70:	std	r20,392(3)
+71:	std	r11,264(3)
+72:	std	r9,136(3)
+73:	std	r7,8(3)
+74:	addi	r4,r4,640
+75:	addi	r3,r3,648
+	bge	0b
+	mtctr	r5
+76:	ld	r7,0(4)
+77:	ld	r8,8(4)
+78:	ldu	r9,16(4)
+3:
+79:	ld	r10,8(4)
+80:	std	r7,8(3)
+81:	ld	r7,16(4)
+82:	std	r8,16(3)
+83:	ld	r8,24(4)
+84:	std	r9,24(3)
+85:	ldu	r9,32(4)
+86:	stdu	r10,32(3)
+	bdnz	3b
+4:
+87:	ld	r10,8(4)
+88:	std	r7,8(3)
+89:	std	r8,16(3)
+90:	std	r9,24(3)
+91:	std	r10,32(3)
+9:	ld	r20,-120(1)
+	ld	r21,-112(1)
+	ld	r22,-104(1)
+	ld	r23,-96(1)
+	ld	r24,-88(1)
+	ld	r25,-80(1)
+	ld	r26,-72(1)
+	ld	r27,-64(1)
+	ld	r28,-56(1)
+	ld	r29,-48(1)
+	ld	r30,-40(1)
+	ld	r31,-32(1)
+	li	r3,0
+	blr
+
+/*
+ * on an exception, reset to the beginning and jump back into the
+ * standard __copy_tofrom_user
+ */
+100:	ld	r20,-120(1)
+	ld	r21,-112(1)
+	ld	r22,-104(1)
+	ld	r23,-96(1)
+	ld	r24,-88(1)
+	ld	r25,-80(1)
+	ld	r26,-72(1)
+	ld	r27,-64(1)
+	ld	r28,-56(1)
+	ld	r29,-48(1)
+	ld	r30,-40(1)
+	ld	r31,-32(1)
+	ld	r3,-24(r1)
+	ld	r4,-16(r1)
+	li	r5,4096
+	b	.Ldst_aligned
+
+	.section __ex_table,"a"
+	.align	3
+	.llong	20b,100b
+	.llong	21b,100b
+	.llong	22b,100b
+	.llong	23b,100b
+	.llong	24b,100b
+	.llong	25b,100b
+	.llong	26b,100b
+	.llong	27b,100b
+	.llong	28b,100b
+	.llong	29b,100b
+	.llong	30b,100b
+	.llong	31b,100b
+	.llong	32b,100b
+	.llong	33b,100b
+	.llong	34b,100b
+	.llong	35b,100b
+	.llong	36b,100b
+	.llong	37b,100b
+	.llong	38b,100b
+	.llong	39b,100b
+	.llong	40b,100b
+	.llong	41b,100b
+	.llong	42b,100b
+	.llong	43b,100b
+	.llong	44b,100b
+	.llong	45b,100b
+	.llong	46b,100b
+	.llong	47b,100b
+	.llong	48b,100b
+	.llong	49b,100b
+	.llong	50b,100b
+	.llong	51b,100b
+	.llong	52b,100b
+	.llong	53b,100b
+	.llong	54b,100b
+	.llong	55b,100b
+	.llong	56b,100b
+	.llong	57b,100b
+	.llong	58b,100b
+	.llong	59b,100b
+	.llong	60b,100b
+	.llong	61b,100b
+	.llong	62b,100b
+	.llong	63b,100b
+	.llong	64b,100b
+	.llong	65b,100b
+	.llong	66b,100b
+	.llong	67b,100b
+	.llong	68b,100b
+	.llong	69b,100b
+	.llong	70b,100b
+	.llong	71b,100b
+	.llong	72b,100b
+	.llong	73b,100b
+	.llong	74b,100b
+	.llong	75b,100b
+	.llong	76b,100b
+	.llong	77b,100b
+	.llong	78b,100b
+	.llong	79b,100b
+	.llong	80b,100b
+	.llong	81b,100b
+	.llong	82b,100b
+	.llong	83b,100b
+	.llong	84b,100b
+	.llong	85b,100b
+	.llong	86b,100b
+	.llong	87b,100b
+	.llong	88b,100b
+	.llong	89b,100b
+	.llong	90b,100b
+	.llong	91b,100b
diff --git a/arch/powerpc/lib/div64.S b/arch/powerpc/lib/div64.S
new file mode 100644
index 000000000000..3527569e9926
--- /dev/null
+++ b/arch/powerpc/lib/div64.S
@@ -0,0 +1,58 @@
+/*
+ * Divide a 64-bit unsigned number by a 32-bit unsigned number.
+ * This routine assumes that the top 32 bits of the dividend are
+ * non-zero to start with.
+ * On entry, r3 points to the dividend, which get overwritten with
+ * the 64-bit quotient, and r4 contains the divisor.
+ * On exit, r3 contains the remainder.
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+
+_GLOBAL(__div64_32)
+	lwz	r5,0(r3)	# get the dividend into r5/r6
+	lwz	r6,4(r3)
+	cmplw	r5,r4
+	li	r7,0
+	li	r8,0
+	blt	1f
+	divwu	r7,r5,r4	# if dividend.hi >= divisor,
+	mullw	r0,r7,r4	# quotient.hi = dividend.hi / divisor
+	subf.	r5,r0,r5	# dividend.hi %= divisor
+	beq	3f
+1:	mr	r11,r5		# here dividend.hi != 0
+	andis.	r0,r5,0xc000
+	bne	2f
+	cntlzw	r0,r5		# we are shifting the dividend right
+	li	r10,-1		# to make it < 2^32, and shifting
+	srw	r10,r10,r0	# the divisor right the same amount,
+	add	r9,r4,r10	# rounding up (so the estimate cannot
+	andc	r11,r6,r10	# ever be too large, only too small)
+	andc	r9,r9,r10
+	or	r11,r5,r11
+	rotlw	r9,r9,r0
+	rotlw	r11,r11,r0
+	divwu	r11,r11,r9	# then we divide the shifted quantities
+2:	mullw	r10,r11,r4	# to get an estimate of the quotient,
+	mulhwu	r9,r11,r4	# multiply the estimate by the divisor,
+	subfc	r6,r10,r6	# take the product from the divisor,
+	add	r8,r8,r11	# and add the estimate to the accumulated
+	subfe.	r5,r9,r5	# quotient
+	bne	1b
+3:	cmplw	r6,r4
+	blt	4f
+	divwu	r0,r6,r4	# perform the remaining 32-bit division
+	mullw	r10,r0,r4	# and get the remainder
+	add	r8,r8,r0
+	subf	r6,r10,r6
+4:	stw	r7,0(r3)	# return the quotient in *r3
+	stw	r8,4(r3)
+	mr	r3,r6		# return the remainder in r3
+	blr
diff --git a/arch/powerpc/lib/e2a.c b/arch/powerpc/lib/e2a.c
new file mode 100644
index 000000000000..d2b834887920
--- /dev/null
+++ b/arch/powerpc/lib/e2a.c
@@ -0,0 +1,108 @@
+/*
+ *  arch/ppc64/lib/e2a.c
+ *
+ *  EBCDIC to ASCII conversion
+ *
+ * This function moved here from arch/ppc64/kernel/viopath.c
+ *
+ * (C) Copyright 2000-2004 IBM Corporation
+ *
+ * This program is free software;  you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) anyu later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/module.h>
+
+unsigned char e2a(unsigned char x)
+{
+	switch (x) {
+	case 0xF0:
+		return '0';
+	case 0xF1:
+		return '1';
+	case 0xF2:
+		return '2';
+	case 0xF3:
+		return '3';
+	case 0xF4:
+		return '4';
+	case 0xF5:
+		return '5';
+	case 0xF6:
+		return '6';
+	case 0xF7:
+		return '7';
+	case 0xF8:
+		return '8';
+	case 0xF9:
+		return '9';
+	case 0xC1:
+		return 'A';
+	case 0xC2:
+		return 'B';
+	case 0xC3:
+		return 'C';
+	case 0xC4:
+		return 'D';
+	case 0xC5:
+		return 'E';
+	case 0xC6:
+		return 'F';
+	case 0xC7:
+		return 'G';
+	case 0xC8:
+		return 'H';
+	case 0xC9:
+		return 'I';
+	case 0xD1:
+		return 'J';
+	case 0xD2:
+		return 'K';
+	case 0xD3:
+		return 'L';
+	case 0xD4:
+		return 'M';
+	case 0xD5:
+		return 'N';
+	case 0xD6:
+		return 'O';
+	case 0xD7:
+		return 'P';
+	case 0xD8:
+		return 'Q';
+	case 0xD9:
+		return 'R';
+	case 0xE2:
+		return 'S';
+	case 0xE3:
+		return 'T';
+	case 0xE4:
+		return 'U';
+	case 0xE5:
+		return 'V';
+	case 0xE6:
+		return 'W';
+	case 0xE7:
+		return 'X';
+	case 0xE8:
+		return 'Y';
+	case 0xE9:
+		return 'Z';
+	}
+	return ' ';
+}
+EXPORT_SYMBOL(e2a);
+
+
diff --git a/arch/powerpc/lib/memcpy.S b/arch/powerpc/lib/memcpy.S
new file mode 100644
index 000000000000..9ccacdf5bcb9
--- /dev/null
+++ b/arch/powerpc/lib/memcpy.S
@@ -0,0 +1,172 @@
+/*
+ * arch/ppc64/lib/memcpy.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+	.align	7
+_GLOBAL(memcpy)
+	mtcrf	0x01,r5
+	cmpldi	cr1,r5,16
+	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
+	andi.	r6,r6,7
+	dcbt	0,r4
+	blt	cr1,.Lshort_copy
+	bne	.Ldst_unaligned
+.Ldst_aligned:
+	andi.	r0,r4,7
+	addi	r3,r3,-16
+	bne	.Lsrc_unaligned
+	srdi	r7,r5,4
+	ld	r9,0(r4)
+	addi	r4,r4,-8
+	mtctr	r7
+	andi.	r5,r5,7
+	bf	cr7*4+0,2f
+	addi	r3,r3,8
+	addi	r4,r4,8
+	mr	r8,r9
+	blt	cr1,3f
+1:	ld	r9,8(r4)
+	std	r8,8(r3)
+2:	ldu	r8,16(r4)
+	stdu	r9,16(r3)
+	bdnz	1b
+3:	std	r8,8(r3)
+	beqlr
+	addi	r3,r3,16
+	ld	r9,8(r4)
+.Ldo_tail:
+	bf	cr7*4+1,1f
+	rotldi	r9,r9,32
+	stw	r9,0(r3)
+	addi	r3,r3,4
+1:	bf	cr7*4+2,2f
+	rotldi	r9,r9,16
+	sth	r9,0(r3)
+	addi	r3,r3,2
+2:	bf	cr7*4+3,3f
+	rotldi	r9,r9,8
+	stb	r9,0(r3)
+3:	blr
+
+.Lsrc_unaligned:
+	srdi	r6,r5,3
+	addi	r5,r5,-16
+	subf	r4,r0,r4
+	srdi	r7,r5,4
+	sldi	r10,r0,3
+	cmpdi	cr6,r6,3
+	andi.	r5,r5,7
+	mtctr	r7
+	subfic	r11,r10,64
+	add	r5,r5,r0
+
+	bt	cr7*4+0,0f
+
+	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
+	ld	r0,8(r4)
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	or	r7,r7,r6
+	blt	cr6,4f
+	ld	r0,8(r4)
+	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
+	b	2f
+
+0:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
+	ldu	r9,8(r4)
+	sld	r8,r0,r10
+	addi	r3,r3,-8
+	blt	cr6,5f
+	ld	r0,8(r4)
+	srd	r12,r9,r11
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	or	r12,r8,r12
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	addi	r3,r3,16
+	beq	cr6,3f
+
+	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
+1:	or	r7,r7,r6
+	ld	r0,8(r4)
+	std	r12,8(r3)
+2:	srd	r12,r9,r11
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	or	r12,r8,r12
+	stdu	r7,16(r3)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	bdnz	1b
+
+3:	std	r12,8(r3)
+	or	r7,r7,r6
+4:	std	r7,16(r3)
+5:	srd	r12,r9,r11
+	or	r12,r8,r12
+	std	r12,24(r3)
+	beqlr
+	cmpwi	cr1,r5,8
+	addi	r3,r3,32
+	sld	r9,r9,r10
+	ble	cr1,.Ldo_tail
+	ld	r0,8(r4)
+	srd	r7,r0,r11
+	or	r9,r7,r9
+	b	.Ldo_tail
+
+.Ldst_unaligned:
+	mtcrf	0x01,r6		# put #bytes to 8B bdry into cr7
+	subf	r5,r6,r5
+	li	r7,0
+	cmpldi	r1,r5,16
+	bf	cr7*4+3,1f
+	lbz	r0,0(r4)
+	stb	r0,0(r3)
+	addi	r7,r7,1
+1:	bf	cr7*4+2,2f
+	lhzx	r0,r7,r4
+	sthx	r0,r7,r3
+	addi	r7,r7,2
+2:	bf	cr7*4+1,3f
+	lwzx	r0,r7,r4
+	stwx	r0,r7,r3
+3:	mtcrf	0x01,r5
+	add	r4,r6,r4
+	add	r3,r6,r3
+	b	.Ldst_aligned
+
+.Lshort_copy:
+	bf	cr7*4+0,1f
+	lwz	r0,0(r4)
+	lwz	r9,4(r4)
+	addi	r4,r4,8
+	stw	r0,0(r3)
+	stw	r9,4(r3)
+	addi	r3,r3,8
+1:	bf	cr7*4+1,2f
+	lwz	r0,0(r4)
+	addi	r4,r4,4
+	stw	r0,0(r3)
+	addi	r3,r3,4
+2:	bf	cr7*4+2,3f
+	lhz	r0,0(r4)
+	addi	r4,r4,2
+	sth	r0,0(r3)
+	addi	r3,r3,2
+3:	bf	cr7*4+3,4f
+	lbz	r0,0(r4)
+	stb	r0,0(r3)
+4:	blr
diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c
new file mode 100644
index 000000000000..42c5de2c898f
--- /dev/null
+++ b/arch/powerpc/lib/rheap.c
@@ -0,0 +1,693 @@
+/*
+ * arch/ppc/syslib/rheap.c
+ *
+ * A Remote Heap.  Remote means that we don't touch the memory that the
+ * heap points to. Normal heap implementations use the memory they manage
+ * to place their list. We cannot do that because the memory we manage may
+ * have special properties, for example it is uncachable or of different
+ * endianess.
+ *
+ * Author: Pantelis Antoniou <panto@intracom.gr>
+ *
+ * 2004 (c) INTRACOM S.A. Greece. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/rheap.h>
+
+/*
+ * Fixup a list_head, needed when copying lists.  If the pointers fall
+ * between s and e, apply the delta.  This assumes that
+ * sizeof(struct list_head *) == sizeof(unsigned long *).
+ */
+static inline void fixup(unsigned long s, unsigned long e, int d,
+			 struct list_head *l)
+{
+	unsigned long *pp;
+
+	pp = (unsigned long *)&l->next;
+	if (*pp >= s && *pp < e)
+		*pp += d;
+
+	pp = (unsigned long *)&l->prev;
+	if (*pp >= s && *pp < e)
+		*pp += d;
+}
+
+/* Grow the allocated blocks */
+static int grow(rh_info_t * info, int max_blocks)
+{
+	rh_block_t *block, *blk;
+	int i, new_blocks;
+	int delta;
+	unsigned long blks, blke;
+
+	if (max_blocks <= info->max_blocks)
+		return -EINVAL;
+
+	new_blocks = max_blocks - info->max_blocks;
+
+	block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_KERNEL);
+	if (block == NULL)
+		return -ENOMEM;
+
+	if (info->max_blocks > 0) {
+
+		/* copy old block area */
+		memcpy(block, info->block,
+		       sizeof(rh_block_t) * info->max_blocks);
+
+		delta = (char *)block - (char *)info->block;
+
+		/* and fixup list pointers */
+		blks = (unsigned long)info->block;
+		blke = (unsigned long)(info->block + info->max_blocks);
+
+		for (i = 0, blk = block; i < info->max_blocks; i++, blk++)
+			fixup(blks, blke, delta, &blk->list);
+
+		fixup(blks, blke, delta, &info->empty_list);
+		fixup(blks, blke, delta, &info->free_list);
+		fixup(blks, blke, delta, &info->taken_list);
+
+		/* free the old allocated memory */
+		if ((info->flags & RHIF_STATIC_BLOCK) == 0)
+			kfree(info->block);
+	}
+
+	info->block = block;
+	info->empty_slots += new_blocks;
+	info->max_blocks = max_blocks;
+	info->flags &= ~RHIF_STATIC_BLOCK;
+
+	/* add all new blocks to the free list */
+	for (i = 0, blk = block + info->max_blocks; i < new_blocks; i++, blk++)
+		list_add(&blk->list, &info->empty_list);
+
+	return 0;
+}
+
+/*
+ * Assure at least the required amount of empty slots.  If this function
+ * causes a grow in the block area then all pointers kept to the block
+ * area are invalid!
+ */
+static int assure_empty(rh_info_t * info, int slots)
+{
+	int max_blocks;
+
+	/* This function is not meant to be used to grow uncontrollably */
+	if (slots >= 4)
+		return -EINVAL;
+
+	/* Enough space */
+	if (info->empty_slots >= slots)
+		return 0;
+
+	/* Next 16 sized block */
+	max_blocks = ((info->max_blocks + slots) + 15) & ~15;
+
+	return grow(info, max_blocks);
+}
+
+static rh_block_t *get_slot(rh_info_t * info)
+{
+	rh_block_t *blk;
+
+	/* If no more free slots, and failure to extend. */
+	/* XXX: You should have called assure_empty before */
+	if (info->empty_slots == 0) {
+		printk(KERN_ERR "rh: out of slots; crash is imminent.\n");
+		return NULL;
+	}
+
+	/* Get empty slot to use */
+	blk = list_entry(info->empty_list.next, rh_block_t, list);
+	list_del_init(&blk->list);
+	info->empty_slots--;
+
+	/* Initialize */
+	blk->start = NULL;
+	blk->size = 0;
+	blk->owner = NULL;
+
+	return blk;
+}
+
+static inline void release_slot(rh_info_t * info, rh_block_t * blk)
+{
+	list_add(&blk->list, &info->empty_list);
+	info->empty_slots++;
+}
+
+static void attach_free_block(rh_info_t * info, rh_block_t * blkn)
+{
+	rh_block_t *blk;
+	rh_block_t *before;
+	rh_block_t *after;
+	rh_block_t *next;
+	int size;
+	unsigned long s, e, bs, be;
+	struct list_head *l;
+
+	/* We assume that they are aligned properly */
+	size = blkn->size;
+	s = (unsigned long)blkn->start;
+	e = s + size;
+
+	/* Find the blocks immediately before and after the given one
+	 * (if any) */
+	before = NULL;
+	after = NULL;
+	next = NULL;
+
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+
+		bs = (unsigned long)blk->start;
+		be = bs + blk->size;
+
+		if (next == NULL && s >= bs)
+			next = blk;
+
+		if (be == s)
+			before = blk;
+
+		if (e == bs)
+			after = blk;
+
+		/* If both are not null, break now */
+		if (before != NULL && after != NULL)
+			break;
+	}
+
+	/* Now check if they are really adjacent */
+	if (before != NULL && s != (unsigned long)before->start + before->size)
+		before = NULL;
+
+	if (after != NULL && e != (unsigned long)after->start)
+		after = NULL;
+
+	/* No coalescing; list insert and return */
+	if (before == NULL && after == NULL) {
+
+		if (next != NULL)
+			list_add(&blkn->list, &next->list);
+		else
+			list_add(&blkn->list, &info->free_list);
+
+		return;
+	}
+
+	/* We don't need it anymore */
+	release_slot(info, blkn);
+
+	/* Grow the before block */
+	if (before != NULL && after == NULL) {
+		before->size += size;
+		return;
+	}
+
+	/* Grow the after block backwards */
+	if (before == NULL && after != NULL) {
+		after->start = (int8_t *)after->start - size;
+		after->size += size;
+		return;
+	}
+
+	/* Grow the before block, and release the after block */
+	before->size += size + after->size;
+	list_del(&after->list);
+	release_slot(info, after);
+}
+
+static void attach_taken_block(rh_info_t * info, rh_block_t * blkn)
+{
+	rh_block_t *blk;
+	struct list_head *l;
+
+	/* Find the block immediately before the given one (if any) */
+	list_for_each(l, &info->taken_list) {
+		blk = list_entry(l, rh_block_t, list);
+		if (blk->start > blkn->start) {
+			list_add_tail(&blkn->list, &blk->list);
+			return;
+		}
+	}
+
+	list_add_tail(&blkn->list, &info->taken_list);
+}
+
+/*
+ * Create a remote heap dynamically.  Note that no memory for the blocks
+ * are allocated.  It will upon the first allocation
+ */
+rh_info_t *rh_create(unsigned int alignment)
+{
+	rh_info_t *info;
+
+	/* Alignment must be a power of two */
+	if ((alignment & (alignment - 1)) != 0)
+		return ERR_PTR(-EINVAL);
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (info == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	info->alignment = alignment;
+
+	/* Initially everything as empty */
+	info->block = NULL;
+	info->max_blocks = 0;
+	info->empty_slots = 0;
+	info->flags = 0;
+
+	INIT_LIST_HEAD(&info->empty_list);
+	INIT_LIST_HEAD(&info->free_list);
+	INIT_LIST_HEAD(&info->taken_list);
+
+	return info;
+}
+
+/*
+ * Destroy a dynamically created remote heap.  Deallocate only if the areas
+ * are not static
+ */
+void rh_destroy(rh_info_t * info)
+{
+	if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL)
+		kfree(info->block);
+
+	if ((info->flags & RHIF_STATIC_INFO) == 0)
+		kfree(info);
+}
+
+/*
+ * Initialize in place a remote heap info block.  This is needed to support
+ * operation very early in the startup of the kernel, when it is not yet safe
+ * to call kmalloc.
+ */
+void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks,
+	     rh_block_t * block)
+{
+	int i;
+	rh_block_t *blk;
+
+	/* Alignment must be a power of two */
+	if ((alignment & (alignment - 1)) != 0)
+		return;
+
+	info->alignment = alignment;
+
+	/* Initially everything as empty */
+	info->block = block;
+	info->max_blocks = max_blocks;
+	info->empty_slots = max_blocks;
+	info->flags = RHIF_STATIC_INFO | RHIF_STATIC_BLOCK;
+
+	INIT_LIST_HEAD(&info->empty_list);
+	INIT_LIST_HEAD(&info->free_list);
+	INIT_LIST_HEAD(&info->taken_list);
+
+	/* Add all new blocks to the free list */
+	for (i = 0, blk = block; i < max_blocks; i++, blk++)
+		list_add(&blk->list, &info->empty_list);
+}
+
+/* Attach a free memory region, coalesces regions if adjuscent */
+int rh_attach_region(rh_info_t * info, void *start, int size)
+{
+	rh_block_t *blk;
+	unsigned long s, e, m;
+	int r;
+
+	/* The region must be aligned */
+	s = (unsigned long)start;
+	e = s + size;
+	m = info->alignment - 1;
+
+	/* Round start up */
+	s = (s + m) & ~m;
+
+	/* Round end down */
+	e = e & ~m;
+
+	/* Take final values */
+	start = (void *)s;
+	size = (int)(e - s);
+
+	/* Grow the blocks, if needed */
+	r = assure_empty(info, 1);
+	if (r < 0)
+		return r;
+
+	blk = get_slot(info);
+	blk->start = start;
+	blk->size = size;
+	blk->owner = NULL;
+
+	attach_free_block(info, blk);
+
+	return 0;
+}
+
+/* Detatch given address range, splits free block if needed. */
+void *rh_detach_region(rh_info_t * info, void *start, int size)
+{
+	struct list_head *l;
+	rh_block_t *blk, *newblk;
+	unsigned long s, e, m, bs, be;
+
+	/* Validate size */
+	if (size <= 0)
+		return ERR_PTR(-EINVAL);
+
+	/* The region must be aligned */
+	s = (unsigned long)start;
+	e = s + size;
+	m = info->alignment - 1;
+
+	/* Round start up */
+	s = (s + m) & ~m;
+
+	/* Round end down */
+	e = e & ~m;
+
+	if (assure_empty(info, 1) < 0)
+		return ERR_PTR(-ENOMEM);
+
+	blk = NULL;
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+		/* The range must lie entirely inside one free block */
+		bs = (unsigned long)blk->start;
+		be = (unsigned long)blk->start + blk->size;
+		if (s >= bs && e <= be)
+			break;
+		blk = NULL;
+	}
+
+	if (blk == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Perfect fit */
+	if (bs == s && be == e) {
+		/* Delete from free list, release slot */
+		list_del(&blk->list);
+		release_slot(info, blk);
+		return (void *)s;
+	}
+
+	/* blk still in free list, with updated start and/or size */
+	if (bs == s || be == e) {
+		if (bs == s)
+			blk->start = (int8_t *)blk->start + size;
+		blk->size -= size;
+
+	} else {
+		/* The front free fragment */
+		blk->size = s - bs;
+
+		/* the back free fragment */
+		newblk = get_slot(info);
+		newblk->start = (void *)e;
+		newblk->size = be - e;
+
+		list_add(&newblk->list, &blk->list);
+	}
+
+	return (void *)s;
+}
+
+void *rh_alloc(rh_info_t * info, int size, const char *owner)
+{
+	struct list_head *l;
+	rh_block_t *blk;
+	rh_block_t *newblk;
+	void *start;
+
+	/* Validate size */
+	if (size <= 0)
+		return ERR_PTR(-EINVAL);
+
+	/* Align to configured alignment */
+	size = (size + (info->alignment - 1)) & ~(info->alignment - 1);
+
+	if (assure_empty(info, 1) < 0)
+		return ERR_PTR(-ENOMEM);
+
+	blk = NULL;
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+		if (size <= blk->size)
+			break;
+		blk = NULL;
+	}
+
+	if (blk == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Just fits */
+	if (blk->size == size) {
+		/* Move from free list to taken list */
+		list_del(&blk->list);
+		blk->owner = owner;
+		start = blk->start;
+
+		attach_taken_block(info, blk);
+
+		return start;
+	}
+
+	newblk = get_slot(info);
+	newblk->start = blk->start;
+	newblk->size = size;
+	newblk->owner = owner;
+
+	/* blk still in free list, with updated start, size */
+	blk->start = (int8_t *)blk->start + size;
+	blk->size -= size;
+
+	start = newblk->start;
+
+	attach_taken_block(info, newblk);
+
+	return start;
+}
+
+/* allocate at precisely the given address */
+void *rh_alloc_fixed(rh_info_t * info, void *start, int size, const char *owner)
+{
+	struct list_head *l;
+	rh_block_t *blk, *newblk1, *newblk2;
+	unsigned long s, e, m, bs, be;
+
+	/* Validate size */
+	if (size <= 0)
+		return ERR_PTR(-EINVAL);
+
+	/* The region must be aligned */
+	s = (unsigned long)start;
+	e = s + size;
+	m = info->alignment - 1;
+
+	/* Round start up */
+	s = (s + m) & ~m;
+
+	/* Round end down */
+	e = e & ~m;
+
+	if (assure_empty(info, 2) < 0)
+		return ERR_PTR(-ENOMEM);
+
+	blk = NULL;
+	list_for_each(l, &info->free_list) {
+		blk = list_entry(l, rh_block_t, list);
+		/* The range must lie entirely inside one free block */
+		bs = (unsigned long)blk->start;
+		be = (unsigned long)blk->start + blk->size;
+		if (s >= bs && e <= be)
+			break;
+	}
+
+	if (blk == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/* Perfect fit */
+	if (bs == s && be == e) {
+		/* Move from free list to taken list */
+		list_del(&blk->list);
+		blk->owner = owner;
+
+		start = blk->start;
+		attach_taken_block(info, blk);
+
+		return start;
+
+	}
+
+	/* blk still in free list, with updated start and/or size */
+	if (bs == s || be == e) {
+		if (bs == s)
+			blk->start = (int8_t *)blk->start + size;
+		blk->size -= size;
+
+	} else {
+		/* The front free fragment */
+		blk->size = s - bs;
+
+		/* The back free fragment */
+		newblk2 = get_slot(info);
+		newblk2->start = (void *)e;
+		newblk2->size = be - e;
+
+		list_add(&newblk2->list, &blk->list);
+	}
+
+	newblk1 = get_slot(info);
+	newblk1->start = (void *)s;
+	newblk1->size = e - s;
+	newblk1->owner = owner;
+
+	start = newblk1->start;
+	attach_taken_block(info, newblk1);
+
+	return start;
+}
+
+int rh_free(rh_info_t * info, void *start)
+{
+	rh_block_t *blk, *blk2;
+	struct list_head *l;
+	int size;
+
+	/* Linear search for block */
+	blk = NULL;
+	list_for_each(l, &info->taken_list) {
+		blk2 = list_entry(l, rh_block_t, list);
+		if (start < blk2->start)
+			break;
+		blk = blk2;
+	}
+
+	if (blk == NULL || start > (blk->start + blk->size))
+		return -EINVAL;
+
+	/* Remove from taken list */
+	list_del(&blk->list);
+
+	/* Get size of freed block */
+	size = blk->size;
+	attach_free_block(info, blk);
+
+	return size;
+}
+
+int rh_get_stats(rh_info_t * info, int what, int max_stats, rh_stats_t * stats)
+{
+	rh_block_t *blk;
+	struct list_head *l;
+	struct list_head *h;
+	int nr;
+
+	switch (what) {
+
+	case RHGS_FREE:
+		h = &info->free_list;
+		break;
+
+	case RHGS_TAKEN:
+		h = &info->taken_list;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* Linear search for block */
+	nr = 0;
+	list_for_each(l, h) {
+		blk = list_entry(l, rh_block_t, list);
+		if (stats != NULL && nr < max_stats) {
+			stats->start = blk->start;
+			stats->size = blk->size;
+			stats->owner = blk->owner;
+			stats++;
+		}
+		nr++;
+	}
+
+	return nr;
+}
+
+int rh_set_owner(rh_info_t * info, void *start, const char *owner)
+{
+	rh_block_t *blk, *blk2;
+	struct list_head *l;
+	int size;
+
+	/* Linear search for block */
+	blk = NULL;
+	list_for_each(l, &info->taken_list) {
+		blk2 = list_entry(l, rh_block_t, list);
+		if (start < blk2->start)
+			break;
+		blk = blk2;
+	}
+
+	if (blk == NULL || start > (blk->start + blk->size))
+		return -EINVAL;
+
+	blk->owner = owner;
+	size = blk->size;
+
+	return size;
+}
+
+void rh_dump(rh_info_t * info)
+{
+	static rh_stats_t st[32];	/* XXX maximum 32 blocks */
+	int maxnr;
+	int i, nr;
+
+	maxnr = sizeof(st) / sizeof(st[0]);
+
+	printk(KERN_INFO
+	       "info @0x%p (%d slots empty / %d max)\n",
+	       info, info->empty_slots, info->max_blocks);
+
+	printk(KERN_INFO "  Free:\n");
+	nr = rh_get_stats(info, RHGS_FREE, maxnr, st);
+	if (nr > maxnr)
+		nr = maxnr;
+	for (i = 0; i < nr; i++)
+		printk(KERN_INFO
+		       "    0x%p-0x%p (%u)\n",
+		       st[i].start, (int8_t *) st[i].start + st[i].size,
+		       st[i].size);
+	printk(KERN_INFO "\n");
+
+	printk(KERN_INFO "  Taken:\n");
+	nr = rh_get_stats(info, RHGS_TAKEN, maxnr, st);
+	if (nr > maxnr)
+		nr = maxnr;
+	for (i = 0; i < nr; i++)
+		printk(KERN_INFO
+		       "    0x%p-0x%p (%u) %s\n",
+		       st[i].start, (int8_t *) st[i].start + st[i].size,
+		       st[i].size, st[i].owner != NULL ? st[i].owner : "");
+	printk(KERN_INFO "\n");
+}
+
+void rh_dump_blk(rh_info_t * info, rh_block_t * blk)
+{
+	printk(KERN_INFO
+	       "blk @0x%p: 0x%p-0x%p (%u)\n",
+	       blk, blk->start, (int8_t *) blk->start + blk->size, blk->size);
+}
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
new file mode 100644
index 000000000000..e79123d1485c
--- /dev/null
+++ b/arch/powerpc/lib/sstep.c
@@ -0,0 +1,141 @@
+/*
+ * Single-step support.
+ *
+ * Copyright (C) 2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <asm/sstep.h>
+#include <asm/processor.h>
+
+extern char system_call_common[];
+
+/* Bits in SRR1 that are copied from MSR */
+#define MSR_MASK	0xffffffff87c0ffff
+
+/*
+ * Determine whether a conditional branch instruction would branch.
+ */
+static int branch_taken(unsigned int instr, struct pt_regs *regs)
+{
+	unsigned int bo = (instr >> 21) & 0x1f;
+	unsigned int bi;
+
+	if ((bo & 4) == 0) {
+		/* decrement counter */
+		--regs->ctr;
+		if (((bo >> 1) & 1) ^ (regs->ctr == 0))
+			return 0;
+	}
+	if ((bo & 0x10) == 0) {
+		/* check bit from CR */
+		bi = (instr >> 16) & 0x1f;
+		if (((regs->ccr >> (31 - bi)) & 1) != ((bo >> 3) & 1))
+			return 0;
+	}
+	return 1;
+}
+
+/*
+ * Emulate instructions that cause a transfer of control.
+ * Returns 1 if the step was emulated, 0 if not,
+ * or -1 if the instruction is one that should not be stepped,
+ * such as an rfid, or a mtmsrd that would clear MSR_RI.
+ */
+int emulate_step(struct pt_regs *regs, unsigned int instr)
+{
+	unsigned int opcode, rd;
+	unsigned long int imm;
+
+	opcode = instr >> 26;
+	switch (opcode) {
+	case 16:	/* bc */
+		imm = (signed short)(instr & 0xfffc);
+		if ((instr & 2) == 0)
+			imm += regs->nip;
+		regs->nip += 4;
+		if ((regs->msr & MSR_SF) == 0)
+			regs->nip &= 0xffffffffUL;
+		if (instr & 1)
+			regs->link = regs->nip;
+		if (branch_taken(instr, regs))
+			regs->nip = imm;
+		return 1;
+	case 17:	/* sc */
+		/*
+		 * N.B. this uses knowledge about how the syscall
+		 * entry code works.  If that is changed, this will
+		 * need to be changed also.
+		 */
+		regs->gpr[9] = regs->gpr[13];
+		regs->gpr[11] = regs->nip + 4;
+		regs->gpr[12] = regs->msr & MSR_MASK;
+		regs->gpr[13] = (unsigned long) get_paca();
+		regs->nip = (unsigned long) &system_call_common;
+		regs->msr = MSR_KERNEL;
+		return 1;
+	case 18:	/* b */
+		imm = instr & 0x03fffffc;
+		if (imm & 0x02000000)
+			imm -= 0x04000000;
+		if ((instr & 2) == 0)
+			imm += regs->nip;
+		if (instr & 1) {
+			regs->link = regs->nip + 4;
+			if ((regs->msr & MSR_SF) == 0)
+				regs->link &= 0xffffffffUL;
+		}
+		if ((regs->msr & MSR_SF) == 0)
+			imm &= 0xffffffffUL;
+		regs->nip = imm;
+		return 1;
+	case 19:
+		switch (instr & 0x7fe) {
+		case 0x20:	/* bclr */
+		case 0x420:	/* bcctr */
+			imm = (instr & 0x400)? regs->ctr: regs->link;
+			regs->nip += 4;
+			if ((regs->msr & MSR_SF) == 0) {
+				regs->nip &= 0xffffffffUL;
+				imm &= 0xffffffffUL;
+			}
+			if (instr & 1)
+				regs->link = regs->nip;
+			if (branch_taken(instr, regs))
+				regs->nip = imm;
+			return 1;
+		case 0x24:	/* rfid, scary */
+			return -1;
+		}
+	case 31:
+		rd = (instr >> 21) & 0x1f;
+		switch (instr & 0x7fe) {
+		case 0xa6:	/* mfmsr */
+			regs->gpr[rd] = regs->msr & MSR_MASK;
+			regs->nip += 4;
+			if ((regs->msr & MSR_SF) == 0)
+				regs->nip &= 0xffffffffUL;
+			return 1;
+		case 0x164:	/* mtmsrd */
+			/* only MSR_EE and MSR_RI get changed if bit 15 set */
+			/* mtmsrd doesn't change MSR_HV and MSR_ME */
+			imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL;
+			imm = (regs->msr & MSR_MASK & ~imm)
+				| (regs->gpr[rd] & imm);
+			if ((imm & MSR_RI) == 0)
+				/* can't step mtmsrd that would clear MSR_RI */
+				return -1;
+			regs->msr = imm;
+			regs->nip += 4;
+			if ((imm & MSR_SF) == 0)
+				regs->nip &= 0xffffffffUL;
+			return 1;
+		}
+	}
+	return 0;
+}
diff --git a/arch/powerpc/lib/strcase.c b/arch/powerpc/lib/strcase.c
new file mode 100644
index 000000000000..36b521091bbc
--- /dev/null
+++ b/arch/powerpc/lib/strcase.c
@@ -0,0 +1,23 @@
+#include <linux/ctype.h>
+
+int strcasecmp(const char *s1, const char *s2)
+{
+	int c1, c2;
+
+	do {
+		c1 = tolower(*s1++);
+		c2 = tolower(*s2++);
+	} while (c1 == c2 && c1 != 0);
+	return c1 - c2;
+}
+
+int strncasecmp(const char *s1, const char *s2, int n)
+{
+	int c1, c2;
+
+	do {
+		c1 = tolower(*s1++);
+		c2 = tolower(*s2++);
+	} while ((--n > 0) && c1 == c2 && c1 != 0);
+	return c1 - c2;
+}
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
new file mode 100644
index 000000000000..15d40e9ef8b1
--- /dev/null
+++ b/arch/powerpc/lib/string.S
@@ -0,0 +1,203 @@
+/*
+ * String handling functions for PowerPC.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+	.text
+	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
+	.stabs	"string.S",N_SO,0,0,0f
+0:
+
+	.section __ex_table,"a"
+#ifdef CONFIG_PPC64
+	.align	3
+#define EXTBL	.llong
+#else
+	.align	2
+#define EXTBL	.long
+#endif
+	.text
+	
+_GLOBAL(strcpy)
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r5)
+	bne	1b
+	blr
+
+/* This clears out any unused part of the destination buffer,
+   just as the libc version does.  -- paulus */
+_GLOBAL(strncpy)
+	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r6)
+	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
+	bnelr			/* if we didn't hit a null char, we're done */
+	mfctr	r5
+	cmpwi	0,r5,0		/* any space left in destination buffer? */
+	beqlr			/* we know r0 == 0 here */
+2:	stbu	r0,1(r6)	/* clear it out if so */
+	bdnz	2b
+	blr
+
+_GLOBAL(strcat)
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r0,1(r5)
+	cmpwi	0,r0,0
+	bne	1b
+	addi	r5,r5,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r5)
+	bne	1b
+	blr
+
+_GLOBAL(strcmp)
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r3,1(r5)
+	cmpwi	1,r3,0
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	beqlr	1
+	beq	1b
+	blr
+
+_GLOBAL(strlen)
+	addi	r4,r3,-1
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	bne	1b
+	subf	r3,r3,r4
+	blr
+
+_GLOBAL(memcmp)
+	cmpwi	0,r5,0
+	ble-	2f
+	mtctr	r5
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r3,1(r6)
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	bdnzt	2,1b
+	blr
+2:	li	r3,0
+	blr
+
+_GLOBAL(memchr)
+	cmpwi	0,r5,0
+	ble-	2f
+	mtctr	r5
+	addi	r3,r3,-1
+1:	lbzu	r0,1(r3)
+	cmpw	0,r0,r4
+	bdnzf	2,1b
+	beqlr
+2:	li	r3,0
+	blr
+
+_GLOBAL(__clear_user)
+	addi	r6,r3,-4
+	li	r3,0
+	li	r5,0
+	cmplwi	0,r4,4
+	blt	7f
+	/* clear a single word */
+11:	stwu	r5,4(r6)
+	beqlr
+	/* clear word sized chunks */
+	andi.	r0,r6,3
+	add	r4,r0,r4
+	subf	r6,r0,r6
+	srwi	r0,r4,2
+	andi.	r4,r4,3
+	mtctr	r0
+	bdz	7f
+1:	stwu	r5,4(r6)
+	bdnz	1b
+	/* clear byte sized chunks */
+7:	cmpwi	0,r4,0
+	beqlr
+	mtctr	r4
+	addi	r6,r6,3
+8:	stbu	r5,1(r6)
+	bdnz	8b
+	blr
+90:	mr	r3,r4
+	blr
+91:	mfctr	r3
+	slwi	r3,r3,2
+	add	r3,r3,r4
+	blr
+92:	mfctr	r3
+	blr
+
+	.section __ex_table,"a"
+	EXTBL	11b,90b
+	EXTBL	1b,91b
+	EXTBL	8b,92b
+	.text
+
+_GLOBAL(__strncpy_from_user)
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+	cmpwi	0,r5,0
+	beq	2f
+	mtctr	r5
+1:	lbzu	r0,1(r4)
+	cmpwi	0,r0,0
+	stbu	r0,1(r6)
+	bdnzf	2,1b		/* dec ctr, branch if ctr != 0 && !cr0.eq */
+	beq	3f
+2:	addi	r6,r6,1
+3:	subf	r3,r3,r6
+	blr
+99:	li	r3,-EFAULT
+	blr
+
+	.section __ex_table,"a"
+	EXTBL	1b,99b
+	.text
+
+/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */
+_GLOBAL(__strnlen_user)
+	addi	r7,r3,-1
+	subf	r6,r7,r5	/* top+1 - str */
+	cmplw	0,r4,r6
+	bge	0f
+	mr	r6,r4
+0:	mtctr	r6		/* ctr = min(len, top - str) */
+1:	lbzu	r0,1(r7)	/* get next byte */
+	cmpwi	0,r0,0
+	bdnzf	2,1b		/* loop if --ctr != 0 && byte != 0 */
+	addi	r7,r7,1
+	subf	r3,r3,r7	/* number of bytes we have looked at */
+	beqlr			/* return if we found a 0 byte */
+	cmpw	0,r3,r4		/* did we look at all len bytes? */
+	blt	99f		/* if not, must have hit top */
+	addi	r3,r4,1		/* return len + 1 to indicate no null found */
+	blr
+99:	li	r3,0		/* bad address, return 0 */
+	blr
+
+	.section __ex_table,"a"
+	EXTBL	1b,99b
diff --git a/arch/powerpc/lib/usercopy.c b/arch/powerpc/lib/usercopy.c
new file mode 100644
index 000000000000..5eea6f3c1e03
--- /dev/null
+++ b/arch/powerpc/lib/usercopy.c
@@ -0,0 +1,41 @@
+/*
+ * Functions which are too large to be inlined.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	if (likely(access_ok(VERIFY_READ, from, n)))
+		n = __copy_from_user(to, from, n);
+	else
+		memset(to, 0, n);
+	return n;
+}
+
+unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if (likely(access_ok(VERIFY_WRITE, to, n)))
+		n = __copy_to_user(to, from, n);
+	return n;
+}
+
+unsigned long copy_in_user(void __user *to, const void __user *from,
+			   unsigned long n)
+{
+	might_sleep();
+	if (likely(access_ok(VERIFY_READ, from, n) &&
+	    access_ok(VERIFY_WRITE, to, n)))
+		n =__copy_tofrom_user(to, from, n);
+	return n;
+}
+
+EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(copy_to_user);
+EXPORT_SYMBOL(copy_in_user);
+
-- 
cgit v1.2.2


From 70d64ceaa1a84d2502405422a4dfd3f87786a347 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 10 Oct 2005 21:52:43 +1000
Subject: powerpc: Rename files to have consistent _32/_64 suffixes

This doesn't change any code, just renames things so we consistently
have foo_32.c and foo_64.c where we have separate 32- and 64-bit
versions.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/lib/Makefile      |   6 +-
 arch/powerpc/lib/checksum.S    | 225 ----------------
 arch/powerpc/lib/checksum64.S  | 229 ----------------
 arch/powerpc/lib/checksum_32.S | 225 ++++++++++++++++
 arch/powerpc/lib/checksum_64.S | 229 ++++++++++++++++
 arch/powerpc/lib/copy32.S      | 543 --------------------------------------
 arch/powerpc/lib/copy_32.S     | 543 ++++++++++++++++++++++++++++++++++++++
 arch/powerpc/lib/copypage.S    | 121 ---------
 arch/powerpc/lib/copypage_64.S | 121 +++++++++
 arch/powerpc/lib/copyuser.S    | 576 -----------------------------------------
 arch/powerpc/lib/copyuser_64.S | 576 +++++++++++++++++++++++++++++++++++++++++
 arch/powerpc/lib/mem_64.S      | 119 +++++++++
 arch/powerpc/lib/memcpy.S      | 172 ------------
 arch/powerpc/lib/memcpy_64.S   | 172 ++++++++++++
 arch/powerpc/lib/usercopy.c    |  41 ---
 arch/powerpc/lib/usercopy_64.c |  41 +++
 16 files changed, 2029 insertions(+), 1910 deletions(-)
 delete mode 100644 arch/powerpc/lib/checksum.S
 delete mode 100644 arch/powerpc/lib/checksum64.S
 create mode 100644 arch/powerpc/lib/checksum_32.S
 create mode 100644 arch/powerpc/lib/checksum_64.S
 delete mode 100644 arch/powerpc/lib/copy32.S
 create mode 100644 arch/powerpc/lib/copy_32.S
 delete mode 100644 arch/powerpc/lib/copypage.S
 create mode 100644 arch/powerpc/lib/copypage_64.S
 delete mode 100644 arch/powerpc/lib/copyuser.S
 create mode 100644 arch/powerpc/lib/copyuser_64.S
 create mode 100644 arch/powerpc/lib/mem_64.S
 delete mode 100644 arch/powerpc/lib/memcpy.S
 create mode 100644 arch/powerpc/lib/memcpy_64.S
 delete mode 100644 arch/powerpc/lib/usercopy.c
 create mode 100644 arch/powerpc/lib/usercopy_64.c

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 347f9798e433..a8cedb96de5f 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-y			:= strcase.o string.o
-obj-$(CONFIG_PPC32)	+= div64.o copy32.o checksum.o
-obj-$(CONFIG_PPC64)	+= copypage.o copyuser.o memcpy.o usercopy.o \
-			   sstep.o checksum64.o
+obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
+obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o memcpy_64.o \
+			   usercopy_64.o sstep.o checksum_64.o mem_64.o
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
diff --git a/arch/powerpc/lib/checksum.S b/arch/powerpc/lib/checksum.S
deleted file mode 100644
index 7874e8a80455..000000000000
--- a/arch/powerpc/lib/checksum.S
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * This file contains assembly-language implementations
- * of IP-style 1's complement checksum routines.
- *	
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
- */
-
-#include <linux/sys.h>
-#include <asm/processor.h>
-#include <asm/errno.h>
-#include <asm/ppc_asm.h>
-
-	.text
-
-/*
- * ip_fast_csum(buf, len) -- Optimized for IP header
- * len is in words and is always >= 5.
- */
-_GLOBAL(ip_fast_csum)
-	lwz	r0,0(r3)
-	lwzu	r5,4(r3)
-	addic.	r4,r4,-2
-	addc	r0,r0,r5
-	mtctr	r4
-	blelr-
-1:	lwzu	r4,4(r3)
-	adde	r0,r0,r4
-	bdnz	1b
-	addze	r0,r0		/* add in final carry */
-	rlwinm	r3,r0,16,0,31	/* fold two halves together */
-	add	r3,r0,r3
-	not	r3,r3
-	srwi	r3,r3,16
-	blr
-
-/*
- * Compute checksum of TCP or UDP pseudo-header:
- *   csum_tcpudp_magic(saddr, daddr, len, proto, sum)
- */	
-_GLOBAL(csum_tcpudp_magic)
-	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
-	addc	r0,r3,r4	/* add 4 32-bit words together */
-	adde	r0,r0,r5
-	adde	r0,r0,r7
-	addze	r0,r0		/* add in final carry */
-	rlwinm	r3,r0,16,0,31	/* fold two halves together */
-	add	r3,r0,r3
-	not	r3,r3
-	srwi	r3,r3,16
-	blr
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * csum_partial(buff, len, sum)
- */
-_GLOBAL(csum_partial)
-	addic	r0,r5,0
-	subi	r3,r3,4
-	srwi.	r6,r4,2
-	beq	3f		/* if we're doing < 4 bytes */
-	andi.	r5,r3,2		/* Align buffer to longword boundary */
-	beq+	1f
-	lhz	r5,4(r3)	/* do 2 bytes to get aligned */
-	addi	r3,r3,2
-	subi	r4,r4,2
-	addc	r0,r0,r5
-	srwi.	r6,r4,2		/* # words to do */
-	beq	3f
-1:	mtctr	r6
-2:	lwzu	r5,4(r3)	/* the bdnz has zero overhead, so it should */
-	adde	r0,r0,r5	/* be unnecessary to unroll this loop */
-	bdnz	2b
-	andi.	r4,r4,3
-3:	cmpwi	0,r4,2
-	blt+	4f
-	lhz	r5,4(r3)
-	addi	r3,r3,2
-	subi	r4,r4,2
-	adde	r0,r0,r5
-4:	cmpwi	0,r4,1
-	bne+	5f
-	lbz	r5,4(r3)
-	slwi	r5,r5,8		/* Upper byte of word */
-	adde	r0,r0,r5
-5:	addze	r3,r0		/* add in final carry */
-	blr
-
-/*
- * Computes the checksum of a memory block at src, length len,
- * and adds in "sum" (32-bit), while copying the block to dst.
- * If an access exception occurs on src or dst, it stores -EFAULT
- * to *src_err or *dst_err respectively, and (for an error on
- * src) zeroes the rest of dst.
- *
- * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
- */
-_GLOBAL(csum_partial_copy_generic)
-	addic	r0,r6,0
-	subi	r3,r3,4
-	subi	r4,r4,4
-	srwi.	r6,r5,2
-	beq	3f		/* if we're doing < 4 bytes */
-	andi.	r9,r4,2		/* Align dst to longword boundary */
-	beq+	1f
-81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
-	addi	r3,r3,2
-	subi	r5,r5,2
-91:	sth	r6,4(r4)
-	addi	r4,r4,2
-	addc	r0,r0,r6
-	srwi.	r6,r5,2		/* # words to do */
-	beq	3f
-1:	srwi.	r6,r5,4		/* # groups of 4 words to do */
-	beq	10f
-	mtctr	r6
-71:	lwz	r6,4(r3)
-72:	lwz	r9,8(r3)
-73:	lwz	r10,12(r3)
-74:	lwzu	r11,16(r3)
-	adde	r0,r0,r6
-75:	stw	r6,4(r4)
-	adde	r0,r0,r9
-76:	stw	r9,8(r4)
-	adde	r0,r0,r10
-77:	stw	r10,12(r4)
-	adde	r0,r0,r11
-78:	stwu	r11,16(r4)
-	bdnz	71b
-10:	rlwinm.	r6,r5,30,30,31	/* # words left to do */
-	beq	13f
-	mtctr	r6
-82:	lwzu	r9,4(r3)
-92:	stwu	r9,4(r4)
-	adde	r0,r0,r9
-	bdnz	82b
-13:	andi.	r5,r5,3
-3:	cmpwi	0,r5,2
-	blt+	4f
-83:	lhz	r6,4(r3)
-	addi	r3,r3,2
-	subi	r5,r5,2
-93:	sth	r6,4(r4)
-	addi	r4,r4,2
-	adde	r0,r0,r6
-4:	cmpwi	0,r5,1
-	bne+	5f
-84:	lbz	r6,4(r3)
-94:	stb	r6,4(r4)
-	slwi	r6,r6,8		/* Upper byte of word */
-	adde	r0,r0,r6
-5:	addze	r3,r0		/* add in final carry */
-	blr
-
-/* These shouldn't go in the fixup section, since that would
-   cause the ex_table addresses to get out of order. */
-
-src_error_4:
-	mfctr	r6		/* update # bytes remaining from ctr */
-	rlwimi	r5,r6,4,0,27
-	b	79f
-src_error_1:
-	li	r6,0
-	subi	r5,r5,2
-95:	sth	r6,4(r4)
-	addi	r4,r4,2
-79:	srwi.	r6,r5,2
-	beq	3f
-	mtctr	r6
-src_error_2:
-	li	r6,0
-96:	stwu	r6,4(r4)
-	bdnz	96b
-3:	andi.	r5,r5,3
-	beq	src_error
-src_error_3:
-	li	r6,0
-	mtctr	r5
-	addi	r4,r4,3
-97:	stbu	r6,1(r4)
-	bdnz	97b
-src_error:
-	cmpwi	0,r7,0
-	beq	1f
-	li	r6,-EFAULT
-	stw	r6,0(r7)
-1:	addze	r3,r0
-	blr
-
-dst_error:
-	cmpwi	0,r8,0
-	beq	1f
-	li	r6,-EFAULT
-	stw	r6,0(r8)
-1:	addze	r3,r0
-	blr
-
-.section __ex_table,"a"
-	.long	81b,src_error_1
-	.long	91b,dst_error
-	.long	71b,src_error_4
-	.long	72b,src_error_4
-	.long	73b,src_error_4
-	.long	74b,src_error_4
-	.long	75b,dst_error
-	.long	76b,dst_error
-	.long	77b,dst_error
-	.long	78b,dst_error
-	.long	82b,src_error_2
-	.long	92b,dst_error
-	.long	83b,src_error_3
-	.long	93b,dst_error
-	.long	84b,src_error_3
-	.long	94b,dst_error
-	.long	95b,dst_error
-	.long	96b,dst_error
-	.long	97b,dst_error
diff --git a/arch/powerpc/lib/checksum64.S b/arch/powerpc/lib/checksum64.S
deleted file mode 100644
index ef96c6c58efc..000000000000
--- a/arch/powerpc/lib/checksum64.S
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * This file contains assembly-language implementations
- * of IP-style 1's complement checksum routines.
- *	
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- *
- * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
- */
-
-#include <linux/sys.h>
-#include <asm/processor.h>
-#include <asm/errno.h>
-#include <asm/ppc_asm.h>
-
-/*
- * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
- * len is in words and is always >= 5.
- *
- * In practice len == 5, but this is not guaranteed.  So this code does not
- * attempt to use doubleword instructions.
- */
-_GLOBAL(ip_fast_csum)
-	lwz	r0,0(r3)
-	lwzu	r5,4(r3)
-	addic.	r4,r4,-2
-	addc	r0,r0,r5
-	mtctr	r4
-	blelr-
-1:	lwzu	r4,4(r3)
-	adde	r0,r0,r4
-	bdnz	1b
-	addze	r0,r0		/* add in final carry */
-        rldicl  r4,r0,32,0      /* fold two 32-bit halves together */
-        add     r0,r0,r4
-        srdi    r0,r0,32
-	rlwinm	r3,r0,16,0,31	/* fold two halves together */
-	add	r3,r0,r3
-	not	r3,r3
-	srwi	r3,r3,16
-	blr
-
-/*
- * Compute checksum of TCP or UDP pseudo-header:
- *   csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum)
- * No real gain trying to do this specially for 64 bit, but
- * the 32 bit addition may spill into the upper bits of
- * the doubleword so we still must fold it down from 64.
- */	
-_GLOBAL(csum_tcpudp_magic)
-	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
-	addc	r0,r3,r4	/* add 4 32-bit words together */
-	adde	r0,r0,r5
-	adde	r0,r0,r7
-        rldicl  r4,r0,32,0      /* fold 64 bit value */
-        add     r0,r4,r0
-        srdi    r0,r0,32
-	rlwinm	r3,r0,16,0,31	/* fold two halves together */
-	add	r3,r0,r3
-	not	r3,r3
-	srwi	r3,r3,16
-	blr
-
-/*
- * Computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit).
- *
- * This code assumes at least halfword alignment, though the length
- * can be any number of bytes.  The sum is accumulated in r5.
- *
- * csum_partial(r3=buff, r4=len, r5=sum)
- */
-_GLOBAL(csum_partial)
-        subi	r3,r3,8		/* we'll offset by 8 for the loads */
-        srdi.	r6,r4,3         /* divide by 8 for doubleword count */
-        addic   r5,r5,0         /* clear carry */
-        beq	3f              /* if we're doing < 8 bytes */
-        andi.	r0,r3,2         /* aligned on a word boundary already? */
-        beq+	1f
-        lhz     r6,8(r3)        /* do 2 bytes to get aligned */
-        addi    r3,r3,2
-        subi    r4,r4,2
-        addc    r5,r5,r6
-        srdi.   r6,r4,3         /* recompute number of doublewords */
-        beq     3f              /* any left? */
-1:      mtctr   r6
-2:      ldu     r6,8(r3)        /* main sum loop */
-        adde    r5,r5,r6
-        bdnz    2b
-        andi.	r4,r4,7         /* compute bytes left to sum after doublewords */
-3:	cmpwi	0,r4,4		/* is at least a full word left? */
-	blt	4f
-	lwz	r6,8(r3)	/* sum this word */
-	addi	r3,r3,4
-	subi	r4,r4,4
-	adde	r5,r5,r6
-4:	cmpwi	0,r4,2		/* is at least a halfword left? */
-        blt+	5f
-        lhz     r6,8(r3)        /* sum this halfword */
-        addi    r3,r3,2
-        subi    r4,r4,2
-        adde    r5,r5,r6
-5:	cmpwi	0,r4,1		/* is at least a byte left? */
-        bne+    6f
-        lbz     r6,8(r3)        /* sum this byte */
-        slwi    r6,r6,8         /* this byte is assumed to be the upper byte of a halfword */
-        adde    r5,r5,r6
-6:      addze	r5,r5		/* add in final carry */
-	rldicl  r4,r5,32,0      /* fold two 32-bit halves together */
-        add     r3,r4,r5
-        srdi    r3,r3,32
-        blr
-
-/*
- * Computes the checksum of a memory block at src, length len,
- * and adds in "sum" (32-bit), while copying the block to dst.
- * If an access exception occurs on src or dst, it stores -EFAULT
- * to *src_err or *dst_err respectively, and (for an error on
- * src) zeroes the rest of dst.
- *
- * This code needs to be reworked to take advantage of 64 bit sum+copy.
- * However, due to tokenring halfword alignment problems this will be very
- * tricky.  For now we'll leave it until we instrument it somehow.
- *
- * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
- */
-_GLOBAL(csum_partial_copy_generic)
-	addic	r0,r6,0
-	subi	r3,r3,4
-	subi	r4,r4,4
-	srwi.	r6,r5,2
-	beq	3f		/* if we're doing < 4 bytes */
-	andi.	r9,r4,2		/* Align dst to longword boundary */
-	beq+	1f
-81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
-	addi	r3,r3,2
-	subi	r5,r5,2
-91:	sth	r6,4(r4)
-	addi	r4,r4,2
-	addc	r0,r0,r6
-	srwi.	r6,r5,2		/* # words to do */
-	beq	3f
-1:	mtctr	r6
-82:	lwzu	r6,4(r3)	/* the bdnz has zero overhead, so it should */
-92:	stwu	r6,4(r4)	/* be unnecessary to unroll this loop */
-	adde	r0,r0,r6
-	bdnz	82b
-	andi.	r5,r5,3
-3:	cmpwi	0,r5,2
-	blt+	4f
-83:	lhz	r6,4(r3)
-	addi	r3,r3,2
-	subi	r5,r5,2
-93:	sth	r6,4(r4)
-	addi	r4,r4,2
-	adde	r0,r0,r6
-4:	cmpwi	0,r5,1
-	bne+	5f
-84:	lbz	r6,4(r3)
-94:	stb	r6,4(r4)
-	slwi	r6,r6,8		/* Upper byte of word */
-	adde	r0,r0,r6
-5:	addze	r3,r0		/* add in final carry (unlikely with 64-bit regs) */
-        rldicl  r4,r3,32,0      /* fold 64 bit value */
-        add     r3,r4,r3
-        srdi    r3,r3,32
-	blr
-
-/* These shouldn't go in the fixup section, since that would
-   cause the ex_table addresses to get out of order. */
-
-	.globl src_error_1
-src_error_1:
-	li	r6,0
-	subi	r5,r5,2
-95:	sth	r6,4(r4)
-	addi	r4,r4,2
-	srwi.	r6,r5,2
-	beq	3f
-	mtctr	r6
-	.globl src_error_2
-src_error_2:
-	li	r6,0
-96:	stwu	r6,4(r4)
-	bdnz	96b
-3:	andi.	r5,r5,3
-	beq	src_error
-	.globl src_error_3
-src_error_3:
-	li	r6,0
-	mtctr	r5
-	addi	r4,r4,3
-97:	stbu	r6,1(r4)
-	bdnz	97b
-	.globl src_error
-src_error:
-	cmpdi	0,r7,0
-	beq	1f
-	li	r6,-EFAULT
-	stw	r6,0(r7)
-1:	addze	r3,r0
-	blr
-
-	.globl dst_error
-dst_error:
-	cmpdi	0,r8,0
-	beq	1f
-	li	r6,-EFAULT
-	stw	r6,0(r8)
-1:	addze	r3,r0
-	blr
-
-.section __ex_table,"a"
-	.align  3
-	.llong	81b,src_error_1
-	.llong	91b,dst_error
-	.llong	82b,src_error_2
-	.llong	92b,dst_error
-	.llong	83b,src_error_3
-	.llong	93b,dst_error
-	.llong	84b,src_error_3
-	.llong	94b,dst_error
-	.llong	95b,dst_error
-	.llong	96b,dst_error
-	.llong	97b,dst_error
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
new file mode 100644
index 000000000000..7874e8a80455
--- /dev/null
+++ b/arch/powerpc/lib/checksum_32.S
@@ -0,0 +1,225 @@
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *	
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+	.text
+
+/*
+ * ip_fast_csum(buf, len) -- Optimized for IP header
+ * len is in words and is always >= 5.
+ */
+_GLOBAL(ip_fast_csum)
+	lwz	r0,0(r3)
+	lwzu	r5,4(r3)
+	addic.	r4,r4,-2
+	addc	r0,r0,r5
+	mtctr	r4
+	blelr-
+1:	lwzu	r4,4(r3)
+	adde	r0,r0,r4
+	bdnz	1b
+	addze	r0,r0		/* add in final carry */
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Compute checksum of TCP or UDP pseudo-header:
+ *   csum_tcpudp_magic(saddr, daddr, len, proto, sum)
+ */	
+_GLOBAL(csum_tcpudp_magic)
+	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
+	addc	r0,r3,r4	/* add 4 32-bit words together */
+	adde	r0,r0,r5
+	adde	r0,r0,r7
+	addze	r0,r0		/* add in final carry */
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * csum_partial(buff, len, sum)
+ */
+_GLOBAL(csum_partial)
+	addic	r0,r5,0
+	subi	r3,r3,4
+	srwi.	r6,r4,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r5,r3,2		/* Align buffer to longword boundary */
+	beq+	1f
+	lhz	r5,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r4,r4,2
+	addc	r0,r0,r5
+	srwi.	r6,r4,2		/* # words to do */
+	beq	3f
+1:	mtctr	r6
+2:	lwzu	r5,4(r3)	/* the bdnz has zero overhead, so it should */
+	adde	r0,r0,r5	/* be unnecessary to unroll this loop */
+	bdnz	2b
+	andi.	r4,r4,3
+3:	cmpwi	0,r4,2
+	blt+	4f
+	lhz	r5,4(r3)
+	addi	r3,r3,2
+	subi	r4,r4,2
+	adde	r0,r0,r5
+4:	cmpwi	0,r4,1
+	bne+	5f
+	lbz	r5,4(r3)
+	slwi	r5,r5,8		/* Upper byte of word */
+	adde	r0,r0,r5
+5:	addze	r3,r0		/* add in final carry */
+	blr
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit), while copying the block to dst.
+ * If an access exception occurs on src or dst, it stores -EFAULT
+ * to *src_err or *dst_err respectively, and (for an error on
+ * src) zeroes the rest of dst.
+ *
+ * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
+ */
+_GLOBAL(csum_partial_copy_generic)
+	addic	r0,r6,0
+	subi	r3,r3,4
+	subi	r4,r4,4
+	srwi.	r6,r5,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r9,r4,2		/* Align dst to longword boundary */
+	beq+	1f
+81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r5,r5,2
+91:	sth	r6,4(r4)
+	addi	r4,r4,2
+	addc	r0,r0,r6
+	srwi.	r6,r5,2		/* # words to do */
+	beq	3f
+1:	srwi.	r6,r5,4		/* # groups of 4 words to do */
+	beq	10f
+	mtctr	r6
+71:	lwz	r6,4(r3)
+72:	lwz	r9,8(r3)
+73:	lwz	r10,12(r3)
+74:	lwzu	r11,16(r3)
+	adde	r0,r0,r6
+75:	stw	r6,4(r4)
+	adde	r0,r0,r9
+76:	stw	r9,8(r4)
+	adde	r0,r0,r10
+77:	stw	r10,12(r4)
+	adde	r0,r0,r11
+78:	stwu	r11,16(r4)
+	bdnz	71b
+10:	rlwinm.	r6,r5,30,30,31	/* # words left to do */
+	beq	13f
+	mtctr	r6
+82:	lwzu	r9,4(r3)
+92:	stwu	r9,4(r4)
+	adde	r0,r0,r9
+	bdnz	82b
+13:	andi.	r5,r5,3
+3:	cmpwi	0,r5,2
+	blt+	4f
+83:	lhz	r6,4(r3)
+	addi	r3,r3,2
+	subi	r5,r5,2
+93:	sth	r6,4(r4)
+	addi	r4,r4,2
+	adde	r0,r0,r6
+4:	cmpwi	0,r5,1
+	bne+	5f
+84:	lbz	r6,4(r3)
+94:	stb	r6,4(r4)
+	slwi	r6,r6,8		/* Upper byte of word */
+	adde	r0,r0,r6
+5:	addze	r3,r0		/* add in final carry */
+	blr
+
+/* These shouldn't go in the fixup section, since that would
+   cause the ex_table addresses to get out of order. */
+
+src_error_4:
+	mfctr	r6		/* update # bytes remaining from ctr */
+	rlwimi	r5,r6,4,0,27
+	b	79f
+src_error_1:
+	li	r6,0
+	subi	r5,r5,2
+95:	sth	r6,4(r4)
+	addi	r4,r4,2
+79:	srwi.	r6,r5,2
+	beq	3f
+	mtctr	r6
+src_error_2:
+	li	r6,0
+96:	stwu	r6,4(r4)
+	bdnz	96b
+3:	andi.	r5,r5,3
+	beq	src_error
+src_error_3:
+	li	r6,0
+	mtctr	r5
+	addi	r4,r4,3
+97:	stbu	r6,1(r4)
+	bdnz	97b
+src_error:
+	cmpwi	0,r7,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r7)
+1:	addze	r3,r0
+	blr
+
+dst_error:
+	cmpwi	0,r8,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r8)
+1:	addze	r3,r0
+	blr
+
+.section __ex_table,"a"
+	.long	81b,src_error_1
+	.long	91b,dst_error
+	.long	71b,src_error_4
+	.long	72b,src_error_4
+	.long	73b,src_error_4
+	.long	74b,src_error_4
+	.long	75b,dst_error
+	.long	76b,dst_error
+	.long	77b,dst_error
+	.long	78b,dst_error
+	.long	82b,src_error_2
+	.long	92b,dst_error
+	.long	83b,src_error_3
+	.long	93b,dst_error
+	.long	84b,src_error_3
+	.long	94b,dst_error
+	.long	95b,dst_error
+	.long	96b,dst_error
+	.long	97b,dst_error
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
new file mode 100644
index 000000000000..ef96c6c58efc
--- /dev/null
+++ b/arch/powerpc/lib/checksum_64.S
@@ -0,0 +1,229 @@
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *	
+ *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+/*
+ * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
+ * len is in words and is always >= 5.
+ *
+ * In practice len == 5, but this is not guaranteed.  So this code does not
+ * attempt to use doubleword instructions.
+ */
+_GLOBAL(ip_fast_csum)
+	lwz	r0,0(r3)
+	lwzu	r5,4(r3)
+	addic.	r4,r4,-2
+	addc	r0,r0,r5
+	mtctr	r4
+	blelr-
+1:	lwzu	r4,4(r3)
+	adde	r0,r0,r4
+	bdnz	1b
+	addze	r0,r0		/* add in final carry */
+        rldicl  r4,r0,32,0      /* fold two 32-bit halves together */
+        add     r0,r0,r4
+        srdi    r0,r0,32
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Compute checksum of TCP or UDP pseudo-header:
+ *   csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum)
+ * No real gain trying to do this specially for 64 bit, but
+ * the 32 bit addition may spill into the upper bits of
+ * the doubleword so we still must fold it down from 64.
+ */	
+_GLOBAL(csum_tcpudp_magic)
+	rlwimi	r5,r6,16,0,15	/* put proto in upper half of len */
+	addc	r0,r3,r4	/* add 4 32-bit words together */
+	adde	r0,r0,r5
+	adde	r0,r0,r7
+        rldicl  r4,r0,32,0      /* fold 64 bit value */
+        add     r0,r4,r0
+        srdi    r0,r0,32
+	rlwinm	r3,r0,16,0,31	/* fold two halves together */
+	add	r3,r0,r3
+	not	r3,r3
+	srwi	r3,r3,16
+	blr
+
+/*
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit).
+ *
+ * This code assumes at least halfword alignment, though the length
+ * can be any number of bytes.  The sum is accumulated in r5.
+ *
+ * csum_partial(r3=buff, r4=len, r5=sum)
+ */
+_GLOBAL(csum_partial)
+        subi	r3,r3,8		/* we'll offset by 8 for the loads */
+        srdi.	r6,r4,3         /* divide by 8 for doubleword count */
+        addic   r5,r5,0         /* clear carry */
+        beq	3f              /* if we're doing < 8 bytes */
+        andi.	r0,r3,2         /* aligned on a word boundary already? */
+        beq+	1f
+        lhz     r6,8(r3)        /* do 2 bytes to get aligned */
+        addi    r3,r3,2
+        subi    r4,r4,2
+        addc    r5,r5,r6
+        srdi.   r6,r4,3         /* recompute number of doublewords */
+        beq     3f              /* any left? */
+1:      mtctr   r6
+2:      ldu     r6,8(r3)        /* main sum loop */
+        adde    r5,r5,r6
+        bdnz    2b
+        andi.	r4,r4,7         /* compute bytes left to sum after doublewords */
+3:	cmpwi	0,r4,4		/* is at least a full word left? */
+	blt	4f
+	lwz	r6,8(r3)	/* sum this word */
+	addi	r3,r3,4
+	subi	r4,r4,4
+	adde	r5,r5,r6
+4:	cmpwi	0,r4,2		/* is at least a halfword left? */
+        blt+	5f
+        lhz     r6,8(r3)        /* sum this halfword */
+        addi    r3,r3,2
+        subi    r4,r4,2
+        adde    r5,r5,r6
+5:	cmpwi	0,r4,1		/* is at least a byte left? */
+        bne+    6f
+        lbz     r6,8(r3)        /* sum this byte */
+        slwi    r6,r6,8         /* this byte is assumed to be the upper byte of a halfword */
+        adde    r5,r5,r6
+6:      addze	r5,r5		/* add in final carry */
+	rldicl  r4,r5,32,0      /* fold two 32-bit halves together */
+        add     r3,r4,r5
+        srdi    r3,r3,32
+        blr
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit), while copying the block to dst.
+ * If an access exception occurs on src or dst, it stores -EFAULT
+ * to *src_err or *dst_err respectively, and (for an error on
+ * src) zeroes the rest of dst.
+ *
+ * This code needs to be reworked to take advantage of 64 bit sum+copy.
+ * However, due to tokenring halfword alignment problems this will be very
+ * tricky.  For now we'll leave it until we instrument it somehow.
+ *
+ * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
+ */
+_GLOBAL(csum_partial_copy_generic)
+	addic	r0,r6,0
+	subi	r3,r3,4
+	subi	r4,r4,4
+	srwi.	r6,r5,2
+	beq	3f		/* if we're doing < 4 bytes */
+	andi.	r9,r4,2		/* Align dst to longword boundary */
+	beq+	1f
+81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
+	addi	r3,r3,2
+	subi	r5,r5,2
+91:	sth	r6,4(r4)
+	addi	r4,r4,2
+	addc	r0,r0,r6
+	srwi.	r6,r5,2		/* # words to do */
+	beq	3f
+1:	mtctr	r6
+82:	lwzu	r6,4(r3)	/* the bdnz has zero overhead, so it should */
+92:	stwu	r6,4(r4)	/* be unnecessary to unroll this loop */
+	adde	r0,r0,r6
+	bdnz	82b
+	andi.	r5,r5,3
+3:	cmpwi	0,r5,2
+	blt+	4f
+83:	lhz	r6,4(r3)
+	addi	r3,r3,2
+	subi	r5,r5,2
+93:	sth	r6,4(r4)
+	addi	r4,r4,2
+	adde	r0,r0,r6
+4:	cmpwi	0,r5,1
+	bne+	5f
+84:	lbz	r6,4(r3)
+94:	stb	r6,4(r4)
+	slwi	r6,r6,8		/* Upper byte of word */
+	adde	r0,r0,r6
+5:	addze	r3,r0		/* add in final carry (unlikely with 64-bit regs) */
+        rldicl  r4,r3,32,0      /* fold 64 bit value */
+        add     r3,r4,r3
+        srdi    r3,r3,32
+	blr
+
+/* These shouldn't go in the fixup section, since that would
+   cause the ex_table addresses to get out of order. */
+
+	.globl src_error_1
+src_error_1:
+	li	r6,0
+	subi	r5,r5,2
+95:	sth	r6,4(r4)
+	addi	r4,r4,2
+	srwi.	r6,r5,2
+	beq	3f
+	mtctr	r6
+	.globl src_error_2
+src_error_2:
+	li	r6,0
+96:	stwu	r6,4(r4)
+	bdnz	96b
+3:	andi.	r5,r5,3
+	beq	src_error
+	.globl src_error_3
+src_error_3:
+	li	r6,0
+	mtctr	r5
+	addi	r4,r4,3
+97:	stbu	r6,1(r4)
+	bdnz	97b
+	.globl src_error
+src_error:
+	cmpdi	0,r7,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r7)
+1:	addze	r3,r0
+	blr
+
+	.globl dst_error
+dst_error:
+	cmpdi	0,r8,0
+	beq	1f
+	li	r6,-EFAULT
+	stw	r6,0(r8)
+1:	addze	r3,r0
+	blr
+
+.section __ex_table,"a"
+	.align  3
+	.llong	81b,src_error_1
+	.llong	91b,dst_error
+	.llong	82b,src_error_2
+	.llong	92b,dst_error
+	.llong	83b,src_error_3
+	.llong	93b,dst_error
+	.llong	84b,src_error_3
+	.llong	94b,dst_error
+	.llong	95b,dst_error
+	.llong	96b,dst_error
+	.llong	97b,dst_error
diff --git a/arch/powerpc/lib/copy32.S b/arch/powerpc/lib/copy32.S
deleted file mode 100644
index 420a912198a2..000000000000
--- a/arch/powerpc/lib/copy32.S
+++ /dev/null
@@ -1,543 +0,0 @@
-/*
- * Memory copy functions for 32-bit PowerPC.
- *
- * Copyright (C) 1996-2005 Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/config.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
-#include <asm/ppc_asm.h>
-
-#define COPY_16_BYTES		\
-	lwz	r7,4(r4);	\
-	lwz	r8,8(r4);	\
-	lwz	r9,12(r4);	\
-	lwzu	r10,16(r4);	\
-	stw	r7,4(r6);	\
-	stw	r8,8(r6);	\
-	stw	r9,12(r6);	\
-	stwu	r10,16(r6)
-
-#define COPY_16_BYTES_WITHEX(n)	\
-8 ## n ## 0:			\
-	lwz	r7,4(r4);	\
-8 ## n ## 1:			\
-	lwz	r8,8(r4);	\
-8 ## n ## 2:			\
-	lwz	r9,12(r4);	\
-8 ## n ## 3:			\
-	lwzu	r10,16(r4);	\
-8 ## n ## 4:			\
-	stw	r7,4(r6);	\
-8 ## n ## 5:			\
-	stw	r8,8(r6);	\
-8 ## n ## 6:			\
-	stw	r9,12(r6);	\
-8 ## n ## 7:			\
-	stwu	r10,16(r6)
-
-#define COPY_16_BYTES_EXCODE(n)			\
-9 ## n ## 0:					\
-	addi	r5,r5,-(16 * n);		\
-	b	104f;				\
-9 ## n ## 1:					\
-	addi	r5,r5,-(16 * n);		\
-	b	105f;				\
-.section __ex_table,"a";			\
-	.align	2;				\
-	.long	8 ## n ## 0b,9 ## n ## 0b;	\
-	.long	8 ## n ## 1b,9 ## n ## 0b;	\
-	.long	8 ## n ## 2b,9 ## n ## 0b;	\
-	.long	8 ## n ## 3b,9 ## n ## 0b;	\
-	.long	8 ## n ## 4b,9 ## n ## 1b;	\
-	.long	8 ## n ## 5b,9 ## n ## 1b;	\
-	.long	8 ## n ## 6b,9 ## n ## 1b;	\
-	.long	8 ## n ## 7b,9 ## n ## 1b;	\
-	.text
-
-	.text
-	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
-	.stabs	"copy32.S",N_SO,0,0,0f
-0:
-
-CACHELINE_BYTES = L1_CACHE_LINE_SIZE
-LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE
-CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1)
-
-/*
- * Use dcbz on the complete cache lines in the destination
- * to set them to zero.  This requires that the destination
- * area is cacheable.  -- paulus
- */
-_GLOBAL(cacheable_memzero)
-	mr	r5,r4
-	li	r4,0
-	addi	r6,r3,-4
-	cmplwi	0,r5,4
-	blt	7f
-	stwu	r4,4(r6)
-	beqlr
-	andi.	r0,r6,3
-	add	r5,r0,r5
-	subf	r6,r0,r6
-	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
-	add	r8,r7,r5
-	srwi	r9,r8,LG_CACHELINE_BYTES
-	addic.	r9,r9,-1	/* total number of complete cachelines */
-	ble	2f
-	xori	r0,r7,CACHELINE_MASK & ~3
-	srwi.	r0,r0,2
-	beq	3f
-	mtctr	r0
-4:	stwu	r4,4(r6)
-	bdnz	4b
-3:	mtctr	r9
-	li	r7,4
-#if !defined(CONFIG_8xx)
-10:	dcbz	r7,r6
-#else
-10:	stw	r4, 4(r6)
-	stw	r4, 8(r6)
-	stw	r4, 12(r6)
-	stw	r4, 16(r6)
-#if CACHE_LINE_SIZE >= 32
-	stw	r4, 20(r6)
-	stw	r4, 24(r6)
-	stw	r4, 28(r6)
-	stw	r4, 32(r6)
-#endif /* CACHE_LINE_SIZE */
-#endif
-	addi	r6,r6,CACHELINE_BYTES
-	bdnz	10b
-	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
-	addi	r5,r5,4
-2:	srwi	r0,r5,2
-	mtctr	r0
-	bdz	6f
-1:	stwu	r4,4(r6)
-	bdnz	1b
-6:	andi.	r5,r5,3
-7:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r6,r6,3
-8:	stbu	r4,1(r6)
-	bdnz	8b
-	blr
-
-_GLOBAL(memset)
-	rlwimi	r4,r4,8,16,23
-	rlwimi	r4,r4,16,0,15
-	addi	r6,r3,-4
-	cmplwi	0,r5,4
-	blt	7f
-	stwu	r4,4(r6)
-	beqlr
-	andi.	r0,r6,3
-	add	r5,r0,r5
-	subf	r6,r0,r6
-	srwi	r0,r5,2
-	mtctr	r0
-	bdz	6f
-1:	stwu	r4,4(r6)
-	bdnz	1b
-6:	andi.	r5,r5,3
-7:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r6,r6,3
-8:	stbu	r4,1(r6)
-	bdnz	8b
-	blr
-
-/*
- * This version uses dcbz on the complete cache lines in the
- * destination area to reduce memory traffic.  This requires that
- * the destination area is cacheable.
- * We only use this version if the source and dest don't overlap.
- * -- paulus.
- */
-_GLOBAL(cacheable_memcpy)
-	add	r7,r3,r5		/* test if the src & dst overlap */
-	add	r8,r4,r5
-	cmplw	0,r4,r7
-	cmplw	1,r3,r8
-	crand	0,0,4			/* cr0.lt &= cr1.lt */
-	blt	memcpy			/* if regions overlap */
-
-	addi	r4,r4,-4
-	addi	r6,r3,-4
-	neg	r0,r3
-	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
-	beq	58f
-
-	cmplw	0,r5,r0			/* is this more than total to do? */
-	blt	63f			/* if not much to do */
-	andi.	r8,r0,3			/* get it word-aligned first */
-	subf	r5,r0,r5
-	mtctr	r8
-	beq+	61f
-70:	lbz	r9,4(r4)		/* do some bytes */
-	stb	r9,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	70b
-61:	srwi.	r0,r0,2
-	mtctr	r0
-	beq	58f
-72:	lwzu	r9,4(r4)		/* do some words */
-	stwu	r9,4(r6)
-	bdnz	72b
-
-58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
-	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
-	li	r11,4
-	mtctr	r0
-	beq	63f
-53:
-#if !defined(CONFIG_8xx)
-	dcbz	r11,r6
-#endif
-	COPY_16_BYTES
-#if L1_CACHE_LINE_SIZE >= 32
-	COPY_16_BYTES
-#if L1_CACHE_LINE_SIZE >= 64
-	COPY_16_BYTES
-	COPY_16_BYTES
-#if L1_CACHE_LINE_SIZE >= 128
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-	COPY_16_BYTES
-#endif
-#endif
-#endif
-	bdnz	53b
-
-63:	srwi.	r0,r5,2
-	mtctr	r0
-	beq	64f
-30:	lwzu	r0,4(r4)
-	stwu	r0,4(r6)
-	bdnz	30b
-
-64:	andi.	r0,r5,3
-	mtctr	r0
-	beq+	65f
-40:	lbz	r0,4(r4)
-	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	40b
-65:	blr
-
-_GLOBAL(memmove)
-	cmplw	0,r3,r4
-	bgt	backwards_memcpy
-	/* fall through */
-
-_GLOBAL(memcpy)
-	srwi.	r7,r5,3
-	addi	r6,r3,-4
-	addi	r4,r4,-4
-	beq	2f			/* if less than 8 bytes to do */
-	andi.	r0,r6,3			/* get dest word aligned */
-	mtctr	r7
-	bne	5f
-1:	lwz	r7,4(r4)
-	lwzu	r8,8(r4)
-	stw	r7,4(r6)
-	stwu	r8,8(r6)
-	bdnz	1b
-	andi.	r5,r5,7
-2:	cmplwi	0,r5,4
-	blt	3f
-	lwzu	r0,4(r4)
-	addi	r5,r5,-4
-	stwu	r0,4(r6)
-3:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-	addi	r4,r4,3
-	addi	r6,r6,3
-4:	lbzu	r0,1(r4)
-	stbu	r0,1(r6)
-	bdnz	4b
-	blr
-5:	subfic	r0,r0,4
-	mtctr	r0
-6:	lbz	r7,4(r4)
-	addi	r4,r4,1
-	stb	r7,4(r6)
-	addi	r6,r6,1
-	bdnz	6b
-	subf	r5,r0,r5
-	rlwinm.	r7,r5,32-3,3,31
-	beq	2b
-	mtctr	r7
-	b	1b
-
-_GLOBAL(backwards_memcpy)
-	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
-	add	r6,r3,r5
-	add	r4,r4,r5
-	beq	2f
-	andi.	r0,r6,3
-	mtctr	r7
-	bne	5f
-1:	lwz	r7,-4(r4)
-	lwzu	r8,-8(r4)
-	stw	r7,-4(r6)
-	stwu	r8,-8(r6)
-	bdnz	1b
-	andi.	r5,r5,7
-2:	cmplwi	0,r5,4
-	blt	3f
-	lwzu	r0,-4(r4)
-	subi	r5,r5,4
-	stwu	r0,-4(r6)
-3:	cmpwi	0,r5,0
-	beqlr
-	mtctr	r5
-4:	lbzu	r0,-1(r4)
-	stbu	r0,-1(r6)
-	bdnz	4b
-	blr
-5:	mtctr	r0
-6:	lbzu	r7,-1(r4)
-	stbu	r7,-1(r6)
-	bdnz	6b
-	subf	r5,r0,r5
-	rlwinm.	r7,r5,32-3,3,31
-	beq	2b
-	mtctr	r7
-	b	1b
-
-_GLOBAL(__copy_tofrom_user)
-	addi	r4,r4,-4
-	addi	r6,r3,-4
-	neg	r0,r3
-	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
-	beq	58f
-
-	cmplw	0,r5,r0			/* is this more than total to do? */
-	blt	63f			/* if not much to do */
-	andi.	r8,r0,3			/* get it word-aligned first */
-	mtctr	r8
-	beq+	61f
-70:	lbz	r9,4(r4)		/* do some bytes */
-71:	stb	r9,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	70b
-61:	subf	r5,r0,r5
-	srwi.	r0,r0,2
-	mtctr	r0
-	beq	58f
-72:	lwzu	r9,4(r4)		/* do some words */
-73:	stwu	r9,4(r6)
-	bdnz	72b
-
-	.section __ex_table,"a"
-	.align	2
-	.long	70b,100f
-	.long	71b,101f
-	.long	72b,102f
-	.long	73b,103f
-	.text
-
-58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
-	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
-	li	r11,4
-	beq	63f
-
-#ifdef CONFIG_8xx
-	/* Don't use prefetch on 8xx */
-	mtctr	r0
-	li	r0,0
-53:	COPY_16_BYTES_WITHEX(0)
-	bdnz	53b
-
-#else /* not CONFIG_8xx */
-	/* Here we decide how far ahead to prefetch the source */
-	li	r3,4
-	cmpwi	r0,1
-	li	r7,0
-	ble	114f
-	li	r7,1
-#if MAX_COPY_PREFETCH > 1
-	/* Heuristically, for large transfers we prefetch
-	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
-	   we prefetch 1 cacheline ahead. */
-	cmpwi	r0,MAX_COPY_PREFETCH
-	ble	112f
-	li	r7,MAX_COPY_PREFETCH
-112:	mtctr	r7
-111:	dcbt	r3,r4
-	addi	r3,r3,CACHELINE_BYTES
-	bdnz	111b
-#else
-	dcbt	r3,r4
-	addi	r3,r3,CACHELINE_BYTES
-#endif /* MAX_COPY_PREFETCH > 1 */
-
-114:	subf	r8,r7,r0
-	mr	r0,r7
-	mtctr	r8
-
-53:	dcbt	r3,r4
-54:	dcbz	r11,r6
-	.section __ex_table,"a"
-	.align	2
-	.long	54b,105f
-	.text
-/* the main body of the cacheline loop */
-	COPY_16_BYTES_WITHEX(0)
-#if L1_CACHE_LINE_SIZE >= 32
-	COPY_16_BYTES_WITHEX(1)
-#if L1_CACHE_LINE_SIZE >= 64
-	COPY_16_BYTES_WITHEX(2)
-	COPY_16_BYTES_WITHEX(3)
-#if L1_CACHE_LINE_SIZE >= 128
-	COPY_16_BYTES_WITHEX(4)
-	COPY_16_BYTES_WITHEX(5)
-	COPY_16_BYTES_WITHEX(6)
-	COPY_16_BYTES_WITHEX(7)
-#endif
-#endif
-#endif
-	bdnz	53b
-	cmpwi	r0,0
-	li	r3,4
-	li	r7,0
-	bne	114b
-#endif /* CONFIG_8xx */
-
-63:	srwi.	r0,r5,2
-	mtctr	r0
-	beq	64f
-30:	lwzu	r0,4(r4)
-31:	stwu	r0,4(r6)
-	bdnz	30b
-
-64:	andi.	r0,r5,3
-	mtctr	r0
-	beq+	65f
-40:	lbz	r0,4(r4)
-41:	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	40b
-65:	li	r3,0
-	blr
-
-/* read fault, initial single-byte copy */
-100:	li	r9,0
-	b	90f
-/* write fault, initial single-byte copy */
-101:	li	r9,1
-90:	subf	r5,r8,r5
-	li	r3,0
-	b	99f
-/* read fault, initial word copy */
-102:	li	r9,0
-	b	91f
-/* write fault, initial word copy */
-103:	li	r9,1
-91:	li	r3,2
-	b	99f
-
-/*
- * this stuff handles faults in the cacheline loop and branches to either
- * 104f (if in read part) or 105f (if in write part), after updating r5
- */
-	COPY_16_BYTES_EXCODE(0)
-#if L1_CACHE_LINE_SIZE >= 32
-	COPY_16_BYTES_EXCODE(1)
-#if L1_CACHE_LINE_SIZE >= 64
-	COPY_16_BYTES_EXCODE(2)
-	COPY_16_BYTES_EXCODE(3)
-#if L1_CACHE_LINE_SIZE >= 128
-	COPY_16_BYTES_EXCODE(4)
-	COPY_16_BYTES_EXCODE(5)
-	COPY_16_BYTES_EXCODE(6)
-	COPY_16_BYTES_EXCODE(7)
-#endif
-#endif
-#endif
-
-/* read fault in cacheline loop */
-104:	li	r9,0
-	b	92f
-/* fault on dcbz (effectively a write fault) */
-/* or write fault in cacheline loop */
-105:	li	r9,1
-92:	li	r3,LG_CACHELINE_BYTES
-	mfctr	r8
-	add	r0,r0,r8
-	b	106f
-/* read fault in final word loop */
-108:	li	r9,0
-	b	93f
-/* write fault in final word loop */
-109:	li	r9,1
-93:	andi.	r5,r5,3
-	li	r3,2
-	b	99f
-/* read fault in final byte loop */
-110:	li	r9,0
-	b	94f
-/* write fault in final byte loop */
-111:	li	r9,1
-94:	li	r5,0
-	li	r3,0
-/*
- * At this stage the number of bytes not copied is
- * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
- */
-99:	mfctr	r0
-106:	slw	r3,r0,r3
-	add.	r3,r3,r5
-	beq	120f			/* shouldn't happen */
-	cmpwi	0,r9,0
-	bne	120f
-/* for a read fault, first try to continue the copy one byte at a time */
-	mtctr	r3
-130:	lbz	r0,4(r4)
-131:	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
-	bdnz	130b
-/* then clear out the destination: r3 bytes starting at 4(r6) */
-132:	mfctr	r3
-	srwi.	r0,r3,2
-	li	r9,0
-	mtctr	r0
-	beq	113f
-112:	stwu	r9,4(r6)
-	bdnz	112b
-113:	andi.	r0,r3,3
-	mtctr	r0
-	beq	120f
-114:	stb	r9,4(r6)
-	addi	r6,r6,1
-	bdnz	114b
-120:	blr
-
-	.section __ex_table,"a"
-	.align	2
-	.long	30b,108b
-	.long	31b,109b
-	.long	40b,110b
-	.long	41b,111b
-	.long	130b,132b
-	.long	131b,120b
-	.long	112b,120b
-	.long	114b,120b
-	.text
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
new file mode 100644
index 000000000000..420a912198a2
--- /dev/null
+++ b/arch/powerpc/lib/copy_32.S
@@ -0,0 +1,543 @@
+/*
+ * Memory copy functions for 32-bit PowerPC.
+ *
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+#define COPY_16_BYTES		\
+	lwz	r7,4(r4);	\
+	lwz	r8,8(r4);	\
+	lwz	r9,12(r4);	\
+	lwzu	r10,16(r4);	\
+	stw	r7,4(r6);	\
+	stw	r8,8(r6);	\
+	stw	r9,12(r6);	\
+	stwu	r10,16(r6)
+
+#define COPY_16_BYTES_WITHEX(n)	\
+8 ## n ## 0:			\
+	lwz	r7,4(r4);	\
+8 ## n ## 1:			\
+	lwz	r8,8(r4);	\
+8 ## n ## 2:			\
+	lwz	r9,12(r4);	\
+8 ## n ## 3:			\
+	lwzu	r10,16(r4);	\
+8 ## n ## 4:			\
+	stw	r7,4(r6);	\
+8 ## n ## 5:			\
+	stw	r8,8(r6);	\
+8 ## n ## 6:			\
+	stw	r9,12(r6);	\
+8 ## n ## 7:			\
+	stwu	r10,16(r6)
+
+#define COPY_16_BYTES_EXCODE(n)			\
+9 ## n ## 0:					\
+	addi	r5,r5,-(16 * n);		\
+	b	104f;				\
+9 ## n ## 1:					\
+	addi	r5,r5,-(16 * n);		\
+	b	105f;				\
+.section __ex_table,"a";			\
+	.align	2;				\
+	.long	8 ## n ## 0b,9 ## n ## 0b;	\
+	.long	8 ## n ## 1b,9 ## n ## 0b;	\
+	.long	8 ## n ## 2b,9 ## n ## 0b;	\
+	.long	8 ## n ## 3b,9 ## n ## 0b;	\
+	.long	8 ## n ## 4b,9 ## n ## 1b;	\
+	.long	8 ## n ## 5b,9 ## n ## 1b;	\
+	.long	8 ## n ## 6b,9 ## n ## 1b;	\
+	.long	8 ## n ## 7b,9 ## n ## 1b;	\
+	.text
+
+	.text
+	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
+	.stabs	"copy32.S",N_SO,0,0,0f
+0:
+
+CACHELINE_BYTES = L1_CACHE_LINE_SIZE
+LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE
+CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1)
+
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero.  This requires that the destination
+ * area is cacheable.  -- paulus
+ */
+_GLOBAL(cacheable_memzero)
+	mr	r5,r4
+	li	r4,0
+	addi	r6,r3,-4
+	cmplwi	0,r5,4
+	blt	7f
+	stwu	r4,4(r6)
+	beqlr
+	andi.	r0,r6,3
+	add	r5,r0,r5
+	subf	r6,r0,r6
+	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
+	add	r8,r7,r5
+	srwi	r9,r8,LG_CACHELINE_BYTES
+	addic.	r9,r9,-1	/* total number of complete cachelines */
+	ble	2f
+	xori	r0,r7,CACHELINE_MASK & ~3
+	srwi.	r0,r0,2
+	beq	3f
+	mtctr	r0
+4:	stwu	r4,4(r6)
+	bdnz	4b
+3:	mtctr	r9
+	li	r7,4
+#if !defined(CONFIG_8xx)
+10:	dcbz	r7,r6
+#else
+10:	stw	r4, 4(r6)
+	stw	r4, 8(r6)
+	stw	r4, 12(r6)
+	stw	r4, 16(r6)
+#if CACHE_LINE_SIZE >= 32
+	stw	r4, 20(r6)
+	stw	r4, 24(r6)
+	stw	r4, 28(r6)
+	stw	r4, 32(r6)
+#endif /* CACHE_LINE_SIZE */
+#endif
+	addi	r6,r6,CACHELINE_BYTES
+	bdnz	10b
+	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
+	addi	r5,r5,4
+2:	srwi	r0,r5,2
+	mtctr	r0
+	bdz	6f
+1:	stwu	r4,4(r6)
+	bdnz	1b
+6:	andi.	r5,r5,3
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r6,3
+8:	stbu	r4,1(r6)
+	bdnz	8b
+	blr
+
+_GLOBAL(memset)
+	rlwimi	r4,r4,8,16,23
+	rlwimi	r4,r4,16,0,15
+	addi	r6,r3,-4
+	cmplwi	0,r5,4
+	blt	7f
+	stwu	r4,4(r6)
+	beqlr
+	andi.	r0,r6,3
+	add	r5,r0,r5
+	subf	r6,r0,r6
+	srwi	r0,r5,2
+	mtctr	r0
+	bdz	6f
+1:	stwu	r4,4(r6)
+	bdnz	1b
+6:	andi.	r5,r5,3
+7:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r6,r6,3
+8:	stbu	r4,1(r6)
+	bdnz	8b
+	blr
+
+/*
+ * This version uses dcbz on the complete cache lines in the
+ * destination area to reduce memory traffic.  This requires that
+ * the destination area is cacheable.
+ * We only use this version if the source and dest don't overlap.
+ * -- paulus.
+ */
+_GLOBAL(cacheable_memcpy)
+	add	r7,r3,r5		/* test if the src & dst overlap */
+	add	r8,r4,r5
+	cmplw	0,r4,r7
+	cmplw	1,r3,r8
+	crand	0,0,4			/* cr0.lt &= cr1.lt */
+	blt	memcpy			/* if regions overlap */
+
+	addi	r4,r4,-4
+	addi	r6,r3,-4
+	neg	r0,r3
+	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	beq	58f
+
+	cmplw	0,r5,r0			/* is this more than total to do? */
+	blt	63f			/* if not much to do */
+	andi.	r8,r0,3			/* get it word-aligned first */
+	subf	r5,r0,r5
+	mtctr	r8
+	beq+	61f
+70:	lbz	r9,4(r4)		/* do some bytes */
+	stb	r9,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	70b
+61:	srwi.	r0,r0,2
+	mtctr	r0
+	beq	58f
+72:	lwzu	r9,4(r4)		/* do some words */
+	stwu	r9,4(r6)
+	bdnz	72b
+
+58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
+	li	r11,4
+	mtctr	r0
+	beq	63f
+53:
+#if !defined(CONFIG_8xx)
+	dcbz	r11,r6
+#endif
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES
+	COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+	COPY_16_BYTES
+#endif
+#endif
+#endif
+	bdnz	53b
+
+63:	srwi.	r0,r5,2
+	mtctr	r0
+	beq	64f
+30:	lwzu	r0,4(r4)
+	stwu	r0,4(r6)
+	bdnz	30b
+
+64:	andi.	r0,r5,3
+	mtctr	r0
+	beq+	65f
+40:	lbz	r0,4(r4)
+	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	40b
+65:	blr
+
+_GLOBAL(memmove)
+	cmplw	0,r3,r4
+	bgt	backwards_memcpy
+	/* fall through */
+
+_GLOBAL(memcpy)
+	srwi.	r7,r5,3
+	addi	r6,r3,-4
+	addi	r4,r4,-4
+	beq	2f			/* if less than 8 bytes to do */
+	andi.	r0,r6,3			/* get dest word aligned */
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,4(r4)
+	lwzu	r8,8(r4)
+	stw	r7,4(r6)
+	stwu	r8,8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,4(r4)
+	addi	r5,r5,-4
+	stwu	r0,4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+	addi	r4,r4,3
+	addi	r6,r6,3
+4:	lbzu	r0,1(r4)
+	stbu	r0,1(r6)
+	bdnz	4b
+	blr
+5:	subfic	r0,r0,4
+	mtctr	r0
+6:	lbz	r7,4(r4)
+	addi	r4,r4,1
+	stb	r7,4(r6)
+	addi	r6,r6,1
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+_GLOBAL(backwards_memcpy)
+	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
+	add	r6,r3,r5
+	add	r4,r4,r5
+	beq	2f
+	andi.	r0,r6,3
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,-4(r4)
+	lwzu	r8,-8(r4)
+	stw	r7,-4(r6)
+	stwu	r8,-8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,-4(r4)
+	subi	r5,r5,4
+	stwu	r0,-4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+4:	lbzu	r0,-1(r4)
+	stbu	r0,-1(r6)
+	bdnz	4b
+	blr
+5:	mtctr	r0
+6:	lbzu	r7,-1(r4)
+	stbu	r7,-1(r6)
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
+
+_GLOBAL(__copy_tofrom_user)
+	addi	r4,r4,-4
+	addi	r6,r3,-4
+	neg	r0,r3
+	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	beq	58f
+
+	cmplw	0,r5,r0			/* is this more than total to do? */
+	blt	63f			/* if not much to do */
+	andi.	r8,r0,3			/* get it word-aligned first */
+	mtctr	r8
+	beq+	61f
+70:	lbz	r9,4(r4)		/* do some bytes */
+71:	stb	r9,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	70b
+61:	subf	r5,r0,r5
+	srwi.	r0,r0,2
+	mtctr	r0
+	beq	58f
+72:	lwzu	r9,4(r4)		/* do some words */
+73:	stwu	r9,4(r6)
+	bdnz	72b
+
+	.section __ex_table,"a"
+	.align	2
+	.long	70b,100f
+	.long	71b,101f
+	.long	72b,102f
+	.long	73b,103f
+	.text
+
+58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
+	li	r11,4
+	beq	63f
+
+#ifdef CONFIG_8xx
+	/* Don't use prefetch on 8xx */
+	mtctr	r0
+	li	r0,0
+53:	COPY_16_BYTES_WITHEX(0)
+	bdnz	53b
+
+#else /* not CONFIG_8xx */
+	/* Here we decide how far ahead to prefetch the source */
+	li	r3,4
+	cmpwi	r0,1
+	li	r7,0
+	ble	114f
+	li	r7,1
+#if MAX_COPY_PREFETCH > 1
+	/* Heuristically, for large transfers we prefetch
+	   MAX_COPY_PREFETCH cachelines ahead.  For small transfers
+	   we prefetch 1 cacheline ahead. */
+	cmpwi	r0,MAX_COPY_PREFETCH
+	ble	112f
+	li	r7,MAX_COPY_PREFETCH
+112:	mtctr	r7
+111:	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+	bdnz	111b
+#else
+	dcbt	r3,r4
+	addi	r3,r3,CACHELINE_BYTES
+#endif /* MAX_COPY_PREFETCH > 1 */
+
+114:	subf	r8,r7,r0
+	mr	r0,r7
+	mtctr	r8
+
+53:	dcbt	r3,r4
+54:	dcbz	r11,r6
+	.section __ex_table,"a"
+	.align	2
+	.long	54b,105f
+	.text
+/* the main body of the cacheline loop */
+	COPY_16_BYTES_WITHEX(0)
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES_WITHEX(1)
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES_WITHEX(2)
+	COPY_16_BYTES_WITHEX(3)
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES_WITHEX(4)
+	COPY_16_BYTES_WITHEX(5)
+	COPY_16_BYTES_WITHEX(6)
+	COPY_16_BYTES_WITHEX(7)
+#endif
+#endif
+#endif
+	bdnz	53b
+	cmpwi	r0,0
+	li	r3,4
+	li	r7,0
+	bne	114b
+#endif /* CONFIG_8xx */
+
+63:	srwi.	r0,r5,2
+	mtctr	r0
+	beq	64f
+30:	lwzu	r0,4(r4)
+31:	stwu	r0,4(r6)
+	bdnz	30b
+
+64:	andi.	r0,r5,3
+	mtctr	r0
+	beq+	65f
+40:	lbz	r0,4(r4)
+41:	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	40b
+65:	li	r3,0
+	blr
+
+/* read fault, initial single-byte copy */
+100:	li	r9,0
+	b	90f
+/* write fault, initial single-byte copy */
+101:	li	r9,1
+90:	subf	r5,r8,r5
+	li	r3,0
+	b	99f
+/* read fault, initial word copy */
+102:	li	r9,0
+	b	91f
+/* write fault, initial word copy */
+103:	li	r9,1
+91:	li	r3,2
+	b	99f
+
+/*
+ * this stuff handles faults in the cacheline loop and branches to either
+ * 104f (if in read part) or 105f (if in write part), after updating r5
+ */
+	COPY_16_BYTES_EXCODE(0)
+#if L1_CACHE_LINE_SIZE >= 32
+	COPY_16_BYTES_EXCODE(1)
+#if L1_CACHE_LINE_SIZE >= 64
+	COPY_16_BYTES_EXCODE(2)
+	COPY_16_BYTES_EXCODE(3)
+#if L1_CACHE_LINE_SIZE >= 128
+	COPY_16_BYTES_EXCODE(4)
+	COPY_16_BYTES_EXCODE(5)
+	COPY_16_BYTES_EXCODE(6)
+	COPY_16_BYTES_EXCODE(7)
+#endif
+#endif
+#endif
+
+/* read fault in cacheline loop */
+104:	li	r9,0
+	b	92f
+/* fault on dcbz (effectively a write fault) */
+/* or write fault in cacheline loop */
+105:	li	r9,1
+92:	li	r3,LG_CACHELINE_BYTES
+	mfctr	r8
+	add	r0,r0,r8
+	b	106f
+/* read fault in final word loop */
+108:	li	r9,0
+	b	93f
+/* write fault in final word loop */
+109:	li	r9,1
+93:	andi.	r5,r5,3
+	li	r3,2
+	b	99f
+/* read fault in final byte loop */
+110:	li	r9,0
+	b	94f
+/* write fault in final byte loop */
+111:	li	r9,1
+94:	li	r5,0
+	li	r3,0
+/*
+ * At this stage the number of bytes not copied is
+ * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
+ */
+99:	mfctr	r0
+106:	slw	r3,r0,r3
+	add.	r3,r3,r5
+	beq	120f			/* shouldn't happen */
+	cmpwi	0,r9,0
+	bne	120f
+/* for a read fault, first try to continue the copy one byte at a time */
+	mtctr	r3
+130:	lbz	r0,4(r4)
+131:	stb	r0,4(r6)
+	addi	r4,r4,1
+	addi	r6,r6,1
+	bdnz	130b
+/* then clear out the destination: r3 bytes starting at 4(r6) */
+132:	mfctr	r3
+	srwi.	r0,r3,2
+	li	r9,0
+	mtctr	r0
+	beq	113f
+112:	stwu	r9,4(r6)
+	bdnz	112b
+113:	andi.	r0,r3,3
+	mtctr	r0
+	beq	120f
+114:	stb	r9,4(r6)
+	addi	r6,r6,1
+	bdnz	114b
+120:	blr
+
+	.section __ex_table,"a"
+	.align	2
+	.long	30b,108b
+	.long	31b,109b
+	.long	40b,110b
+	.long	41b,111b
+	.long	130b,132b
+	.long	131b,120b
+	.long	112b,120b
+	.long	114b,120b
+	.text
diff --git a/arch/powerpc/lib/copypage.S b/arch/powerpc/lib/copypage.S
deleted file mode 100644
index 733d61618bbf..000000000000
--- a/arch/powerpc/lib/copypage.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * arch/ppc64/lib/copypage.S
- *
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-
-_GLOBAL(copy_page)
-	std	r31,-8(1)
-	std	r30,-16(1)
-	std	r29,-24(1)
-	std	r28,-32(1)
-	std	r27,-40(1)
-	std	r26,-48(1)
-	std	r25,-56(1)
-	std	r24,-64(1)
-	std	r23,-72(1)
-	std	r22,-80(1)
-	std	r21,-88(1)
-	std	r20,-96(1)
-	li	r5,4096/32 - 1
-	addi	r3,r3,-8
-	li	r12,5
-0:	addi	r5,r5,-24
-	mtctr	r12
-	ld	r22,640(4)
-	ld	r21,512(4)
-	ld	r20,384(4)
-	ld	r11,256(4)
-	ld	r9,128(4)
-	ld	r7,0(4)
-	ld	r25,648(4)
-	ld	r24,520(4)
-	ld	r23,392(4)
-	ld	r10,264(4)
-	ld	r8,136(4)
-	ldu	r6,8(4)
-	cmpwi	r5,24
-1:	std	r22,648(3)
-	std	r21,520(3)
-	std	r20,392(3)
-	std	r11,264(3)
-	std	r9,136(3)
-	std	r7,8(3)
-	ld	r28,648(4)
-	ld	r27,520(4)
-	ld	r26,392(4)
-	ld	r31,264(4)
-	ld	r30,136(4)
-	ld	r29,8(4)
-	std	r25,656(3)
-	std	r24,528(3)
-	std	r23,400(3)
-	std	r10,272(3)
-	std	r8,144(3)
-	std	r6,16(3)
-	ld	r22,656(4)
-	ld	r21,528(4)
-	ld	r20,400(4)
-	ld	r11,272(4)
-	ld	r9,144(4)
-	ld	r7,16(4)
-	std	r28,664(3)
-	std	r27,536(3)
-	std	r26,408(3)
-	std	r31,280(3)
-	std	r30,152(3)
-	stdu	r29,24(3)
-	ld	r25,664(4)
-	ld	r24,536(4)
-	ld	r23,408(4)
-	ld	r10,280(4)
-	ld	r8,152(4)
-	ldu	r6,24(4)
-	bdnz	1b
-	std	r22,648(3)
-	std	r21,520(3)
-	std	r20,392(3)
-	std	r11,264(3)
-	std	r9,136(3)
-	std	r7,8(3)
-	addi	r4,r4,640
-	addi	r3,r3,648
-	bge	0b
-	mtctr	r5
-	ld	r7,0(4)
-	ld	r8,8(4)
-	ldu	r9,16(4)
-3:	ld	r10,8(4)
-	std	r7,8(3)
-	ld	r7,16(4)
-	std	r8,16(3)
-	ld	r8,24(4)
-	std	r9,24(3)
-	ldu	r9,32(4)
-	stdu	r10,32(3)
-	bdnz	3b
-4:	ld	r10,8(4)
-	std	r7,8(3)
-	std	r8,16(3)
-	std	r9,24(3)
-	std	r10,32(3)
-9:	ld	r20,-96(1)
-	ld	r21,-88(1)
-	ld	r22,-80(1)
-	ld	r23,-72(1)
-	ld	r24,-64(1)
-	ld	r25,-56(1)
-	ld	r26,-48(1)
-	ld	r27,-40(1)
-	ld	r28,-32(1)
-	ld	r29,-24(1)
-	ld	r30,-16(1)
-	ld	r31,-8(1)
-	blr
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
new file mode 100644
index 000000000000..733d61618bbf
--- /dev/null
+++ b/arch/powerpc/lib/copypage_64.S
@@ -0,0 +1,121 @@
+/*
+ * arch/ppc64/lib/copypage.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(copy_page)
+	std	r31,-8(1)
+	std	r30,-16(1)
+	std	r29,-24(1)
+	std	r28,-32(1)
+	std	r27,-40(1)
+	std	r26,-48(1)
+	std	r25,-56(1)
+	std	r24,-64(1)
+	std	r23,-72(1)
+	std	r22,-80(1)
+	std	r21,-88(1)
+	std	r20,-96(1)
+	li	r5,4096/32 - 1
+	addi	r3,r3,-8
+	li	r12,5
+0:	addi	r5,r5,-24
+	mtctr	r12
+	ld	r22,640(4)
+	ld	r21,512(4)
+	ld	r20,384(4)
+	ld	r11,256(4)
+	ld	r9,128(4)
+	ld	r7,0(4)
+	ld	r25,648(4)
+	ld	r24,520(4)
+	ld	r23,392(4)
+	ld	r10,264(4)
+	ld	r8,136(4)
+	ldu	r6,8(4)
+	cmpwi	r5,24
+1:	std	r22,648(3)
+	std	r21,520(3)
+	std	r20,392(3)
+	std	r11,264(3)
+	std	r9,136(3)
+	std	r7,8(3)
+	ld	r28,648(4)
+	ld	r27,520(4)
+	ld	r26,392(4)
+	ld	r31,264(4)
+	ld	r30,136(4)
+	ld	r29,8(4)
+	std	r25,656(3)
+	std	r24,528(3)
+	std	r23,400(3)
+	std	r10,272(3)
+	std	r8,144(3)
+	std	r6,16(3)
+	ld	r22,656(4)
+	ld	r21,528(4)
+	ld	r20,400(4)
+	ld	r11,272(4)
+	ld	r9,144(4)
+	ld	r7,16(4)
+	std	r28,664(3)
+	std	r27,536(3)
+	std	r26,408(3)
+	std	r31,280(3)
+	std	r30,152(3)
+	stdu	r29,24(3)
+	ld	r25,664(4)
+	ld	r24,536(4)
+	ld	r23,408(4)
+	ld	r10,280(4)
+	ld	r8,152(4)
+	ldu	r6,24(4)
+	bdnz	1b
+	std	r22,648(3)
+	std	r21,520(3)
+	std	r20,392(3)
+	std	r11,264(3)
+	std	r9,136(3)
+	std	r7,8(3)
+	addi	r4,r4,640
+	addi	r3,r3,648
+	bge	0b
+	mtctr	r5
+	ld	r7,0(4)
+	ld	r8,8(4)
+	ldu	r9,16(4)
+3:	ld	r10,8(4)
+	std	r7,8(3)
+	ld	r7,16(4)
+	std	r8,16(3)
+	ld	r8,24(4)
+	std	r9,24(3)
+	ldu	r9,32(4)
+	stdu	r10,32(3)
+	bdnz	3b
+4:	ld	r10,8(4)
+	std	r7,8(3)
+	std	r8,16(3)
+	std	r9,24(3)
+	std	r10,32(3)
+9:	ld	r20,-96(1)
+	ld	r21,-88(1)
+	ld	r22,-80(1)
+	ld	r23,-72(1)
+	ld	r24,-64(1)
+	ld	r25,-56(1)
+	ld	r26,-48(1)
+	ld	r27,-40(1)
+	ld	r28,-32(1)
+	ld	r29,-24(1)
+	ld	r30,-16(1)
+	ld	r31,-8(1)
+	blr
diff --git a/arch/powerpc/lib/copyuser.S b/arch/powerpc/lib/copyuser.S
deleted file mode 100644
index a0b3fbbd6fb1..000000000000
--- a/arch/powerpc/lib/copyuser.S
+++ /dev/null
@@ -1,576 +0,0 @@
-/*
- * arch/ppc64/lib/copyuser.S
- *
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-
-	.align	7
-_GLOBAL(__copy_tofrom_user)
-	/* first check for a whole page copy on a page boundary */
-	cmpldi	cr1,r5,16
-	cmpdi	cr6,r5,4096
-	or	r0,r3,r4
-	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
-	andi.	r0,r0,4095
-	std	r3,-24(r1)
-	crand	cr0*4+2,cr0*4+2,cr6*4+2
-	std	r4,-16(r1)
-	std	r5,-8(r1)
-	dcbt	0,r4
-	beq	.Lcopy_page
-	andi.	r6,r6,7
-	mtcrf	0x01,r5
-	blt	cr1,.Lshort_copy
-	bne	.Ldst_unaligned
-.Ldst_aligned:
-	andi.	r0,r4,7
-	addi	r3,r3,-16
-	bne	.Lsrc_unaligned
-	srdi	r7,r5,4
-20:	ld	r9,0(r4)
-	addi	r4,r4,-8
-	mtctr	r7
-	andi.	r5,r5,7
-	bf	cr7*4+0,22f
-	addi	r3,r3,8
-	addi	r4,r4,8
-	mr	r8,r9
-	blt	cr1,72f
-21:	ld	r9,8(r4)
-70:	std	r8,8(r3)
-22:	ldu	r8,16(r4)
-71:	stdu	r9,16(r3)
-	bdnz	21b
-72:	std	r8,8(r3)
-	beq+	3f
-	addi	r3,r3,16
-23:	ld	r9,8(r4)
-.Ldo_tail:
-	bf	cr7*4+1,1f
-	rotldi	r9,r9,32
-73:	stw	r9,0(r3)
-	addi	r3,r3,4
-1:	bf	cr7*4+2,2f
-	rotldi	r9,r9,16
-74:	sth	r9,0(r3)
-	addi	r3,r3,2
-2:	bf	cr7*4+3,3f
-	rotldi	r9,r9,8
-75:	stb	r9,0(r3)
-3:	li	r3,0
-	blr
-
-.Lsrc_unaligned:
-	srdi	r6,r5,3
-	addi	r5,r5,-16
-	subf	r4,r0,r4
-	srdi	r7,r5,4
-	sldi	r10,r0,3
-	cmpldi	cr6,r6,3
-	andi.	r5,r5,7
-	mtctr	r7
-	subfic	r11,r10,64
-	add	r5,r5,r0
-	bt	cr7*4+0,28f
-
-24:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
-25:	ld	r0,8(r4)
-	sld	r6,r9,r10
-26:	ldu	r9,16(r4)
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	or	r7,r7,r6
-	blt	cr6,79f
-27:	ld	r0,8(r4)
-	b	2f
-
-28:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
-29:	ldu	r9,8(r4)
-	sld	r8,r0,r10
-	addi	r3,r3,-8
-	blt	cr6,5f
-30:	ld	r0,8(r4)
-	srd	r12,r9,r11
-	sld	r6,r9,r10
-31:	ldu	r9,16(r4)
-	or	r12,r8,r12
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	addi	r3,r3,16
-	beq	cr6,78f
-
-1:	or	r7,r7,r6
-32:	ld	r0,8(r4)
-76:	std	r12,8(r3)
-2:	srd	r12,r9,r11
-	sld	r6,r9,r10
-33:	ldu	r9,16(r4)
-	or	r12,r8,r12
-77:	stdu	r7,16(r3)
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	bdnz	1b
-
-78:	std	r12,8(r3)
-	or	r7,r7,r6
-79:	std	r7,16(r3)
-5:	srd	r12,r9,r11
-	or	r12,r8,r12
-80:	std	r12,24(r3)
-	bne	6f
-	li	r3,0
-	blr
-6:	cmpwi	cr1,r5,8
-	addi	r3,r3,32
-	sld	r9,r9,r10
-	ble	cr1,.Ldo_tail
-34:	ld	r0,8(r4)
-	srd	r7,r0,r11
-	or	r9,r7,r9
-	b	.Ldo_tail
-
-.Ldst_unaligned:
-	mtcrf	0x01,r6		/* put #bytes to 8B bdry into cr7 */
-	subf	r5,r6,r5
-	li	r7,0
-	cmpldi	r1,r5,16
-	bf	cr7*4+3,1f
-35:	lbz	r0,0(r4)
-81:	stb	r0,0(r3)
-	addi	r7,r7,1
-1:	bf	cr7*4+2,2f
-36:	lhzx	r0,r7,r4
-82:	sthx	r0,r7,r3
-	addi	r7,r7,2
-2:	bf	cr7*4+1,3f
-37:	lwzx	r0,r7,r4
-83:	stwx	r0,r7,r3
-3:	mtcrf	0x01,r5
-	add	r4,r6,r4
-	add	r3,r6,r3
-	b	.Ldst_aligned
-
-.Lshort_copy:
-	bf	cr7*4+0,1f
-38:	lwz	r0,0(r4)
-39:	lwz	r9,4(r4)
-	addi	r4,r4,8
-84:	stw	r0,0(r3)
-85:	stw	r9,4(r3)
-	addi	r3,r3,8
-1:	bf	cr7*4+1,2f
-40:	lwz	r0,0(r4)
-	addi	r4,r4,4
-86:	stw	r0,0(r3)
-	addi	r3,r3,4
-2:	bf	cr7*4+2,3f
-41:	lhz	r0,0(r4)
-	addi	r4,r4,2
-87:	sth	r0,0(r3)
-	addi	r3,r3,2
-3:	bf	cr7*4+3,4f
-42:	lbz	r0,0(r4)
-88:	stb	r0,0(r3)
-4:	li	r3,0
-	blr
-
-/*
- * exception handlers follow
- * we have to return the number of bytes not copied
- * for an exception on a load, we set the rest of the destination to 0
- */
-
-136:
-137:
-	add	r3,r3,r7
-	b	1f
-130:
-131:
-	addi	r3,r3,8
-120:
-122:
-124:
-125:
-126:
-127:
-128:
-129:
-133:
-	addi	r3,r3,8
-121:
-132:
-	addi	r3,r3,8
-123:
-134:
-135:
-138:
-139:
-140:
-141:
-142:
-
-/*
- * here we have had a fault on a load and r3 points to the first
- * unmodified byte of the destination
- */
-1:	ld	r6,-24(r1)
-	ld	r4,-16(r1)
-	ld	r5,-8(r1)
-	subf	r6,r6,r3
-	add	r4,r4,r6
-	subf	r5,r6,r5	/* #bytes left to go */
-
-/*
- * first see if we can copy any more bytes before hitting another exception
- */
-	mtctr	r5
-43:	lbz	r0,0(r4)
-	addi	r4,r4,1
-89:	stb	r0,0(r3)
-	addi	r3,r3,1
-	bdnz	43b
-	li	r3,0		/* huh? all copied successfully this time? */
-	blr
-
-/*
- * here we have trapped again, need to clear ctr bytes starting at r3
- */
-143:	mfctr	r5
-	li	r0,0
-	mr	r4,r3
-	mr	r3,r5		/* return the number of bytes not copied */
-1:	andi.	r9,r4,7
-	beq	3f
-90:	stb	r0,0(r4)
-	addic.	r5,r5,-1
-	addi	r4,r4,1
-	bne	1b
-	blr
-3:	cmpldi	cr1,r5,8
-	srdi	r9,r5,3
-	andi.	r5,r5,7
-	blt	cr1,93f
-	mtctr	r9
-91:	std	r0,0(r4)
-	addi	r4,r4,8
-	bdnz	91b
-93:	beqlr
-	mtctr	r5	
-92:	stb	r0,0(r4)
-	addi	r4,r4,1
-	bdnz	92b
-	blr
-
-/*
- * exception handlers for stores: we just need to work
- * out how many bytes weren't copied
- */
-182:
-183:
-	add	r3,r3,r7
-	b	1f
-180:
-	addi	r3,r3,8
-171:
-177:
-	addi	r3,r3,8
-170:
-172:
-176:
-178:
-	addi	r3,r3,4
-185:
-	addi	r3,r3,4
-173:
-174:
-175:
-179:
-181:
-184:
-186:
-187:
-188:
-189:	
-1:
-	ld	r6,-24(r1)
-	ld	r5,-8(r1)
-	add	r6,r6,r5
-	subf	r3,r3,r6	/* #bytes not copied */
-190:
-191:
-192:
-	blr			/* #bytes not copied in r3 */
-
-	.section __ex_table,"a"
-	.align	3
-	.llong	20b,120b
-	.llong	21b,121b
-	.llong	70b,170b
-	.llong	22b,122b
-	.llong	71b,171b
-	.llong	72b,172b
-	.llong	23b,123b
-	.llong	73b,173b
-	.llong	74b,174b
-	.llong	75b,175b
-	.llong	24b,124b
-	.llong	25b,125b
-	.llong	26b,126b
-	.llong	27b,127b
-	.llong	28b,128b
-	.llong	29b,129b
-	.llong	30b,130b
-	.llong	31b,131b
-	.llong	32b,132b
-	.llong	76b,176b
-	.llong	33b,133b
-	.llong	77b,177b
-	.llong	78b,178b
-	.llong	79b,179b
-	.llong	80b,180b
-	.llong	34b,134b
-	.llong	35b,135b
-	.llong	81b,181b
-	.llong	36b,136b
-	.llong	82b,182b
-	.llong	37b,137b
-	.llong	83b,183b
-	.llong	38b,138b
-	.llong	39b,139b
-	.llong	84b,184b
-	.llong	85b,185b
-	.llong	40b,140b
-	.llong	86b,186b
-	.llong	41b,141b
-	.llong	87b,187b
-	.llong	42b,142b
-	.llong	88b,188b
-	.llong	43b,143b
-	.llong	89b,189b
-	.llong	90b,190b
-	.llong	91b,191b
-	.llong	92b,192b
-	
-	.text
-
-/*
- * Routine to copy a whole page of data, optimized for POWER4.
- * On POWER4 it is more than 50% faster than the simple loop
- * above (following the .Ldst_aligned label) but it runs slightly
- * slower on POWER3.
- */
-.Lcopy_page:
-	std	r31,-32(1)
-	std	r30,-40(1)
-	std	r29,-48(1)
-	std	r28,-56(1)
-	std	r27,-64(1)
-	std	r26,-72(1)
-	std	r25,-80(1)
-	std	r24,-88(1)
-	std	r23,-96(1)
-	std	r22,-104(1)
-	std	r21,-112(1)
-	std	r20,-120(1)
-	li	r5,4096/32 - 1
-	addi	r3,r3,-8
-	li	r0,5
-0:	addi	r5,r5,-24
-	mtctr	r0
-20:	ld	r22,640(4)
-21:	ld	r21,512(4)
-22:	ld	r20,384(4)
-23:	ld	r11,256(4)
-24:	ld	r9,128(4)
-25:	ld	r7,0(4)
-26:	ld	r25,648(4)
-27:	ld	r24,520(4)
-28:	ld	r23,392(4)
-29:	ld	r10,264(4)
-30:	ld	r8,136(4)
-31:	ldu	r6,8(4)
-	cmpwi	r5,24
-1:
-32:	std	r22,648(3)
-33:	std	r21,520(3)
-34:	std	r20,392(3)
-35:	std	r11,264(3)
-36:	std	r9,136(3)
-37:	std	r7,8(3)
-38:	ld	r28,648(4)
-39:	ld	r27,520(4)
-40:	ld	r26,392(4)
-41:	ld	r31,264(4)
-42:	ld	r30,136(4)
-43:	ld	r29,8(4)
-44:	std	r25,656(3)
-45:	std	r24,528(3)
-46:	std	r23,400(3)
-47:	std	r10,272(3)
-48:	std	r8,144(3)
-49:	std	r6,16(3)
-50:	ld	r22,656(4)
-51:	ld	r21,528(4)
-52:	ld	r20,400(4)
-53:	ld	r11,272(4)
-54:	ld	r9,144(4)
-55:	ld	r7,16(4)
-56:	std	r28,664(3)
-57:	std	r27,536(3)
-58:	std	r26,408(3)
-59:	std	r31,280(3)
-60:	std	r30,152(3)
-61:	stdu	r29,24(3)
-62:	ld	r25,664(4)
-63:	ld	r24,536(4)
-64:	ld	r23,408(4)
-65:	ld	r10,280(4)
-66:	ld	r8,152(4)
-67:	ldu	r6,24(4)
-	bdnz	1b
-68:	std	r22,648(3)
-69:	std	r21,520(3)
-70:	std	r20,392(3)
-71:	std	r11,264(3)
-72:	std	r9,136(3)
-73:	std	r7,8(3)
-74:	addi	r4,r4,640
-75:	addi	r3,r3,648
-	bge	0b
-	mtctr	r5
-76:	ld	r7,0(4)
-77:	ld	r8,8(4)
-78:	ldu	r9,16(4)
-3:
-79:	ld	r10,8(4)
-80:	std	r7,8(3)
-81:	ld	r7,16(4)
-82:	std	r8,16(3)
-83:	ld	r8,24(4)
-84:	std	r9,24(3)
-85:	ldu	r9,32(4)
-86:	stdu	r10,32(3)
-	bdnz	3b
-4:
-87:	ld	r10,8(4)
-88:	std	r7,8(3)
-89:	std	r8,16(3)
-90:	std	r9,24(3)
-91:	std	r10,32(3)
-9:	ld	r20,-120(1)
-	ld	r21,-112(1)
-	ld	r22,-104(1)
-	ld	r23,-96(1)
-	ld	r24,-88(1)
-	ld	r25,-80(1)
-	ld	r26,-72(1)
-	ld	r27,-64(1)
-	ld	r28,-56(1)
-	ld	r29,-48(1)
-	ld	r30,-40(1)
-	ld	r31,-32(1)
-	li	r3,0
-	blr
-
-/*
- * on an exception, reset to the beginning and jump back into the
- * standard __copy_tofrom_user
- */
-100:	ld	r20,-120(1)
-	ld	r21,-112(1)
-	ld	r22,-104(1)
-	ld	r23,-96(1)
-	ld	r24,-88(1)
-	ld	r25,-80(1)
-	ld	r26,-72(1)
-	ld	r27,-64(1)
-	ld	r28,-56(1)
-	ld	r29,-48(1)
-	ld	r30,-40(1)
-	ld	r31,-32(1)
-	ld	r3,-24(r1)
-	ld	r4,-16(r1)
-	li	r5,4096
-	b	.Ldst_aligned
-
-	.section __ex_table,"a"
-	.align	3
-	.llong	20b,100b
-	.llong	21b,100b
-	.llong	22b,100b
-	.llong	23b,100b
-	.llong	24b,100b
-	.llong	25b,100b
-	.llong	26b,100b
-	.llong	27b,100b
-	.llong	28b,100b
-	.llong	29b,100b
-	.llong	30b,100b
-	.llong	31b,100b
-	.llong	32b,100b
-	.llong	33b,100b
-	.llong	34b,100b
-	.llong	35b,100b
-	.llong	36b,100b
-	.llong	37b,100b
-	.llong	38b,100b
-	.llong	39b,100b
-	.llong	40b,100b
-	.llong	41b,100b
-	.llong	42b,100b
-	.llong	43b,100b
-	.llong	44b,100b
-	.llong	45b,100b
-	.llong	46b,100b
-	.llong	47b,100b
-	.llong	48b,100b
-	.llong	49b,100b
-	.llong	50b,100b
-	.llong	51b,100b
-	.llong	52b,100b
-	.llong	53b,100b
-	.llong	54b,100b
-	.llong	55b,100b
-	.llong	56b,100b
-	.llong	57b,100b
-	.llong	58b,100b
-	.llong	59b,100b
-	.llong	60b,100b
-	.llong	61b,100b
-	.llong	62b,100b
-	.llong	63b,100b
-	.llong	64b,100b
-	.llong	65b,100b
-	.llong	66b,100b
-	.llong	67b,100b
-	.llong	68b,100b
-	.llong	69b,100b
-	.llong	70b,100b
-	.llong	71b,100b
-	.llong	72b,100b
-	.llong	73b,100b
-	.llong	74b,100b
-	.llong	75b,100b
-	.llong	76b,100b
-	.llong	77b,100b
-	.llong	78b,100b
-	.llong	79b,100b
-	.llong	80b,100b
-	.llong	81b,100b
-	.llong	82b,100b
-	.llong	83b,100b
-	.llong	84b,100b
-	.llong	85b,100b
-	.llong	86b,100b
-	.llong	87b,100b
-	.llong	88b,100b
-	.llong	89b,100b
-	.llong	90b,100b
-	.llong	91b,100b
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
new file mode 100644
index 000000000000..a0b3fbbd6fb1
--- /dev/null
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -0,0 +1,576 @@
+/*
+ * arch/ppc64/lib/copyuser.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+	.align	7
+_GLOBAL(__copy_tofrom_user)
+	/* first check for a whole page copy on a page boundary */
+	cmpldi	cr1,r5,16
+	cmpdi	cr6,r5,4096
+	or	r0,r3,r4
+	neg	r6,r3		/* LS 3 bits = # bytes to 8-byte dest bdry */
+	andi.	r0,r0,4095
+	std	r3,-24(r1)
+	crand	cr0*4+2,cr0*4+2,cr6*4+2
+	std	r4,-16(r1)
+	std	r5,-8(r1)
+	dcbt	0,r4
+	beq	.Lcopy_page
+	andi.	r6,r6,7
+	mtcrf	0x01,r5
+	blt	cr1,.Lshort_copy
+	bne	.Ldst_unaligned
+.Ldst_aligned:
+	andi.	r0,r4,7
+	addi	r3,r3,-16
+	bne	.Lsrc_unaligned
+	srdi	r7,r5,4
+20:	ld	r9,0(r4)
+	addi	r4,r4,-8
+	mtctr	r7
+	andi.	r5,r5,7
+	bf	cr7*4+0,22f
+	addi	r3,r3,8
+	addi	r4,r4,8
+	mr	r8,r9
+	blt	cr1,72f
+21:	ld	r9,8(r4)
+70:	std	r8,8(r3)
+22:	ldu	r8,16(r4)
+71:	stdu	r9,16(r3)
+	bdnz	21b
+72:	std	r8,8(r3)
+	beq+	3f
+	addi	r3,r3,16
+23:	ld	r9,8(r4)
+.Ldo_tail:
+	bf	cr7*4+1,1f
+	rotldi	r9,r9,32
+73:	stw	r9,0(r3)
+	addi	r3,r3,4
+1:	bf	cr7*4+2,2f
+	rotldi	r9,r9,16
+74:	sth	r9,0(r3)
+	addi	r3,r3,2
+2:	bf	cr7*4+3,3f
+	rotldi	r9,r9,8
+75:	stb	r9,0(r3)
+3:	li	r3,0
+	blr
+
+.Lsrc_unaligned:
+	srdi	r6,r5,3
+	addi	r5,r5,-16
+	subf	r4,r0,r4
+	srdi	r7,r5,4
+	sldi	r10,r0,3
+	cmpldi	cr6,r6,3
+	andi.	r5,r5,7
+	mtctr	r7
+	subfic	r11,r10,64
+	add	r5,r5,r0
+	bt	cr7*4+0,28f
+
+24:	ld	r9,0(r4)	/* 3+2n loads, 2+2n stores */
+25:	ld	r0,8(r4)
+	sld	r6,r9,r10
+26:	ldu	r9,16(r4)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	or	r7,r7,r6
+	blt	cr6,79f
+27:	ld	r0,8(r4)
+	b	2f
+
+28:	ld	r0,0(r4)	/* 4+2n loads, 3+2n stores */
+29:	ldu	r9,8(r4)
+	sld	r8,r0,r10
+	addi	r3,r3,-8
+	blt	cr6,5f
+30:	ld	r0,8(r4)
+	srd	r12,r9,r11
+	sld	r6,r9,r10
+31:	ldu	r9,16(r4)
+	or	r12,r8,r12
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	addi	r3,r3,16
+	beq	cr6,78f
+
+1:	or	r7,r7,r6
+32:	ld	r0,8(r4)
+76:	std	r12,8(r3)
+2:	srd	r12,r9,r11
+	sld	r6,r9,r10
+33:	ldu	r9,16(r4)
+	or	r12,r8,r12
+77:	stdu	r7,16(r3)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	bdnz	1b
+
+78:	std	r12,8(r3)
+	or	r7,r7,r6
+79:	std	r7,16(r3)
+5:	srd	r12,r9,r11
+	or	r12,r8,r12
+80:	std	r12,24(r3)
+	bne	6f
+	li	r3,0
+	blr
+6:	cmpwi	cr1,r5,8
+	addi	r3,r3,32
+	sld	r9,r9,r10
+	ble	cr1,.Ldo_tail
+34:	ld	r0,8(r4)
+	srd	r7,r0,r11
+	or	r9,r7,r9
+	b	.Ldo_tail
+
+.Ldst_unaligned:
+	mtcrf	0x01,r6		/* put #bytes to 8B bdry into cr7 */
+	subf	r5,r6,r5
+	li	r7,0
+	cmpldi	r1,r5,16
+	bf	cr7*4+3,1f
+35:	lbz	r0,0(r4)
+81:	stb	r0,0(r3)
+	addi	r7,r7,1
+1:	bf	cr7*4+2,2f
+36:	lhzx	r0,r7,r4
+82:	sthx	r0,r7,r3
+	addi	r7,r7,2
+2:	bf	cr7*4+1,3f
+37:	lwzx	r0,r7,r4
+83:	stwx	r0,r7,r3
+3:	mtcrf	0x01,r5
+	add	r4,r6,r4
+	add	r3,r6,r3
+	b	.Ldst_aligned
+
+.Lshort_copy:
+	bf	cr7*4+0,1f
+38:	lwz	r0,0(r4)
+39:	lwz	r9,4(r4)
+	addi	r4,r4,8
+84:	stw	r0,0(r3)
+85:	stw	r9,4(r3)
+	addi	r3,r3,8
+1:	bf	cr7*4+1,2f
+40:	lwz	r0,0(r4)
+	addi	r4,r4,4
+86:	stw	r0,0(r3)
+	addi	r3,r3,4
+2:	bf	cr7*4+2,3f
+41:	lhz	r0,0(r4)
+	addi	r4,r4,2
+87:	sth	r0,0(r3)
+	addi	r3,r3,2
+3:	bf	cr7*4+3,4f
+42:	lbz	r0,0(r4)
+88:	stb	r0,0(r3)
+4:	li	r3,0
+	blr
+
+/*
+ * exception handlers follow
+ * we have to return the number of bytes not copied
+ * for an exception on a load, we set the rest of the destination to 0
+ */
+
+136:
+137:
+	add	r3,r3,r7
+	b	1f
+130:
+131:
+	addi	r3,r3,8
+120:
+122:
+124:
+125:
+126:
+127:
+128:
+129:
+133:
+	addi	r3,r3,8
+121:
+132:
+	addi	r3,r3,8
+123:
+134:
+135:
+138:
+139:
+140:
+141:
+142:
+
+/*
+ * here we have had a fault on a load and r3 points to the first
+ * unmodified byte of the destination
+ */
+1:	ld	r6,-24(r1)
+	ld	r4,-16(r1)
+	ld	r5,-8(r1)
+	subf	r6,r6,r3
+	add	r4,r4,r6
+	subf	r5,r6,r5	/* #bytes left to go */
+
+/*
+ * first see if we can copy any more bytes before hitting another exception
+ */
+	mtctr	r5
+43:	lbz	r0,0(r4)
+	addi	r4,r4,1
+89:	stb	r0,0(r3)
+	addi	r3,r3,1
+	bdnz	43b
+	li	r3,0		/* huh? all copied successfully this time? */
+	blr
+
+/*
+ * here we have trapped again, need to clear ctr bytes starting at r3
+ */
+143:	mfctr	r5
+	li	r0,0
+	mr	r4,r3
+	mr	r3,r5		/* return the number of bytes not copied */
+1:	andi.	r9,r4,7
+	beq	3f
+90:	stb	r0,0(r4)
+	addic.	r5,r5,-1
+	addi	r4,r4,1
+	bne	1b
+	blr
+3:	cmpldi	cr1,r5,8
+	srdi	r9,r5,3
+	andi.	r5,r5,7
+	blt	cr1,93f
+	mtctr	r9
+91:	std	r0,0(r4)
+	addi	r4,r4,8
+	bdnz	91b
+93:	beqlr
+	mtctr	r5	
+92:	stb	r0,0(r4)
+	addi	r4,r4,1
+	bdnz	92b
+	blr
+
+/*
+ * exception handlers for stores: we just need to work
+ * out how many bytes weren't copied
+ */
+182:
+183:
+	add	r3,r3,r7
+	b	1f
+180:
+	addi	r3,r3,8
+171:
+177:
+	addi	r3,r3,8
+170:
+172:
+176:
+178:
+	addi	r3,r3,4
+185:
+	addi	r3,r3,4
+173:
+174:
+175:
+179:
+181:
+184:
+186:
+187:
+188:
+189:	
+1:
+	ld	r6,-24(r1)
+	ld	r5,-8(r1)
+	add	r6,r6,r5
+	subf	r3,r3,r6	/* #bytes not copied */
+190:
+191:
+192:
+	blr			/* #bytes not copied in r3 */
+
+	.section __ex_table,"a"
+	.align	3
+	.llong	20b,120b
+	.llong	21b,121b
+	.llong	70b,170b
+	.llong	22b,122b
+	.llong	71b,171b
+	.llong	72b,172b
+	.llong	23b,123b
+	.llong	73b,173b
+	.llong	74b,174b
+	.llong	75b,175b
+	.llong	24b,124b
+	.llong	25b,125b
+	.llong	26b,126b
+	.llong	27b,127b
+	.llong	28b,128b
+	.llong	29b,129b
+	.llong	30b,130b
+	.llong	31b,131b
+	.llong	32b,132b
+	.llong	76b,176b
+	.llong	33b,133b
+	.llong	77b,177b
+	.llong	78b,178b
+	.llong	79b,179b
+	.llong	80b,180b
+	.llong	34b,134b
+	.llong	35b,135b
+	.llong	81b,181b
+	.llong	36b,136b
+	.llong	82b,182b
+	.llong	37b,137b
+	.llong	83b,183b
+	.llong	38b,138b
+	.llong	39b,139b
+	.llong	84b,184b
+	.llong	85b,185b
+	.llong	40b,140b
+	.llong	86b,186b
+	.llong	41b,141b
+	.llong	87b,187b
+	.llong	42b,142b
+	.llong	88b,188b
+	.llong	43b,143b
+	.llong	89b,189b
+	.llong	90b,190b
+	.llong	91b,191b
+	.llong	92b,192b
+	
+	.text
+
+/*
+ * Routine to copy a whole page of data, optimized for POWER4.
+ * On POWER4 it is more than 50% faster than the simple loop
+ * above (following the .Ldst_aligned label) but it runs slightly
+ * slower on POWER3.
+ */
+.Lcopy_page:
+	std	r31,-32(1)
+	std	r30,-40(1)
+	std	r29,-48(1)
+	std	r28,-56(1)
+	std	r27,-64(1)
+	std	r26,-72(1)
+	std	r25,-80(1)
+	std	r24,-88(1)
+	std	r23,-96(1)
+	std	r22,-104(1)
+	std	r21,-112(1)
+	std	r20,-120(1)
+	li	r5,4096/32 - 1
+	addi	r3,r3,-8
+	li	r0,5
+0:	addi	r5,r5,-24
+	mtctr	r0
+20:	ld	r22,640(4)
+21:	ld	r21,512(4)
+22:	ld	r20,384(4)
+23:	ld	r11,256(4)
+24:	ld	r9,128(4)
+25:	ld	r7,0(4)
+26:	ld	r25,648(4)
+27:	ld	r24,520(4)
+28:	ld	r23,392(4)
+29:	ld	r10,264(4)
+30:	ld	r8,136(4)
+31:	ldu	r6,8(4)
+	cmpwi	r5,24
+1:
+32:	std	r22,648(3)
+33:	std	r21,520(3)
+34:	std	r20,392(3)
+35:	std	r11,264(3)
+36:	std	r9,136(3)
+37:	std	r7,8(3)
+38:	ld	r28,648(4)
+39:	ld	r27,520(4)
+40:	ld	r26,392(4)
+41:	ld	r31,264(4)
+42:	ld	r30,136(4)
+43:	ld	r29,8(4)
+44:	std	r25,656(3)
+45:	std	r24,528(3)
+46:	std	r23,400(3)
+47:	std	r10,272(3)
+48:	std	r8,144(3)
+49:	std	r6,16(3)
+50:	ld	r22,656(4)
+51:	ld	r21,528(4)
+52:	ld	r20,400(4)
+53:	ld	r11,272(4)
+54:	ld	r9,144(4)
+55:	ld	r7,16(4)
+56:	std	r28,664(3)
+57:	std	r27,536(3)
+58:	std	r26,408(3)
+59:	std	r31,280(3)
+60:	std	r30,152(3)
+61:	stdu	r29,24(3)
+62:	ld	r25,664(4)
+63:	ld	r24,536(4)
+64:	ld	r23,408(4)
+65:	ld	r10,280(4)
+66:	ld	r8,152(4)
+67:	ldu	r6,24(4)
+	bdnz	1b
+68:	std	r22,648(3)
+69:	std	r21,520(3)
+70:	std	r20,392(3)
+71:	std	r11,264(3)
+72:	std	r9,136(3)
+73:	std	r7,8(3)
+74:	addi	r4,r4,640
+75:	addi	r3,r3,648
+	bge	0b
+	mtctr	r5
+76:	ld	r7,0(4)
+77:	ld	r8,8(4)
+78:	ldu	r9,16(4)
+3:
+79:	ld	r10,8(4)
+80:	std	r7,8(3)
+81:	ld	r7,16(4)
+82:	std	r8,16(3)
+83:	ld	r8,24(4)
+84:	std	r9,24(3)
+85:	ldu	r9,32(4)
+86:	stdu	r10,32(3)
+	bdnz	3b
+4:
+87:	ld	r10,8(4)
+88:	std	r7,8(3)
+89:	std	r8,16(3)
+90:	std	r9,24(3)
+91:	std	r10,32(3)
+9:	ld	r20,-120(1)
+	ld	r21,-112(1)
+	ld	r22,-104(1)
+	ld	r23,-96(1)
+	ld	r24,-88(1)
+	ld	r25,-80(1)
+	ld	r26,-72(1)
+	ld	r27,-64(1)
+	ld	r28,-56(1)
+	ld	r29,-48(1)
+	ld	r30,-40(1)
+	ld	r31,-32(1)
+	li	r3,0
+	blr
+
+/*
+ * on an exception, reset to the beginning and jump back into the
+ * standard __copy_tofrom_user
+ */
+100:	ld	r20,-120(1)
+	ld	r21,-112(1)
+	ld	r22,-104(1)
+	ld	r23,-96(1)
+	ld	r24,-88(1)
+	ld	r25,-80(1)
+	ld	r26,-72(1)
+	ld	r27,-64(1)
+	ld	r28,-56(1)
+	ld	r29,-48(1)
+	ld	r30,-40(1)
+	ld	r31,-32(1)
+	ld	r3,-24(r1)
+	ld	r4,-16(r1)
+	li	r5,4096
+	b	.Ldst_aligned
+
+	.section __ex_table,"a"
+	.align	3
+	.llong	20b,100b
+	.llong	21b,100b
+	.llong	22b,100b
+	.llong	23b,100b
+	.llong	24b,100b
+	.llong	25b,100b
+	.llong	26b,100b
+	.llong	27b,100b
+	.llong	28b,100b
+	.llong	29b,100b
+	.llong	30b,100b
+	.llong	31b,100b
+	.llong	32b,100b
+	.llong	33b,100b
+	.llong	34b,100b
+	.llong	35b,100b
+	.llong	36b,100b
+	.llong	37b,100b
+	.llong	38b,100b
+	.llong	39b,100b
+	.llong	40b,100b
+	.llong	41b,100b
+	.llong	42b,100b
+	.llong	43b,100b
+	.llong	44b,100b
+	.llong	45b,100b
+	.llong	46b,100b
+	.llong	47b,100b
+	.llong	48b,100b
+	.llong	49b,100b
+	.llong	50b,100b
+	.llong	51b,100b
+	.llong	52b,100b
+	.llong	53b,100b
+	.llong	54b,100b
+	.llong	55b,100b
+	.llong	56b,100b
+	.llong	57b,100b
+	.llong	58b,100b
+	.llong	59b,100b
+	.llong	60b,100b
+	.llong	61b,100b
+	.llong	62b,100b
+	.llong	63b,100b
+	.llong	64b,100b
+	.llong	65b,100b
+	.llong	66b,100b
+	.llong	67b,100b
+	.llong	68b,100b
+	.llong	69b,100b
+	.llong	70b,100b
+	.llong	71b,100b
+	.llong	72b,100b
+	.llong	73b,100b
+	.llong	74b,100b
+	.llong	75b,100b
+	.llong	76b,100b
+	.llong	77b,100b
+	.llong	78b,100b
+	.llong	79b,100b
+	.llong	80b,100b
+	.llong	81b,100b
+	.llong	82b,100b
+	.llong	83b,100b
+	.llong	84b,100b
+	.llong	85b,100b
+	.llong	86b,100b
+	.llong	87b,100b
+	.llong	88b,100b
+	.llong	89b,100b
+	.llong	90b,100b
+	.llong	91b,100b
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
new file mode 100644
index 000000000000..68df20283ff5
--- /dev/null
+++ b/arch/powerpc/lib/mem_64.S
@@ -0,0 +1,119 @@
+/*
+ * String handling functions for PowerPC.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(memset)
+	neg	r0,r3
+	rlwimi	r4,r4,8,16,23
+	andi.	r0,r0,7			/* # bytes to be 8-byte aligned */
+	rlwimi	r4,r4,16,0,15
+	cmplw	cr1,r5,r0		/* do we get that far? */
+	rldimi	r4,r4,32,0
+	mtcrf	1,r0
+	mr	r6,r3
+	blt	cr1,8f
+	beq+	3f			/* if already 8-byte aligned */
+	subf	r5,r0,r5
+	bf	31,1f
+	stb	r4,0(r6)
+	addi	r6,r6,1
+1:	bf	30,2f
+	sth	r4,0(r6)
+	addi	r6,r6,2
+2:	bf	29,3f
+	stw	r4,0(r6)
+	addi	r6,r6,4
+3:	srdi.	r0,r5,6
+	clrldi	r5,r5,58
+	mtctr	r0
+	beq	5f
+4:	std	r4,0(r6)
+	std	r4,8(r6)
+	std	r4,16(r6)
+	std	r4,24(r6)
+	std	r4,32(r6)
+	std	r4,40(r6)
+	std	r4,48(r6)
+	std	r4,56(r6)
+	addi	r6,r6,64
+	bdnz	4b
+5:	srwi.	r0,r5,3
+	clrlwi	r5,r5,29
+	mtcrf	1,r0
+	beq	8f
+	bf	29,6f
+	std	r4,0(r6)
+	std	r4,8(r6)
+	std	r4,16(r6)
+	std	r4,24(r6)
+	addi	r6,r6,32
+6:	bf	30,7f
+	std	r4,0(r6)
+	std	r4,8(r6)
+	addi	r6,r6,16
+7:	bf	31,8f
+	std	r4,0(r6)
+	addi	r6,r6,8
+8:	cmpwi	r5,0
+	mtcrf	1,r5
+	beqlr+
+	bf	29,9f
+	stw	r4,0(r6)
+	addi	r6,r6,4
+9:	bf	30,10f
+	sth	r4,0(r6)
+	addi	r6,r6,2
+10:	bflr	31
+	stb	r4,0(r6)
+	blr
+
+_GLOBAL(memmove)
+	cmplw	0,r3,r4
+	bgt	.backwards_memcpy
+	b	.memcpy
+
+_GLOBAL(backwards_memcpy)
+	rlwinm.	r7,r5,32-3,3,31		/* r0 = r5 >> 3 */
+	add	r6,r3,r5
+	add	r4,r4,r5
+	beq	2f
+	andi.	r0,r6,3
+	mtctr	r7
+	bne	5f
+1:	lwz	r7,-4(r4)
+	lwzu	r8,-8(r4)
+	stw	r7,-4(r6)
+	stwu	r8,-8(r6)
+	bdnz	1b
+	andi.	r5,r5,7
+2:	cmplwi	0,r5,4
+	blt	3f
+	lwzu	r0,-4(r4)
+	subi	r5,r5,4
+	stwu	r0,-4(r6)
+3:	cmpwi	0,r5,0
+	beqlr
+	mtctr	r5
+4:	lbzu	r0,-1(r4)
+	stbu	r0,-1(r6)
+	bdnz	4b
+	blr
+5:	mtctr	r0
+6:	lbzu	r7,-1(r4)
+	stbu	r7,-1(r6)
+	bdnz	6b
+	subf	r5,r0,r5
+	rlwinm.	r7,r5,32-3,3,31
+	beq	2b
+	mtctr	r7
+	b	1b
diff --git a/arch/powerpc/lib/memcpy.S b/arch/powerpc/lib/memcpy.S
deleted file mode 100644
index 9ccacdf5bcb9..000000000000
--- a/arch/powerpc/lib/memcpy.S
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * arch/ppc64/lib/memcpy.S
- *
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-
-	.align	7
-_GLOBAL(memcpy)
-	mtcrf	0x01,r5
-	cmpldi	cr1,r5,16
-	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
-	andi.	r6,r6,7
-	dcbt	0,r4
-	blt	cr1,.Lshort_copy
-	bne	.Ldst_unaligned
-.Ldst_aligned:
-	andi.	r0,r4,7
-	addi	r3,r3,-16
-	bne	.Lsrc_unaligned
-	srdi	r7,r5,4
-	ld	r9,0(r4)
-	addi	r4,r4,-8
-	mtctr	r7
-	andi.	r5,r5,7
-	bf	cr7*4+0,2f
-	addi	r3,r3,8
-	addi	r4,r4,8
-	mr	r8,r9
-	blt	cr1,3f
-1:	ld	r9,8(r4)
-	std	r8,8(r3)
-2:	ldu	r8,16(r4)
-	stdu	r9,16(r3)
-	bdnz	1b
-3:	std	r8,8(r3)
-	beqlr
-	addi	r3,r3,16
-	ld	r9,8(r4)
-.Ldo_tail:
-	bf	cr7*4+1,1f
-	rotldi	r9,r9,32
-	stw	r9,0(r3)
-	addi	r3,r3,4
-1:	bf	cr7*4+2,2f
-	rotldi	r9,r9,16
-	sth	r9,0(r3)
-	addi	r3,r3,2
-2:	bf	cr7*4+3,3f
-	rotldi	r9,r9,8
-	stb	r9,0(r3)
-3:	blr
-
-.Lsrc_unaligned:
-	srdi	r6,r5,3
-	addi	r5,r5,-16
-	subf	r4,r0,r4
-	srdi	r7,r5,4
-	sldi	r10,r0,3
-	cmpdi	cr6,r6,3
-	andi.	r5,r5,7
-	mtctr	r7
-	subfic	r11,r10,64
-	add	r5,r5,r0
-
-	bt	cr7*4+0,0f
-
-	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
-	ld	r0,8(r4)
-	sld	r6,r9,r10
-	ldu	r9,16(r4)
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	or	r7,r7,r6
-	blt	cr6,4f
-	ld	r0,8(r4)
-	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
-	b	2f
-
-0:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
-	ldu	r9,8(r4)
-	sld	r8,r0,r10
-	addi	r3,r3,-8
-	blt	cr6,5f
-	ld	r0,8(r4)
-	srd	r12,r9,r11
-	sld	r6,r9,r10
-	ldu	r9,16(r4)
-	or	r12,r8,r12
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	addi	r3,r3,16
-	beq	cr6,3f
-
-	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
-1:	or	r7,r7,r6
-	ld	r0,8(r4)
-	std	r12,8(r3)
-2:	srd	r12,r9,r11
-	sld	r6,r9,r10
-	ldu	r9,16(r4)
-	or	r12,r8,r12
-	stdu	r7,16(r3)
-	srd	r7,r0,r11
-	sld	r8,r0,r10
-	bdnz	1b
-
-3:	std	r12,8(r3)
-	or	r7,r7,r6
-4:	std	r7,16(r3)
-5:	srd	r12,r9,r11
-	or	r12,r8,r12
-	std	r12,24(r3)
-	beqlr
-	cmpwi	cr1,r5,8
-	addi	r3,r3,32
-	sld	r9,r9,r10
-	ble	cr1,.Ldo_tail
-	ld	r0,8(r4)
-	srd	r7,r0,r11
-	or	r9,r7,r9
-	b	.Ldo_tail
-
-.Ldst_unaligned:
-	mtcrf	0x01,r6		# put #bytes to 8B bdry into cr7
-	subf	r5,r6,r5
-	li	r7,0
-	cmpldi	r1,r5,16
-	bf	cr7*4+3,1f
-	lbz	r0,0(r4)
-	stb	r0,0(r3)
-	addi	r7,r7,1
-1:	bf	cr7*4+2,2f
-	lhzx	r0,r7,r4
-	sthx	r0,r7,r3
-	addi	r7,r7,2
-2:	bf	cr7*4+1,3f
-	lwzx	r0,r7,r4
-	stwx	r0,r7,r3
-3:	mtcrf	0x01,r5
-	add	r4,r6,r4
-	add	r3,r6,r3
-	b	.Ldst_aligned
-
-.Lshort_copy:
-	bf	cr7*4+0,1f
-	lwz	r0,0(r4)
-	lwz	r9,4(r4)
-	addi	r4,r4,8
-	stw	r0,0(r3)
-	stw	r9,4(r3)
-	addi	r3,r3,8
-1:	bf	cr7*4+1,2f
-	lwz	r0,0(r4)
-	addi	r4,r4,4
-	stw	r0,0(r3)
-	addi	r3,r3,4
-2:	bf	cr7*4+2,3f
-	lhz	r0,0(r4)
-	addi	r4,r4,2
-	sth	r0,0(r3)
-	addi	r3,r3,2
-3:	bf	cr7*4+3,4f
-	lbz	r0,0(r4)
-	stb	r0,0(r3)
-4:	blr
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
new file mode 100644
index 000000000000..9ccacdf5bcb9
--- /dev/null
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -0,0 +1,172 @@
+/*
+ * arch/ppc64/lib/memcpy.S
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+
+	.align	7
+_GLOBAL(memcpy)
+	mtcrf	0x01,r5
+	cmpldi	cr1,r5,16
+	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
+	andi.	r6,r6,7
+	dcbt	0,r4
+	blt	cr1,.Lshort_copy
+	bne	.Ldst_unaligned
+.Ldst_aligned:
+	andi.	r0,r4,7
+	addi	r3,r3,-16
+	bne	.Lsrc_unaligned
+	srdi	r7,r5,4
+	ld	r9,0(r4)
+	addi	r4,r4,-8
+	mtctr	r7
+	andi.	r5,r5,7
+	bf	cr7*4+0,2f
+	addi	r3,r3,8
+	addi	r4,r4,8
+	mr	r8,r9
+	blt	cr1,3f
+1:	ld	r9,8(r4)
+	std	r8,8(r3)
+2:	ldu	r8,16(r4)
+	stdu	r9,16(r3)
+	bdnz	1b
+3:	std	r8,8(r3)
+	beqlr
+	addi	r3,r3,16
+	ld	r9,8(r4)
+.Ldo_tail:
+	bf	cr7*4+1,1f
+	rotldi	r9,r9,32
+	stw	r9,0(r3)
+	addi	r3,r3,4
+1:	bf	cr7*4+2,2f
+	rotldi	r9,r9,16
+	sth	r9,0(r3)
+	addi	r3,r3,2
+2:	bf	cr7*4+3,3f
+	rotldi	r9,r9,8
+	stb	r9,0(r3)
+3:	blr
+
+.Lsrc_unaligned:
+	srdi	r6,r5,3
+	addi	r5,r5,-16
+	subf	r4,r0,r4
+	srdi	r7,r5,4
+	sldi	r10,r0,3
+	cmpdi	cr6,r6,3
+	andi.	r5,r5,7
+	mtctr	r7
+	subfic	r11,r10,64
+	add	r5,r5,r0
+
+	bt	cr7*4+0,0f
+
+	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
+	ld	r0,8(r4)
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	or	r7,r7,r6
+	blt	cr6,4f
+	ld	r0,8(r4)
+	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
+	b	2f
+
+0:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
+	ldu	r9,8(r4)
+	sld	r8,r0,r10
+	addi	r3,r3,-8
+	blt	cr6,5f
+	ld	r0,8(r4)
+	srd	r12,r9,r11
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	or	r12,r8,r12
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	addi	r3,r3,16
+	beq	cr6,3f
+
+	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
+1:	or	r7,r7,r6
+	ld	r0,8(r4)
+	std	r12,8(r3)
+2:	srd	r12,r9,r11
+	sld	r6,r9,r10
+	ldu	r9,16(r4)
+	or	r12,r8,r12
+	stdu	r7,16(r3)
+	srd	r7,r0,r11
+	sld	r8,r0,r10
+	bdnz	1b
+
+3:	std	r12,8(r3)
+	or	r7,r7,r6
+4:	std	r7,16(r3)
+5:	srd	r12,r9,r11
+	or	r12,r8,r12
+	std	r12,24(r3)
+	beqlr
+	cmpwi	cr1,r5,8
+	addi	r3,r3,32
+	sld	r9,r9,r10
+	ble	cr1,.Ldo_tail
+	ld	r0,8(r4)
+	srd	r7,r0,r11
+	or	r9,r7,r9
+	b	.Ldo_tail
+
+.Ldst_unaligned:
+	mtcrf	0x01,r6		# put #bytes to 8B bdry into cr7
+	subf	r5,r6,r5
+	li	r7,0
+	cmpldi	r1,r5,16
+	bf	cr7*4+3,1f
+	lbz	r0,0(r4)
+	stb	r0,0(r3)
+	addi	r7,r7,1
+1:	bf	cr7*4+2,2f
+	lhzx	r0,r7,r4
+	sthx	r0,r7,r3
+	addi	r7,r7,2
+2:	bf	cr7*4+1,3f
+	lwzx	r0,r7,r4
+	stwx	r0,r7,r3
+3:	mtcrf	0x01,r5
+	add	r4,r6,r4
+	add	r3,r6,r3
+	b	.Ldst_aligned
+
+.Lshort_copy:
+	bf	cr7*4+0,1f
+	lwz	r0,0(r4)
+	lwz	r9,4(r4)
+	addi	r4,r4,8
+	stw	r0,0(r3)
+	stw	r9,4(r3)
+	addi	r3,r3,8
+1:	bf	cr7*4+1,2f
+	lwz	r0,0(r4)
+	addi	r4,r4,4
+	stw	r0,0(r3)
+	addi	r3,r3,4
+2:	bf	cr7*4+2,3f
+	lhz	r0,0(r4)
+	addi	r4,r4,2
+	sth	r0,0(r3)
+	addi	r3,r3,2
+3:	bf	cr7*4+3,4f
+	lbz	r0,0(r4)
+	stb	r0,0(r3)
+4:	blr
diff --git a/arch/powerpc/lib/usercopy.c b/arch/powerpc/lib/usercopy.c
deleted file mode 100644
index 5eea6f3c1e03..000000000000
--- a/arch/powerpc/lib/usercopy.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Functions which are too large to be inlined.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/module.h>
-#include <asm/uaccess.h>
-
-unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
-{
-	if (likely(access_ok(VERIFY_READ, from, n)))
-		n = __copy_from_user(to, from, n);
-	else
-		memset(to, 0, n);
-	return n;
-}
-
-unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
-{
-	if (likely(access_ok(VERIFY_WRITE, to, n)))
-		n = __copy_to_user(to, from, n);
-	return n;
-}
-
-unsigned long copy_in_user(void __user *to, const void __user *from,
-			   unsigned long n)
-{
-	might_sleep();
-	if (likely(access_ok(VERIFY_READ, from, n) &&
-	    access_ok(VERIFY_WRITE, to, n)))
-		n =__copy_tofrom_user(to, from, n);
-	return n;
-}
-
-EXPORT_SYMBOL(copy_from_user);
-EXPORT_SYMBOL(copy_to_user);
-EXPORT_SYMBOL(copy_in_user);
-
diff --git a/arch/powerpc/lib/usercopy_64.c b/arch/powerpc/lib/usercopy_64.c
new file mode 100644
index 000000000000..5eea6f3c1e03
--- /dev/null
+++ b/arch/powerpc/lib/usercopy_64.c
@@ -0,0 +1,41 @@
+/*
+ * Functions which are too large to be inlined.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	if (likely(access_ok(VERIFY_READ, from, n)))
+		n = __copy_from_user(to, from, n);
+	else
+		memset(to, 0, n);
+	return n;
+}
+
+unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if (likely(access_ok(VERIFY_WRITE, to, n)))
+		n = __copy_to_user(to, from, n);
+	return n;
+}
+
+unsigned long copy_in_user(void __user *to, const void __user *from,
+			   unsigned long n)
+{
+	might_sleep();
+	if (likely(access_ok(VERIFY_READ, from, n) &&
+	    access_ok(VERIFY_WRITE, to, n)))
+		n =__copy_tofrom_user(to, from, n);
+	return n;
+}
+
+EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(copy_to_user);
+EXPORT_SYMBOL(copy_in_user);
+
-- 
cgit v1.2.2


From b3b8dc6c07cecc1f8d52d03f677206bdf9f794c9 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 10 Oct 2005 22:20:10 +1000
Subject: powerpc: Use reg.h instead of processor.h when we just want reg names

Now that the register names and bit definitions are all in reg.h,
use that instead of processor.h in assembly code in a few places.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/lib/string.S | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 15d40e9ef8b1..b9ca84ed8927 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -13,11 +13,6 @@
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
 
-	.text
-	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
-	.stabs	"string.S",N_SO,0,0,0f
-0:
-
 	.section __ex_table,"a"
 #ifdef CONFIG_PPC64
 	.align	3
-- 
cgit v1.2.2


From 40ef8cbc6d360e564573eb19582249c35d8ba330 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 10 Oct 2005 22:50:37 +1000
Subject: powerpc: Get 64-bit configs to compile with ARCH=powerpc

This is a bunch of mostly small fixes that are needed to get
ARCH=powerpc to compile for 64-bit.  This adds setup_64.c from
arch/ppc64/kernel/setup.c and locks.c from arch/ppc64/lib/locks.c.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/lib/Makefile |  4 ++
 arch/powerpc/lib/locks.c  | 95 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+)
 create mode 100644 arch/powerpc/lib/locks.c

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index a8cedb96de5f..30367a0237dd 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -7,3 +7,7 @@ obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
 obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o memcpy_64.o \
 			   usercopy_64.o sstep.o checksum_64.o mem_64.o
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
+ifeq ($(CONFIG_PPC64),y)
+obj-$(CONFIG_SMP)	+= locks.o
+endif
+
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
new file mode 100644
index 000000000000..4b8c5ad5e7dc
--- /dev/null
+++ b/arch/powerpc/lib/locks.c
@@ -0,0 +1,95 @@
+/*
+ * Spin and read/write lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ *   Rework to support virtual processors
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/stringify.h>
+
+/* waiting for a spinlock... */
+#if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES)
+#include <asm/hvcall.h>
+#include <asm/iSeries/HvCall.h>
+
+void __spin_yield(raw_spinlock_t *lock)
+{
+	unsigned int lock_value, holder_cpu, yield_count;
+	struct paca_struct *holder_paca;
+
+	lock_value = lock->slock;
+	if (lock_value == 0)
+		return;
+	holder_cpu = lock_value & 0xffff;
+	BUG_ON(holder_cpu >= NR_CPUS);
+	holder_paca = &paca[holder_cpu];
+	yield_count = holder_paca->lppaca.yield_count;
+	if ((yield_count & 1) == 0)
+		return;		/* virtual cpu is currently running */
+	rmb();
+	if (lock->slock != lock_value)
+		return;		/* something has changed */
+#ifdef CONFIG_PPC_ISERIES
+	HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
+		((u64)holder_cpu << 32) | yield_count);
+#else
+	plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu),
+			   yield_count);
+#endif
+}
+
+/*
+ * Waiting for a read lock or a write lock on a rwlock...
+ * This turns out to be the same for read and write locks, since
+ * we only know the holder if it is write-locked.
+ */
+void __rw_yield(raw_rwlock_t *rw)
+{
+	int lock_value;
+	unsigned int holder_cpu, yield_count;
+	struct paca_struct *holder_paca;
+
+	lock_value = rw->lock;
+	if (lock_value >= 0)
+		return;		/* no write lock at present */
+	holder_cpu = lock_value & 0xffff;
+	BUG_ON(holder_cpu >= NR_CPUS);
+	holder_paca = &paca[holder_cpu];
+	yield_count = holder_paca->lppaca.yield_count;
+	if ((yield_count & 1) == 0)
+		return;		/* virtual cpu is currently running */
+	rmb();
+	if (rw->lock != lock_value)
+		return;		/* something has changed */
+#ifdef CONFIG_PPC_ISERIES
+	HvCall2(HvCallBaseYieldProcessor, HvCall_YieldToProc,
+		((u64)holder_cpu << 32) | yield_count);
+#else
+	plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(holder_cpu),
+			   yield_count);
+#endif
+}
+#endif
+
+void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+{
+	while (lock->slock) {
+		HMT_low();
+		if (SHARED_PROCESSOR)
+			__spin_yield(lock);
+	}
+	HMT_medium();
+}
+
+EXPORT_SYMBOL(__raw_spin_unlock_wait);
-- 
cgit v1.2.2


From 7dffb72028bfd909ac51a1546d182de2df4d2426 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 17 Oct 2005 11:50:32 +1000
Subject: ppc32: use L1_CACHE_SHIFT/L1_CACHE_BYTES

instead of L1_CACHE_LINE_SIZE and LG_L1_CACHE_LINE_SIZE

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/lib/copy_32.S | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 420a912198a2..bee51414812e 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -66,9 +66,9 @@
 	.stabs	"copy32.S",N_SO,0,0,0f
 0:
 
-CACHELINE_BYTES = L1_CACHE_LINE_SIZE
-LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE
-CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1)
+CACHELINE_BYTES = L1_CACHE_BYTES
+LG_CACHELINE_BYTES = L1_CACHE_SHIFT
+CACHELINE_MASK = (L1_CACHE_BYTES-1)
 
 /*
  * Use dcbz on the complete cache lines in the destination
@@ -205,12 +205,12 @@ _GLOBAL(cacheable_memcpy)
 	dcbz	r11,r6
 #endif
 	COPY_16_BYTES
-#if L1_CACHE_LINE_SIZE >= 32
+#if L1_CACHE_BYTES >= 32
 	COPY_16_BYTES
-#if L1_CACHE_LINE_SIZE >= 64
+#if L1_CACHE_BYTES >= 64
 	COPY_16_BYTES
 	COPY_16_BYTES
-#if L1_CACHE_LINE_SIZE >= 128
+#if L1_CACHE_BYTES >= 128
 	COPY_16_BYTES
 	COPY_16_BYTES
 	COPY_16_BYTES
@@ -399,12 +399,12 @@ _GLOBAL(__copy_tofrom_user)
 	.text
 /* the main body of the cacheline loop */
 	COPY_16_BYTES_WITHEX(0)
-#if L1_CACHE_LINE_SIZE >= 32
+#if L1_CACHE_BYTES >= 32
 	COPY_16_BYTES_WITHEX(1)
-#if L1_CACHE_LINE_SIZE >= 64
+#if L1_CACHE_BYTES >= 64
 	COPY_16_BYTES_WITHEX(2)
 	COPY_16_BYTES_WITHEX(3)
-#if L1_CACHE_LINE_SIZE >= 128
+#if L1_CACHE_BYTES >= 128
 	COPY_16_BYTES_WITHEX(4)
 	COPY_16_BYTES_WITHEX(5)
 	COPY_16_BYTES_WITHEX(6)
@@ -458,12 +458,12 @@ _GLOBAL(__copy_tofrom_user)
  * 104f (if in read part) or 105f (if in write part), after updating r5
  */
 	COPY_16_BYTES_EXCODE(0)
-#if L1_CACHE_LINE_SIZE >= 32
+#if L1_CACHE_BYTES >= 32
 	COPY_16_BYTES_EXCODE(1)
-#if L1_CACHE_LINE_SIZE >= 64
+#if L1_CACHE_BYTES >= 64
 	COPY_16_BYTES_EXCODE(2)
 	COPY_16_BYTES_EXCODE(3)
-#if L1_CACHE_LINE_SIZE >= 128
+#if L1_CACHE_BYTES >= 128
 	COPY_16_BYTES_EXCODE(4)
 	COPY_16_BYTES_EXCODE(5)
 	COPY_16_BYTES_EXCODE(6)
-- 
cgit v1.2.2


From 344480b99730bfd205e306d3fd168cdcebe83425 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 20 Oct 2005 09:37:02 +1000
Subject: powerpc: Fix a corner case in __div64_32

The code was incorrectly doing a division by 0 in the case where
the denominator was 0x100000000 and the divisor was 0xffffffff.
Thanks to Fred Liu of Motorola for pointing this out.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/lib/div64.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/div64.S b/arch/powerpc/lib/div64.S
index 3527569e9926..83d9832fd919 100644
--- a/arch/powerpc/lib/div64.S
+++ b/arch/powerpc/lib/div64.S
@@ -33,9 +33,10 @@ _GLOBAL(__div64_32)
 	cntlzw	r0,r5		# we are shifting the dividend right
 	li	r10,-1		# to make it < 2^32, and shifting
 	srw	r10,r10,r0	# the divisor right the same amount,
-	add	r9,r4,r10	# rounding up (so the estimate cannot
+	addc	r9,r4,r10	# rounding up (so the estimate cannot
 	andc	r11,r6,r10	# ever be too large, only too small)
 	andc	r9,r9,r10
+	addze	r9,r9
 	or	r11,r5,r11
 	rotlw	r9,r9,r0
 	rotlw	r11,r11,r0
-- 
cgit v1.2.2


From 1a3c061efd32b620cf6498b71687ff0768cfe2d8 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 27 Oct 2005 16:25:05 +1000
Subject: ppc64: use checksum_64.S from powerpc

as it is identical to checksum.S from ppc64.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/lib/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 30367a0237dd..1f1f01b757d4 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -2,12 +2,15 @@
 # Makefile for ppc-specific library files..
 #
 
+ifeq ($(CONFIG_PPC_MERGE),y)
 obj-y			:= strcase.o string.o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
 obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o memcpy_64.o \
-			   usercopy_64.o sstep.o checksum_64.o mem_64.o
+			   usercopy_64.o sstep.o mem_64.o
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o
 endif
+endif
 
+obj-$(CONFIG_PPC64)	+= checksum_64.o
-- 
cgit v1.2.2


From 454fb016b3407e7cacbe17e590521ddf86a54f7f Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 27 Oct 2005 16:28:43 +1000
Subject: ppc64: use e2a.c from powerpc/lib

since it is identical to e2a.c from ppc64/lib

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/lib/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 1f1f01b757d4..87153891d7a7 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -7,10 +7,10 @@ obj-y			:= strcase.o string.o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
 obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o memcpy_64.o \
 			   usercopy_64.o sstep.o mem_64.o
-obj-$(CONFIG_PPC_ISERIES) += e2a.o
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o
 endif
 endif
 
 obj-$(CONFIG_PPC64)	+= checksum_64.o
+obj-$(CONFIG_PPC_ISERIES) += e2a.o
-- 
cgit v1.2.2


From 34faa82841b87e6f375b01dad3262ca345de25c0 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 27 Oct 2005 16:35:43 +1000
Subject: ppc64: use copypage_64.S from powerpc/lib

since it is identical to copypage.S from ppc64/lib.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/lib/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 87153891d7a7..5e481944250e 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -5,12 +5,12 @@
 ifeq ($(CONFIG_PPC_MERGE),y)
 obj-y			:= strcase.o string.o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
-obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o memcpy_64.o \
+obj-$(CONFIG_PPC64)	+= copyuser_64.o memcpy_64.o \
 			   usercopy_64.o sstep.o mem_64.o
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o
 endif
 endif
 
-obj-$(CONFIG_PPC64)	+= checksum_64.o
+obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
-- 
cgit v1.2.2


From 43f35ec35c2b9a1c984d885893084eed2101e1bf Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 27 Oct 2005 16:40:05 +1000
Subject: ppc64: use copyuser_64.S from powerpc/lib

since it is identical to copyuser.S from ppc64/lib.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/lib/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 5e481944250e..3474c9309e2f 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -5,12 +5,12 @@
 ifeq ($(CONFIG_PPC_MERGE),y)
 obj-y			:= strcase.o string.o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
-obj-$(CONFIG_PPC64)	+= copyuser_64.o memcpy_64.o \
+obj-$(CONFIG_PPC64)	+= memcpy_64.o \
 			   usercopy_64.o sstep.o mem_64.o
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o
 endif
 endif
 
-obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o
+obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o copyuser_64.o
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
-- 
cgit v1.2.2


From ecc81e0f719f566b75b222b8aef64c8b809b2e29 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 27 Oct 2005 16:45:38 +1000
Subject: ppc64: use lockc.c from powerpc/lib

since it is effectively the same as locks.c from ppc64/lib.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/lib/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 3474c9309e2f..955c06421590 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -7,10 +7,10 @@ obj-y			:= strcase.o string.o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
 obj-$(CONFIG_PPC64)	+= memcpy_64.o \
 			   usercopy_64.o sstep.o mem_64.o
-ifeq ($(CONFIG_PPC64),y)
-obj-$(CONFIG_SMP)	+= locks.o
-endif
 endif
 
 obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o copyuser_64.o
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
+ifeq ($(CONFIG_PPC64),y)
+obj-$(CONFIG_SMP)	+= locks.o
+endif
-- 
cgit v1.2.2


From 12a39407f021fd17d5f9d33d78bddb005bd106fb Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 27 Oct 2005 16:49:44 +1000
Subject: ppc64: use memcpy_64.S from powerpc/lib

since it is identical to mempcy.S from ppc64/lib.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/lib/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 955c06421590..9cf35ed3f33a 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -5,11 +5,11 @@
 ifeq ($(CONFIG_PPC_MERGE),y)
 obj-y			:= strcase.o string.o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
-obj-$(CONFIG_PPC64)	+= memcpy_64.o \
-			   usercopy_64.o sstep.o mem_64.o
+obj-$(CONFIG_PPC64)	+= usercopy_64.o sstep.o mem_64.o
 endif
 
-obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o copyuser_64.o
+obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o copyuser_64.o \
+			   memcpy_64.o
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o
-- 
cgit v1.2.2


From a0d8bf9cf1f9722204936d8ef53370d7ae1181d9 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 27 Oct 2005 17:05:27 +1000
Subject: ppc64: use sstep.c from powerpc/lib

since it is identical to sstep.c from ppc64/lib.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/lib/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 9cf35ed3f33a..a7af96ce9df2 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -5,7 +5,7 @@
 ifeq ($(CONFIG_PPC_MERGE),y)
 obj-y			:= strcase.o string.o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
-obj-$(CONFIG_PPC64)	+= usercopy_64.o sstep.o mem_64.o
+obj-$(CONFIG_PPC64)	+= usercopy_64.o mem_64.o
 endif
 
 obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o copyuser_64.o \
@@ -13,4 +13,5 @@ obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o copyuser_64.o \
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o
+obj-$(CONFIG_DEBUG_KERNEL) += sstep.o
 endif
-- 
cgit v1.2.2


From a2b0ca84a1198505f0b3c7b55b7769dd5dbd0791 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 27 Oct 2005 17:12:52 +1000
Subject: ppc64: user strcase.c from powerpc/lib

since it is identical to strcase.c from ppc64/lib.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/lib/Makefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index a7af96ce9df2..0726235ef25d 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -3,11 +3,12 @@
 #
 
 ifeq ($(CONFIG_PPC_MERGE),y)
-obj-y			:= strcase.o string.o
-obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
+obj-y			:= string.o
 obj-$(CONFIG_PPC64)	+= usercopy_64.o mem_64.o
 endif
 
+obj-y			+= strcase.o
+obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
 obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o copyuser_64.o \
 			   memcpy_64.o
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
-- 
cgit v1.2.2


From a4a264f144d5389a52d467a7e41bec2834cfde75 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 27 Oct 2005 17:28:21 +1000
Subject: ppc64: use usercopy_64.c from powerpc/lib

since it is identical to usercopy.c from ppc64/lib.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/lib/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 0726235ef25d..15b6b2696500 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -4,13 +4,13 @@
 
 ifeq ($(CONFIG_PPC_MERGE),y)
 obj-y			:= string.o
-obj-$(CONFIG_PPC64)	+= usercopy_64.o mem_64.o
+obj-$(CONFIG_PPC64)	+= mem_64.o
 endif
 
 obj-y			+= strcase.o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
 obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o copyuser_64.o \
-			   memcpy_64.o
+			   memcpy_64.o usercopy_64.o
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o
-- 
cgit v1.2.2


From 299f6ce491aa28515d86f29af2779cbfdc7a4790 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 27 Oct 2005 18:03:58 +1000
Subject: ppc64: use mem_64.S from powerpc/lib

and remove the same bits from ppc64/lib/string.S.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 arch/powerpc/lib/Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 15b6b2696500..0115bf96751c 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -4,13 +4,12 @@
 
 ifeq ($(CONFIG_PPC_MERGE),y)
 obj-y			:= string.o
-obj-$(CONFIG_PPC64)	+= mem_64.o
 endif
 
 obj-y			+= strcase.o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
 obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o copyuser_64.o \
-			   memcpy_64.o usercopy_64.o
+			   memcpy_64.o usercopy_64.o mem_64.o
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o
-- 
cgit v1.2.2


From c032524f0ddea5fcc3a2cece0d4a61f37e5ca9cd Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 28 Oct 2005 22:48:08 +1000
Subject: powerpc: Make single-stepping emulation (mostly) usable on 32-bit

The sc instruction emulation can't be done the same way on 32-bit
as 64-bit yet, but this should work OK.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/lib/sstep.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index e79123d1485c..666c2aa55016 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -10,13 +10,18 @@
  */
 #include <linux/kernel.h>
 #include <linux/ptrace.h>
+#include <linux/config.h>
 #include <asm/sstep.h>
 #include <asm/processor.h>
 
 extern char system_call_common[];
 
+#ifdef CONFIG_PPC64
 /* Bits in SRR1 that are copied from MSR */
 #define MSR_MASK	0xffffffff87c0ffff
+#else
+#define MSR_MASK	0x87c0ffff
+#endif
 
 /*
  * Determine whether a conditional branch instruction would branch.
@@ -66,6 +71,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr)
 		if (branch_taken(instr, regs))
 			regs->nip = imm;
 		return 1;
+#ifdef CONFIG_PPC64
 	case 17:	/* sc */
 		/*
 		 * N.B. this uses knowledge about how the syscall
@@ -79,6 +85,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr)
 		regs->nip = (unsigned long) &system_call_common;
 		regs->msr = MSR_KERNEL;
 		return 1;
+#endif
 	case 18:	/* b */
 		imm = instr & 0x03fffffc;
 		if (imm & 0x02000000)
@@ -121,6 +128,15 @@ int emulate_step(struct pt_regs *regs, unsigned int instr)
 			if ((regs->msr & MSR_SF) == 0)
 				regs->nip &= 0xffffffffUL;
 			return 1;
+		case 0x124:	/* mtmsr */
+			imm = regs->gpr[rd];
+			if ((imm & MSR_RI) == 0)
+				/* can't step mtmsr that would clear MSR_RI */
+				return -1;
+			regs->msr = imm;
+			regs->nip += 4;
+			return 1;
+#ifdef CONFIG_PPC64
 		case 0x164:	/* mtmsrd */
 			/* only MSR_EE and MSR_RI get changed if bit 15 set */
 			/* mtmsrd doesn't change MSR_HV and MSR_ME */
@@ -135,6 +151,7 @@ int emulate_step(struct pt_regs *regs, unsigned int instr)
 			if ((imm & MSR_SF) == 0)
 				regs->nip &= 0xffffffffUL;
 			return 1;
+#endif
 		}
 	}
 	return 0;
-- 
cgit v1.2.2


From f78541dcec327b0c46b150ee7d727f3db80275c4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 28 Oct 2005 22:53:37 +1000
Subject: powerpc: Merge xmon

The merged version follows the ppc64 version pretty closely mostly,
and in fact ARCH=ppc64 now uses the arch/powerpc/xmon version.
The main difference for ppc64 is that the 'p' command to call
show_state (which was always pretty dodgy) has been replaced by
the ppc32 'p' command, which calls a given procedure (so in fact
the old 'p' command behaviour can be achieved with 'p $show_state').

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/lib/Makefile | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 0115bf96751c..e6b2be3bcec1 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -11,6 +11,8 @@ obj-$(CONFIG_PPC32)	+= div64.o copy_32.o checksum_32.o
 obj-$(CONFIG_PPC64)	+= checksum_64.o copypage_64.o copyuser_64.o \
 			   memcpy_64.o usercopy_64.o mem_64.o
 obj-$(CONFIG_PPC_ISERIES) += e2a.o
+obj-$(CONFIG_XMON)	+= sstep.o
+
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o
 obj-$(CONFIG_DEBUG_KERNEL) += sstep.o
-- 
cgit v1.2.2


From 734d6524800b6a8362666e893a5f3f29b9ef0be9 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 31 Oct 2005 13:57:01 +1100
Subject: powerpc: apply recent changes to merged code

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/lib/locks.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc/lib')

diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 4b8c5ad5e7dc..3794715b2972 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -17,6 +17,7 @@
 #include <linux/spinlock.h>
 #include <linux/module.h>
 #include <linux/stringify.h>
+#include <linux/smp.h>
 
 /* waiting for a spinlock... */
 #if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES)
-- 
cgit v1.2.2