aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilippe Bergheaud <felix@linux.vnet.ibm.com>2014-04-29 19:12:01 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-04-30 01:26:18 -0400
commit00f554fadebb96877ad449758dc90303a9826afe (patch)
tree77a9ff9bfbb32f75a18a7f3032ad7d3d55944c48
parent48ce3b7cc66b31cd8ae1b8155a82426308f71491 (diff)
powerpc: memcpy optimization for 64bit LE
Unaligned stores take alignment exceptions on POWER7 running in little-endian. This is a dumb little-endian base memcpy that prevents unaligned stores. Once booted the feature fixup code switches over to the VMX copy loops (which are already endian safe). The question is what we do before that switch over. The base 64bit memcpy takes alignment exceptions on POWER7 so we can't use it as is. Fixing the causes of alignment exception would slow it down, because we'd need to ensure all loads and stores are aligned either through rotate tricks or bytewise loads and stores. Either would be bad for all other 64bit platforms. [ I simplified the loop a bit - Anton ] Signed-off-by: Philippe Bergheaud <felix@linux.vnet.ibm.com> Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/string.h4
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c2
-rw-r--r--arch/powerpc/lib/Makefile2
-rw-r--r--arch/powerpc/lib/memcpy_64.S16
4 files changed, 16 insertions, 8 deletions
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index 0dffad6bcc84..e40010abcaf1 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -10,9 +10,7 @@
10#define __HAVE_ARCH_STRNCMP 10#define __HAVE_ARCH_STRNCMP
11#define __HAVE_ARCH_STRCAT 11#define __HAVE_ARCH_STRCAT
12#define __HAVE_ARCH_MEMSET 12#define __HAVE_ARCH_MEMSET
13#ifdef __BIG_ENDIAN__
14#define __HAVE_ARCH_MEMCPY 13#define __HAVE_ARCH_MEMCPY
15#endif
16#define __HAVE_ARCH_MEMMOVE 14#define __HAVE_ARCH_MEMMOVE
17#define __HAVE_ARCH_MEMCMP 15#define __HAVE_ARCH_MEMCMP
18#define __HAVE_ARCH_MEMCHR 16#define __HAVE_ARCH_MEMCHR
@@ -24,9 +22,7 @@ extern int strcmp(const char *,const char *);
24extern int strncmp(const char *, const char *, __kernel_size_t); 22extern int strncmp(const char *, const char *, __kernel_size_t);
25extern char * strcat(char *, const char *); 23extern char * strcat(char *, const char *);
26extern void * memset(void *,int,__kernel_size_t); 24extern void * memset(void *,int,__kernel_size_t);
27#ifdef __BIG_ENDIAN__
28extern void * memcpy(void *,const void *,__kernel_size_t); 25extern void * memcpy(void *,const void *,__kernel_size_t);
29#endif
30extern void * memmove(void *,const void *,__kernel_size_t); 26extern void * memmove(void *,const void *,__kernel_size_t);
31extern int memcmp(const void *,const void *,__kernel_size_t); 27extern int memcmp(const void *,const void *,__kernel_size_t);
32extern void * memchr(const void *,int,__kernel_size_t); 28extern void * memchr(const void *,int,__kernel_size_t);
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 450850a49dce..48d17d6fca5b 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -155,9 +155,7 @@ EXPORT_SYMBOL(__cmpdi2);
155#endif 155#endif
156long long __bswapdi2(long long); 156long long __bswapdi2(long long);
157EXPORT_SYMBOL(__bswapdi2); 157EXPORT_SYMBOL(__bswapdi2);
158#ifdef __BIG_ENDIAN__
159EXPORT_SYMBOL(memcpy); 158EXPORT_SYMBOL(memcpy);
160#endif
161EXPORT_SYMBOL(memset); 159EXPORT_SYMBOL(memset);
162EXPORT_SYMBOL(memmove); 160EXPORT_SYMBOL(memmove);
163EXPORT_SYMBOL(memcmp); 161EXPORT_SYMBOL(memcmp);
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 95a20e17dbff..59fa2de9546d 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -23,9 +23,7 @@ obj-y += checksum_$(CONFIG_WORD_SIZE).o
23obj-$(CONFIG_PPC64) += checksum_wrappers_64.o 23obj-$(CONFIG_PPC64) += checksum_wrappers_64.o
24endif 24endif
25 25
26ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),)
27obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o 26obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o
28endif
29 27
30obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o 28obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
31 29
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 72ad055168a3..dc4ba7953b92 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -12,12 +12,27 @@
12 .align 7 12 .align 7
13_GLOBAL(memcpy) 13_GLOBAL(memcpy)
14BEGIN_FTR_SECTION 14BEGIN_FTR_SECTION
15#ifdef __LITTLE_ENDIAN__
16 cmpdi cr7,r5,0
17#else
15 std r3,48(r1) /* save destination pointer for return value */ 18 std r3,48(r1) /* save destination pointer for return value */
19#endif
16FTR_SECTION_ELSE 20FTR_SECTION_ELSE
17#ifndef SELFTEST 21#ifndef SELFTEST
18 b memcpy_power7 22 b memcpy_power7
19#endif 23#endif
20ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 24ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
25#ifdef __LITTLE_ENDIAN__
26 /* dumb little-endian memcpy that will get replaced at runtime */
27 addi r9,r3,-1
28 addi r4,r4,-1
29 beqlr cr7
30 mtctr r5
311: lbzu r10,1(r4)
32 stbu r10,1(r9)
33 bdnz 1b
34 blr
35#else
21 PPC_MTOCRF(0x01,r5) 36 PPC_MTOCRF(0x01,r5)
22 cmpldi cr1,r5,16 37 cmpldi cr1,r5,16
23 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry 38 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
@@ -203,3 +218,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
203 stb r0,0(r3) 218 stb r0,0(r3)
2044: ld r3,48(r1) /* return dest pointer */ 2194: ld r3,48(r1) /* return dest pointer */
205 blr 220 blr
221#endif