aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/lib
diff options
context:
space:
mode:
authorPhilippe Bergheaud <felix@linux.vnet.ibm.com>2014-04-29 19:12:01 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-04-30 01:26:18 -0400
commit00f554fadebb96877ad449758dc90303a9826afe (patch)
tree77a9ff9bfbb32f75a18a7f3032ad7d3d55944c48 /arch/powerpc/lib
parent48ce3b7cc66b31cd8ae1b8155a82426308f71491 (diff)
powerpc: memcpy optimization for 64bit LE
Unaligned stores take alignment exceptions on POWER7 running in little-endian. This is a dumb little-endian base memcpy that prevents unaligned stores. Once booted the feature fixup code switches over to the VMX copy loops (which are already endian safe). The question is what we do before that switch over. The base 64bit memcpy takes alignment exceptions on POWER7 so we can't use it as is. Fixing the causes of alignment exception would slow it down, because we'd need to ensure all loads and stores are aligned either through rotate tricks or bytewise loads and stores. Either would be bad for all other 64bit platforms. [ I simplified the loop a bit - Anton ] Signed-off-by: Philippe Bergheaud <felix@linux.vnet.ibm.com> Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r--arch/powerpc/lib/Makefile2
-rw-r--r--arch/powerpc/lib/memcpy_64.S16
2 files changed, 16 insertions, 2 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 95a20e17dbff..59fa2de9546d 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -23,9 +23,7 @@ obj-y += checksum_$(CONFIG_WORD_SIZE).o
23obj-$(CONFIG_PPC64) += checksum_wrappers_64.o 23obj-$(CONFIG_PPC64) += checksum_wrappers_64.o
24endif 24endif
25 25
26ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),)
27obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o 26obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o
28endif
29 27
30obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o 28obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
31 29
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 72ad055168a3..dc4ba7953b92 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -12,12 +12,27 @@
12 .align 7 12 .align 7
13_GLOBAL(memcpy) 13_GLOBAL(memcpy)
14BEGIN_FTR_SECTION 14BEGIN_FTR_SECTION
15#ifdef __LITTLE_ENDIAN__
16 cmpdi cr7,r5,0
17#else
15 std r3,48(r1) /* save destination pointer for return value */ 18 std r3,48(r1) /* save destination pointer for return value */
19#endif
16FTR_SECTION_ELSE 20FTR_SECTION_ELSE
17#ifndef SELFTEST 21#ifndef SELFTEST
18 b memcpy_power7 22 b memcpy_power7
19#endif 23#endif
20ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) 24ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
25#ifdef __LITTLE_ENDIAN__
26 /* dumb little-endian memcpy that will get replaced at runtime */
27 addi r9,r3,-1
28 addi r4,r4,-1
29 beqlr cr7
30 mtctr r5
311: lbzu r10,1(r4)
32 stbu r10,1(r9)
33 bdnz 1b
34 blr
35#else
21 PPC_MTOCRF(0x01,r5) 36 PPC_MTOCRF(0x01,r5)
22 cmpldi cr1,r5,16 37 cmpldi cr1,r5,16
23 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry 38 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
@@ -203,3 +218,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
203 stb r0,0(r3) 218 stb r0,0(r3)
2044: ld r3,48(r1) /* return dest pointer */ 2194: ld r3,48(r1) /* return dest pointer */
205 blr 220 blr
221#endif