aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/lib/memcpy_64.S
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2012-05-30 16:22:09 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-07-03 00:14:46 -0400
commitb3f271e86e5a440713716bb222e1aa1227994c50 (patch)
treea215f44357cf20c9f2b342a7bdb840bb85ca4cb1 /arch/powerpc/lib/memcpy_64.S
parentbce4b4bd91efab9dca693ac37c8ddf88103280d8 (diff)
powerpc: POWER7 optimised memcpy using VMX and enhanced prefetch
Implement a POWER7 optimised memcpy using VMX and enhanced prefetch instructions. This is a copy of the POWER7 optimised copy_to_user/copy_from_user loop. Detailed implementation and performance details can be found in commit a66086b8197d (powerpc: POWER7 optimised copy_to_user/copy_from_user using VMX). I noticed memcpy issues when profiling a RAID6 workload: .memcpy .async_memcpy .async_copy_data .__raid_run_ops .handle_stripe .raid5d .md_thread I created a simplified testcase by building a RAID6 array with 4 1GB ramdisks (booting with brd.rd_size=1048576): # mdadm -CR -e 1.2 /dev/md0 --level=6 -n4 /dev/ram[0-3] I then timed how long it took to write to the entire array: # dd if=/dev/zero of=/dev/md0 bs=1M Before: 892 MB/s After: 999 MB/s A 12% improvement. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/lib/memcpy_64.S')
-rw-r--r--arch/powerpc/lib/memcpy_64.S4
1 files changed, 4 insertions, 0 deletions
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 82fea3963e1..d2bbbc8d7dc 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -11,7 +11,11 @@
11 11
12 .align 7 12 .align 7
13_GLOBAL(memcpy) 13_GLOBAL(memcpy)
14BEGIN_FTR_SECTION
14 std r3,48(r1) /* save destination pointer for return value */ 15 std r3,48(r1) /* save destination pointer for return value */
16FTR_SECTION_ELSE
17 b memcpy_power7
18ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
15 PPC_MTOCRF(0x01,r5) 19 PPC_MTOCRF(0x01,r5)
16 cmpldi cr1,r5,16 20 cmpldi cr1,r5,16
17 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry 21 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry