diff options
| author | Anton Blanchard <anton@samba.org> | 2010-02-10 13:07:54 -0500 |
|---|---|---|
| committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2010-02-16 22:03:16 -0500 |
| commit | 63e6c5b8102af7df7a5e1cebbd865d711645886a (patch) | |
| tree | 8dd79a95cf5265b6e1d88bed1493c0b952d23992 | |
| parent | 5a0e9b5718d921f5d8e17176d6b483f6b8f1844a (diff) | |
powerpc: Pair loads and stores in copy_4k_page
A number of our chips like loads and stores to be paired. A small kernel
module testcase shows the improvement of pairing loads and stores in
copy_4k_page:
POWER6: +9%
POWER7: +1.5%
#include <linux/module.h>
#include <linux/mm.h>
#define ITERATIONS 10000000
static int __init copypage_init(void)
{
struct timespec before, after;
unsigned long i;
struct page *destpage, *srcpage;
char *dest, *src;
destpage = alloc_page(GFP_KERNEL);
srcpage = alloc_page(GFP_KERNEL);
dest = page_address(destpage);
src = page_address(srcpage);
getnstimeofday(&before);
for (i = 0; i < ITERATIONS; i++)
copy_4K_page(dest, src);
getnstimeofday(&after);
free_page((unsigned long)dest);
free_page((unsigned long)src);
printk(KERN_DEBUG "copy_4K_page loop took %lu ns\n",
(after.tv_sec - before.tv_sec) * NSEC_PER_SEC +
(after.tv_nsec - before.tv_nsec));
return 0;
}
static void __exit copypage_exit(void)
{
}
module_init(copypage_init)
module_exit(copypage_exit)
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Anton Blanchard");
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
| -rw-r--r-- | arch/powerpc/lib/copypage_64.S | 28 |
1 files changed, 14 insertions, 14 deletions
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index e68beac0a171..4d4eeb900486 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S | |||
| @@ -43,62 +43,62 @@ END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ) | |||
| 43 | ld r7,16(r4) | 43 | ld r7,16(r4) |
| 44 | ldu r8,24(r4) | 44 | ldu r8,24(r4) |
| 45 | 1: std r5,8(r3) | 45 | 1: std r5,8(r3) |
| 46 | ld r9,8(r4) | ||
| 47 | std r6,16(r3) | 46 | std r6,16(r3) |
| 47 | ld r9,8(r4) | ||
| 48 | ld r10,16(r4) | 48 | ld r10,16(r4) |
| 49 | std r7,24(r3) | 49 | std r7,24(r3) |
| 50 | ld r11,24(r4) | ||
| 51 | std r8,32(r3) | 50 | std r8,32(r3) |
| 51 | ld r11,24(r4) | ||
| 52 | ld r12,32(r4) | 52 | ld r12,32(r4) |
| 53 | std r9,40(r3) | 53 | std r9,40(r3) |
| 54 | ld r5,40(r4) | ||
| 55 | std r10,48(r3) | 54 | std r10,48(r3) |
| 55 | ld r5,40(r4) | ||
| 56 | ld r6,48(r4) | 56 | ld r6,48(r4) |
| 57 | std r11,56(r3) | 57 | std r11,56(r3) |
| 58 | ld r7,56(r4) | ||
| 59 | std r12,64(r3) | 58 | std r12,64(r3) |
| 59 | ld r7,56(r4) | ||
| 60 | ld r8,64(r4) | 60 | ld r8,64(r4) |
| 61 | std r5,72(r3) | 61 | std r5,72(r3) |
| 62 | ld r9,72(r4) | ||
| 63 | std r6,80(r3) | 62 | std r6,80(r3) |
| 63 | ld r9,72(r4) | ||
| 64 | ld r10,80(r4) | 64 | ld r10,80(r4) |
| 65 | std r7,88(r3) | 65 | std r7,88(r3) |
| 66 | ld r11,88(r4) | ||
| 67 | std r8,96(r3) | 66 | std r8,96(r3) |
| 67 | ld r11,88(r4) | ||
| 68 | ld r12,96(r4) | 68 | ld r12,96(r4) |
| 69 | std r9,104(r3) | 69 | std r9,104(r3) |
| 70 | ld r5,104(r4) | ||
| 71 | std r10,112(r3) | 70 | std r10,112(r3) |
| 71 | ld r5,104(r4) | ||
| 72 | ld r6,112(r4) | 72 | ld r6,112(r4) |
| 73 | std r11,120(r3) | 73 | std r11,120(r3) |
| 74 | ld r7,120(r4) | ||
| 75 | stdu r12,128(r3) | 74 | stdu r12,128(r3) |
| 75 | ld r7,120(r4) | ||
| 76 | ldu r8,128(r4) | 76 | ldu r8,128(r4) |
| 77 | bdnz 1b | 77 | bdnz 1b |
| 78 | 78 | ||
| 79 | std r5,8(r3) | 79 | std r5,8(r3) |
| 80 | ld r9,8(r4) | ||
| 81 | std r6,16(r3) | 80 | std r6,16(r3) |
| 81 | ld r9,8(r4) | ||
| 82 | ld r10,16(r4) | 82 | ld r10,16(r4) |
| 83 | std r7,24(r3) | 83 | std r7,24(r3) |
| 84 | ld r11,24(r4) | ||
| 85 | std r8,32(r3) | 84 | std r8,32(r3) |
| 85 | ld r11,24(r4) | ||
| 86 | ld r12,32(r4) | 86 | ld r12,32(r4) |
| 87 | std r9,40(r3) | 87 | std r9,40(r3) |
| 88 | ld r5,40(r4) | ||
| 89 | std r10,48(r3) | 88 | std r10,48(r3) |
| 89 | ld r5,40(r4) | ||
| 90 | ld r6,48(r4) | 90 | ld r6,48(r4) |
| 91 | std r11,56(r3) | 91 | std r11,56(r3) |
| 92 | ld r7,56(r4) | ||
| 93 | std r12,64(r3) | 92 | std r12,64(r3) |
| 93 | ld r7,56(r4) | ||
| 94 | ld r8,64(r4) | 94 | ld r8,64(r4) |
| 95 | std r5,72(r3) | 95 | std r5,72(r3) |
| 96 | ld r9,72(r4) | ||
| 97 | std r6,80(r3) | 96 | std r6,80(r3) |
| 97 | ld r9,72(r4) | ||
| 98 | ld r10,80(r4) | 98 | ld r10,80(r4) |
| 99 | std r7,88(r3) | 99 | std r7,88(r3) |
| 100 | ld r11,88(r4) | ||
| 101 | std r8,96(r3) | 100 | std r8,96(r3) |
| 101 | ld r11,88(r4) | ||
| 102 | ld r12,96(r4) | 102 | ld r12,96(r4) |
| 103 | std r9,104(r3) | 103 | std r9,104(r3) |
| 104 | std r10,112(r3) | 104 | std r10,112(r3) |
