aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicolas Pitre <nico@cam.org>2008-03-31 12:38:31 -0400
committerLennert Buytenhek <buytenh@marvell.com>2008-06-22 16:44:38 -0400
commit2239aff6ab2b95af1f628eee7a809f21c41605b3 (patch)
treefd940074a312d252976da05f7e4457c446e14027
parent4c4925c1f4ccd72002957c3e73b4f117f2bcf712 (diff)
[ARM] cache align destination pointer when copying memory for some processors
The implementation for memory copy functions on ARM had a (disabled) provision for aligning the source pointer before loading registers with data. Turns out that aligning the _destination_ pointer is much more useful, as the read side is already sufficiently helped with the use of preload. So this changes the definition of the CALGN() macro to target the destination pointer instead, and turns it on for Feroceon processors where the gain is very noticeable. Signed-off-by: Nicolas Pitre <nico@marvell.com> Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
-rw-r--r--arch/arm/lib/copy_template.S12
-rw-r--r--arch/arm/lib/memmove.S12
-rw-r--r--include/asm-arm/assembler.h15
3 files changed, 19 insertions, 20 deletions
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index cab355c0c1f7..139cce646055 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -13,14 +13,6 @@
13 */ 13 */
14 14
15/* 15/*
16 * This can be used to enable code to cacheline align the source pointer.
17 * Experiments on tested architectures (StrongARM and XScale) didn't show
18 * this a worthwhile thing to do. That might be different in the future.
19 */
20//#define CALGN(code...) code
21#define CALGN(code...)
22
23/*
24 * Theory of operation 16 * Theory of operation
25 * ------------------- 17 * -------------------
26 * 18 *
@@ -82,7 +74,7 @@
82 stmfd sp!, {r5 - r8} 74 stmfd sp!, {r5 - r8}
83 blt 5f 75 blt 5f
84 76
85 CALGN( ands ip, r1, #31 ) 77 CALGN( ands ip, r0, #31 )
86 CALGN( rsb r3, ip, #32 ) 78 CALGN( rsb r3, ip, #32 )
87 CALGN( sbcnes r4, r3, r2 ) @ C is always set here 79 CALGN( sbcnes r4, r3, r2 ) @ C is always set here
88 CALGN( bcs 2f ) 80 CALGN( bcs 2f )
@@ -168,7 +160,7 @@
168 subs r2, r2, #28 160 subs r2, r2, #28
169 blt 14f 161 blt 14f
170 162
171 CALGN( ands ip, r1, #31 ) 163 CALGN( ands ip, r0, #31 )
172 CALGN( rsb ip, ip, #32 ) 164 CALGN( rsb ip, ip, #32 )
173 CALGN( sbcnes r4, ip, r2 ) @ C is always set here 165 CALGN( sbcnes r4, ip, r2 ) @ C is always set here
174 CALGN( subcc r2, r2, ip ) 166 CALGN( subcc r2, r2, ip )
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index 018522c3ff26..2e301b7bd8f1 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -13,14 +13,6 @@
13#include <linux/linkage.h> 13#include <linux/linkage.h>
14#include <asm/assembler.h> 14#include <asm/assembler.h>
15 15
16/*
17 * This can be used to enable code to cacheline align the source pointer.
18 * Experiments on tested architectures (StrongARM and XScale) didn't show
19 * this a worthwhile thing to do. That might be different in the future.
20 */
21//#define CALGN(code...) code
22#define CALGN(code...)
23
24 .text 16 .text
25 17
26/* 18/*
@@ -55,7 +47,7 @@ ENTRY(memmove)
55 stmfd sp!, {r5 - r8} 47 stmfd sp!, {r5 - r8}
56 blt 5f 48 blt 5f
57 49
58 CALGN( ands ip, r1, #31 ) 50 CALGN( ands ip, r0, #31 )
59 CALGN( sbcnes r4, ip, r2 ) @ C is always set here 51 CALGN( sbcnes r4, ip, r2 ) @ C is always set here
60 CALGN( bcs 2f ) 52 CALGN( bcs 2f )
61 CALGN( adr r4, 6f ) 53 CALGN( adr r4, 6f )
@@ -139,7 +131,7 @@ ENTRY(memmove)
139 subs r2, r2, #28 131 subs r2, r2, #28
140 blt 14f 132 blt 14f
141 133
142 CALGN( ands ip, r1, #31 ) 134 CALGN( ands ip, r0, #31 )
143 CALGN( sbcnes r4, ip, r2 ) @ C is always set here 135 CALGN( sbcnes r4, ip, r2 ) @ C is always set here
144 CALGN( subcc r2, r2, ip ) 136 CALGN( subcc r2, r2, ip )
145 CALGN( bcc 15f ) 137 CALGN( bcc 15f )
diff --git a/include/asm-arm/assembler.h b/include/asm-arm/assembler.h
index fce832820825..911393b2c6f0 100644
--- a/include/asm-arm/assembler.h
+++ b/include/asm-arm/assembler.h
@@ -56,6 +56,21 @@
56#endif 56#endif
57 57
58/* 58/*
59 * This can be used to enable code to cacheline align the destination
60 * pointer when bulk writing to memory. Experiments on StrongARM and
61 * XScale didn't show this a worthwhile thing to do when the cache is not
62 * set to write-allocate (this would need further testing on XScale when WA
63 * is used).
64 *
65 * On Feroceon there is much to gain however, regardless of cache mode.
66 */
67#ifdef CONFIG_CPU_FEROCEON
68#define CALGN(code...) code
69#else
70#define CALGN(code...)
71#endif
72
73/*
59 * Enable and disable interrupts 74 * Enable and disable interrupts
60 */ 75 */
61#if __LINUX_ARM_ARCH__ >= 6 76#if __LINUX_ARM_ARCH__ >= 6