aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/lib')
-rw-r--r--arch/arm/lib/copy_template.S12
-rw-r--r--arch/arm/lib/memmove.S14
-rw-r--r--arch/arm/lib/memset.S46
-rw-r--r--arch/arm/lib/memzero.S44
4 files changed, 95 insertions, 21 deletions
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index cab355c0c1f7..139cce646055 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -13,14 +13,6 @@
13 */ 13 */
14 14
15/* 15/*
16 * This can be used to enable code to cacheline align the source pointer.
17 * Experiments on tested architectures (StrongARM and XScale) didn't show
18 * this a worthwhile thing to do. That might be different in the future.
19 */
20//#define CALGN(code...) code
21#define CALGN(code...)
22
23/*
24 * Theory of operation 16 * Theory of operation
25 * ------------------- 17 * -------------------
26 * 18 *
@@ -82,7 +74,7 @@
82 stmfd sp!, {r5 - r8} 74 stmfd sp!, {r5 - r8}
83 blt 5f 75 blt 5f
84 76
85 CALGN( ands ip, r1, #31 ) 77 CALGN( ands ip, r0, #31 )
86 CALGN( rsb r3, ip, #32 ) 78 CALGN( rsb r3, ip, #32 )
87 CALGN( sbcnes r4, r3, r2 ) @ C is always set here 79 CALGN( sbcnes r4, r3, r2 ) @ C is always set here
88 CALGN( bcs 2f ) 80 CALGN( bcs 2f )
@@ -168,7 +160,7 @@
168 subs r2, r2, #28 160 subs r2, r2, #28
169 blt 14f 161 blt 14f
170 162
171 CALGN( ands ip, r1, #31 ) 163 CALGN( ands ip, r0, #31 )
172 CALGN( rsb ip, ip, #32 ) 164 CALGN( rsb ip, ip, #32 )
173 CALGN( sbcnes r4, ip, r2 ) @ C is always set here 165 CALGN( sbcnes r4, ip, r2 ) @ C is always set here
174 CALGN( subcc r2, r2, ip ) 166 CALGN( subcc r2, r2, ip )
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index ef7fddc14ac9..2e301b7bd8f1 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -13,14 +13,6 @@
13#include <linux/linkage.h> 13#include <linux/linkage.h>
14#include <asm/assembler.h> 14#include <asm/assembler.h>
15 15
16/*
17 * This can be used to enable code to cacheline align the source pointer.
18 * Experiments on tested architectures (StrongARM and XScale) didn't show
19 * this a worthwhile thing to do. That might be different in the future.
20 */
21//#define CALGN(code...) code
22#define CALGN(code...)
23
24 .text 16 .text
25 17
26/* 18/*
@@ -55,11 +47,12 @@ ENTRY(memmove)
55 stmfd sp!, {r5 - r8} 47 stmfd sp!, {r5 - r8}
56 blt 5f 48 blt 5f
57 49
58 CALGN( ands ip, r1, #31 ) 50 CALGN( ands ip, r0, #31 )
59 CALGN( sbcnes r4, ip, r2 ) @ C is always set here 51 CALGN( sbcnes r4, ip, r2 ) @ C is always set here
60 CALGN( bcs 2f ) 52 CALGN( bcs 2f )
61 CALGN( adr r4, 6f ) 53 CALGN( adr r4, 6f )
62 CALGN( subs r2, r2, ip ) @ C is set here 54 CALGN( subs r2, r2, ip ) @ C is set here
55 CALGN( rsb ip, ip, #32 )
63 CALGN( add pc, r4, ip ) 56 CALGN( add pc, r4, ip )
64 57
65 PLD( pld [r1, #-4] ) 58 PLD( pld [r1, #-4] )
@@ -138,8 +131,7 @@ ENTRY(memmove)
138 subs r2, r2, #28 131 subs r2, r2, #28
139 blt 14f 132 blt 14f
140 133
141 CALGN( ands ip, r1, #31 ) 134 CALGN( ands ip, r0, #31 )
142 CALGN( rsb ip, ip, #32 )
143 CALGN( sbcnes r4, ip, r2 ) @ C is always set here 135 CALGN( sbcnes r4, ip, r2 ) @ C is always set here
144 CALGN( subcc r2, r2, ip ) 136 CALGN( subcc r2, r2, ip )
145 CALGN( bcc 15f ) 137 CALGN( bcc 15f )
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 95b110b07a89..b477d4ac88ef 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -39,6 +39,9 @@ ENTRY(memset)
39 mov r3, r1 39 mov r3, r1
40 cmp r2, #16 40 cmp r2, #16
41 blt 4f 41 blt 4f
42
43#if ! CALGN(1)+0
44
42/* 45/*
43 * We need an extra register for this loop - save the return address and 46 * We need an extra register for this loop - save the return address and
44 * use the LR 47 * use the LR
@@ -64,6 +67,49 @@ ENTRY(memset)
64 stmneia r0!, {r1, r3, ip, lr} 67 stmneia r0!, {r1, r3, ip, lr}
65 ldr lr, [sp], #4 68 ldr lr, [sp], #4
66 69
70#else
71
72/*
73 * This version aligns the destination pointer in order to write
74 * whole cache lines at once.
75 */
76
77 stmfd sp!, {r4-r7, lr}
78 mov r4, r1
79 mov r5, r1
80 mov r6, r1
81 mov r7, r1
82 mov ip, r1
83 mov lr, r1
84
85 cmp r2, #96
86 tstgt r0, #31
87 ble 3f
88
89 and ip, r0, #31
90 rsb ip, ip, #32
91 sub r2, r2, ip
92 movs ip, ip, lsl #(32 - 4)
93 stmcsia r0!, {r4, r5, r6, r7}
94 stmmiia r0!, {r4, r5}
95 tst ip, #(1 << 30)
96 mov ip, r1
97 strne r1, [r0], #4
98
993: subs r2, r2, #64
100 stmgeia r0!, {r1, r3-r7, ip, lr}
101 stmgeia r0!, {r1, r3-r7, ip, lr}
102 bgt 3b
103 ldmeqfd sp!, {r4-r7, pc}
104
105 tst r2, #32
106 stmneia r0!, {r1, r3-r7, ip, lr}
107 tst r2, #16
108 stmneia r0!, {r4-r7}
109 ldmfd sp!, {r4-r7, lr}
110
111#endif
112
674: tst r2, #8 1134: tst r2, #8
68 stmneia r0!, {r1, r3} 114 stmneia r0!, {r1, r3}
69 tst r2, #4 115 tst r2, #4
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
index abf2508e8221..b8f79d80ee9b 100644
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -39,6 +39,9 @@ ENTRY(__memzero)
39 */ 39 */
40 cmp r1, #16 @ 1 we can skip this chunk if we 40 cmp r1, #16 @ 1 we can skip this chunk if we
41 blt 4f @ 1 have < 16 bytes 41 blt 4f @ 1 have < 16 bytes
42
43#if ! CALGN(1)+0
44
42/* 45/*
43 * We need an extra register for this loop - save the return address and 46 * We need an extra register for this loop - save the return address and
44 * use the LR 47 * use the LR
@@ -64,6 +67,47 @@ ENTRY(__memzero)
64 stmneia r0!, {r2, r3, ip, lr} @ 4 67 stmneia r0!, {r2, r3, ip, lr} @ 4
65 ldr lr, [sp], #4 @ 1 68 ldr lr, [sp], #4 @ 1
66 69
70#else
71
72/*
73 * This version aligns the destination pointer in order to write
74 * whole cache lines at once.
75 */
76
77 stmfd sp!, {r4-r7, lr}
78 mov r4, r2
79 mov r5, r2
80 mov r6, r2
81 mov r7, r2
82 mov ip, r2
83 mov lr, r2
84
85 cmp r1, #96
86 andgts ip, r0, #31
87 ble 3f
88
89 rsb ip, ip, #32
90 sub r1, r1, ip
91 movs ip, ip, lsl #(32 - 4)
92 stmcsia r0!, {r4, r5, r6, r7}
93 stmmiia r0!, {r4, r5}
94 movs ip, ip, lsl #2
95 strcs r2, [r0], #4
96
973: subs r1, r1, #64
98 stmgeia r0!, {r2-r7, ip, lr}
99 stmgeia r0!, {r2-r7, ip, lr}
100 bgt 3b
101 ldmeqfd sp!, {r4-r7, pc}
102
103 tst r1, #32
104 stmneia r0!, {r2-r7, ip, lr}
105 tst r1, #16
106 stmneia r0!, {r4-r7}
107 ldmfd sp!, {r4-r7, lr}
108
109#endif
110
674: tst r1, #8 @ 1 8 bytes or more? 1114: tst r1, #8 @ 1 8 bytes or more?
68 stmneia r0!, {r2, r3} @ 2 112 stmneia r0!, {r2, r3} @ 2
69 tst r1, #4 @ 1 4 bytes or more? 113 tst r1, #4 @ 1 4 bytes or more?