diff options
Diffstat (limited to 'arch/arm/lib')
-rw-r--r-- | arch/arm/lib/copy_template.S | 12 | ||||
-rw-r--r-- | arch/arm/lib/memmove.S | 14 | ||||
-rw-r--r-- | arch/arm/lib/memset.S | 46 | ||||
-rw-r--r-- | arch/arm/lib/memzero.S | 44 |
4 files changed, 95 insertions, 21 deletions
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index cab355c0c1f7..139cce646055 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S | |||
@@ -13,14 +13,6 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | /* | 15 | /* |
16 | * This can be used to enable code to cacheline align the source pointer. | ||
17 | * Experiments on tested architectures (StrongARM and XScale) didn't show | ||
18 | * this a worthwhile thing to do. That might be different in the future. | ||
19 | */ | ||
20 | //#define CALGN(code...) code | ||
21 | #define CALGN(code...) | ||
22 | |||
23 | /* | ||
24 | * Theory of operation | 16 | * Theory of operation |
25 | * ------------------- | 17 | * ------------------- |
26 | * | 18 | * |
@@ -82,7 +74,7 @@ | |||
82 | stmfd sp!, {r5 - r8} | 74 | stmfd sp!, {r5 - r8} |
83 | blt 5f | 75 | blt 5f |
84 | 76 | ||
85 | CALGN( ands ip, r1, #31 ) | 77 | CALGN( ands ip, r0, #31 ) |
86 | CALGN( rsb r3, ip, #32 ) | 78 | CALGN( rsb r3, ip, #32 ) |
87 | CALGN( sbcnes r4, r3, r2 ) @ C is always set here | 79 | CALGN( sbcnes r4, r3, r2 ) @ C is always set here |
88 | CALGN( bcs 2f ) | 80 | CALGN( bcs 2f ) |
@@ -168,7 +160,7 @@ | |||
168 | subs r2, r2, #28 | 160 | subs r2, r2, #28 |
169 | blt 14f | 161 | blt 14f |
170 | 162 | ||
171 | CALGN( ands ip, r1, #31 ) | 163 | CALGN( ands ip, r0, #31 ) |
172 | CALGN( rsb ip, ip, #32 ) | 164 | CALGN( rsb ip, ip, #32 ) |
173 | CALGN( sbcnes r4, ip, r2 ) @ C is always set here | 165 | CALGN( sbcnes r4, ip, r2 ) @ C is always set here |
174 | CALGN( subcc r2, r2, ip ) | 166 | CALGN( subcc r2, r2, ip ) |
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index ef7fddc14ac9..2e301b7bd8f1 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S | |||
@@ -13,14 +13,6 @@ | |||
13 | #include <linux/linkage.h> | 13 | #include <linux/linkage.h> |
14 | #include <asm/assembler.h> | 14 | #include <asm/assembler.h> |
15 | 15 | ||
16 | /* | ||
17 | * This can be used to enable code to cacheline align the source pointer. | ||
18 | * Experiments on tested architectures (StrongARM and XScale) didn't show | ||
19 | * this a worthwhile thing to do. That might be different in the future. | ||
20 | */ | ||
21 | //#define CALGN(code...) code | ||
22 | #define CALGN(code...) | ||
23 | |||
24 | .text | 16 | .text |
25 | 17 | ||
26 | /* | 18 | /* |
@@ -55,11 +47,12 @@ ENTRY(memmove) | |||
55 | stmfd sp!, {r5 - r8} | 47 | stmfd sp!, {r5 - r8} |
56 | blt 5f | 48 | blt 5f |
57 | 49 | ||
58 | CALGN( ands ip, r1, #31 ) | 50 | CALGN( ands ip, r0, #31 ) |
59 | CALGN( sbcnes r4, ip, r2 ) @ C is always set here | 51 | CALGN( sbcnes r4, ip, r2 ) @ C is always set here |
60 | CALGN( bcs 2f ) | 52 | CALGN( bcs 2f ) |
61 | CALGN( adr r4, 6f ) | 53 | CALGN( adr r4, 6f ) |
62 | CALGN( subs r2, r2, ip ) @ C is set here | 54 | CALGN( subs r2, r2, ip ) @ C is set here |
55 | CALGN( rsb ip, ip, #32 ) | ||
63 | CALGN( add pc, r4, ip ) | 56 | CALGN( add pc, r4, ip ) |
64 | 57 | ||
65 | PLD( pld [r1, #-4] ) | 58 | PLD( pld [r1, #-4] ) |
@@ -138,8 +131,7 @@ ENTRY(memmove) | |||
138 | subs r2, r2, #28 | 131 | subs r2, r2, #28 |
139 | blt 14f | 132 | blt 14f |
140 | 133 | ||
141 | CALGN( ands ip, r1, #31 ) | 134 | CALGN( ands ip, r0, #31 ) |
142 | CALGN( rsb ip, ip, #32 ) | ||
143 | CALGN( sbcnes r4, ip, r2 ) @ C is always set here | 135 | CALGN( sbcnes r4, ip, r2 ) @ C is always set here |
144 | CALGN( subcc r2, r2, ip ) | 136 | CALGN( subcc r2, r2, ip ) |
145 | CALGN( bcc 15f ) | 137 | CALGN( bcc 15f ) |
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 95b110b07a89..b477d4ac88ef 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S | |||
@@ -39,6 +39,9 @@ ENTRY(memset) | |||
39 | mov r3, r1 | 39 | mov r3, r1 |
40 | cmp r2, #16 | 40 | cmp r2, #16 |
41 | blt 4f | 41 | blt 4f |
42 | |||
43 | #if ! CALGN(1)+0 | ||
44 | |||
42 | /* | 45 | /* |
43 | * We need an extra register for this loop - save the return address and | 46 | * We need an extra register for this loop - save the return address and |
44 | * use the LR | 47 | * use the LR |
@@ -64,6 +67,49 @@ ENTRY(memset) | |||
64 | stmneia r0!, {r1, r3, ip, lr} | 67 | stmneia r0!, {r1, r3, ip, lr} |
65 | ldr lr, [sp], #4 | 68 | ldr lr, [sp], #4 |
66 | 69 | ||
70 | #else | ||
71 | |||
72 | /* | ||
73 | * This version aligns the destination pointer in order to write | ||
74 | * whole cache lines at once. | ||
75 | */ | ||
76 | |||
77 | stmfd sp!, {r4-r7, lr} | ||
78 | mov r4, r1 | ||
79 | mov r5, r1 | ||
80 | mov r6, r1 | ||
81 | mov r7, r1 | ||
82 | mov ip, r1 | ||
83 | mov lr, r1 | ||
84 | |||
85 | cmp r2, #96 | ||
86 | tstgt r0, #31 | ||
87 | ble 3f | ||
88 | |||
89 | and ip, r0, #31 | ||
90 | rsb ip, ip, #32 | ||
91 | sub r2, r2, ip | ||
92 | movs ip, ip, lsl #(32 - 4) | ||
93 | stmcsia r0!, {r4, r5, r6, r7} | ||
94 | stmmiia r0!, {r4, r5} | ||
95 | tst ip, #(1 << 30) | ||
96 | mov ip, r1 | ||
97 | strne r1, [r0], #4 | ||
98 | |||
99 | 3: subs r2, r2, #64 | ||
100 | stmgeia r0!, {r1, r3-r7, ip, lr} | ||
101 | stmgeia r0!, {r1, r3-r7, ip, lr} | ||
102 | bgt 3b | ||
103 | ldmeqfd sp!, {r4-r7, pc} | ||
104 | |||
105 | tst r2, #32 | ||
106 | stmneia r0!, {r1, r3-r7, ip, lr} | ||
107 | tst r2, #16 | ||
108 | stmneia r0!, {r4-r7} | ||
109 | ldmfd sp!, {r4-r7, lr} | ||
110 | |||
111 | #endif | ||
112 | |||
67 | 4: tst r2, #8 | 113 | 4: tst r2, #8 |
68 | stmneia r0!, {r1, r3} | 114 | stmneia r0!, {r1, r3} |
69 | tst r2, #4 | 115 | tst r2, #4 |
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S index abf2508e8221..b8f79d80ee9b 100644 --- a/arch/arm/lib/memzero.S +++ b/arch/arm/lib/memzero.S | |||
@@ -39,6 +39,9 @@ ENTRY(__memzero) | |||
39 | */ | 39 | */ |
40 | cmp r1, #16 @ 1 we can skip this chunk if we | 40 | cmp r1, #16 @ 1 we can skip this chunk if we |
41 | blt 4f @ 1 have < 16 bytes | 41 | blt 4f @ 1 have < 16 bytes |
42 | |||
43 | #if ! CALGN(1)+0 | ||
44 | |||
42 | /* | 45 | /* |
43 | * We need an extra register for this loop - save the return address and | 46 | * We need an extra register for this loop - save the return address and |
44 | * use the LR | 47 | * use the LR |
@@ -64,6 +67,47 @@ ENTRY(__memzero) | |||
64 | stmneia r0!, {r2, r3, ip, lr} @ 4 | 67 | stmneia r0!, {r2, r3, ip, lr} @ 4 |
65 | ldr lr, [sp], #4 @ 1 | 68 | ldr lr, [sp], #4 @ 1 |
66 | 69 | ||
70 | #else | ||
71 | |||
72 | /* | ||
73 | * This version aligns the destination pointer in order to write | ||
74 | * whole cache lines at once. | ||
75 | */ | ||
76 | |||
77 | stmfd sp!, {r4-r7, lr} | ||
78 | mov r4, r2 | ||
79 | mov r5, r2 | ||
80 | mov r6, r2 | ||
81 | mov r7, r2 | ||
82 | mov ip, r2 | ||
83 | mov lr, r2 | ||
84 | |||
85 | cmp r1, #96 | ||
86 | andgts ip, r0, #31 | ||
87 | ble 3f | ||
88 | |||
89 | rsb ip, ip, #32 | ||
90 | sub r1, r1, ip | ||
91 | movs ip, ip, lsl #(32 - 4) | ||
92 | stmcsia r0!, {r4, r5, r6, r7} | ||
93 | stmmiia r0!, {r4, r5} | ||
94 | movs ip, ip, lsl #2 | ||
95 | strcs r2, [r0], #4 | ||
96 | |||
97 | 3: subs r1, r1, #64 | ||
98 | stmgeia r0!, {r2-r7, ip, lr} | ||
99 | stmgeia r0!, {r2-r7, ip, lr} | ||
100 | bgt 3b | ||
101 | ldmeqfd sp!, {r4-r7, pc} | ||
102 | |||
103 | tst r1, #32 | ||
104 | stmneia r0!, {r2-r7, ip, lr} | ||
105 | tst r1, #16 | ||
106 | stmneia r0!, {r4-r7} | ||
107 | ldmfd sp!, {r4-r7, lr} | ||
108 | |||
109 | #endif | ||
110 | |||
67 | 4: tst r1, #8 @ 1 8 bytes or more? | 111 | 4: tst r1, #8 @ 1 8 bytes or more? |
68 | stmneia r0!, {r2, r3} @ 2 | 112 | stmneia r0!, {r2, r3} @ 2 |
69 | tst r1, #4 @ 1 4 bytes or more? | 113 | tst r1, #4 @ 1 4 bytes or more? |