diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/lib/Makefile | 5 | ||||
-rw-r--r-- | arch/arm/lib/copy_template.S | 255 | ||||
-rw-r--r-- | arch/arm/lib/memcpy.S | 410 | ||||
-rw-r--r-- | arch/arm/lib/memmove.S | 206 |
4 files changed, 502 insertions, 374 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index d3d9b21eb7e4..8f9770f1af19 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile | |||
@@ -7,8 +7,9 @@ | |||
7 | lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ | 7 | lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ |
8 | csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ | 8 | csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ |
9 | copy_page.o delay.o findbit.o memchr.o memcpy.o \ | 9 | copy_page.o delay.o findbit.o memchr.o memcpy.o \ |
10 | memset.o memzero.o setbit.o strncpy_from_user.o \ | 10 | memmove.o memset.o memzero.o setbit.o \ |
11 | strnlen_user.o strchr.o strrchr.o testchangebit.o \ | 11 | strncpy_from_user.o strnlen_user.o \ |
12 | strchr.o strrchr.o testchangebit.o \ | ||
12 | testclearbit.o testsetbit.o uaccess.o \ | 13 | testclearbit.o testsetbit.o uaccess.o \ |
13 | getuser.o putuser.o clear_user.o \ | 14 | getuser.o putuser.o clear_user.o \ |
14 | ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ | 15 | ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ |
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S new file mode 100644 index 000000000000..838e435e4922 --- /dev/null +++ b/arch/arm/lib/copy_template.S | |||
@@ -0,0 +1,255 @@ | |||
1 | /* | ||
2 | * linux/arch/arm/lib/copy_template.s | ||
3 | * | ||
4 | * Code template for optimized memory copy functions | ||
5 | * | ||
6 | * Author: Nicolas Pitre | ||
7 | * Created: Sep 28, 2005 | ||
8 | * Copyright: MontaVista Software, Inc. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License version 2 as | ||
12 | * published by the Free Software Foundation. | ||
13 | */ | ||
14 | |||
15 | /* | ||
16 | * This can be used to enable code to cacheline align the source pointer. | ||
17 | * Experiments on tested architectures (StrongARM and XScale) didn't show | ||
18 | * this a worthwhile thing to do. That might be different in the future. | ||
19 | */ | ||
20 | //#define CALGN(code...) code | ||
21 | #define CALGN(code...) | ||
22 | |||
23 | /* | ||
24 | * Theory of operation | ||
25 | * ------------------- | ||
26 | * | ||
27 | * This file provides the core code for a forward memory copy used in | ||
28 | * the implementation of memcopy(), copy_to_user() and copy_from_user(). | ||
29 | * | ||
30 | * The including file must define the following accessor macros | ||
31 | * according to the need of the given function: | ||
32 | * | ||
33 | * ldr1w ptr reg abort | ||
34 | * | ||
35 | * This loads one word from 'ptr', stores it in 'reg' and increments | ||
36 | * 'ptr' to the next word. The 'abort' argument is used for fixup tables. | ||
37 | * | ||
38 | * ldr4w ptr reg1 reg2 reg3 reg4 abort | ||
39 | * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort | ||
40 | * | ||
41 | * This loads four or eight words starting from 'ptr', stores them | ||
42 | * in provided registers and increments 'ptr' past those words. | ||
43 | * The'abort' argument is used for fixup tables. | ||
44 | * | ||
45 | * ldr1b ptr reg cond abort | ||
46 | * | ||
47 | * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte. | ||
48 | * It also must apply the condition code if provided, otherwise the | ||
49 | * "al" condition is assumed by default. | ||
50 | * | ||
51 | * str1w ptr reg abort | ||
52 | * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort | ||
53 | * str1b ptr reg cond abort | ||
54 | * | ||
55 | * Same as their ldr* counterparts, but data is stored to 'ptr' location | ||
56 | * rather than being loaded. | ||
57 | * | ||
58 | * enter reg1 reg2 | ||
59 | * | ||
60 | * Preserve the provided registers on the stack plus any additional | ||
61 | * data as needed by the implementation including this code. Called | ||
62 | * upon code entry. | ||
63 | * | ||
64 | * exit reg1 reg2 | ||
65 | * | ||
66 | * Restore registers with the values previously saved with the | ||
67 | * 'preserv' macro. Called upon code termination. | ||
68 | */ | ||
69 | |||
70 | |||
71 | enter r4, lr | ||
72 | |||
73 | subs r2, r2, #4 | ||
74 | blt 8f | ||
75 | ands ip, r0, #3 | ||
76 | PLD( pld [r1, #0] ) | ||
77 | bne 9f | ||
78 | ands ip, r1, #3 | ||
79 | bne 10f | ||
80 | |||
81 | 1: subs r2, r2, #(28) | ||
82 | stmfd sp!, {r5 - r8} | ||
83 | blt 5f | ||
84 | |||
85 | CALGN( ands ip, r1, #31 ) | ||
86 | CALGN( rsb r3, ip, #32 ) | ||
87 | CALGN( sbcnes r4, r3, r2 ) @ C is always set here | ||
88 | CALGN( bcs 2f ) | ||
89 | CALGN( adr r4, 6f ) | ||
90 | CALGN( subs r2, r2, r3 ) @ C gets set | ||
91 | CALGN( add pc, r4, ip ) | ||
92 | |||
93 | PLD( pld [r1, #0] ) | ||
94 | 2: PLD( subs r2, r2, #96 ) | ||
95 | PLD( pld [r1, #28] ) | ||
96 | PLD( blt 4f ) | ||
97 | PLD( pld [r1, #60] ) | ||
98 | PLD( pld [r1, #92] ) | ||
99 | |||
100 | 3: PLD( pld [r1, #124] ) | ||
101 | 4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f | ||
102 | subs r2, r2, #32 | ||
103 | str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f | ||
104 | bge 3b | ||
105 | PLD( cmn r2, #96 ) | ||
106 | PLD( bge 4b ) | ||
107 | |||
108 | 5: ands ip, r2, #28 | ||
109 | rsb ip, ip, #32 | ||
110 | addne pc, pc, ip @ C is always clear here | ||
111 | b 7f | ||
112 | 6: nop | ||
113 | ldr1w r1, r3, abort=20f | ||
114 | ldr1w r1, r4, abort=20f | ||
115 | ldr1w r1, r5, abort=20f | ||
116 | ldr1w r1, r6, abort=20f | ||
117 | ldr1w r1, r7, abort=20f | ||
118 | ldr1w r1, r8, abort=20f | ||
119 | ldr1w r1, lr, abort=20f | ||
120 | |||
121 | add pc, pc, ip | ||
122 | nop | ||
123 | nop | ||
124 | str1w r0, r3, abort=20f | ||
125 | str1w r0, r4, abort=20f | ||
126 | str1w r0, r5, abort=20f | ||
127 | str1w r0, r6, abort=20f | ||
128 | str1w r0, r7, abort=20f | ||
129 | str1w r0, r8, abort=20f | ||
130 | str1w r0, lr, abort=20f | ||
131 | |||
132 | CALGN( bcs 2b ) | ||
133 | |||
134 | 7: ldmfd sp!, {r5 - r8} | ||
135 | |||
136 | 8: movs r2, r2, lsl #31 | ||
137 | ldr1b r1, r3, ne, abort=21f | ||
138 | ldr1b r1, r4, cs, abort=21f | ||
139 | ldr1b r1, ip, cs, abort=21f | ||
140 | str1b r0, r3, ne, abort=21f | ||
141 | str1b r0, r4, cs, abort=21f | ||
142 | str1b r0, ip, cs, abort=21f | ||
143 | |||
144 | exit r4, pc | ||
145 | |||
146 | 9: rsb ip, ip, #4 | ||
147 | cmp ip, #2 | ||
148 | ldr1b r1, r3, gt, abort=21f | ||
149 | ldr1b r1, r4, ge, abort=21f | ||
150 | ldr1b r1, lr, abort=21f | ||
151 | str1b r0, r3, gt, abort=21f | ||
152 | str1b r0, r4, ge, abort=21f | ||
153 | subs r2, r2, ip | ||
154 | str1b r0, lr, abort=21f | ||
155 | blt 8b | ||
156 | ands ip, r1, #3 | ||
157 | beq 1b | ||
158 | |||
159 | 10: bic r1, r1, #3 | ||
160 | cmp ip, #2 | ||
161 | ldr1w r1, lr, abort=21f | ||
162 | beq 17f | ||
163 | bgt 18f | ||
164 | |||
165 | |||
166 | .macro forward_copy_shift pull push | ||
167 | |||
168 | subs r2, r2, #28 | ||
169 | blt 14f | ||
170 | |||
171 | CALGN( ands ip, r1, #31 ) | ||
172 | CALGN( rsb ip, ip, #32 ) | ||
173 | CALGN( sbcnes r4, ip, r2 ) @ C is always set here | ||
174 | CALGN( subcc r2, r2, ip ) | ||
175 | CALGN( bcc 15f ) | ||
176 | |||
177 | 11: stmfd sp!, {r5 - r9} | ||
178 | |||
179 | PLD( pld [r1, #0] ) | ||
180 | PLD( subs r2, r2, #96 ) | ||
181 | PLD( pld [r1, #28] ) | ||
182 | PLD( blt 13f ) | ||
183 | PLD( pld [r1, #60] ) | ||
184 | PLD( pld [r1, #92] ) | ||
185 | |||
186 | 12: PLD( pld [r1, #124] ) | ||
187 | 13: ldr4w r1, r4, r5, r6, r7, abort=19f | ||
188 | mov r3, lr, pull #\pull | ||
189 | subs r2, r2, #32 | ||
190 | ldr4w r1, r8, r9, ip, lr, abort=19f | ||
191 | orr r3, r3, r4, push #\push | ||
192 | mov r4, r4, pull #\pull | ||
193 | orr r4, r4, r5, push #\push | ||
194 | mov r5, r5, pull #\pull | ||
195 | orr r5, r5, r6, push #\push | ||
196 | mov r6, r6, pull #\pull | ||
197 | orr r6, r6, r7, push #\push | ||
198 | mov r7, r7, pull #\pull | ||
199 | orr r7, r7, r8, push #\push | ||
200 | mov r8, r8, pull #\pull | ||
201 | orr r8, r8, r9, push #\push | ||
202 | mov r9, r9, pull #\pull | ||
203 | orr r9, r9, ip, push #\push | ||
204 | mov ip, ip, pull #\pull | ||
205 | orr ip, ip, lr, push #\push | ||
206 | str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f | ||
207 | bge 12b | ||
208 | PLD( cmn r2, #96 ) | ||
209 | PLD( bge 13b ) | ||
210 | |||
211 | ldmfd sp!, {r5 - r9} | ||
212 | |||
213 | 14: ands ip, r2, #28 | ||
214 | beq 16f | ||
215 | |||
216 | 15: mov r3, lr, pull #\pull | ||
217 | ldr1w r1, lr, abort=21f | ||
218 | subs ip, ip, #4 | ||
219 | orr r3, r3, lr, push #\push | ||
220 | str1w r0, r3, abort=21f | ||
221 | bgt 15b | ||
222 | CALGN( cmp r2, #0 ) | ||
223 | CALGN( bge 11b ) | ||
224 | |||
225 | 16: sub r1, r1, #(\push / 8) | ||
226 | b 8b | ||
227 | |||
228 | .endm | ||
229 | |||
230 | |||
231 | forward_copy_shift pull=8 push=24 | ||
232 | |||
233 | 17: forward_copy_shift pull=16 push=16 | ||
234 | |||
235 | 18: forward_copy_shift pull=24 push=8 | ||
236 | |||
237 | |||
238 | /* | ||
239 | * Abort preanble and completion macros. | ||
240 | * If a fixup handler is required then those macros must surround it. | ||
241 | * It is assumed that the fixup code will handle the private part of | ||
242 | * the exit macro. | ||
243 | */ | ||
244 | |||
245 | .macro copy_abort_preamble | ||
246 | 19: ldmfd sp!, {r5 - r9} | ||
247 | b 21f | ||
248 | 20: ldmfd sp!, {r5 - r8} | ||
249 | 21: | ||
250 | .endm | ||
251 | |||
252 | .macro copy_abort_end | ||
253 | ldmfd sp!, {r4, pc} | ||
254 | .endm | ||
255 | |||
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index f5a593ceb8cc..7e71d6708a8d 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S | |||
@@ -1,393 +1,59 @@ | |||
1 | /* | 1 | /* |
2 | * linux/arch/arm/lib/memcpy.S | 2 | * linux/arch/arm/lib/memcpy.S |
3 | * | 3 | * |
4 | * Copyright (C) 1995-1999 Russell King | 4 | * Author: Nicolas Pitre |
5 | * Created: Sep 28, 2005 | ||
6 | * Copyright: MontaVista Software, Inc. | ||
5 | * | 7 | * |
6 | * This program is free software; you can redistribute it and/or modify | 8 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License version 2 as | 9 | * it under the terms of the GNU General Public License version 2 as |
8 | * published by the Free Software Foundation. | 10 | * published by the Free Software Foundation. |
9 | * | ||
10 | * ASM optimised string functions | ||
11 | */ | 11 | */ |
12 | |||
12 | #include <linux/linkage.h> | 13 | #include <linux/linkage.h> |
13 | #include <asm/assembler.h> | 14 | #include <asm/assembler.h> |
14 | 15 | ||
15 | .text | 16 | .macro ldr1w ptr reg abort |
16 | 17 | ldr \reg, [\ptr], #4 | |
17 | #define ENTER \ | 18 | .endm |
18 | mov ip,sp ;\ | ||
19 | stmfd sp!,{r0,r4-r9,fp,ip,lr,pc} ;\ | ||
20 | sub fp,ip,#4 | ||
21 | |||
22 | #define EXIT \ | ||
23 | LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc}) | ||
24 | |||
25 | #define EXITEQ \ | ||
26 | LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc}) | ||
27 | |||
28 | /* | ||
29 | * Prototype: void memcpy(void *to,const void *from,unsigned long n); | ||
30 | */ | ||
31 | ENTRY(memcpy) | ||
32 | ENTRY(memmove) | ||
33 | ENTER | ||
34 | cmp r1, r0 | ||
35 | bcc 23f | ||
36 | subs r2, r2, #4 | ||
37 | blt 6f | ||
38 | PLD( pld [r1, #0] ) | ||
39 | ands ip, r0, #3 | ||
40 | bne 7f | ||
41 | ands ip, r1, #3 | ||
42 | bne 8f | ||
43 | 19 | ||
44 | 1: subs r2, r2, #8 | 20 | .macro ldr4w ptr reg1 reg2 reg3 reg4 abort |
45 | blt 5f | 21 | ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4} |
46 | subs r2, r2, #20 | 22 | .endm |
47 | blt 4f | ||
48 | PLD( pld [r1, #28] ) | ||
49 | PLD( subs r2, r2, #64 ) | ||
50 | PLD( blt 3f ) | ||
51 | 2: PLD( pld [r1, #60] ) | ||
52 | PLD( pld [r1, #92] ) | ||
53 | ldmia r1!, {r3 - r9, ip} | ||
54 | subs r2, r2, #32 | ||
55 | stmgeia r0!, {r3 - r9, ip} | ||
56 | ldmgeia r1!, {r3 - r9, ip} | ||
57 | subges r2, r2, #32 | ||
58 | stmia r0!, {r3 - r9, ip} | ||
59 | bge 2b | ||
60 | 3: PLD( ldmia r1!, {r3 - r9, ip} ) | ||
61 | PLD( adds r2, r2, #32 ) | ||
62 | PLD( stmgeia r0!, {r3 - r9, ip} ) | ||
63 | PLD( ldmgeia r1!, {r3 - r9, ip} ) | ||
64 | PLD( subges r2, r2, #32 ) | ||
65 | PLD( stmia r0!, {r3 - r9, ip} ) | ||
66 | 4: cmn r2, #16 | ||
67 | ldmgeia r1!, {r3 - r6} | ||
68 | subge r2, r2, #16 | ||
69 | stmgeia r0!, {r3 - r6} | ||
70 | adds r2, r2, #20 | ||
71 | ldmgeia r1!, {r3 - r5} | ||
72 | subge r2, r2, #12 | ||
73 | stmgeia r0!, {r3 - r5} | ||
74 | 5: adds r2, r2, #8 | ||
75 | blt 6f | ||
76 | subs r2, r2, #4 | ||
77 | ldrlt r3, [r1], #4 | ||
78 | ldmgeia r1!, {r4, r5} | ||
79 | subge r2, r2, #4 | ||
80 | strlt r3, [r0], #4 | ||
81 | stmgeia r0!, {r4, r5} | ||
82 | 23 | ||
83 | 6: adds r2, r2, #4 | 24 | .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort |
84 | EXITEQ | 25 | ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} |
85 | cmp r2, #2 | 26 | .endm |
86 | ldrb r3, [r1], #1 | ||
87 | ldrgeb r4, [r1], #1 | ||
88 | ldrgtb r5, [r1], #1 | ||
89 | strb r3, [r0], #1 | ||
90 | strgeb r4, [r0], #1 | ||
91 | strgtb r5, [r0], #1 | ||
92 | EXIT | ||
93 | 27 | ||
94 | 7: rsb ip, ip, #4 | 28 | .macro ldr1b ptr reg cond=al abort |
95 | cmp ip, #2 | 29 | ldr\cond\()b \reg, [\ptr], #1 |
96 | ldrb r3, [r1], #1 | 30 | .endm |
97 | ldrgeb r4, [r1], #1 | ||
98 | ldrgtb r5, [r1], #1 | ||
99 | strb r3, [r0], #1 | ||
100 | strgeb r4, [r0], #1 | ||
101 | strgtb r5, [r0], #1 | ||
102 | subs r2, r2, ip | ||
103 | blt 6b | ||
104 | ands ip, r1, #3 | ||
105 | beq 1b | ||
106 | 31 | ||
107 | 8: bic r1, r1, #3 | 32 | .macro str1w ptr reg abort |
108 | ldr r7, [r1], #4 | 33 | str \reg, [\ptr], #4 |
109 | cmp ip, #2 | 34 | .endm |
110 | bgt 18f | ||
111 | beq 13f | ||
112 | cmp r2, #12 | ||
113 | blt 11f | ||
114 | PLD( pld [r1, #12] ) | ||
115 | sub r2, r2, #12 | ||
116 | PLD( subs r2, r2, #32 ) | ||
117 | PLD( blt 10f ) | ||
118 | PLD( pld [r1, #28] ) | ||
119 | 9: PLD( pld [r1, #44] ) | ||
120 | 10: mov r3, r7, pull #8 | ||
121 | ldmia r1!, {r4 - r7} | ||
122 | subs r2, r2, #16 | ||
123 | orr r3, r3, r4, push #24 | ||
124 | mov r4, r4, pull #8 | ||
125 | orr r4, r4, r5, push #24 | ||
126 | mov r5, r5, pull #8 | ||
127 | orr r5, r5, r6, push #24 | ||
128 | mov r6, r6, pull #8 | ||
129 | orr r6, r6, r7, push #24 | ||
130 | stmia r0!, {r3 - r6} | ||
131 | bge 9b | ||
132 | PLD( cmn r2, #32 ) | ||
133 | PLD( bge 10b ) | ||
134 | PLD( add r2, r2, #32 ) | ||
135 | adds r2, r2, #12 | ||
136 | blt 12f | ||
137 | 11: mov r3, r7, pull #8 | ||
138 | ldr r7, [r1], #4 | ||
139 | subs r2, r2, #4 | ||
140 | orr r3, r3, r7, push #24 | ||
141 | str r3, [r0], #4 | ||
142 | bge 11b | ||
143 | 12: sub r1, r1, #3 | ||
144 | b 6b | ||
145 | 35 | ||
146 | 13: cmp r2, #12 | 36 | .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort |
147 | blt 16f | 37 | stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} |
148 | PLD( pld [r1, #12] ) | 38 | .endm |
149 | sub r2, r2, #12 | ||
150 | PLD( subs r2, r2, #32 ) | ||
151 | PLD( blt 15f ) | ||
152 | PLD( pld [r1, #28] ) | ||
153 | 14: PLD( pld [r1, #44] ) | ||
154 | 15: mov r3, r7, pull #16 | ||
155 | ldmia r1!, {r4 - r7} | ||
156 | subs r2, r2, #16 | ||
157 | orr r3, r3, r4, push #16 | ||
158 | mov r4, r4, pull #16 | ||
159 | orr r4, r4, r5, push #16 | ||
160 | mov r5, r5, pull #16 | ||
161 | orr r5, r5, r6, push #16 | ||
162 | mov r6, r6, pull #16 | ||
163 | orr r6, r6, r7, push #16 | ||
164 | stmia r0!, {r3 - r6} | ||
165 | bge 14b | ||
166 | PLD( cmn r2, #32 ) | ||
167 | PLD( bge 15b ) | ||
168 | PLD( add r2, r2, #32 ) | ||
169 | adds r2, r2, #12 | ||
170 | blt 17f | ||
171 | 16: mov r3, r7, pull #16 | ||
172 | ldr r7, [r1], #4 | ||
173 | subs r2, r2, #4 | ||
174 | orr r3, r3, r7, push #16 | ||
175 | str r3, [r0], #4 | ||
176 | bge 16b | ||
177 | 17: sub r1, r1, #2 | ||
178 | b 6b | ||
179 | 39 | ||
180 | 18: cmp r2, #12 | 40 | .macro str1b ptr reg cond=al abort |
181 | blt 21f | 41 | str\cond\()b \reg, [\ptr], #1 |
182 | PLD( pld [r1, #12] ) | 42 | .endm |
183 | sub r2, r2, #12 | ||
184 | PLD( subs r2, r2, #32 ) | ||
185 | PLD( blt 20f ) | ||
186 | PLD( pld [r1, #28] ) | ||
187 | 19: PLD( pld [r1, #44] ) | ||
188 | 20: mov r3, r7, pull #24 | ||
189 | ldmia r1!, {r4 - r7} | ||
190 | subs r2, r2, #16 | ||
191 | orr r3, r3, r4, push #8 | ||
192 | mov r4, r4, pull #24 | ||
193 | orr r4, r4, r5, push #8 | ||
194 | mov r5, r5, pull #24 | ||
195 | orr r5, r5, r6, push #8 | ||
196 | mov r6, r6, pull #24 | ||
197 | orr r6, r6, r7, push #8 | ||
198 | stmia r0!, {r3 - r6} | ||
199 | bge 19b | ||
200 | PLD( cmn r2, #32 ) | ||
201 | PLD( bge 20b ) | ||
202 | PLD( add r2, r2, #32 ) | ||
203 | adds r2, r2, #12 | ||
204 | blt 22f | ||
205 | 21: mov r3, r7, pull #24 | ||
206 | ldr r7, [r1], #4 | ||
207 | subs r2, r2, #4 | ||
208 | orr r3, r3, r7, push #8 | ||
209 | str r3, [r0], #4 | ||
210 | bge 21b | ||
211 | 22: sub r1, r1, #1 | ||
212 | b 6b | ||
213 | 43 | ||
44 | .macro enter reg1 reg2 | ||
45 | stmdb sp!, {r0, \reg1, \reg2} | ||
46 | .endm | ||
214 | 47 | ||
215 | 23: add r1, r1, r2 | 48 | .macro exit reg1 reg2 |
216 | add r0, r0, r2 | 49 | ldmfd sp!, {r0, \reg1, \reg2} |
217 | subs r2, r2, #4 | 50 | .endm |
218 | blt 29f | ||
219 | PLD( pld [r1, #-4] ) | ||
220 | ands ip, r0, #3 | ||
221 | bne 30f | ||
222 | ands ip, r1, #3 | ||
223 | bne 31f | ||
224 | 51 | ||
225 | 24: subs r2, r2, #8 | 52 | .text |
226 | blt 28f | ||
227 | subs r2, r2, #20 | ||
228 | blt 27f | ||
229 | PLD( pld [r1, #-32] ) | ||
230 | PLD( subs r2, r2, #64 ) | ||
231 | PLD( blt 26f ) | ||
232 | 25: PLD( pld [r1, #-64] ) | ||
233 | PLD( pld [r1, #-96] ) | ||
234 | ldmdb r1!, {r3 - r9, ip} | ||
235 | subs r2, r2, #32 | ||
236 | stmgedb r0!, {r3 - r9, ip} | ||
237 | ldmgedb r1!, {r3 - r9, ip} | ||
238 | subges r2, r2, #32 | ||
239 | stmdb r0!, {r3 - r9, ip} | ||
240 | bge 25b | ||
241 | 26: PLD( ldmdb r1!, {r3 - r9, ip} ) | ||
242 | PLD( adds r2, r2, #32 ) | ||
243 | PLD( stmgedb r0!, {r3 - r9, ip} ) | ||
244 | PLD( ldmgedb r1!, {r3 - r9, ip} ) | ||
245 | PLD( subges r2, r2, #32 ) | ||
246 | PLD( stmdb r0!, {r3 - r9, ip} ) | ||
247 | 27: cmn r2, #16 | ||
248 | ldmgedb r1!, {r3 - r6} | ||
249 | subge r2, r2, #16 | ||
250 | stmgedb r0!, {r3 - r6} | ||
251 | adds r2, r2, #20 | ||
252 | ldmgedb r1!, {r3 - r5} | ||
253 | subge r2, r2, #12 | ||
254 | stmgedb r0!, {r3 - r5} | ||
255 | 28: adds r2, r2, #8 | ||
256 | blt 29f | ||
257 | subs r2, r2, #4 | ||
258 | ldrlt r3, [r1, #-4]! | ||
259 | ldmgedb r1!, {r4, r5} | ||
260 | subge r2, r2, #4 | ||
261 | strlt r3, [r0, #-4]! | ||
262 | stmgedb r0!, {r4, r5} | ||
263 | 53 | ||
264 | 29: adds r2, r2, #4 | 54 | /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ |
265 | EXITEQ | ||
266 | cmp r2, #2 | ||
267 | ldrb r3, [r1, #-1]! | ||
268 | ldrgeb r4, [r1, #-1]! | ||
269 | ldrgtb r5, [r1, #-1]! | ||
270 | strb r3, [r0, #-1]! | ||
271 | strgeb r4, [r0, #-1]! | ||
272 | strgtb r5, [r0, #-1]! | ||
273 | EXIT | ||
274 | 55 | ||
275 | 30: cmp ip, #2 | 56 | ENTRY(memcpy) |
276 | ldrb r3, [r1, #-1]! | ||
277 | ldrgeb r4, [r1, #-1]! | ||
278 | ldrgtb r5, [r1, #-1]! | ||
279 | strb r3, [r0, #-1]! | ||
280 | strgeb r4, [r0, #-1]! | ||
281 | strgtb r5, [r0, #-1]! | ||
282 | subs r2, r2, ip | ||
283 | blt 29b | ||
284 | ands ip, r1, #3 | ||
285 | beq 24b | ||
286 | |||
287 | 31: bic r1, r1, #3 | ||
288 | ldr r3, [r1], #0 | ||
289 | cmp ip, #2 | ||
290 | blt 41f | ||
291 | beq 36f | ||
292 | cmp r2, #12 | ||
293 | blt 34f | ||
294 | PLD( pld [r1, #-16] ) | ||
295 | sub r2, r2, #12 | ||
296 | PLD( subs r2, r2, #32 ) | ||
297 | PLD( blt 33f ) | ||
298 | PLD( pld [r1, #-32] ) | ||
299 | 32: PLD( pld [r1, #-48] ) | ||
300 | 33: mov r7, r3, push #8 | ||
301 | ldmdb r1!, {r3, r4, r5, r6} | ||
302 | subs r2, r2, #16 | ||
303 | orr r7, r7, r6, pull #24 | ||
304 | mov r6, r6, push #8 | ||
305 | orr r6, r6, r5, pull #24 | ||
306 | mov r5, r5, push #8 | ||
307 | orr r5, r5, r4, pull #24 | ||
308 | mov r4, r4, push #8 | ||
309 | orr r4, r4, r3, pull #24 | ||
310 | stmdb r0!, {r4, r5, r6, r7} | ||
311 | bge 32b | ||
312 | PLD( cmn r2, #32 ) | ||
313 | PLD( bge 33b ) | ||
314 | PLD( add r2, r2, #32 ) | ||
315 | adds r2, r2, #12 | ||
316 | blt 35f | ||
317 | 34: mov ip, r3, push #8 | ||
318 | ldr r3, [r1, #-4]! | ||
319 | subs r2, r2, #4 | ||
320 | orr ip, ip, r3, pull #24 | ||
321 | str ip, [r0, #-4]! | ||
322 | bge 34b | ||
323 | 35: add r1, r1, #3 | ||
324 | b 29b | ||
325 | |||
326 | 36: cmp r2, #12 | ||
327 | blt 39f | ||
328 | PLD( pld [r1, #-16] ) | ||
329 | sub r2, r2, #12 | ||
330 | PLD( subs r2, r2, #32 ) | ||
331 | PLD( blt 38f ) | ||
332 | PLD( pld [r1, #-32] ) | ||
333 | 37: PLD( pld [r1, #-48] ) | ||
334 | 38: mov r7, r3, push #16 | ||
335 | ldmdb r1!, {r3, r4, r5, r6} | ||
336 | subs r2, r2, #16 | ||
337 | orr r7, r7, r6, pull #16 | ||
338 | mov r6, r6, push #16 | ||
339 | orr r6, r6, r5, pull #16 | ||
340 | mov r5, r5, push #16 | ||
341 | orr r5, r5, r4, pull #16 | ||
342 | mov r4, r4, push #16 | ||
343 | orr r4, r4, r3, pull #16 | ||
344 | stmdb r0!, {r4, r5, r6, r7} | ||
345 | bge 37b | ||
346 | PLD( cmn r2, #32 ) | ||
347 | PLD( bge 38b ) | ||
348 | PLD( add r2, r2, #32 ) | ||
349 | adds r2, r2, #12 | ||
350 | blt 40f | ||
351 | 39: mov ip, r3, push #16 | ||
352 | ldr r3, [r1, #-4]! | ||
353 | subs r2, r2, #4 | ||
354 | orr ip, ip, r3, pull #16 | ||
355 | str ip, [r0, #-4]! | ||
356 | bge 39b | ||
357 | 40: add r1, r1, #2 | ||
358 | b 29b | ||
359 | 57 | ||
360 | 41: cmp r2, #12 | 58 | #include "copy_template.S" |
361 | blt 44f | ||
362 | PLD( pld [r1, #-16] ) | ||
363 | sub r2, r2, #12 | ||
364 | PLD( subs r2, r2, #32 ) | ||
365 | PLD( blt 43f ) | ||
366 | PLD( pld [r1, #-32] ) | ||
367 | 42: PLD( pld [r1, #-48] ) | ||
368 | 43: mov r7, r3, push #24 | ||
369 | ldmdb r1!, {r3, r4, r5, r6} | ||
370 | subs r2, r2, #16 | ||
371 | orr r7, r7, r6, pull #8 | ||
372 | mov r6, r6, push #24 | ||
373 | orr r6, r6, r5, pull #8 | ||
374 | mov r5, r5, push #24 | ||
375 | orr r5, r5, r4, pull #8 | ||
376 | mov r4, r4, push #24 | ||
377 | orr r4, r4, r3, pull #8 | ||
378 | stmdb r0!, {r4, r5, r6, r7} | ||
379 | bge 42b | ||
380 | PLD( cmn r2, #32 ) | ||
381 | PLD( bge 43b ) | ||
382 | PLD( add r2, r2, #32 ) | ||
383 | adds r2, r2, #12 | ||
384 | blt 45f | ||
385 | 44: mov ip, r3, push #24 | ||
386 | ldr r3, [r1, #-4]! | ||
387 | subs r2, r2, #4 | ||
388 | orr ip, ip, r3, pull #8 | ||
389 | str ip, [r0, #-4]! | ||
390 | bge 44b | ||
391 | 45: add r1, r1, #1 | ||
392 | b 29b | ||
393 | 59 | ||
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S new file mode 100644 index 000000000000..ef7fddc14ac9 --- /dev/null +++ b/arch/arm/lib/memmove.S | |||
@@ -0,0 +1,206 @@ | |||
1 | /* | ||
2 | * linux/arch/arm/lib/memmove.S | ||
3 | * | ||
4 | * Author: Nicolas Pitre | ||
5 | * Created: Sep 28, 2005 | ||
6 | * Copyright: (C) MontaVista Software Inc. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License version 2 as | ||
10 | * published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/linkage.h> | ||
14 | #include <asm/assembler.h> | ||
15 | |||
16 | /* | ||
17 | * This can be used to enable code to cacheline align the source pointer. | ||
18 | * Experiments on tested architectures (StrongARM and XScale) didn't show | ||
19 | * this a worthwhile thing to do. That might be different in the future. | ||
20 | */ | ||
21 | //#define CALGN(code...) code | ||
22 | #define CALGN(code...) | ||
23 | |||
24 | .text | ||
25 | |||
26 | /* | ||
27 | * Prototype: void *memmove(void *dest, const void *src, size_t n); | ||
28 | * | ||
29 | * Note: | ||
30 | * | ||
31 | * If the memory regions don't overlap, we simply branch to memcpy which is | ||
32 | * normally a bit faster. Otherwise the copy is done going downwards. This | ||
33 | * is a transposition of the code from copy_template.S but with the copy | ||
34 | * occurring in the opposite direction. | ||
35 | */ | ||
36 | |||
37 | ENTRY(memmove) | ||
38 | |||
39 | subs ip, r0, r1 | ||
40 | cmphi r2, ip | ||
41 | bls memcpy | ||
42 | |||
43 | stmfd sp!, {r0, r4, lr} | ||
44 | add r1, r1, r2 | ||
45 | add r0, r0, r2 | ||
46 | subs r2, r2, #4 | ||
47 | blt 8f | ||
48 | ands ip, r0, #3 | ||
49 | PLD( pld [r1, #-4] ) | ||
50 | bne 9f | ||
51 | ands ip, r1, #3 | ||
52 | bne 10f | ||
53 | |||
54 | 1: subs r2, r2, #(28) | ||
55 | stmfd sp!, {r5 - r8} | ||
56 | blt 5f | ||
57 | |||
58 | CALGN( ands ip, r1, #31 ) | ||
59 | CALGN( sbcnes r4, ip, r2 ) @ C is always set here | ||
60 | CALGN( bcs 2f ) | ||
61 | CALGN( adr r4, 6f ) | ||
62 | CALGN( subs r2, r2, ip ) @ C is set here | ||
63 | CALGN( add pc, r4, ip ) | ||
64 | |||
65 | PLD( pld [r1, #-4] ) | ||
66 | 2: PLD( subs r2, r2, #96 ) | ||
67 | PLD( pld [r1, #-32] ) | ||
68 | PLD( blt 4f ) | ||
69 | PLD( pld [r1, #-64] ) | ||
70 | PLD( pld [r1, #-96] ) | ||
71 | |||
72 | 3: PLD( pld [r1, #-128] ) | ||
73 | 4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr} | ||
74 | subs r2, r2, #32 | ||
75 | stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr} | ||
76 | bge 3b | ||
77 | PLD( cmn r2, #96 ) | ||
78 | PLD( bge 4b ) | ||
79 | |||
80 | 5: ands ip, r2, #28 | ||
81 | rsb ip, ip, #32 | ||
82 | addne pc, pc, ip @ C is always clear here | ||
83 | b 7f | ||
84 | 6: nop | ||
85 | ldr r3, [r1, #-4]! | ||
86 | ldr r4, [r1, #-4]! | ||
87 | ldr r5, [r1, #-4]! | ||
88 | ldr r6, [r1, #-4]! | ||
89 | ldr r7, [r1, #-4]! | ||
90 | ldr r8, [r1, #-4]! | ||
91 | ldr lr, [r1, #-4]! | ||
92 | |||
93 | add pc, pc, ip | ||
94 | nop | ||
95 | nop | ||
96 | str r3, [r0, #-4]! | ||
97 | str r4, [r0, #-4]! | ||
98 | str r5, [r0, #-4]! | ||
99 | str r6, [r0, #-4]! | ||
100 | str r7, [r0, #-4]! | ||
101 | str r8, [r0, #-4]! | ||
102 | str lr, [r0, #-4]! | ||
103 | |||
104 | CALGN( bcs 2b ) | ||
105 | |||
106 | 7: ldmfd sp!, {r5 - r8} | ||
107 | |||
108 | 8: movs r2, r2, lsl #31 | ||
109 | ldrneb r3, [r1, #-1]! | ||
110 | ldrcsb r4, [r1, #-1]! | ||
111 | ldrcsb ip, [r1, #-1] | ||
112 | strneb r3, [r0, #-1]! | ||
113 | strcsb r4, [r0, #-1]! | ||
114 | strcsb ip, [r0, #-1] | ||
115 | ldmfd sp!, {r0, r4, pc} | ||
116 | |||
117 | 9: cmp ip, #2 | ||
118 | ldrgtb r3, [r1, #-1]! | ||
119 | ldrgeb r4, [r1, #-1]! | ||
120 | ldrb lr, [r1, #-1]! | ||
121 | strgtb r3, [r0, #-1]! | ||
122 | strgeb r4, [r0, #-1]! | ||
123 | subs r2, r2, ip | ||
124 | strb lr, [r0, #-1]! | ||
125 | blt 8b | ||
126 | ands ip, r1, #3 | ||
127 | beq 1b | ||
128 | |||
129 | 10: bic r1, r1, #3 | ||
130 | cmp ip, #2 | ||
131 | ldr r3, [r1, #0] | ||
132 | beq 17f | ||
133 | blt 18f | ||
134 | |||
135 | |||
136 | .macro backward_copy_shift push pull | ||
137 | |||
138 | subs r2, r2, #28 | ||
139 | blt 14f | ||
140 | |||
141 | CALGN( ands ip, r1, #31 ) | ||
142 | CALGN( rsb ip, ip, #32 ) | ||
143 | CALGN( sbcnes r4, ip, r2 ) @ C is always set here | ||
144 | CALGN( subcc r2, r2, ip ) | ||
145 | CALGN( bcc 15f ) | ||
146 | |||
147 | 11: stmfd sp!, {r5 - r9} | ||
148 | |||
149 | PLD( pld [r1, #-4] ) | ||
150 | PLD( subs r2, r2, #96 ) | ||
151 | PLD( pld [r1, #-32] ) | ||
152 | PLD( blt 13f ) | ||
153 | PLD( pld [r1, #-64] ) | ||
154 | PLD( pld [r1, #-96] ) | ||
155 | |||
156 | 12: PLD( pld [r1, #-128] ) | ||
157 | 13: ldmdb r1!, {r7, r8, r9, ip} | ||
158 | mov lr, r3, push #\push | ||
159 | subs r2, r2, #32 | ||
160 | ldmdb r1!, {r3, r4, r5, r6} | ||
161 | orr lr, lr, ip, pull #\pull | ||
162 | mov ip, ip, push #\push | ||
163 | orr ip, ip, r9, pull #\pull | ||
164 | mov r9, r9, push #\push | ||
165 | orr r9, r9, r8, pull #\pull | ||
166 | mov r8, r8, push #\push | ||
167 | orr r8, r8, r7, pull #\pull | ||
168 | mov r7, r7, push #\push | ||
169 | orr r7, r7, r6, pull #\pull | ||
170 | mov r6, r6, push #\push | ||
171 | orr r6, r6, r5, pull #\pull | ||
172 | mov r5, r5, push #\push | ||
173 | orr r5, r5, r4, pull #\pull | ||
174 | mov r4, r4, push #\push | ||
175 | orr r4, r4, r3, pull #\pull | ||
176 | stmdb r0!, {r4 - r9, ip, lr} | ||
177 | bge 12b | ||
178 | PLD( cmn r2, #96 ) | ||
179 | PLD( bge 13b ) | ||
180 | |||
181 | ldmfd sp!, {r5 - r9} | ||
182 | |||
183 | 14: ands ip, r2, #28 | ||
184 | beq 16f | ||
185 | |||
186 | 15: mov lr, r3, push #\push | ||
187 | ldr r3, [r1, #-4]! | ||
188 | subs ip, ip, #4 | ||
189 | orr lr, lr, r3, pull #\pull | ||
190 | str lr, [r0, #-4]! | ||
191 | bgt 15b | ||
192 | CALGN( cmp r2, #0 ) | ||
193 | CALGN( bge 11b ) | ||
194 | |||
195 | 16: add r1, r1, #(\pull / 8) | ||
196 | b 8b | ||
197 | |||
198 | .endm | ||
199 | |||
200 | |||
201 | backward_copy_shift push=8 pull=24 | ||
202 | |||
203 | 17: backward_copy_shift push=16 pull=16 | ||
204 | |||
205 | 18: backward_copy_shift push=24 pull=8 | ||
206 | |||