aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm/lib/Makefile5
-rw-r--r--arch/arm/lib/copy_template.S255
-rw-r--r--arch/arm/lib/memcpy.S410
-rw-r--r--arch/arm/lib/memmove.S206
4 files changed, 502 insertions, 374 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
index d3d9b21eb7e4..8f9770f1af19 100644
--- a/arch/arm/lib/Makefile
+++ b/arch/arm/lib/Makefile
@@ -7,8 +7,9 @@
7lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ 7lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \
8 csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ 8 csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
9 copy_page.o delay.o findbit.o memchr.o memcpy.o \ 9 copy_page.o delay.o findbit.o memchr.o memcpy.o \
10 memset.o memzero.o setbit.o strncpy_from_user.o \ 10 memmove.o memset.o memzero.o setbit.o \
11 strnlen_user.o strchr.o strrchr.o testchangebit.o \ 11 strncpy_from_user.o strnlen_user.o \
12 strchr.o strrchr.o testchangebit.o \
12 testclearbit.o testsetbit.o uaccess.o \ 13 testclearbit.o testsetbit.o uaccess.o \
13 getuser.o putuser.o clear_user.o \ 14 getuser.o putuser.o clear_user.o \
14 ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \ 15 ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
new file mode 100644
index 000000000000..838e435e4922
--- /dev/null
+++ b/arch/arm/lib/copy_template.S
@@ -0,0 +1,255 @@
1/*
2 * linux/arch/arm/lib/copy_template.s
3 *
4 * Code template for optimized memory copy functions
5 *
6 * Author: Nicolas Pitre
7 * Created: Sep 28, 2005
8 * Copyright: MontaVista Software, Inc.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14
15/*
16 * This can be used to enable code to cacheline align the source pointer.
17 * Experiments on tested architectures (StrongARM and XScale) didn't show
18 * this a worthwhile thing to do. That might be different in the future.
19 */
20//#define CALGN(code...) code
21#define CALGN(code...)
22
23/*
24 * Theory of operation
25 * -------------------
26 *
27 * This file provides the core code for a forward memory copy used in
28 * the implementation of memcopy(), copy_to_user() and copy_from_user().
29 *
30 * The including file must define the following accessor macros
31 * according to the need of the given function:
32 *
33 * ldr1w ptr reg abort
34 *
35 * This loads one word from 'ptr', stores it in 'reg' and increments
36 * 'ptr' to the next word. The 'abort' argument is used for fixup tables.
37 *
38 * ldr4w ptr reg1 reg2 reg3 reg4 abort
39 * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
40 *
41 * This loads four or eight words starting from 'ptr', stores them
42 * in provided registers and increments 'ptr' past those words.
43 * The'abort' argument is used for fixup tables.
44 *
45 * ldr1b ptr reg cond abort
46 *
47 * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
48 * It also must apply the condition code if provided, otherwise the
49 * "al" condition is assumed by default.
50 *
51 * str1w ptr reg abort
52 * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
53 * str1b ptr reg cond abort
54 *
55 * Same as their ldr* counterparts, but data is stored to 'ptr' location
56 * rather than being loaded.
57 *
58 * enter reg1 reg2
59 *
60 * Preserve the provided registers on the stack plus any additional
61 * data as needed by the implementation including this code. Called
62 * upon code entry.
63 *
64 * exit reg1 reg2
65 *
66 * Restore registers with the values previously saved with the
67 * 'preserv' macro. Called upon code termination.
68 */
69
70
71 enter r4, lr
72
73 subs r2, r2, #4
74 blt 8f
75 ands ip, r0, #3
76 PLD( pld [r1, #0] )
77 bne 9f
78 ands ip, r1, #3
79 bne 10f
80
811: subs r2, r2, #(28)
82 stmfd sp!, {r5 - r8}
83 blt 5f
84
85 CALGN( ands ip, r1, #31 )
86 CALGN( rsb r3, ip, #32 )
87 CALGN( sbcnes r4, r3, r2 ) @ C is always set here
88 CALGN( bcs 2f )
89 CALGN( adr r4, 6f )
90 CALGN( subs r2, r2, r3 ) @ C gets set
91 CALGN( add pc, r4, ip )
92
93 PLD( pld [r1, #0] )
942: PLD( subs r2, r2, #96 )
95 PLD( pld [r1, #28] )
96 PLD( blt 4f )
97 PLD( pld [r1, #60] )
98 PLD( pld [r1, #92] )
99
1003: PLD( pld [r1, #124] )
1014: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
102 subs r2, r2, #32
103 str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
104 bge 3b
105 PLD( cmn r2, #96 )
106 PLD( bge 4b )
107
1085: ands ip, r2, #28
109 rsb ip, ip, #32
110 addne pc, pc, ip @ C is always clear here
111 b 7f
1126: nop
113 ldr1w r1, r3, abort=20f
114 ldr1w r1, r4, abort=20f
115 ldr1w r1, r5, abort=20f
116 ldr1w r1, r6, abort=20f
117 ldr1w r1, r7, abort=20f
118 ldr1w r1, r8, abort=20f
119 ldr1w r1, lr, abort=20f
120
121 add pc, pc, ip
122 nop
123 nop
124 str1w r0, r3, abort=20f
125 str1w r0, r4, abort=20f
126 str1w r0, r5, abort=20f
127 str1w r0, r6, abort=20f
128 str1w r0, r7, abort=20f
129 str1w r0, r8, abort=20f
130 str1w r0, lr, abort=20f
131
132 CALGN( bcs 2b )
133
1347: ldmfd sp!, {r5 - r8}
135
1368: movs r2, r2, lsl #31
137 ldr1b r1, r3, ne, abort=21f
138 ldr1b r1, r4, cs, abort=21f
139 ldr1b r1, ip, cs, abort=21f
140 str1b r0, r3, ne, abort=21f
141 str1b r0, r4, cs, abort=21f
142 str1b r0, ip, cs, abort=21f
143
144 exit r4, pc
145
1469: rsb ip, ip, #4
147 cmp ip, #2
148 ldr1b r1, r3, gt, abort=21f
149 ldr1b r1, r4, ge, abort=21f
150 ldr1b r1, lr, abort=21f
151 str1b r0, r3, gt, abort=21f
152 str1b r0, r4, ge, abort=21f
153 subs r2, r2, ip
154 str1b r0, lr, abort=21f
155 blt 8b
156 ands ip, r1, #3
157 beq 1b
158
15910: bic r1, r1, #3
160 cmp ip, #2
161 ldr1w r1, lr, abort=21f
162 beq 17f
163 bgt 18f
164
165
166 .macro forward_copy_shift pull push
167
168 subs r2, r2, #28
169 blt 14f
170
171 CALGN( ands ip, r1, #31 )
172 CALGN( rsb ip, ip, #32 )
173 CALGN( sbcnes r4, ip, r2 ) @ C is always set here
174 CALGN( subcc r2, r2, ip )
175 CALGN( bcc 15f )
176
17711: stmfd sp!, {r5 - r9}
178
179 PLD( pld [r1, #0] )
180 PLD( subs r2, r2, #96 )
181 PLD( pld [r1, #28] )
182 PLD( blt 13f )
183 PLD( pld [r1, #60] )
184 PLD( pld [r1, #92] )
185
18612: PLD( pld [r1, #124] )
18713: ldr4w r1, r4, r5, r6, r7, abort=19f
188 mov r3, lr, pull #\pull
189 subs r2, r2, #32
190 ldr4w r1, r8, r9, ip, lr, abort=19f
191 orr r3, r3, r4, push #\push
192 mov r4, r4, pull #\pull
193 orr r4, r4, r5, push #\push
194 mov r5, r5, pull #\pull
195 orr r5, r5, r6, push #\push
196 mov r6, r6, pull #\pull
197 orr r6, r6, r7, push #\push
198 mov r7, r7, pull #\pull
199 orr r7, r7, r8, push #\push
200 mov r8, r8, pull #\pull
201 orr r8, r8, r9, push #\push
202 mov r9, r9, pull #\pull
203 orr r9, r9, ip, push #\push
204 mov ip, ip, pull #\pull
205 orr ip, ip, lr, push #\push
206 str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
207 bge 12b
208 PLD( cmn r2, #96 )
209 PLD( bge 13b )
210
211 ldmfd sp!, {r5 - r9}
212
21314: ands ip, r2, #28
214 beq 16f
215
21615: mov r3, lr, pull #\pull
217 ldr1w r1, lr, abort=21f
218 subs ip, ip, #4
219 orr r3, r3, lr, push #\push
220 str1w r0, r3, abort=21f
221 bgt 15b
222 CALGN( cmp r2, #0 )
223 CALGN( bge 11b )
224
22516: sub r1, r1, #(\push / 8)
226 b 8b
227
228 .endm
229
230
231 forward_copy_shift pull=8 push=24
232
23317: forward_copy_shift pull=16 push=16
234
23518: forward_copy_shift pull=24 push=8
236
237
238/*
239 * Abort preanble and completion macros.
240 * If a fixup handler is required then those macros must surround it.
241 * It is assumed that the fixup code will handle the private part of
242 * the exit macro.
243 */
244
245 .macro copy_abort_preamble
24619: ldmfd sp!, {r5 - r9}
247 b 21f
24820: ldmfd sp!, {r5 - r8}
24921:
250 .endm
251
252 .macro copy_abort_end
253 ldmfd sp!, {r4, pc}
254 .endm
255
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index f5a593ceb8cc..7e71d6708a8d 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -1,393 +1,59 @@
1/* 1/*
2 * linux/arch/arm/lib/memcpy.S 2 * linux/arch/arm/lib/memcpy.S
3 * 3 *
4 * Copyright (C) 1995-1999 Russell King 4 * Author: Nicolas Pitre
5 * Created: Sep 28, 2005
6 * Copyright: MontaVista Software, Inc.
5 * 7 *
6 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as 9 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 10 * published by the Free Software Foundation.
9 *
10 * ASM optimised string functions
11 */ 11 */
12
12#include <linux/linkage.h> 13#include <linux/linkage.h>
13#include <asm/assembler.h> 14#include <asm/assembler.h>
14 15
15 .text 16 .macro ldr1w ptr reg abort
16 17 ldr \reg, [\ptr], #4
17#define ENTER \ 18 .endm
18 mov ip,sp ;\
19 stmfd sp!,{r0,r4-r9,fp,ip,lr,pc} ;\
20 sub fp,ip,#4
21
22#define EXIT \
23 LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc})
24
25#define EXITEQ \
26 LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc})
27
28/*
29 * Prototype: void memcpy(void *to,const void *from,unsigned long n);
30 */
31ENTRY(memcpy)
32ENTRY(memmove)
33 ENTER
34 cmp r1, r0
35 bcc 23f
36 subs r2, r2, #4
37 blt 6f
38 PLD( pld [r1, #0] )
39 ands ip, r0, #3
40 bne 7f
41 ands ip, r1, #3
42 bne 8f
43 19
441: subs r2, r2, #8 20 .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
45 blt 5f 21 ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
46 subs r2, r2, #20 22 .endm
47 blt 4f
48 PLD( pld [r1, #28] )
49 PLD( subs r2, r2, #64 )
50 PLD( blt 3f )
512: PLD( pld [r1, #60] )
52 PLD( pld [r1, #92] )
53 ldmia r1!, {r3 - r9, ip}
54 subs r2, r2, #32
55 stmgeia r0!, {r3 - r9, ip}
56 ldmgeia r1!, {r3 - r9, ip}
57 subges r2, r2, #32
58 stmia r0!, {r3 - r9, ip}
59 bge 2b
603: PLD( ldmia r1!, {r3 - r9, ip} )
61 PLD( adds r2, r2, #32 )
62 PLD( stmgeia r0!, {r3 - r9, ip} )
63 PLD( ldmgeia r1!, {r3 - r9, ip} )
64 PLD( subges r2, r2, #32 )
65 PLD( stmia r0!, {r3 - r9, ip} )
664: cmn r2, #16
67 ldmgeia r1!, {r3 - r6}
68 subge r2, r2, #16
69 stmgeia r0!, {r3 - r6}
70 adds r2, r2, #20
71 ldmgeia r1!, {r3 - r5}
72 subge r2, r2, #12
73 stmgeia r0!, {r3 - r5}
745: adds r2, r2, #8
75 blt 6f
76 subs r2, r2, #4
77 ldrlt r3, [r1], #4
78 ldmgeia r1!, {r4, r5}
79 subge r2, r2, #4
80 strlt r3, [r0], #4
81 stmgeia r0!, {r4, r5}
82 23
836: adds r2, r2, #4 24 .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
84 EXITEQ 25 ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
85 cmp r2, #2 26 .endm
86 ldrb r3, [r1], #1
87 ldrgeb r4, [r1], #1
88 ldrgtb r5, [r1], #1
89 strb r3, [r0], #1
90 strgeb r4, [r0], #1
91 strgtb r5, [r0], #1
92 EXIT
93 27
947: rsb ip, ip, #4 28 .macro ldr1b ptr reg cond=al abort
95 cmp ip, #2 29 ldr\cond\()b \reg, [\ptr], #1
96 ldrb r3, [r1], #1 30 .endm
97 ldrgeb r4, [r1], #1
98 ldrgtb r5, [r1], #1
99 strb r3, [r0], #1
100 strgeb r4, [r0], #1
101 strgtb r5, [r0], #1
102 subs r2, r2, ip
103 blt 6b
104 ands ip, r1, #3
105 beq 1b
106 31
1078: bic r1, r1, #3 32 .macro str1w ptr reg abort
108 ldr r7, [r1], #4 33 str \reg, [\ptr], #4
109 cmp ip, #2 34 .endm
110 bgt 18f
111 beq 13f
112 cmp r2, #12
113 blt 11f
114 PLD( pld [r1, #12] )
115 sub r2, r2, #12
116 PLD( subs r2, r2, #32 )
117 PLD( blt 10f )
118 PLD( pld [r1, #28] )
1199: PLD( pld [r1, #44] )
12010: mov r3, r7, pull #8
121 ldmia r1!, {r4 - r7}
122 subs r2, r2, #16
123 orr r3, r3, r4, push #24
124 mov r4, r4, pull #8
125 orr r4, r4, r5, push #24
126 mov r5, r5, pull #8
127 orr r5, r5, r6, push #24
128 mov r6, r6, pull #8
129 orr r6, r6, r7, push #24
130 stmia r0!, {r3 - r6}
131 bge 9b
132 PLD( cmn r2, #32 )
133 PLD( bge 10b )
134 PLD( add r2, r2, #32 )
135 adds r2, r2, #12
136 blt 12f
13711: mov r3, r7, pull #8
138 ldr r7, [r1], #4
139 subs r2, r2, #4
140 orr r3, r3, r7, push #24
141 str r3, [r0], #4
142 bge 11b
14312: sub r1, r1, #3
144 b 6b
145 35
14613: cmp r2, #12 36 .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
147 blt 16f 37 stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
148 PLD( pld [r1, #12] ) 38 .endm
149 sub r2, r2, #12
150 PLD( subs r2, r2, #32 )
151 PLD( blt 15f )
152 PLD( pld [r1, #28] )
15314: PLD( pld [r1, #44] )
15415: mov r3, r7, pull #16
155 ldmia r1!, {r4 - r7}
156 subs r2, r2, #16
157 orr r3, r3, r4, push #16
158 mov r4, r4, pull #16
159 orr r4, r4, r5, push #16
160 mov r5, r5, pull #16
161 orr r5, r5, r6, push #16
162 mov r6, r6, pull #16
163 orr r6, r6, r7, push #16
164 stmia r0!, {r3 - r6}
165 bge 14b
166 PLD( cmn r2, #32 )
167 PLD( bge 15b )
168 PLD( add r2, r2, #32 )
169 adds r2, r2, #12
170 blt 17f
17116: mov r3, r7, pull #16
172 ldr r7, [r1], #4
173 subs r2, r2, #4
174 orr r3, r3, r7, push #16
175 str r3, [r0], #4
176 bge 16b
17717: sub r1, r1, #2
178 b 6b
179 39
18018: cmp r2, #12 40 .macro str1b ptr reg cond=al abort
181 blt 21f 41 str\cond\()b \reg, [\ptr], #1
182 PLD( pld [r1, #12] ) 42 .endm
183 sub r2, r2, #12
184 PLD( subs r2, r2, #32 )
185 PLD( blt 20f )
186 PLD( pld [r1, #28] )
18719: PLD( pld [r1, #44] )
18820: mov r3, r7, pull #24
189 ldmia r1!, {r4 - r7}
190 subs r2, r2, #16
191 orr r3, r3, r4, push #8
192 mov r4, r4, pull #24
193 orr r4, r4, r5, push #8
194 mov r5, r5, pull #24
195 orr r5, r5, r6, push #8
196 mov r6, r6, pull #24
197 orr r6, r6, r7, push #8
198 stmia r0!, {r3 - r6}
199 bge 19b
200 PLD( cmn r2, #32 )
201 PLD( bge 20b )
202 PLD( add r2, r2, #32 )
203 adds r2, r2, #12
204 blt 22f
20521: mov r3, r7, pull #24
206 ldr r7, [r1], #4
207 subs r2, r2, #4
208 orr r3, r3, r7, push #8
209 str r3, [r0], #4
210 bge 21b
21122: sub r1, r1, #1
212 b 6b
213 43
44 .macro enter reg1 reg2
45 stmdb sp!, {r0, \reg1, \reg2}
46 .endm
214 47
21523: add r1, r1, r2 48 .macro exit reg1 reg2
216 add r0, r0, r2 49 ldmfd sp!, {r0, \reg1, \reg2}
217 subs r2, r2, #4 50 .endm
218 blt 29f
219 PLD( pld [r1, #-4] )
220 ands ip, r0, #3
221 bne 30f
222 ands ip, r1, #3
223 bne 31f
224 51
22524: subs r2, r2, #8 52 .text
226 blt 28f
227 subs r2, r2, #20
228 blt 27f
229 PLD( pld [r1, #-32] )
230 PLD( subs r2, r2, #64 )
231 PLD( blt 26f )
23225: PLD( pld [r1, #-64] )
233 PLD( pld [r1, #-96] )
234 ldmdb r1!, {r3 - r9, ip}
235 subs r2, r2, #32
236 stmgedb r0!, {r3 - r9, ip}
237 ldmgedb r1!, {r3 - r9, ip}
238 subges r2, r2, #32
239 stmdb r0!, {r3 - r9, ip}
240 bge 25b
24126: PLD( ldmdb r1!, {r3 - r9, ip} )
242 PLD( adds r2, r2, #32 )
243 PLD( stmgedb r0!, {r3 - r9, ip} )
244 PLD( ldmgedb r1!, {r3 - r9, ip} )
245 PLD( subges r2, r2, #32 )
246 PLD( stmdb r0!, {r3 - r9, ip} )
24727: cmn r2, #16
248 ldmgedb r1!, {r3 - r6}
249 subge r2, r2, #16
250 stmgedb r0!, {r3 - r6}
251 adds r2, r2, #20
252 ldmgedb r1!, {r3 - r5}
253 subge r2, r2, #12
254 stmgedb r0!, {r3 - r5}
25528: adds r2, r2, #8
256 blt 29f
257 subs r2, r2, #4
258 ldrlt r3, [r1, #-4]!
259 ldmgedb r1!, {r4, r5}
260 subge r2, r2, #4
261 strlt r3, [r0, #-4]!
262 stmgedb r0!, {r4, r5}
263 53
26429: adds r2, r2, #4 54/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
265 EXITEQ
266 cmp r2, #2
267 ldrb r3, [r1, #-1]!
268 ldrgeb r4, [r1, #-1]!
269 ldrgtb r5, [r1, #-1]!
270 strb r3, [r0, #-1]!
271 strgeb r4, [r0, #-1]!
272 strgtb r5, [r0, #-1]!
273 EXIT
274 55
27530: cmp ip, #2 56ENTRY(memcpy)
276 ldrb r3, [r1, #-1]!
277 ldrgeb r4, [r1, #-1]!
278 ldrgtb r5, [r1, #-1]!
279 strb r3, [r0, #-1]!
280 strgeb r4, [r0, #-1]!
281 strgtb r5, [r0, #-1]!
282 subs r2, r2, ip
283 blt 29b
284 ands ip, r1, #3
285 beq 24b
286
28731: bic r1, r1, #3
288 ldr r3, [r1], #0
289 cmp ip, #2
290 blt 41f
291 beq 36f
292 cmp r2, #12
293 blt 34f
294 PLD( pld [r1, #-16] )
295 sub r2, r2, #12
296 PLD( subs r2, r2, #32 )
297 PLD( blt 33f )
298 PLD( pld [r1, #-32] )
29932: PLD( pld [r1, #-48] )
30033: mov r7, r3, push #8
301 ldmdb r1!, {r3, r4, r5, r6}
302 subs r2, r2, #16
303 orr r7, r7, r6, pull #24
304 mov r6, r6, push #8
305 orr r6, r6, r5, pull #24
306 mov r5, r5, push #8
307 orr r5, r5, r4, pull #24
308 mov r4, r4, push #8
309 orr r4, r4, r3, pull #24
310 stmdb r0!, {r4, r5, r6, r7}
311 bge 32b
312 PLD( cmn r2, #32 )
313 PLD( bge 33b )
314 PLD( add r2, r2, #32 )
315 adds r2, r2, #12
316 blt 35f
31734: mov ip, r3, push #8
318 ldr r3, [r1, #-4]!
319 subs r2, r2, #4
320 orr ip, ip, r3, pull #24
321 str ip, [r0, #-4]!
322 bge 34b
32335: add r1, r1, #3
324 b 29b
325
32636: cmp r2, #12
327 blt 39f
328 PLD( pld [r1, #-16] )
329 sub r2, r2, #12
330 PLD( subs r2, r2, #32 )
331 PLD( blt 38f )
332 PLD( pld [r1, #-32] )
33337: PLD( pld [r1, #-48] )
33438: mov r7, r3, push #16
335 ldmdb r1!, {r3, r4, r5, r6}
336 subs r2, r2, #16
337 orr r7, r7, r6, pull #16
338 mov r6, r6, push #16
339 orr r6, r6, r5, pull #16
340 mov r5, r5, push #16
341 orr r5, r5, r4, pull #16
342 mov r4, r4, push #16
343 orr r4, r4, r3, pull #16
344 stmdb r0!, {r4, r5, r6, r7}
345 bge 37b
346 PLD( cmn r2, #32 )
347 PLD( bge 38b )
348 PLD( add r2, r2, #32 )
349 adds r2, r2, #12
350 blt 40f
35139: mov ip, r3, push #16
352 ldr r3, [r1, #-4]!
353 subs r2, r2, #4
354 orr ip, ip, r3, pull #16
355 str ip, [r0, #-4]!
356 bge 39b
35740: add r1, r1, #2
358 b 29b
359 57
36041: cmp r2, #12 58#include "copy_template.S"
361 blt 44f
362 PLD( pld [r1, #-16] )
363 sub r2, r2, #12
364 PLD( subs r2, r2, #32 )
365 PLD( blt 43f )
366 PLD( pld [r1, #-32] )
36742: PLD( pld [r1, #-48] )
36843: mov r7, r3, push #24
369 ldmdb r1!, {r3, r4, r5, r6}
370 subs r2, r2, #16
371 orr r7, r7, r6, pull #8
372 mov r6, r6, push #24
373 orr r6, r6, r5, pull #8
374 mov r5, r5, push #24
375 orr r5, r5, r4, pull #8
376 mov r4, r4, push #24
377 orr r4, r4, r3, pull #8
378 stmdb r0!, {r4, r5, r6, r7}
379 bge 42b
380 PLD( cmn r2, #32 )
381 PLD( bge 43b )
382 PLD( add r2, r2, #32 )
383 adds r2, r2, #12
384 blt 45f
38544: mov ip, r3, push #24
386 ldr r3, [r1, #-4]!
387 subs r2, r2, #4
388 orr ip, ip, r3, pull #8
389 str ip, [r0, #-4]!
390 bge 44b
39145: add r1, r1, #1
392 b 29b
393 59
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
new file mode 100644
index 000000000000..ef7fddc14ac9
--- /dev/null
+++ b/arch/arm/lib/memmove.S
@@ -0,0 +1,206 @@
1/*
2 * linux/arch/arm/lib/memmove.S
3 *
4 * Author: Nicolas Pitre
5 * Created: Sep 28, 2005
6 * Copyright: (C) MontaVista Software Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/linkage.h>
14#include <asm/assembler.h>
15
16/*
17 * This can be used to enable code to cacheline align the source pointer.
18 * Experiments on tested architectures (StrongARM and XScale) didn't show
19 * this a worthwhile thing to do. That might be different in the future.
20 */
21//#define CALGN(code...) code
22#define CALGN(code...)
23
24 .text
25
26/*
27 * Prototype: void *memmove(void *dest, const void *src, size_t n);
28 *
29 * Note:
30 *
31 * If the memory regions don't overlap, we simply branch to memcpy which is
32 * normally a bit faster. Otherwise the copy is done going downwards. This
33 * is a transposition of the code from copy_template.S but with the copy
34 * occurring in the opposite direction.
35 */
36
37ENTRY(memmove)
38
39 subs ip, r0, r1
40 cmphi r2, ip
41 bls memcpy
42
43 stmfd sp!, {r0, r4, lr}
44 add r1, r1, r2
45 add r0, r0, r2
46 subs r2, r2, #4
47 blt 8f
48 ands ip, r0, #3
49 PLD( pld [r1, #-4] )
50 bne 9f
51 ands ip, r1, #3
52 bne 10f
53
541: subs r2, r2, #(28)
55 stmfd sp!, {r5 - r8}
56 blt 5f
57
58 CALGN( ands ip, r1, #31 )
59 CALGN( sbcnes r4, ip, r2 ) @ C is always set here
60 CALGN( bcs 2f )
61 CALGN( adr r4, 6f )
62 CALGN( subs r2, r2, ip ) @ C is set here
63 CALGN( add pc, r4, ip )
64
65 PLD( pld [r1, #-4] )
662: PLD( subs r2, r2, #96 )
67 PLD( pld [r1, #-32] )
68 PLD( blt 4f )
69 PLD( pld [r1, #-64] )
70 PLD( pld [r1, #-96] )
71
723: PLD( pld [r1, #-128] )
734: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
74 subs r2, r2, #32
75 stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
76 bge 3b
77 PLD( cmn r2, #96 )
78 PLD( bge 4b )
79
805: ands ip, r2, #28
81 rsb ip, ip, #32
82 addne pc, pc, ip @ C is always clear here
83 b 7f
846: nop
85 ldr r3, [r1, #-4]!
86 ldr r4, [r1, #-4]!
87 ldr r5, [r1, #-4]!
88 ldr r6, [r1, #-4]!
89 ldr r7, [r1, #-4]!
90 ldr r8, [r1, #-4]!
91 ldr lr, [r1, #-4]!
92
93 add pc, pc, ip
94 nop
95 nop
96 str r3, [r0, #-4]!
97 str r4, [r0, #-4]!
98 str r5, [r0, #-4]!
99 str r6, [r0, #-4]!
100 str r7, [r0, #-4]!
101 str r8, [r0, #-4]!
102 str lr, [r0, #-4]!
103
104 CALGN( bcs 2b )
105
1067: ldmfd sp!, {r5 - r8}
107
1088: movs r2, r2, lsl #31
109 ldrneb r3, [r1, #-1]!
110 ldrcsb r4, [r1, #-1]!
111 ldrcsb ip, [r1, #-1]
112 strneb r3, [r0, #-1]!
113 strcsb r4, [r0, #-1]!
114 strcsb ip, [r0, #-1]
115 ldmfd sp!, {r0, r4, pc}
116
1179: cmp ip, #2
118 ldrgtb r3, [r1, #-1]!
119 ldrgeb r4, [r1, #-1]!
120 ldrb lr, [r1, #-1]!
121 strgtb r3, [r0, #-1]!
122 strgeb r4, [r0, #-1]!
123 subs r2, r2, ip
124 strb lr, [r0, #-1]!
125 blt 8b
126 ands ip, r1, #3
127 beq 1b
128
12910: bic r1, r1, #3
130 cmp ip, #2
131 ldr r3, [r1, #0]
132 beq 17f
133 blt 18f
134
135
136 .macro backward_copy_shift push pull
137
138 subs r2, r2, #28
139 blt 14f
140
141 CALGN( ands ip, r1, #31 )
142 CALGN( rsb ip, ip, #32 )
143 CALGN( sbcnes r4, ip, r2 ) @ C is always set here
144 CALGN( subcc r2, r2, ip )
145 CALGN( bcc 15f )
146
14711: stmfd sp!, {r5 - r9}
148
149 PLD( pld [r1, #-4] )
150 PLD( subs r2, r2, #96 )
151 PLD( pld [r1, #-32] )
152 PLD( blt 13f )
153 PLD( pld [r1, #-64] )
154 PLD( pld [r1, #-96] )
155
15612: PLD( pld [r1, #-128] )
15713: ldmdb r1!, {r7, r8, r9, ip}
158 mov lr, r3, push #\push
159 subs r2, r2, #32
160 ldmdb r1!, {r3, r4, r5, r6}
161 orr lr, lr, ip, pull #\pull
162 mov ip, ip, push #\push
163 orr ip, ip, r9, pull #\pull
164 mov r9, r9, push #\push
165 orr r9, r9, r8, pull #\pull
166 mov r8, r8, push #\push
167 orr r8, r8, r7, pull #\pull
168 mov r7, r7, push #\push
169 orr r7, r7, r6, pull #\pull
170 mov r6, r6, push #\push
171 orr r6, r6, r5, pull #\pull
172 mov r5, r5, push #\push
173 orr r5, r5, r4, pull #\pull
174 mov r4, r4, push #\push
175 orr r4, r4, r3, pull #\pull
176 stmdb r0!, {r4 - r9, ip, lr}
177 bge 12b
178 PLD( cmn r2, #96 )
179 PLD( bge 13b )
180
181 ldmfd sp!, {r5 - r9}
182
18314: ands ip, r2, #28
184 beq 16f
185
18615: mov lr, r3, push #\push
187 ldr r3, [r1, #-4]!
188 subs ip, ip, #4
189 orr lr, lr, r3, pull #\pull
190 str lr, [r0, #-4]!
191 bgt 15b
192 CALGN( cmp r2, #0 )
193 CALGN( bge 11b )
194
19516: add r1, r1, #(\pull / 8)
196 b 8b
197
198 .endm
199
200
201 backward_copy_shift push=8 pull=24
202
20317: backward_copy_shift push=16 pull=16
204
20518: backward_copy_shift push=24 pull=8
206