aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorStuart Menefy <stuart.menefy@st.com>2007-09-27 23:36:35 -0400
committerPaul Mundt <lethal@linux-sh.org>2007-09-27 23:36:35 -0400
commit023ef184fff6ac2e7cba345708f35536a2a419cb (patch)
treec80cc81eeb473a214fee0d2a0e952448071cf154 /arch
parent24eb17e0813490497f4d5b2fad218bdba402cece (diff)
sh: __copy_user() optimizations for small copies.
This implements a fast-path for small (less than 12 bytes) copies, with the existing path treated as the slow-path and left as the default behaviour for all other copy sizes. Signed-off-by: Stuart Menefy <stuart.menefy@st.com> Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/sh/mm/copy_page.S169
1 files changed, 108 insertions, 61 deletions
diff --git a/arch/sh/mm/copy_page.S b/arch/sh/mm/copy_page.S
index ae039f2da16..a81dbdb0559 100644
--- a/arch/sh/mm/copy_page.S
+++ b/arch/sh/mm/copy_page.S
@@ -141,47 +141,38 @@ ENTRY(__copy_user_page)
141 .long 9999b, 6000f ; \ 141 .long 9999b, 6000f ; \
142 .previous 142 .previous
143ENTRY(__copy_user) 143ENTRY(__copy_user)
144 tst r6,r6 ! Check explicitly for zero 144 ! Check if small number of bytes
145 bf 1f 145 mov #11,r0
146 rts
147 mov #0,r0 ! normal return
1481:
149 mov.l r10,@-r15
150 mov.l r9,@-r15
151 mov.l r8,@-r15
152 mov r4,r3 146 mov r4,r3
153 add r6,r3 ! last destination address 147 cmp/gt r0,r6 ! r6 (len) > r0 (11)
154 mov #12,r0 ! Check if small number of bytes 148 bf/s .L_cleanup_loop_no_pop
155 cmp/gt r0,r6 149 add r6,r3 ! last destination address
156 bt 2f 150
157 bra .L_cleanup_loop 151 ! Calculate bytes needed to align to src
158 nop 152 mov.l r11,@-r15
1592: 153 neg r5,r0
160 neg r5,r0 ! Calculate bytes needed to align source 154 mov.l r10,@-r15
161 add #4,r0 155 add #4,r0
156 mov.l r9,@-r15
162 and #3,r0 157 and #3,r0
158 mov.l r8,@-r15
163 tst r0,r0 159 tst r0,r0
164 bt .L_jump 160 bt 2f
165 mov r0,r1
166 161
167.L_loop1: 1621:
168 ! Copy bytes to align source 163 ! Copy bytes to long word align src
169EX( mov.b @r5+,r0 ) 164EX( mov.b @r5+,r1 )
170 dt r1 165 dt r0
171EX( mov.b r0,@r4 )
172 add #-1,r6 166 add #-1,r6
173 bf/s .L_loop1 167EX( mov.b r1,@r4 )
168 bf/s 1b
174 add #1,r4 169 add #1,r4
175 170
176.L_jump: 171 ! Jump to appropriate routine depending on dest
177 mov r6,r2 ! Calculate number of longwords to copy 1722: mov #3,r1
173 mov r6, r2
174 and r4,r1
178 shlr2 r2 175 shlr2 r2
179 tst r2,r2
180 bt .L_cleanup
181
182 mov r4,r0 ! Jump to appropriate routine
183 and #3,r0
184 mov r0,r1
185 shll2 r1 176 shll2 r1
186 mova .L_jump_tbl,r0 177 mova .L_jump_tbl,r0
187 mov.l @(r0,r1),r1 178 mov.l @(r0,r1),r1
@@ -195,43 +186,97 @@ EX( mov.b r0,@r4 )
195 .long .L_dest10 186 .long .L_dest10
196 .long .L_dest11 187 .long .L_dest11
197 188
189/*
190 * Come here if there are less than 12 bytes to copy
191 *
192 * Keep the branch target close, so the bf/s callee doesn't overflow
193 * and result in a more expensive branch being inserted. This is the
194 * fast-path for small copies, the jump via the jump table will hit the
195 * default slow-path cleanup. -PFM.
196 */
197.L_cleanup_loop_no_pop:
198 tst r6,r6 ! Check explicitly for zero
199 bt 1f
200
2012:
202EX( mov.b @r5+,r0 )
203 dt r6
204EX( mov.b r0,@r4 )
205 bf/s 2b
206 add #1,r4
207
2081: mov #0,r0 ! normal return
2095000:
210
211# Exception handler:
212.section .fixup, "ax"
2136000:
214 mov.l 8000f,r1
215 mov r3,r0
216 jmp @r1
217 sub r4,r0
218 .align 2
2198000: .long 5000b
220
221.previous
222 rts
223 nop
224
198! Destination = 00 225! Destination = 00
199 226
200.L_dest00: 227.L_dest00:
201 mov r2,r7 228 ! Skip the large copy for small transfers
202 shlr2 r7 229 mov #(32+32-4), r0
203 shlr r7 230 cmp/gt r6, r0 ! r0 (60) > r6 (len)
204 tst r7,r7 231 bt 1f
205 mov #7,r0 232
206 bt/s 1f 233 ! Align dest to a 32 byte boundary
207 and r0,r2 234 neg r4,r0
208 .align 2 235 add #0x20, r0
236 and #0x1f, r0
237 tst r0, r0
238 bt 2f
239
240 sub r0, r6
241 shlr2 r0
2423:
243EX( mov.l @r5+,r1 )
244 dt r0
245EX( mov.l r1,@r4 )
246 bf/s 3b
247 add #4,r4
248
2092: 2492:
210EX( mov.l @r5+,r0 ) 250EX( mov.l @r5+,r0 )
251EX( mov.l @r5+,r1 )
252EX( mov.l @r5+,r2 )
253EX( mov.l @r5+,r7 )
211EX( mov.l @r5+,r8 ) 254EX( mov.l @r5+,r8 )
212EX( mov.l @r5+,r9 ) 255EX( mov.l @r5+,r9 )
213EX( mov.l @r5+,r10 ) 256EX( mov.l @r5+,r10 )
214EX( mov.l r0,@r4 ) 257EX( mov.l @r5+,r11 )
215EX( mov.l r8,@(4,r4) ) 258EX( movca.l r0,@r4 )
216EX( mov.l r9,@(8,r4) ) 259 add #-32, r6
217EX( mov.l r10,@(12,r4) ) 260EX( mov.l r1,@(4,r4) )
218EX( mov.l @r5+,r0 ) 261 mov #32, r0
219EX( mov.l @r5+,r8 ) 262EX( mov.l r2,@(8,r4) )
220EX( mov.l @r5+,r9 ) 263 cmp/gt r6, r0 ! r0 (32) > r6 (len)
221EX( mov.l @r5+,r10 ) 264EX( mov.l r7,@(12,r4) )
222 dt r7 265EX( mov.l r8,@(16,r4) )
223EX( mov.l r0,@(16,r4) ) 266EX( mov.l r9,@(20,r4) )
224EX( mov.l r8,@(20,r4) ) 267EX( mov.l r10,@(24,r4) )
225EX( mov.l r9,@(24,r4) ) 268EX( mov.l r11,@(28,r4) )
226EX( mov.l r10,@(28,r4) )
227 bf/s 2b 269 bf/s 2b
228 add #32,r4 270 add #32,r4
229 tst r2,r2 271
2721: mov r6, r0
273 shlr2 r0
274 tst r0, r0
230 bt .L_cleanup 275 bt .L_cleanup
2311: 2761:
232EX( mov.l @r5+,r0 ) 277EX( mov.l @r5+,r1 )
233 dt r2 278 dt r0
234EX( mov.l r0,@r4 ) 279EX( mov.l r1,@r4 )
235 bf/s 1b 280 bf/s 1b
236 add #4,r4 281 add #4,r4
237 282
@@ -250,7 +295,7 @@ EX( mov.l r0,@r4 )
250 and r0,r2 295 and r0,r2
2512: 2962:
252 dt r7 297 dt r7
253#ifdef __LITTLE_ENDIAN__ 298#ifdef CONFIG_CPU_LITTLE_ENDIAN
254EX( mov.l @r5+,r0 ) 299EX( mov.l @r5+,r0 )
255EX( mov.l @r5+,r1 ) 300EX( mov.l @r5+,r1 )
256EX( mov.l @r5+,r8 ) 301EX( mov.l @r5+,r8 )
@@ -320,7 +365,7 @@ EX( mov.w r0,@(2,r4) )
3201: ! Read longword, write two words per iteration 3651: ! Read longword, write two words per iteration
321EX( mov.l @r5+,r0 ) 366EX( mov.l @r5+,r0 )
322 dt r2 367 dt r2
323#ifdef __LITTLE_ENDIAN__ 368#ifdef CONFIG_CPU_LITTLE_ENDIAN
324EX( mov.w r0,@r4 ) 369EX( mov.w r0,@r4 )
325 shlr16 r0 370 shlr16 r0
326EX( mov.w r0,@(2,r4) ) 371EX( mov.w r0,@(2,r4) )
@@ -342,7 +387,7 @@ EX( mov.w r0,@r4 )
342 ! Read longword, write byte, word, byte per iteration 387 ! Read longword, write byte, word, byte per iteration
343EX( mov.l @r5+,r0 ) 388EX( mov.l @r5+,r0 )
344 dt r2 389 dt r2
345#ifdef __LITTLE_ENDIAN__ 390#ifdef CONFIG_CPU_LITTLE_ENDIAN
346EX( mov.b r0,@r4 ) 391EX( mov.b r0,@r4 )
347 shlr8 r0 392 shlr8 r0
348 add #1,r4 393 add #1,r4
@@ -379,6 +424,7 @@ EX( mov.b r0,@r4 )
379 424
380.L_exit: 425.L_exit:
381 mov #0,r0 ! normal return 426 mov #0,r0 ! normal return
427
3825000: 4285000:
383 429
384# Exception handler: 430# Exception handler:
@@ -394,5 +440,6 @@ EX( mov.b r0,@r4 )
394.previous 440.previous
395 mov.l @r15+,r8 441 mov.l @r15+,r8
396 mov.l @r15+,r9 442 mov.l @r15+,r9
443 mov.l @r15+,r10
397 rts 444 rts
398 mov.l @r15+,r10 445 mov.l @r15+,r11