diff options
Diffstat (limited to 'arch/mips/lib/memcpy-inatomic.S')
-rw-r--r-- | arch/mips/lib/memcpy-inatomic.S | 116 |
1 files changed, 58 insertions, 58 deletions
diff --git a/arch/mips/lib/memcpy-inatomic.S b/arch/mips/lib/memcpy-inatomic.S index d1b08f5d6860..736d0fb56a94 100644 --- a/arch/mips/lib/memcpy-inatomic.S +++ b/arch/mips/lib/memcpy-inatomic.S | |||
@@ -209,36 +209,36 @@ LEAF(__copy_user_inatomic) | |||
209 | and t1, dst, ADDRMASK | 209 | and t1, dst, ADDRMASK |
210 | PREF( 0, 1*32(src) ) | 210 | PREF( 0, 1*32(src) ) |
211 | PREF( 1, 1*32(dst) ) | 211 | PREF( 1, 1*32(dst) ) |
212 | bnez t2, copy_bytes_checklen | 212 | bnez t2, .Lcopy_bytes_checklen |
213 | and t0, src, ADDRMASK | 213 | and t0, src, ADDRMASK |
214 | PREF( 0, 2*32(src) ) | 214 | PREF( 0, 2*32(src) ) |
215 | PREF( 1, 2*32(dst) ) | 215 | PREF( 1, 2*32(dst) ) |
216 | bnez t1, dst_unaligned | 216 | bnez t1, .Ldst_unaligned |
217 | nop | 217 | nop |
218 | bnez t0, src_unaligned_dst_aligned | 218 | bnez t0, .Lsrc_unaligned_dst_aligned |
219 | /* | 219 | /* |
220 | * use delay slot for fall-through | 220 | * use delay slot for fall-through |
221 | * src and dst are aligned; need to compute rem | 221 | * src and dst are aligned; need to compute rem |
222 | */ | 222 | */ |
223 | both_aligned: | 223 | .Lboth_aligned: |
224 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 224 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
225 | beqz t0, cleanup_both_aligned # len < 8*NBYTES | 225 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
226 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) | 226 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) |
227 | PREF( 0, 3*32(src) ) | 227 | PREF( 0, 3*32(src) ) |
228 | PREF( 1, 3*32(dst) ) | 228 | PREF( 1, 3*32(dst) ) |
229 | .align 4 | 229 | .align 4 |
230 | 1: | 230 | 1: |
231 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 231 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
232 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 232 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
233 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 233 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
234 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 234 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
235 | SUB len, len, 8*NBYTES | 235 | SUB len, len, 8*NBYTES |
236 | EXC( LOAD t4, UNIT(4)(src), l_exc_copy) | 236 | EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) |
237 | EXC( LOAD t7, UNIT(5)(src), l_exc_copy) | 237 | EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy) |
238 | STORE t0, UNIT(0)(dst) | 238 | STORE t0, UNIT(0)(dst) |
239 | STORE t1, UNIT(1)(dst) | 239 | STORE t1, UNIT(1)(dst) |
240 | EXC( LOAD t0, UNIT(6)(src), l_exc_copy) | 240 | EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy) |
241 | EXC( LOAD t1, UNIT(7)(src), l_exc_copy) | 241 | EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy) |
242 | ADD src, src, 8*NBYTES | 242 | ADD src, src, 8*NBYTES |
243 | ADD dst, dst, 8*NBYTES | 243 | ADD dst, dst, 8*NBYTES |
244 | STORE t2, UNIT(-6)(dst) | 244 | STORE t2, UNIT(-6)(dst) |
@@ -255,18 +255,18 @@ EXC( LOAD t1, UNIT(7)(src), l_exc_copy) | |||
255 | /* | 255 | /* |
256 | * len == rem == the number of bytes left to copy < 8*NBYTES | 256 | * len == rem == the number of bytes left to copy < 8*NBYTES |
257 | */ | 257 | */ |
258 | cleanup_both_aligned: | 258 | .Lcleanup_both_aligned: |
259 | beqz len, done | 259 | beqz len, .Ldone |
260 | sltu t0, len, 4*NBYTES | 260 | sltu t0, len, 4*NBYTES |
261 | bnez t0, less_than_4units | 261 | bnez t0, .Lless_than_4units |
262 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 262 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
263 | /* | 263 | /* |
264 | * len >= 4*NBYTES | 264 | * len >= 4*NBYTES |
265 | */ | 265 | */ |
266 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 266 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
267 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 267 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
268 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 268 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
269 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 269 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
270 | SUB len, len, 4*NBYTES | 270 | SUB len, len, 4*NBYTES |
271 | ADD src, src, 4*NBYTES | 271 | ADD src, src, 4*NBYTES |
272 | STORE t0, UNIT(0)(dst) | 272 | STORE t0, UNIT(0)(dst) |
@@ -275,16 +275,16 @@ EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | |||
275 | STORE t3, UNIT(3)(dst) | 275 | STORE t3, UNIT(3)(dst) |
276 | .set reorder /* DADDI_WAR */ | 276 | .set reorder /* DADDI_WAR */ |
277 | ADD dst, dst, 4*NBYTES | 277 | ADD dst, dst, 4*NBYTES |
278 | beqz len, done | 278 | beqz len, .Ldone |
279 | .set noreorder | 279 | .set noreorder |
280 | less_than_4units: | 280 | .Lless_than_4units: |
281 | /* | 281 | /* |
282 | * rem = len % NBYTES | 282 | * rem = len % NBYTES |
283 | */ | 283 | */ |
284 | beq rem, len, copy_bytes | 284 | beq rem, len, .Lcopy_bytes |
285 | nop | 285 | nop |
286 | 1: | 286 | 1: |
287 | EXC( LOAD t0, 0(src), l_exc) | 287 | EXC( LOAD t0, 0(src), .Ll_exc) |
288 | ADD src, src, NBYTES | 288 | ADD src, src, NBYTES |
289 | SUB len, len, NBYTES | 289 | SUB len, len, NBYTES |
290 | STORE t0, 0(dst) | 290 | STORE t0, 0(dst) |
@@ -305,17 +305,17 @@ EXC( LOAD t0, 0(src), l_exc) | |||
305 | * more instruction-level parallelism. | 305 | * more instruction-level parallelism. |
306 | */ | 306 | */ |
307 | #define bits t2 | 307 | #define bits t2 |
308 | beqz len, done | 308 | beqz len, .Ldone |
309 | ADD t1, dst, len # t1 is just past last byte of dst | 309 | ADD t1, dst, len # t1 is just past last byte of dst |
310 | li bits, 8*NBYTES | 310 | li bits, 8*NBYTES |
311 | SLL rem, len, 3 # rem = number of bits to keep | 311 | SLL rem, len, 3 # rem = number of bits to keep |
312 | EXC( LOAD t0, 0(src), l_exc) | 312 | EXC( LOAD t0, 0(src), .Ll_exc) |
313 | SUB bits, bits, rem # bits = number of bits to discard | 313 | SUB bits, bits, rem # bits = number of bits to discard |
314 | SHIFT_DISCARD t0, t0, bits | 314 | SHIFT_DISCARD t0, t0, bits |
315 | STREST t0, -1(t1) | 315 | STREST t0, -1(t1) |
316 | jr ra | 316 | jr ra |
317 | move len, zero | 317 | move len, zero |
318 | dst_unaligned: | 318 | .Ldst_unaligned: |
319 | /* | 319 | /* |
320 | * dst is unaligned | 320 | * dst is unaligned |
321 | * t0 = src & ADDRMASK | 321 | * t0 = src & ADDRMASK |
@@ -326,22 +326,22 @@ dst_unaligned: | |||
326 | * Set match = (src and dst have same alignment) | 326 | * Set match = (src and dst have same alignment) |
327 | */ | 327 | */ |
328 | #define match rem | 328 | #define match rem |
329 | EXC( LDFIRST t3, FIRST(0)(src), l_exc) | 329 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) |
330 | ADD t2, zero, NBYTES | 330 | ADD t2, zero, NBYTES |
331 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 331 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) |
332 | SUB t2, t2, t1 # t2 = number of bytes copied | 332 | SUB t2, t2, t1 # t2 = number of bytes copied |
333 | xor match, t0, t1 | 333 | xor match, t0, t1 |
334 | STFIRST t3, FIRST(0)(dst) | 334 | STFIRST t3, FIRST(0)(dst) |
335 | beq len, t2, done | 335 | beq len, t2, .Ldone |
336 | SUB len, len, t2 | 336 | SUB len, len, t2 |
337 | ADD dst, dst, t2 | 337 | ADD dst, dst, t2 |
338 | beqz match, both_aligned | 338 | beqz match, .Lboth_aligned |
339 | ADD src, src, t2 | 339 | ADD src, src, t2 |
340 | 340 | ||
341 | src_unaligned_dst_aligned: | 341 | .Lsrc_unaligned_dst_aligned: |
342 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 342 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
343 | PREF( 0, 3*32(src) ) | 343 | PREF( 0, 3*32(src) ) |
344 | beqz t0, cleanup_src_unaligned | 344 | beqz t0, .Lcleanup_src_unaligned |
345 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 345 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
346 | PREF( 1, 3*32(dst) ) | 346 | PREF( 1, 3*32(dst) ) |
347 | 1: | 347 | 1: |
@@ -351,15 +351,15 @@ src_unaligned_dst_aligned: | |||
351 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 351 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
352 | * are to the same unit (unless src is aligned, but it's not). | 352 | * are to the same unit (unless src is aligned, but it's not). |
353 | */ | 353 | */ |
354 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 354 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
355 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 355 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) |
356 | SUB len, len, 4*NBYTES | 356 | SUB len, len, 4*NBYTES |
357 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 357 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
358 | EXC( LDREST t1, REST(1)(src), l_exc_copy) | 358 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) |
359 | EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) | 359 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) |
360 | EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) | 360 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) |
361 | EXC( LDREST t2, REST(2)(src), l_exc_copy) | 361 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) |
362 | EXC( LDREST t3, REST(3)(src), l_exc_copy) | 362 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) |
363 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) | 363 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) |
364 | ADD src, src, 4*NBYTES | 364 | ADD src, src, 4*NBYTES |
365 | #ifdef CONFIG_CPU_SB1 | 365 | #ifdef CONFIG_CPU_SB1 |
@@ -375,14 +375,14 @@ EXC( LDREST t3, REST(3)(src), l_exc_copy) | |||
375 | bne len, rem, 1b | 375 | bne len, rem, 1b |
376 | .set noreorder | 376 | .set noreorder |
377 | 377 | ||
378 | cleanup_src_unaligned: | 378 | .Lcleanup_src_unaligned: |
379 | beqz len, done | 379 | beqz len, .Ldone |
380 | and rem, len, NBYTES-1 # rem = len % NBYTES | 380 | and rem, len, NBYTES-1 # rem = len % NBYTES |
381 | beq rem, len, copy_bytes | 381 | beq rem, len, .Lcopy_bytes |
382 | nop | 382 | nop |
383 | 1: | 383 | 1: |
384 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 384 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
385 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 385 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
386 | ADD src, src, NBYTES | 386 | ADD src, src, NBYTES |
387 | SUB len, len, NBYTES | 387 | SUB len, len, NBYTES |
388 | STORE t0, 0(dst) | 388 | STORE t0, 0(dst) |
@@ -391,15 +391,15 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) | |||
391 | bne len, rem, 1b | 391 | bne len, rem, 1b |
392 | .set noreorder | 392 | .set noreorder |
393 | 393 | ||
394 | copy_bytes_checklen: | 394 | .Lcopy_bytes_checklen: |
395 | beqz len, done | 395 | beqz len, .Ldone |
396 | nop | 396 | nop |
397 | copy_bytes: | 397 | .Lcopy_bytes: |
398 | /* 0 < len < NBYTES */ | 398 | /* 0 < len < NBYTES */ |
399 | #define COPY_BYTE(N) \ | 399 | #define COPY_BYTE(N) \ |
400 | EXC( lb t0, N(src), l_exc); \ | 400 | EXC( lb t0, N(src), .Ll_exc); \ |
401 | SUB len, len, 1; \ | 401 | SUB len, len, 1; \ |
402 | beqz len, done; \ | 402 | beqz len, .Ldone; \ |
403 | sb t0, N(dst) | 403 | sb t0, N(dst) |
404 | 404 | ||
405 | COPY_BYTE(0) | 405 | COPY_BYTE(0) |
@@ -410,16 +410,16 @@ EXC( lb t0, N(src), l_exc); \ | |||
410 | COPY_BYTE(4) | 410 | COPY_BYTE(4) |
411 | COPY_BYTE(5) | 411 | COPY_BYTE(5) |
412 | #endif | 412 | #endif |
413 | EXC( lb t0, NBYTES-2(src), l_exc) | 413 | EXC( lb t0, NBYTES-2(src), .Ll_exc) |
414 | SUB len, len, 1 | 414 | SUB len, len, 1 |
415 | jr ra | 415 | jr ra |
416 | sb t0, NBYTES-2(dst) | 416 | sb t0, NBYTES-2(dst) |
417 | done: | 417 | .Ldone: |
418 | jr ra | 418 | jr ra |
419 | nop | 419 | nop |
420 | END(__copy_user_inatomic) | 420 | END(__copy_user_inatomic) |
421 | 421 | ||
422 | l_exc_copy: | 422 | .Ll_exc_copy: |
423 | /* | 423 | /* |
424 | * Copy bytes from src until faulting load address (or until a | 424 | * Copy bytes from src until faulting load address (or until a |
425 | * lb faults) | 425 | * lb faults) |
@@ -434,14 +434,14 @@ l_exc_copy: | |||
434 | nop | 434 | nop |
435 | LOAD t0, THREAD_BUADDR(t0) | 435 | LOAD t0, THREAD_BUADDR(t0) |
436 | 1: | 436 | 1: |
437 | EXC( lb t1, 0(src), l_exc) | 437 | EXC( lb t1, 0(src), .Ll_exc) |
438 | ADD src, src, 1 | 438 | ADD src, src, 1 |
439 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 439 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
440 | .set reorder /* DADDI_WAR */ | 440 | .set reorder /* DADDI_WAR */ |
441 | ADD dst, dst, 1 | 441 | ADD dst, dst, 1 |
442 | bne src, t0, 1b | 442 | bne src, t0, 1b |
443 | .set noreorder | 443 | .set noreorder |
444 | l_exc: | 444 | .Ll_exc: |
445 | LOAD t0, TI_TASK($28) | 445 | LOAD t0, TI_TASK($28) |
446 | nop | 446 | nop |
447 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address | 447 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address |