diff options
Diffstat (limited to 'arch/mips/lib/memcpy.S')
-rw-r--r-- | arch/mips/lib/memcpy.S | 182 |
1 files changed, 91 insertions, 91 deletions
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index 01e450b1ebc9..c06cccf60bec 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S | |||
@@ -191,7 +191,7 @@ | |||
191 | .align 5 | 191 | .align 5 |
192 | LEAF(memcpy) /* a0=dst a1=src a2=len */ | 192 | LEAF(memcpy) /* a0=dst a1=src a2=len */ |
193 | move v0, dst /* return value */ | 193 | move v0, dst /* return value */ |
194 | __memcpy: | 194 | .L__memcpy: |
195 | FEXPORT(__copy_user) | 195 | FEXPORT(__copy_user) |
196 | /* | 196 | /* |
197 | * Note: dst & src may be unaligned, len may be 0 | 197 | * Note: dst & src may be unaligned, len may be 0 |
@@ -213,45 +213,45 @@ FEXPORT(__copy_user) | |||
213 | and t1, dst, ADDRMASK | 213 | and t1, dst, ADDRMASK |
214 | PREF( 0, 1*32(src) ) | 214 | PREF( 0, 1*32(src) ) |
215 | PREF( 1, 1*32(dst) ) | 215 | PREF( 1, 1*32(dst) ) |
216 | bnez t2, copy_bytes_checklen | 216 | bnez t2, .Lcopy_bytes_checklen |
217 | and t0, src, ADDRMASK | 217 | and t0, src, ADDRMASK |
218 | PREF( 0, 2*32(src) ) | 218 | PREF( 0, 2*32(src) ) |
219 | PREF( 1, 2*32(dst) ) | 219 | PREF( 1, 2*32(dst) ) |
220 | bnez t1, dst_unaligned | 220 | bnez t1, .Ldst_unaligned |
221 | nop | 221 | nop |
222 | bnez t0, src_unaligned_dst_aligned | 222 | bnez t0, .Lsrc_unaligned_dst_aligned |
223 | /* | 223 | /* |
224 | * use delay slot for fall-through | 224 | * use delay slot for fall-through |
225 | * src and dst are aligned; need to compute rem | 225 | * src and dst are aligned; need to compute rem |
226 | */ | 226 | */ |
227 | both_aligned: | 227 | .Lboth_aligned: |
228 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 228 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
229 | beqz t0, cleanup_both_aligned # len < 8*NBYTES | 229 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
230 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) | 230 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) |
231 | PREF( 0, 3*32(src) ) | 231 | PREF( 0, 3*32(src) ) |
232 | PREF( 1, 3*32(dst) ) | 232 | PREF( 1, 3*32(dst) ) |
233 | .align 4 | 233 | .align 4 |
234 | 1: | 234 | 1: |
235 | R10KCBARRIER(0(ra)) | 235 | R10KCBARRIER(0(ra)) |
236 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 236 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
237 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 237 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
238 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 238 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
239 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 239 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
240 | SUB len, len, 8*NBYTES | 240 | SUB len, len, 8*NBYTES |
241 | EXC( LOAD t4, UNIT(4)(src), l_exc_copy) | 241 | EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) |
242 | EXC( LOAD t7, UNIT(5)(src), l_exc_copy) | 242 | EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy) |
243 | EXC( STORE t0, UNIT(0)(dst), s_exc_p8u) | 243 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p8u) |
244 | EXC( STORE t1, UNIT(1)(dst), s_exc_p7u) | 244 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p7u) |
245 | EXC( LOAD t0, UNIT(6)(src), l_exc_copy) | 245 | EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy) |
246 | EXC( LOAD t1, UNIT(7)(src), l_exc_copy) | 246 | EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy) |
247 | ADD src, src, 8*NBYTES | 247 | ADD src, src, 8*NBYTES |
248 | ADD dst, dst, 8*NBYTES | 248 | ADD dst, dst, 8*NBYTES |
249 | EXC( STORE t2, UNIT(-6)(dst), s_exc_p6u) | 249 | EXC( STORE t2, UNIT(-6)(dst), .Ls_exc_p6u) |
250 | EXC( STORE t3, UNIT(-5)(dst), s_exc_p5u) | 250 | EXC( STORE t3, UNIT(-5)(dst), .Ls_exc_p5u) |
251 | EXC( STORE t4, UNIT(-4)(dst), s_exc_p4u) | 251 | EXC( STORE t4, UNIT(-4)(dst), .Ls_exc_p4u) |
252 | EXC( STORE t7, UNIT(-3)(dst), s_exc_p3u) | 252 | EXC( STORE t7, UNIT(-3)(dst), .Ls_exc_p3u) |
253 | EXC( STORE t0, UNIT(-2)(dst), s_exc_p2u) | 253 | EXC( STORE t0, UNIT(-2)(dst), .Ls_exc_p2u) |
254 | EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u) | 254 | EXC( STORE t1, UNIT(-1)(dst), .Ls_exc_p1u) |
255 | PREF( 0, 8*32(src) ) | 255 | PREF( 0, 8*32(src) ) |
256 | PREF( 1, 8*32(dst) ) | 256 | PREF( 1, 8*32(dst) ) |
257 | bne len, rem, 1b | 257 | bne len, rem, 1b |
@@ -260,41 +260,41 @@ EXC( STORE t1, UNIT(-1)(dst), s_exc_p1u) | |||
260 | /* | 260 | /* |
261 | * len == rem == the number of bytes left to copy < 8*NBYTES | 261 | * len == rem == the number of bytes left to copy < 8*NBYTES |
262 | */ | 262 | */ |
263 | cleanup_both_aligned: | 263 | .Lcleanup_both_aligned: |
264 | beqz len, done | 264 | beqz len, .Ldone |
265 | sltu t0, len, 4*NBYTES | 265 | sltu t0, len, 4*NBYTES |
266 | bnez t0, less_than_4units | 266 | bnez t0, .Lless_than_4units |
267 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 267 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
268 | /* | 268 | /* |
269 | * len >= 4*NBYTES | 269 | * len >= 4*NBYTES |
270 | */ | 270 | */ |
271 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 271 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) |
272 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 272 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) |
273 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 273 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) |
274 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 274 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) |
275 | SUB len, len, 4*NBYTES | 275 | SUB len, len, 4*NBYTES |
276 | ADD src, src, 4*NBYTES | 276 | ADD src, src, 4*NBYTES |
277 | R10KCBARRIER(0(ra)) | 277 | R10KCBARRIER(0(ra)) |
278 | EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) | 278 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) |
279 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) | 279 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) |
280 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) | 280 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) |
281 | EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) | 281 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) |
282 | .set reorder /* DADDI_WAR */ | 282 | .set reorder /* DADDI_WAR */ |
283 | ADD dst, dst, 4*NBYTES | 283 | ADD dst, dst, 4*NBYTES |
284 | beqz len, done | 284 | beqz len, .Ldone |
285 | .set noreorder | 285 | .set noreorder |
286 | less_than_4units: | 286 | .Lless_than_4units: |
287 | /* | 287 | /* |
288 | * rem = len % NBYTES | 288 | * rem = len % NBYTES |
289 | */ | 289 | */ |
290 | beq rem, len, copy_bytes | 290 | beq rem, len, .Lcopy_bytes |
291 | nop | 291 | nop |
292 | 1: | 292 | 1: |
293 | R10KCBARRIER(0(ra)) | 293 | R10KCBARRIER(0(ra)) |
294 | EXC( LOAD t0, 0(src), l_exc) | 294 | EXC( LOAD t0, 0(src), .Ll_exc) |
295 | ADD src, src, NBYTES | 295 | ADD src, src, NBYTES |
296 | SUB len, len, NBYTES | 296 | SUB len, len, NBYTES |
297 | EXC( STORE t0, 0(dst), s_exc_p1u) | 297 | EXC( STORE t0, 0(dst), .Ls_exc_p1u) |
298 | .set reorder /* DADDI_WAR */ | 298 | .set reorder /* DADDI_WAR */ |
299 | ADD dst, dst, NBYTES | 299 | ADD dst, dst, NBYTES |
300 | bne rem, len, 1b | 300 | bne rem, len, 1b |
@@ -312,17 +312,17 @@ EXC( STORE t0, 0(dst), s_exc_p1u) | |||
312 | * more instruction-level parallelism. | 312 | * more instruction-level parallelism. |
313 | */ | 313 | */ |
314 | #define bits t2 | 314 | #define bits t2 |
315 | beqz len, done | 315 | beqz len, .Ldone |
316 | ADD t1, dst, len # t1 is just past last byte of dst | 316 | ADD t1, dst, len # t1 is just past last byte of dst |
317 | li bits, 8*NBYTES | 317 | li bits, 8*NBYTES |
318 | SLL rem, len, 3 # rem = number of bits to keep | 318 | SLL rem, len, 3 # rem = number of bits to keep |
319 | EXC( LOAD t0, 0(src), l_exc) | 319 | EXC( LOAD t0, 0(src), .Ll_exc) |
320 | SUB bits, bits, rem # bits = number of bits to discard | 320 | SUB bits, bits, rem # bits = number of bits to discard |
321 | SHIFT_DISCARD t0, t0, bits | 321 | SHIFT_DISCARD t0, t0, bits |
322 | EXC( STREST t0, -1(t1), s_exc) | 322 | EXC( STREST t0, -1(t1), .Ls_exc) |
323 | jr ra | 323 | jr ra |
324 | move len, zero | 324 | move len, zero |
325 | dst_unaligned: | 325 | .Ldst_unaligned: |
326 | /* | 326 | /* |
327 | * dst is unaligned | 327 | * dst is unaligned |
328 | * t0 = src & ADDRMASK | 328 | * t0 = src & ADDRMASK |
@@ -333,23 +333,23 @@ dst_unaligned: | |||
333 | * Set match = (src and dst have same alignment) | 333 | * Set match = (src and dst have same alignment) |
334 | */ | 334 | */ |
335 | #define match rem | 335 | #define match rem |
336 | EXC( LDFIRST t3, FIRST(0)(src), l_exc) | 336 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) |
337 | ADD t2, zero, NBYTES | 337 | ADD t2, zero, NBYTES |
338 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 338 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) |
339 | SUB t2, t2, t1 # t2 = number of bytes copied | 339 | SUB t2, t2, t1 # t2 = number of bytes copied |
340 | xor match, t0, t1 | 340 | xor match, t0, t1 |
341 | R10KCBARRIER(0(ra)) | 341 | R10KCBARRIER(0(ra)) |
342 | EXC( STFIRST t3, FIRST(0)(dst), s_exc) | 342 | EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) |
343 | beq len, t2, done | 343 | beq len, t2, .Ldone |
344 | SUB len, len, t2 | 344 | SUB len, len, t2 |
345 | ADD dst, dst, t2 | 345 | ADD dst, dst, t2 |
346 | beqz match, both_aligned | 346 | beqz match, .Lboth_aligned |
347 | ADD src, src, t2 | 347 | ADD src, src, t2 |
348 | 348 | ||
349 | src_unaligned_dst_aligned: | 349 | .Lsrc_unaligned_dst_aligned: |
350 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 350 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
351 | PREF( 0, 3*32(src) ) | 351 | PREF( 0, 3*32(src) ) |
352 | beqz t0, cleanup_src_unaligned | 352 | beqz t0, .Lcleanup_src_unaligned |
353 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 353 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
354 | PREF( 1, 3*32(dst) ) | 354 | PREF( 1, 3*32(dst) ) |
355 | 1: | 355 | 1: |
@@ -360,58 +360,58 @@ src_unaligned_dst_aligned: | |||
360 | * are to the same unit (unless src is aligned, but it's not). | 360 | * are to the same unit (unless src is aligned, but it's not). |
361 | */ | 361 | */ |
362 | R10KCBARRIER(0(ra)) | 362 | R10KCBARRIER(0(ra)) |
363 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 363 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
364 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 364 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) |
365 | SUB len, len, 4*NBYTES | 365 | SUB len, len, 4*NBYTES |
366 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 366 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
367 | EXC( LDREST t1, REST(1)(src), l_exc_copy) | 367 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) |
368 | EXC( LDFIRST t2, FIRST(2)(src), l_exc_copy) | 368 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) |
369 | EXC( LDFIRST t3, FIRST(3)(src), l_exc_copy) | 369 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) |
370 | EXC( LDREST t2, REST(2)(src), l_exc_copy) | 370 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) |
371 | EXC( LDREST t3, REST(3)(src), l_exc_copy) | 371 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) |
372 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) | 372 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) |
373 | ADD src, src, 4*NBYTES | 373 | ADD src, src, 4*NBYTES |
374 | #ifdef CONFIG_CPU_SB1 | 374 | #ifdef CONFIG_CPU_SB1 |
375 | nop # improves slotting | 375 | nop # improves slotting |
376 | #endif | 376 | #endif |
377 | EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) | 377 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) |
378 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) | 378 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) |
379 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) | 379 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) |
380 | EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) | 380 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) |
381 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) | 381 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) |
382 | .set reorder /* DADDI_WAR */ | 382 | .set reorder /* DADDI_WAR */ |
383 | ADD dst, dst, 4*NBYTES | 383 | ADD dst, dst, 4*NBYTES |
384 | bne len, rem, 1b | 384 | bne len, rem, 1b |
385 | .set noreorder | 385 | .set noreorder |
386 | 386 | ||
387 | cleanup_src_unaligned: | 387 | .Lcleanup_src_unaligned: |
388 | beqz len, done | 388 | beqz len, .Ldone |
389 | and rem, len, NBYTES-1 # rem = len % NBYTES | 389 | and rem, len, NBYTES-1 # rem = len % NBYTES |
390 | beq rem, len, copy_bytes | 390 | beq rem, len, .Lcopy_bytes |
391 | nop | 391 | nop |
392 | 1: | 392 | 1: |
393 | R10KCBARRIER(0(ra)) | 393 | R10KCBARRIER(0(ra)) |
394 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 394 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) |
395 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 395 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) |
396 | ADD src, src, NBYTES | 396 | ADD src, src, NBYTES |
397 | SUB len, len, NBYTES | 397 | SUB len, len, NBYTES |
398 | EXC( STORE t0, 0(dst), s_exc_p1u) | 398 | EXC( STORE t0, 0(dst), .Ls_exc_p1u) |
399 | .set reorder /* DADDI_WAR */ | 399 | .set reorder /* DADDI_WAR */ |
400 | ADD dst, dst, NBYTES | 400 | ADD dst, dst, NBYTES |
401 | bne len, rem, 1b | 401 | bne len, rem, 1b |
402 | .set noreorder | 402 | .set noreorder |
403 | 403 | ||
404 | copy_bytes_checklen: | 404 | .Lcopy_bytes_checklen: |
405 | beqz len, done | 405 | beqz len, .Ldone |
406 | nop | 406 | nop |
407 | copy_bytes: | 407 | .Lcopy_bytes: |
408 | /* 0 < len < NBYTES */ | 408 | /* 0 < len < NBYTES */ |
409 | R10KCBARRIER(0(ra)) | 409 | R10KCBARRIER(0(ra)) |
410 | #define COPY_BYTE(N) \ | 410 | #define COPY_BYTE(N) \ |
411 | EXC( lb t0, N(src), l_exc); \ | 411 | EXC( lb t0, N(src), .Ll_exc); \ |
412 | SUB len, len, 1; \ | 412 | SUB len, len, 1; \ |
413 | beqz len, done; \ | 413 | beqz len, .Ldone; \ |
414 | EXC( sb t0, N(dst), s_exc_p1) | 414 | EXC( sb t0, N(dst), .Ls_exc_p1) |
415 | 415 | ||
416 | COPY_BYTE(0) | 416 | COPY_BYTE(0) |
417 | COPY_BYTE(1) | 417 | COPY_BYTE(1) |
@@ -421,16 +421,16 @@ EXC( sb t0, N(dst), s_exc_p1) | |||
421 | COPY_BYTE(4) | 421 | COPY_BYTE(4) |
422 | COPY_BYTE(5) | 422 | COPY_BYTE(5) |
423 | #endif | 423 | #endif |
424 | EXC( lb t0, NBYTES-2(src), l_exc) | 424 | EXC( lb t0, NBYTES-2(src), .Ll_exc) |
425 | SUB len, len, 1 | 425 | SUB len, len, 1 |
426 | jr ra | 426 | jr ra |
427 | EXC( sb t0, NBYTES-2(dst), s_exc_p1) | 427 | EXC( sb t0, NBYTES-2(dst), .Ls_exc_p1) |
428 | done: | 428 | .Ldone: |
429 | jr ra | 429 | jr ra |
430 | nop | 430 | nop |
431 | END(memcpy) | 431 | END(memcpy) |
432 | 432 | ||
433 | l_exc_copy: | 433 | .Ll_exc_copy: |
434 | /* | 434 | /* |
435 | * Copy bytes from src until faulting load address (or until a | 435 | * Copy bytes from src until faulting load address (or until a |
436 | * lb faults) | 436 | * lb faults) |
@@ -445,14 +445,14 @@ l_exc_copy: | |||
445 | nop | 445 | nop |
446 | LOAD t0, THREAD_BUADDR(t0) | 446 | LOAD t0, THREAD_BUADDR(t0) |
447 | 1: | 447 | 1: |
448 | EXC( lb t1, 0(src), l_exc) | 448 | EXC( lb t1, 0(src), .Ll_exc) |
449 | ADD src, src, 1 | 449 | ADD src, src, 1 |
450 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 450 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
451 | .set reorder /* DADDI_WAR */ | 451 | .set reorder /* DADDI_WAR */ |
452 | ADD dst, dst, 1 | 452 | ADD dst, dst, 1 |
453 | bne src, t0, 1b | 453 | bne src, t0, 1b |
454 | .set noreorder | 454 | .set noreorder |
455 | l_exc: | 455 | .Ll_exc: |
456 | LOAD t0, TI_TASK($28) | 456 | LOAD t0, TI_TASK($28) |
457 | nop | 457 | nop |
458 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address | 458 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address |
@@ -471,7 +471,7 @@ l_exc: | |||
471 | */ | 471 | */ |
472 | .set reorder /* DADDI_WAR */ | 472 | .set reorder /* DADDI_WAR */ |
473 | SUB src, len, 1 | 473 | SUB src, len, 1 |
474 | beqz len, done | 474 | beqz len, .Ldone |
475 | .set noreorder | 475 | .set noreorder |
476 | 1: sb zero, 0(dst) | 476 | 1: sb zero, 0(dst) |
477 | ADD dst, dst, 1 | 477 | ADD dst, dst, 1 |
@@ -492,7 +492,7 @@ l_exc: | |||
492 | 492 | ||
493 | #define SEXC(n) \ | 493 | #define SEXC(n) \ |
494 | .set reorder; /* DADDI_WAR */ \ | 494 | .set reorder; /* DADDI_WAR */ \ |
495 | s_exc_p ## n ## u: \ | 495 | .Ls_exc_p ## n ## u: \ |
496 | ADD len, len, n*NBYTES; \ | 496 | ADD len, len, n*NBYTES; \ |
497 | jr ra; \ | 497 | jr ra; \ |
498 | .set noreorder | 498 | .set noreorder |
@@ -506,12 +506,12 @@ SEXC(3) | |||
506 | SEXC(2) | 506 | SEXC(2) |
507 | SEXC(1) | 507 | SEXC(1) |
508 | 508 | ||
509 | s_exc_p1: | 509 | .Ls_exc_p1: |
510 | .set reorder /* DADDI_WAR */ | 510 | .set reorder /* DADDI_WAR */ |
511 | ADD len, len, 1 | 511 | ADD len, len, 1 |
512 | jr ra | 512 | jr ra |
513 | .set noreorder | 513 | .set noreorder |
514 | s_exc: | 514 | .Ls_exc: |
515 | jr ra | 515 | jr ra |
516 | nop | 516 | nop |
517 | 517 | ||
@@ -522,20 +522,20 @@ LEAF(memmove) | |||
522 | sltu t0, a1, t0 # dst + len <= src -> memcpy | 522 | sltu t0, a1, t0 # dst + len <= src -> memcpy |
523 | sltu t1, a0, t1 # dst >= src + len -> memcpy | 523 | sltu t1, a0, t1 # dst >= src + len -> memcpy |
524 | and t0, t1 | 524 | and t0, t1 |
525 | beqz t0, __memcpy | 525 | beqz t0, .L__memcpy |
526 | move v0, a0 /* return value */ | 526 | move v0, a0 /* return value */ |
527 | beqz a2, r_out | 527 | beqz a2, .Lr_out |
528 | END(memmove) | 528 | END(memmove) |
529 | 529 | ||
530 | /* fall through to __rmemcpy */ | 530 | /* fall through to __rmemcpy */ |
531 | LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ | 531 | LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ |
532 | sltu t0, a1, a0 | 532 | sltu t0, a1, a0 |
533 | beqz t0, r_end_bytes_up # src >= dst | 533 | beqz t0, .Lr_end_bytes_up # src >= dst |
534 | nop | 534 | nop |
535 | ADD a0, a2 # dst = dst + len | 535 | ADD a0, a2 # dst = dst + len |
536 | ADD a1, a2 # src = src + len | 536 | ADD a1, a2 # src = src + len |
537 | 537 | ||
538 | r_end_bytes: | 538 | .Lr_end_bytes: |
539 | R10KCBARRIER(0(ra)) | 539 | R10KCBARRIER(0(ra)) |
540 | lb t0, -1(a1) | 540 | lb t0, -1(a1) |
541 | SUB a2, a2, 0x1 | 541 | SUB a2, a2, 0x1 |
@@ -543,14 +543,14 @@ r_end_bytes: | |||
543 | SUB a1, a1, 0x1 | 543 | SUB a1, a1, 0x1 |
544 | .set reorder /* DADDI_WAR */ | 544 | .set reorder /* DADDI_WAR */ |
545 | SUB a0, a0, 0x1 | 545 | SUB a0, a0, 0x1 |
546 | bnez a2, r_end_bytes | 546 | bnez a2, .Lr_end_bytes |
547 | .set noreorder | 547 | .set noreorder |
548 | 548 | ||
549 | r_out: | 549 | .Lr_out: |
550 | jr ra | 550 | jr ra |
551 | move a2, zero | 551 | move a2, zero |
552 | 552 | ||
553 | r_end_bytes_up: | 553 | .Lr_end_bytes_up: |
554 | R10KCBARRIER(0(ra)) | 554 | R10KCBARRIER(0(ra)) |
555 | lb t0, (a1) | 555 | lb t0, (a1) |
556 | SUB a2, a2, 0x1 | 556 | SUB a2, a2, 0x1 |
@@ -558,7 +558,7 @@ r_end_bytes_up: | |||
558 | ADD a1, a1, 0x1 | 558 | ADD a1, a1, 0x1 |
559 | .set reorder /* DADDI_WAR */ | 559 | .set reorder /* DADDI_WAR */ |
560 | ADD a0, a0, 0x1 | 560 | ADD a0, a0, 0x1 |
561 | bnez a2, r_end_bytes_up | 561 | bnez a2, .Lr_end_bytes_up |
562 | .set noreorder | 562 | .set noreorder |
563 | 563 | ||
564 | jr ra | 564 | jr ra |