diff options
Diffstat (limited to 'arch/mips/lib/memcpy.S')
-rw-r--r-- | arch/mips/lib/memcpy.S | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index aded7b159052..01e450b1ebc9 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S | |||
@@ -199,6 +199,7 @@ FEXPORT(__copy_user) | |||
199 | */ | 199 | */ |
200 | #define rem t8 | 200 | #define rem t8 |
201 | 201 | ||
202 | R10KCBARRIER(0(ra)) | ||
202 | /* | 203 | /* |
203 | * The "issue break"s below are very approximate. | 204 | * The "issue break"s below are very approximate. |
204 | * Issue delays for dcache fills will perturb the schedule, as will | 205 | * Issue delays for dcache fills will perturb the schedule, as will |
@@ -231,6 +232,7 @@ both_aligned: | |||
231 | PREF( 1, 3*32(dst) ) | 232 | PREF( 1, 3*32(dst) ) |
232 | .align 4 | 233 | .align 4 |
233 | 1: | 234 | 1: |
235 | R10KCBARRIER(0(ra)) | ||
234 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 236 | EXC( LOAD t0, UNIT(0)(src), l_exc) |
235 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 237 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) |
236 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 238 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) |
@@ -272,6 +274,7 @@ EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | |||
272 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 274 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) |
273 | SUB len, len, 4*NBYTES | 275 | SUB len, len, 4*NBYTES |
274 | ADD src, src, 4*NBYTES | 276 | ADD src, src, 4*NBYTES |
277 | R10KCBARRIER(0(ra)) | ||
275 | EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) | 278 | EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) |
276 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) | 279 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) |
277 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) | 280 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) |
@@ -287,6 +290,7 @@ less_than_4units: | |||
287 | beq rem, len, copy_bytes | 290 | beq rem, len, copy_bytes |
288 | nop | 291 | nop |
289 | 1: | 292 | 1: |
293 | R10KCBARRIER(0(ra)) | ||
290 | EXC( LOAD t0, 0(src), l_exc) | 294 | EXC( LOAD t0, 0(src), l_exc) |
291 | ADD src, src, NBYTES | 295 | ADD src, src, NBYTES |
292 | SUB len, len, NBYTES | 296 | SUB len, len, NBYTES |
@@ -334,6 +338,7 @@ EXC( LDFIRST t3, FIRST(0)(src), l_exc) | |||
334 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 338 | EXC( LDREST t3, REST(0)(src), l_exc_copy) |
335 | SUB t2, t2, t1 # t2 = number of bytes copied | 339 | SUB t2, t2, t1 # t2 = number of bytes copied |
336 | xor match, t0, t1 | 340 | xor match, t0, t1 |
341 | R10KCBARRIER(0(ra)) | ||
337 | EXC( STFIRST t3, FIRST(0)(dst), s_exc) | 342 | EXC( STFIRST t3, FIRST(0)(dst), s_exc) |
338 | beq len, t2, done | 343 | beq len, t2, done |
339 | SUB len, len, t2 | 344 | SUB len, len, t2 |
@@ -354,6 +359,7 @@ src_unaligned_dst_aligned: | |||
354 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 359 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
355 | * are to the same unit (unless src is aligned, but it's not). | 360 | * are to the same unit (unless src is aligned, but it's not). |
356 | */ | 361 | */ |
362 | R10KCBARRIER(0(ra)) | ||
357 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 363 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) |
358 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 364 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) |
359 | SUB len, len, 4*NBYTES | 365 | SUB len, len, 4*NBYTES |
@@ -384,6 +390,7 @@ cleanup_src_unaligned: | |||
384 | beq rem, len, copy_bytes | 390 | beq rem, len, copy_bytes |
385 | nop | 391 | nop |
386 | 1: | 392 | 1: |
393 | R10KCBARRIER(0(ra)) | ||
387 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 394 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) |
388 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 395 | EXC( LDREST t0, REST(0)(src), l_exc_copy) |
389 | ADD src, src, NBYTES | 396 | ADD src, src, NBYTES |
@@ -399,6 +406,7 @@ copy_bytes_checklen: | |||
399 | nop | 406 | nop |
400 | copy_bytes: | 407 | copy_bytes: |
401 | /* 0 < len < NBYTES */ | 408 | /* 0 < len < NBYTES */ |
409 | R10KCBARRIER(0(ra)) | ||
402 | #define COPY_BYTE(N) \ | 410 | #define COPY_BYTE(N) \ |
403 | EXC( lb t0, N(src), l_exc); \ | 411 | EXC( lb t0, N(src), l_exc); \ |
404 | SUB len, len, 1; \ | 412 | SUB len, len, 1; \ |
@@ -528,6 +536,7 @@ LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ | |||
528 | ADD a1, a2 # src = src + len | 536 | ADD a1, a2 # src = src + len |
529 | 537 | ||
530 | r_end_bytes: | 538 | r_end_bytes: |
539 | R10KCBARRIER(0(ra)) | ||
531 | lb t0, -1(a1) | 540 | lb t0, -1(a1) |
532 | SUB a2, a2, 0x1 | 541 | SUB a2, a2, 0x1 |
533 | sb t0, -1(a0) | 542 | sb t0, -1(a0) |
@@ -542,6 +551,7 @@ r_out: | |||
542 | move a2, zero | 551 | move a2, zero |
543 | 552 | ||
544 | r_end_bytes_up: | 553 | r_end_bytes_up: |
554 | R10KCBARRIER(0(ra)) | ||
545 | lb t0, (a1) | 555 | lb t0, (a1) |
546 | SUB a2, a2, 0x1 | 556 | SUB a2, a2, 0x1 |
547 | sb t0, (a0) | 557 | sb t0, (a0) |