diff options
author | Thomas Bogendoerfer <tsbogend@alpha.franken.de> | 2007-11-25 05:47:56 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2008-01-29 05:14:58 -0500 |
commit | 930bff882296c02ca81db108672ef4ca06c37db5 (patch) | |
tree | 53288137d4f7cc02d8ca417edb2b25221c3007cd /arch/mips/lib/memcpy.S | |
parent | 2064ba23e58daa929eec6f5e7a2abc24574a95b9 (diff) |
[MIPS] IP28: added cache barrier to assembly routines
IP28 needs special treatment to avoid speculative accesses. gcc
takes care for .c code, but for assembly code we need to do it
manually.
This is taken from Peter Fuersts IP28 patches.
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/lib/memcpy.S')
-rw-r--r-- | arch/mips/lib/memcpy.S | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index aded7b159052..01e450b1ebc9 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S | |||
@@ -199,6 +199,7 @@ FEXPORT(__copy_user) | |||
199 | */ | 199 | */ |
200 | #define rem t8 | 200 | #define rem t8 |
201 | 201 | ||
202 | R10KCBARRIER(0(ra)) | ||
202 | /* | 203 | /* |
203 | * The "issue break"s below are very approximate. | 204 | * The "issue break"s below are very approximate. |
204 | * Issue delays for dcache fills will perturb the schedule, as will | 205 | * Issue delays for dcache fills will perturb the schedule, as will |
@@ -231,6 +232,7 @@ both_aligned: | |||
231 | PREF( 1, 3*32(dst) ) | 232 | PREF( 1, 3*32(dst) ) |
232 | .align 4 | 233 | .align 4 |
233 | 1: | 234 | 1: |
235 | R10KCBARRIER(0(ra)) | ||
234 | EXC( LOAD t0, UNIT(0)(src), l_exc) | 236 | EXC( LOAD t0, UNIT(0)(src), l_exc) |
235 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) | 237 | EXC( LOAD t1, UNIT(1)(src), l_exc_copy) |
236 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | 238 | EXC( LOAD t2, UNIT(2)(src), l_exc_copy) |
@@ -272,6 +274,7 @@ EXC( LOAD t2, UNIT(2)(src), l_exc_copy) | |||
272 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) | 274 | EXC( LOAD t3, UNIT(3)(src), l_exc_copy) |
273 | SUB len, len, 4*NBYTES | 275 | SUB len, len, 4*NBYTES |
274 | ADD src, src, 4*NBYTES | 276 | ADD src, src, 4*NBYTES |
277 | R10KCBARRIER(0(ra)) | ||
275 | EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) | 278 | EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) |
276 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) | 279 | EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) |
277 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) | 280 | EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) |
@@ -287,6 +290,7 @@ less_than_4units: | |||
287 | beq rem, len, copy_bytes | 290 | beq rem, len, copy_bytes |
288 | nop | 291 | nop |
289 | 1: | 292 | 1: |
293 | R10KCBARRIER(0(ra)) | ||
290 | EXC( LOAD t0, 0(src), l_exc) | 294 | EXC( LOAD t0, 0(src), l_exc) |
291 | ADD src, src, NBYTES | 295 | ADD src, src, NBYTES |
292 | SUB len, len, NBYTES | 296 | SUB len, len, NBYTES |
@@ -334,6 +338,7 @@ EXC( LDFIRST t3, FIRST(0)(src), l_exc) | |||
334 | EXC( LDREST t3, REST(0)(src), l_exc_copy) | 338 | EXC( LDREST t3, REST(0)(src), l_exc_copy) |
335 | SUB t2, t2, t1 # t2 = number of bytes copied | 339 | SUB t2, t2, t1 # t2 = number of bytes copied |
336 | xor match, t0, t1 | 340 | xor match, t0, t1 |
341 | R10KCBARRIER(0(ra)) | ||
337 | EXC( STFIRST t3, FIRST(0)(dst), s_exc) | 342 | EXC( STFIRST t3, FIRST(0)(dst), s_exc) |
338 | beq len, t2, done | 343 | beq len, t2, done |
339 | SUB len, len, t2 | 344 | SUB len, len, t2 |
@@ -354,6 +359,7 @@ src_unaligned_dst_aligned: | |||
354 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses | 359 | * It's OK to load FIRST(N+1) before REST(N) because the two addresses |
355 | * are to the same unit (unless src is aligned, but it's not). | 360 | * are to the same unit (unless src is aligned, but it's not). |
356 | */ | 361 | */ |
362 | R10KCBARRIER(0(ra)) | ||
357 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 363 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) |
358 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) | 364 | EXC( LDFIRST t1, FIRST(1)(src), l_exc_copy) |
359 | SUB len, len, 4*NBYTES | 365 | SUB len, len, 4*NBYTES |
@@ -384,6 +390,7 @@ cleanup_src_unaligned: | |||
384 | beq rem, len, copy_bytes | 390 | beq rem, len, copy_bytes |
385 | nop | 391 | nop |
386 | 1: | 392 | 1: |
393 | R10KCBARRIER(0(ra)) | ||
387 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) | 394 | EXC( LDFIRST t0, FIRST(0)(src), l_exc) |
388 | EXC( LDREST t0, REST(0)(src), l_exc_copy) | 395 | EXC( LDREST t0, REST(0)(src), l_exc_copy) |
389 | ADD src, src, NBYTES | 396 | ADD src, src, NBYTES |
@@ -399,6 +406,7 @@ copy_bytes_checklen: | |||
399 | nop | 406 | nop |
400 | copy_bytes: | 407 | copy_bytes: |
401 | /* 0 < len < NBYTES */ | 408 | /* 0 < len < NBYTES */ |
409 | R10KCBARRIER(0(ra)) | ||
402 | #define COPY_BYTE(N) \ | 410 | #define COPY_BYTE(N) \ |
403 | EXC( lb t0, N(src), l_exc); \ | 411 | EXC( lb t0, N(src), l_exc); \ |
404 | SUB len, len, 1; \ | 412 | SUB len, len, 1; \ |
@@ -528,6 +536,7 @@ LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ | |||
528 | ADD a1, a2 # src = src + len | 536 | ADD a1, a2 # src = src + len |
529 | 537 | ||
530 | r_end_bytes: | 538 | r_end_bytes: |
539 | R10KCBARRIER(0(ra)) | ||
531 | lb t0, -1(a1) | 540 | lb t0, -1(a1) |
532 | SUB a2, a2, 0x1 | 541 | SUB a2, a2, 0x1 |
533 | sb t0, -1(a0) | 542 | sb t0, -1(a0) |
@@ -542,6 +551,7 @@ r_out: | |||
542 | move a2, zero | 551 | move a2, zero |
543 | 552 | ||
544 | r_end_bytes_up: | 553 | r_end_bytes_up: |
554 | R10KCBARRIER(0(ra)) | ||
545 | lb t0, (a1) | 555 | lb t0, (a1) |
546 | SUB a2, a2, 0x1 | 556 | SUB a2, a2, 0x1 |
547 | sb t0, (a0) | 557 | sb t0, (a0) |