[MIPS] IP28: added cache barrier to assembly routines

IP28 needs special treatment to avoid speculative accesses. gcc takes care for .c code, but for assembly code we need to do it manually. This is taken from Peter Fuersts IP28 patches. Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
author: Thomas Bogendoerfer <tsbogend@alpha.franken.de> 2007-11-25 05:47:56 -0500
committer: Ralf Baechle <ralf@linux-mips.org> 2008-01-29 05:14:58 -0500
commit: 930bff882296c02ca81db108672ef4ca06c37db5 (patch)
tree: 53288137d4f7cc02d8ca417edb2b25221c3007cd /arch/mips/lib/memcpy.S
parent: 2064ba23e58daa929eec6f5e7a2abc24574a95b9 (diff)
1 files changed, 10 insertions, 0 deletions
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index aded7b159052..01e450b1ebc9 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -199,6 +199,7 @@ FEXPORT(__copy_user)
         */
 #define rem t8
+        R10KCBARRIER(0(ra))
        /*
         * The "issue break"s below are very approximate.
         * Issue delays for dcache fills will perturb the schedule, as will
@@ -231,6 +232,7 @@ both_aligned:
        PREF(   1, 3*32(dst) )
        .align  4
 1:
+        R10KCBARRIER(0(ra))
 EXC(    LOAD    t0, UNIT(0)(src),       l_exc)
 EXC(    LOAD    t1, UNIT(1)(src),       l_exc_copy)
 EXC(    LOAD    t2, UNIT(2)(src),       l_exc_copy)
@@ -272,6 +274,7 @@ EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
 EXC(    LOAD    t3, UNIT(3)(src),       l_exc_copy)
        SUB     len, len, 4*NBYTES
        ADD     src, src, 4*NBYTES
+        R10KCBARRIER(0(ra))
 EXC(    STORE   t0, UNIT(0)(dst),       s_exc_p4u)
 EXC(    STORE   t1, UNIT(1)(dst),       s_exc_p3u)
 EXC(    STORE   t2, UNIT(2)(dst),       s_exc_p2u)
@@ -287,6 +290,7 @@ less_than_4units:
        beq     rem, len, copy_bytes
         nop
 1:
+        R10KCBARRIER(0(ra))
 EXC(    LOAD    t0, 0(src),             l_exc)
        ADD     src, src, NBYTES
        SUB     len, len, NBYTES
@@ -334,6 +338,7 @@ EXC(	LDFIRST	t3, FIRST(0)(src),	l_exc)
 EXC(    LDREST  t3, REST(0)(src),       l_exc_copy)
        SUB     t2, t2, t1      # t2 = number of bytes copied
        xor     match, t0, t1
+        R10KCBARRIER(0(ra))
 EXC(    STFIRST t3, FIRST(0)(dst),      s_exc)
        beq     len, t2, done
         SUB    len, len, t2
@@ -354,6 +359,7 @@ src_unaligned_dst_aligned:
 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
 * are to the same unit (unless src is aligned, but it's not).
 */
+        R10KCBARRIER(0(ra))
 EXC(    LDFIRST t0, FIRST(0)(src),      l_exc)
 EXC(    LDFIRST t1, FIRST(1)(src),      l_exc_copy)
        SUB     len, len, 4*NBYTES
@@ -384,6 +390,7 @@ cleanup_src_unaligned:
        beq     rem, len, copy_bytes
         nop
 1:
+        R10KCBARRIER(0(ra))
 EXC(    LDFIRST t0, FIRST(0)(src),      l_exc)
 EXC(    LDREST  t0, REST(0)(src),       l_exc_copy)
        ADD     src, src, NBYTES
@@ -399,6 +406,7 @@ copy_bytes_checklen:
         nop
 copy_bytes:
        /* 0 < len < NBYTES  */
+        R10KCBARRIER(0(ra))
 #define COPY_BYTE(N)                    \
 EXC(    lb      t0, N(src), l_exc);     \
        SUB     len, len, 1;            \
@@ -528,6 +536,7 @@ LEAF(__rmemcpy)					/* a0=dst a1=src a2=len */
        ADD     a1, a2                          # src = src + len
 r_end_bytes:
+        R10KCBARRIER(0(ra))
        lb      t0, -1(a1)
        SUB     a2, a2, 0x1
        sb      t0, -1(a0)
@@ -542,6 +551,7 @@ r_out:
         move   a2, zero
 r_end_bytes_up:
+        R10KCBARRIER(0(ra))
        lb      t0, (a1)
        SUB     a2, a2, 0x1
        sb      t0, (a0)
author	Thomas Bogendoerfer <tsbogend@alpha.franken.de>	2007-11-25 05:47:56 -0500
committer	Ralf Baechle <ralf@linux-mips.org>	2008-01-29 05:14:58 -0500
commit	930bff882296c02ca81db108672ef4ca06c37db5 (patch)
tree	53288137d4f7cc02d8ca417edb2b25221c3007cd /arch/mips/lib/memcpy.S
parent	2064ba23e58daa929eec6f5e7a2abc24574a95b9 (diff)