diff options
Diffstat (limited to 'arch/mips/lib')
-rw-r--r-- | arch/mips/lib/memcpy.S | 36 |
1 files changed, 22 insertions, 14 deletions
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index ab9f04641640..eed6e07bf222 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S | |||
@@ -89,6 +89,9 @@ | |||
89 | /* Instruction type */ | 89 | /* Instruction type */ |
90 | #define LD_INSN 1 | 90 | #define LD_INSN 1 |
91 | #define ST_INSN 2 | 91 | #define ST_INSN 2 |
92 | /* Pretech type */ | ||
93 | #define SRC_PREFETCH 1 | ||
94 | #define DST_PREFETCH 2 | ||
92 | 95 | ||
93 | /* | 96 | /* |
94 | * Wrapper to add an entry in the exception table | 97 | * Wrapper to add an entry in the exception table |
@@ -174,6 +177,11 @@ | |||
174 | #define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler) | 177 | #define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler) |
175 | #define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) | 178 | #define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) |
176 | 179 | ||
180 | #define _PREF(hint, addr, type) PREF(hint, addr) | ||
181 | |||
182 | #define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH) | ||
183 | #define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH) | ||
184 | |||
177 | #ifdef CONFIG_CPU_LITTLE_ENDIAN | 185 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
178 | #define LDFIRST LOADR | 186 | #define LDFIRST LOADR |
179 | #define LDREST LOADL | 187 | #define LDREST LOADL |
@@ -237,16 +245,16 @@ __copy_user_common: | |||
237 | * | 245 | * |
238 | * If len < NBYTES use byte operations. | 246 | * If len < NBYTES use byte operations. |
239 | */ | 247 | */ |
240 | PREF( 0, 0(src) ) | 248 | PREFS( 0, 0(src) ) |
241 | PREF( 1, 0(dst) ) | 249 | PREFD( 1, 0(dst) ) |
242 | sltu t2, len, NBYTES | 250 | sltu t2, len, NBYTES |
243 | and t1, dst, ADDRMASK | 251 | and t1, dst, ADDRMASK |
244 | PREF( 0, 1*32(src) ) | 252 | PREFS( 0, 1*32(src) ) |
245 | PREF( 1, 1*32(dst) ) | 253 | PREFD( 1, 1*32(dst) ) |
246 | bnez t2, .Lcopy_bytes_checklen | 254 | bnez t2, .Lcopy_bytes_checklen |
247 | and t0, src, ADDRMASK | 255 | and t0, src, ADDRMASK |
248 | PREF( 0, 2*32(src) ) | 256 | PREFS( 0, 2*32(src) ) |
249 | PREF( 1, 2*32(dst) ) | 257 | PREFD( 1, 2*32(dst) ) |
250 | bnez t1, .Ldst_unaligned | 258 | bnez t1, .Ldst_unaligned |
251 | nop | 259 | nop |
252 | bnez t0, .Lsrc_unaligned_dst_aligned | 260 | bnez t0, .Lsrc_unaligned_dst_aligned |
@@ -258,8 +266,8 @@ __copy_user_common: | |||
258 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 266 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
259 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES | 267 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
260 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) | 268 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) |
261 | PREF( 0, 3*32(src) ) | 269 | PREFS( 0, 3*32(src) ) |
262 | PREF( 1, 3*32(dst) ) | 270 | PREFD( 1, 3*32(dst) ) |
263 | .align 4 | 271 | .align 4 |
264 | 1: | 272 | 1: |
265 | R10KCBARRIER(0(ra)) | 273 | R10KCBARRIER(0(ra)) |
@@ -282,8 +290,8 @@ __copy_user_common: | |||
282 | STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u) | 290 | STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u) |
283 | STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u) | 291 | STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u) |
284 | STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u) | 292 | STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u) |
285 | PREF( 0, 8*32(src) ) | 293 | PREFS( 0, 8*32(src) ) |
286 | PREF( 1, 8*32(dst) ) | 294 | PREFD( 1, 8*32(dst) ) |
287 | bne len, rem, 1b | 295 | bne len, rem, 1b |
288 | nop | 296 | nop |
289 | 297 | ||
@@ -378,10 +386,10 @@ __copy_user_common: | |||
378 | 386 | ||
379 | .Lsrc_unaligned_dst_aligned: | 387 | .Lsrc_unaligned_dst_aligned: |
380 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 388 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
381 | PREF( 0, 3*32(src) ) | 389 | PREFS( 0, 3*32(src) ) |
382 | beqz t0, .Lcleanup_src_unaligned | 390 | beqz t0, .Lcleanup_src_unaligned |
383 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 391 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
384 | PREF( 1, 3*32(dst) ) | 392 | PREFD( 1, 3*32(dst) ) |
385 | 1: | 393 | 1: |
386 | /* | 394 | /* |
387 | * Avoid consecutive LD*'s to the same register since some mips | 395 | * Avoid consecutive LD*'s to the same register since some mips |
@@ -399,7 +407,7 @@ __copy_user_common: | |||
399 | LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy) | 407 | LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy) |
400 | LDREST(t2, REST(2)(src), .Ll_exc_copy) | 408 | LDREST(t2, REST(2)(src), .Ll_exc_copy) |
401 | LDREST(t3, REST(3)(src), .Ll_exc_copy) | 409 | LDREST(t3, REST(3)(src), .Ll_exc_copy) |
402 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) | 410 | PREFS( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) |
403 | ADD src, src, 4*NBYTES | 411 | ADD src, src, 4*NBYTES |
404 | #ifdef CONFIG_CPU_SB1 | 412 | #ifdef CONFIG_CPU_SB1 |
405 | nop # improves slotting | 413 | nop # improves slotting |
@@ -408,7 +416,7 @@ __copy_user_common: | |||
408 | STORE(t1, UNIT(1)(dst), .Ls_exc_p3u) | 416 | STORE(t1, UNIT(1)(dst), .Ls_exc_p3u) |
409 | STORE(t2, UNIT(2)(dst), .Ls_exc_p2u) | 417 | STORE(t2, UNIT(2)(dst), .Ls_exc_p2u) |
410 | STORE(t3, UNIT(3)(dst), .Ls_exc_p1u) | 418 | STORE(t3, UNIT(3)(dst), .Ls_exc_p1u) |
411 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) | 419 | PREFD( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) |
412 | .set reorder /* DADDI_WAR */ | 420 | .set reorder /* DADDI_WAR */ |
413 | ADD dst, dst, 4*NBYTES | 421 | ADD dst, dst, 4*NBYTES |
414 | bne len, rem, 1b | 422 | bne len, rem, 1b |