diff options
author | Markos Chandras <markos.chandras@imgtec.com> | 2014-01-07 10:59:03 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2014-03-26 18:09:14 -0400 |
commit | bda4d986a6a42710b971d5e3780fe22dce7e6afc (patch) | |
tree | a1f7f251c3aab2a5eab3a1d61a0908fa0864d965 /arch/mips/lib | |
parent | 5bc05971d353cd10bce5044a1ef60b9d5b504810 (diff) |
MIPS: lib: memcpy: Split source and destination prefetch macros
In preparation for EVA support, the PREF macro is split into two
separate macros, PREFS and PREFD, for source and destination data
prefetching respectively.
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Diffstat (limited to 'arch/mips/lib')
-rw-r--r-- | arch/mips/lib/memcpy.S | 36 |
1 files changed, 22 insertions, 14 deletions
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index ab9f04641640..eed6e07bf222 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S | |||
@@ -89,6 +89,9 @@ | |||
89 | /* Instruction type */ | 89 | /* Instruction type */ |
90 | #define LD_INSN 1 | 90 | #define LD_INSN 1 |
91 | #define ST_INSN 2 | 91 | #define ST_INSN 2 |
92 | /* Pretech type */ | ||
93 | #define SRC_PREFETCH 1 | ||
94 | #define DST_PREFETCH 2 | ||
92 | 95 | ||
93 | /* | 96 | /* |
94 | * Wrapper to add an entry in the exception table | 97 | * Wrapper to add an entry in the exception table |
@@ -174,6 +177,11 @@ | |||
174 | #define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler) | 177 | #define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler) |
175 | #define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) | 178 | #define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) |
176 | 179 | ||
180 | #define _PREF(hint, addr, type) PREF(hint, addr) | ||
181 | |||
182 | #define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH) | ||
183 | #define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH) | ||
184 | |||
177 | #ifdef CONFIG_CPU_LITTLE_ENDIAN | 185 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
178 | #define LDFIRST LOADR | 186 | #define LDFIRST LOADR |
179 | #define LDREST LOADL | 187 | #define LDREST LOADL |
@@ -237,16 +245,16 @@ __copy_user_common: | |||
237 | * | 245 | * |
238 | * If len < NBYTES use byte operations. | 246 | * If len < NBYTES use byte operations. |
239 | */ | 247 | */ |
240 | PREF( 0, 0(src) ) | 248 | PREFS( 0, 0(src) ) |
241 | PREF( 1, 0(dst) ) | 249 | PREFD( 1, 0(dst) ) |
242 | sltu t2, len, NBYTES | 250 | sltu t2, len, NBYTES |
243 | and t1, dst, ADDRMASK | 251 | and t1, dst, ADDRMASK |
244 | PREF( 0, 1*32(src) ) | 252 | PREFS( 0, 1*32(src) ) |
245 | PREF( 1, 1*32(dst) ) | 253 | PREFD( 1, 1*32(dst) ) |
246 | bnez t2, .Lcopy_bytes_checklen | 254 | bnez t2, .Lcopy_bytes_checklen |
247 | and t0, src, ADDRMASK | 255 | and t0, src, ADDRMASK |
248 | PREF( 0, 2*32(src) ) | 256 | PREFS( 0, 2*32(src) ) |
249 | PREF( 1, 2*32(dst) ) | 257 | PREFD( 1, 2*32(dst) ) |
250 | bnez t1, .Ldst_unaligned | 258 | bnez t1, .Ldst_unaligned |
251 | nop | 259 | nop |
252 | bnez t0, .Lsrc_unaligned_dst_aligned | 260 | bnez t0, .Lsrc_unaligned_dst_aligned |
@@ -258,8 +266,8 @@ __copy_user_common: | |||
258 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 266 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
259 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES | 267 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES |
260 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) | 268 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) |
261 | PREF( 0, 3*32(src) ) | 269 | PREFS( 0, 3*32(src) ) |
262 | PREF( 1, 3*32(dst) ) | 270 | PREFD( 1, 3*32(dst) ) |
263 | .align 4 | 271 | .align 4 |
264 | 1: | 272 | 1: |
265 | R10KCBARRIER(0(ra)) | 273 | R10KCBARRIER(0(ra)) |
@@ -282,8 +290,8 @@ __copy_user_common: | |||
282 | STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u) | 290 | STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u) |
283 | STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u) | 291 | STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u) |
284 | STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u) | 292 | STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u) |
285 | PREF( 0, 8*32(src) ) | 293 | PREFS( 0, 8*32(src) ) |
286 | PREF( 1, 8*32(dst) ) | 294 | PREFD( 1, 8*32(dst) ) |
287 | bne len, rem, 1b | 295 | bne len, rem, 1b |
288 | nop | 296 | nop |
289 | 297 | ||
@@ -378,10 +386,10 @@ __copy_user_common: | |||
378 | 386 | ||
379 | .Lsrc_unaligned_dst_aligned: | 387 | .Lsrc_unaligned_dst_aligned: |
380 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 388 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
381 | PREF( 0, 3*32(src) ) | 389 | PREFS( 0, 3*32(src) ) |
382 | beqz t0, .Lcleanup_src_unaligned | 390 | beqz t0, .Lcleanup_src_unaligned |
383 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 391 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
384 | PREF( 1, 3*32(dst) ) | 392 | PREFD( 1, 3*32(dst) ) |
385 | 1: | 393 | 1: |
386 | /* | 394 | /* |
387 | * Avoid consecutive LD*'s to the same register since some mips | 395 | * Avoid consecutive LD*'s to the same register since some mips |
@@ -399,7 +407,7 @@ __copy_user_common: | |||
399 | LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy) | 407 | LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy) |
400 | LDREST(t2, REST(2)(src), .Ll_exc_copy) | 408 | LDREST(t2, REST(2)(src), .Ll_exc_copy) |
401 | LDREST(t3, REST(3)(src), .Ll_exc_copy) | 409 | LDREST(t3, REST(3)(src), .Ll_exc_copy) |
402 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) | 410 | PREFS( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) |
403 | ADD src, src, 4*NBYTES | 411 | ADD src, src, 4*NBYTES |
404 | #ifdef CONFIG_CPU_SB1 | 412 | #ifdef CONFIG_CPU_SB1 |
405 | nop # improves slotting | 413 | nop # improves slotting |
@@ -408,7 +416,7 @@ __copy_user_common: | |||
408 | STORE(t1, UNIT(1)(dst), .Ls_exc_p3u) | 416 | STORE(t1, UNIT(1)(dst), .Ls_exc_p3u) |
409 | STORE(t2, UNIT(2)(dst), .Ls_exc_p2u) | 417 | STORE(t2, UNIT(2)(dst), .Ls_exc_p2u) |
410 | STORE(t3, UNIT(3)(dst), .Ls_exc_p1u) | 418 | STORE(t3, UNIT(3)(dst), .Ls_exc_p1u) |
411 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) | 419 | PREFD( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) |
412 | .set reorder /* DADDI_WAR */ | 420 | .set reorder /* DADDI_WAR */ |
413 | ADD dst, dst, 4*NBYTES | 421 | ADD dst, dst, 4*NBYTES |
414 | bne len, rem, 1b | 422 | bne len, rem, 1b |