diff options
author | Markos Chandras <markos.chandras@imgtec.com> | 2014-01-07 09:34:05 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2014-03-26 18:09:14 -0400 |
commit | cf62a8b8134dd37ab365bfe8b972a7ebb1100110 (patch) | |
tree | 58f9eace6288c6ed15d6bf48e6e077bf10a3341b | |
parent | bda4d986a6a42710b971d5e3780fe22dce7e6afc (diff) |
MIPS: lib: memcpy: Use macro to build the copy_user code
The code can be shared between EVA and non-EVA configurations,
therefore use a macro to build it to avoid code duplications.
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
-rw-r--r-- | arch/mips/lib/memcpy.S | 253 |
1 files changed, 143 insertions, 110 deletions
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index eed6e07bf222..d630a28a118b 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S | |||
@@ -92,6 +92,10 @@ | |||
92 | /* Pretech type */ | 92 | /* Pretech type */ |
93 | #define SRC_PREFETCH 1 | 93 | #define SRC_PREFETCH 1 |
94 | #define DST_PREFETCH 2 | 94 | #define DST_PREFETCH 2 |
95 | #define LEGACY_MODE 1 | ||
96 | #define EVA_MODE 2 | ||
97 | #define USEROP 1 | ||
98 | #define KERNELOP 2 | ||
95 | 99 | ||
96 | /* | 100 | /* |
97 | * Wrapper to add an entry in the exception table | 101 | * Wrapper to add an entry in the exception table |
@@ -103,12 +107,14 @@ | |||
103 | * addr : Address | 107 | * addr : Address |
104 | * handler : Exception handler | 108 | * handler : Exception handler |
105 | */ | 109 | */ |
106 | #define EXC(insn, type, reg, addr, handler) \ | ||
107 | 9: insn reg, addr; \ | ||
108 | .section __ex_table,"a"; \ | ||
109 | PTR 9b, handler; \ | ||
110 | .previous | ||
111 | 110 | ||
111 | #define EXC(insn, type, reg, addr, handler) \ | ||
112 | .if \mode == LEGACY_MODE; \ | ||
113 | 9: insn reg, addr; \ | ||
114 | .section __ex_table,"a"; \ | ||
115 | PTR 9b, handler; \ | ||
116 | .previous; \ | ||
117 | .endif | ||
112 | /* | 118 | /* |
113 | * Only on the 64-bit kernel we can made use of 64-bit registers. | 119 | * Only on the 64-bit kernel we can made use of 64-bit registers. |
114 | */ | 120 | */ |
@@ -177,7 +183,10 @@ | |||
177 | #define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler) | 183 | #define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler) |
178 | #define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) | 184 | #define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) |
179 | 185 | ||
180 | #define _PREF(hint, addr, type) PREF(hint, addr) | 186 | #define _PREF(hint, addr, type) \ |
187 | .if \mode == LEGACY_MODE; \ | ||
188 | PREF(hint, addr); \ | ||
189 | .endif | ||
181 | 190 | ||
182 | #define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH) | 191 | #define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH) |
183 | #define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH) | 192 | #define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH) |
@@ -210,27 +219,23 @@ | |||
210 | .set at=v1 | 219 | .set at=v1 |
211 | #endif | 220 | #endif |
212 | 221 | ||
213 | /* | ||
214 | * t6 is used as a flag to note inatomic mode. | ||
215 | */ | ||
216 | LEAF(__copy_user_inatomic) | ||
217 | b __copy_user_common | ||
218 | li t6, 1 | ||
219 | END(__copy_user_inatomic) | ||
220 | |||
221 | /* | ||
222 | * A combined memcpy/__copy_user | ||
223 | * __copy_user sets len to 0 for success; else to an upper bound of | ||
224 | * the number of uncopied bytes. | ||
225 | * memcpy sets v0 to dst. | ||
226 | */ | ||
227 | .align 5 | 222 | .align 5 |
228 | LEAF(memcpy) /* a0=dst a1=src a2=len */ | 223 | |
229 | move v0, dst /* return value */ | 224 | /* |
230 | .L__memcpy: | 225 | * Macro to build the __copy_user common code |
231 | FEXPORT(__copy_user) | 226 | * Arguements: |
232 | li t6, 0 /* not inatomic */ | 227 | * mode : LEGACY_MODE or EVA_MODE |
233 | __copy_user_common: | 228 | * from : Source operand. USEROP or KERNELOP |
229 | * to : Destination operand. USEROP or KERNELOP | ||
230 | */ | ||
231 | .macro __BUILD_COPY_USER mode, from, to | ||
232 | |||
233 | /* initialize __memcpy if this the first time we execute this macro */ | ||
234 | .ifnotdef __memcpy | ||
235 | .set __memcpy, 1 | ||
236 | .hidden __memcpy /* make sure it does not leak */ | ||
237 | .endif | ||
238 | |||
234 | /* | 239 | /* |
235 | * Note: dst & src may be unaligned, len may be 0 | 240 | * Note: dst & src may be unaligned, len may be 0 |
236 | * Temps | 241 | * Temps |
@@ -251,45 +256,45 @@ __copy_user_common: | |||
251 | and t1, dst, ADDRMASK | 256 | and t1, dst, ADDRMASK |
252 | PREFS( 0, 1*32(src) ) | 257 | PREFS( 0, 1*32(src) ) |
253 | PREFD( 1, 1*32(dst) ) | 258 | PREFD( 1, 1*32(dst) ) |
254 | bnez t2, .Lcopy_bytes_checklen | 259 | bnez t2, .Lcopy_bytes_checklen\@ |
255 | and t0, src, ADDRMASK | 260 | and t0, src, ADDRMASK |
256 | PREFS( 0, 2*32(src) ) | 261 | PREFS( 0, 2*32(src) ) |
257 | PREFD( 1, 2*32(dst) ) | 262 | PREFD( 1, 2*32(dst) ) |
258 | bnez t1, .Ldst_unaligned | 263 | bnez t1, .Ldst_unaligned\@ |
259 | nop | 264 | nop |
260 | bnez t0, .Lsrc_unaligned_dst_aligned | 265 | bnez t0, .Lsrc_unaligned_dst_aligned\@ |
261 | /* | 266 | /* |
262 | * use delay slot for fall-through | 267 | * use delay slot for fall-through |
263 | * src and dst are aligned; need to compute rem | 268 | * src and dst are aligned; need to compute rem |
264 | */ | 269 | */ |
265 | .Lboth_aligned: | 270 | .Lboth_aligned\@: |
266 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 271 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
267 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES | 272 | beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES |
268 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) | 273 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) |
269 | PREFS( 0, 3*32(src) ) | 274 | PREFS( 0, 3*32(src) ) |
270 | PREFD( 1, 3*32(dst) ) | 275 | PREFD( 1, 3*32(dst) ) |
271 | .align 4 | 276 | .align 4 |
272 | 1: | 277 | 1: |
273 | R10KCBARRIER(0(ra)) | 278 | R10KCBARRIER(0(ra)) |
274 | LOAD(t0, UNIT(0)(src), .Ll_exc) | 279 | LOAD(t0, UNIT(0)(src), .Ll_exc\@) |
275 | LOAD(t1, UNIT(1)(src), .Ll_exc_copy) | 280 | LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) |
276 | LOAD(t2, UNIT(2)(src), .Ll_exc_copy) | 281 | LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) |
277 | LOAD(t3, UNIT(3)(src), .Ll_exc_copy) | 282 | LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) |
278 | SUB len, len, 8*NBYTES | 283 | SUB len, len, 8*NBYTES |
279 | LOAD(t4, UNIT(4)(src), .Ll_exc_copy) | 284 | LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@) |
280 | LOAD(t7, UNIT(5)(src), .Ll_exc_copy) | 285 | LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@) |
281 | STORE(t0, UNIT(0)(dst), .Ls_exc_p8u) | 286 | STORE(t0, UNIT(0)(dst), .Ls_exc_p8u\@) |
282 | STORE(t1, UNIT(1)(dst), .Ls_exc_p7u) | 287 | STORE(t1, UNIT(1)(dst), .Ls_exc_p7u\@) |
283 | LOAD(t0, UNIT(6)(src), .Ll_exc_copy) | 288 | LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@) |
284 | LOAD(t1, UNIT(7)(src), .Ll_exc_copy) | 289 | LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@) |
285 | ADD src, src, 8*NBYTES | 290 | ADD src, src, 8*NBYTES |
286 | ADD dst, dst, 8*NBYTES | 291 | ADD dst, dst, 8*NBYTES |
287 | STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u) | 292 | STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@) |
288 | STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u) | 293 | STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@) |
289 | STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u) | 294 | STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@) |
290 | STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u) | 295 | STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@) |
291 | STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u) | 296 | STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@) |
292 | STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u) | 297 | STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@) |
293 | PREFS( 0, 8*32(src) ) | 298 | PREFS( 0, 8*32(src) ) |
294 | PREFD( 1, 8*32(dst) ) | 299 | PREFD( 1, 8*32(dst) ) |
295 | bne len, rem, 1b | 300 | bne len, rem, 1b |
@@ -298,41 +303,41 @@ __copy_user_common: | |||
298 | /* | 303 | /* |
299 | * len == rem == the number of bytes left to copy < 8*NBYTES | 304 | * len == rem == the number of bytes left to copy < 8*NBYTES |
300 | */ | 305 | */ |
301 | .Lcleanup_both_aligned: | 306 | .Lcleanup_both_aligned\@: |
302 | beqz len, .Ldone | 307 | beqz len, .Ldone\@ |
303 | sltu t0, len, 4*NBYTES | 308 | sltu t0, len, 4*NBYTES |
304 | bnez t0, .Lless_than_4units | 309 | bnez t0, .Lless_than_4units\@ |
305 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 310 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
306 | /* | 311 | /* |
307 | * len >= 4*NBYTES | 312 | * len >= 4*NBYTES |
308 | */ | 313 | */ |
309 | LOAD( t0, UNIT(0)(src), .Ll_exc) | 314 | LOAD( t0, UNIT(0)(src), .Ll_exc\@) |
310 | LOAD( t1, UNIT(1)(src), .Ll_exc_copy) | 315 | LOAD( t1, UNIT(1)(src), .Ll_exc_copy\@) |
311 | LOAD( t2, UNIT(2)(src), .Ll_exc_copy) | 316 | LOAD( t2, UNIT(2)(src), .Ll_exc_copy\@) |
312 | LOAD( t3, UNIT(3)(src), .Ll_exc_copy) | 317 | LOAD( t3, UNIT(3)(src), .Ll_exc_copy\@) |
313 | SUB len, len, 4*NBYTES | 318 | SUB len, len, 4*NBYTES |
314 | ADD src, src, 4*NBYTES | 319 | ADD src, src, 4*NBYTES |
315 | R10KCBARRIER(0(ra)) | 320 | R10KCBARRIER(0(ra)) |
316 | STORE(t0, UNIT(0)(dst), .Ls_exc_p4u) | 321 | STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@) |
317 | STORE(t1, UNIT(1)(dst), .Ls_exc_p3u) | 322 | STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@) |
318 | STORE(t2, UNIT(2)(dst), .Ls_exc_p2u) | 323 | STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@) |
319 | STORE(t3, UNIT(3)(dst), .Ls_exc_p1u) | 324 | STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@) |
320 | .set reorder /* DADDI_WAR */ | 325 | .set reorder /* DADDI_WAR */ |
321 | ADD dst, dst, 4*NBYTES | 326 | ADD dst, dst, 4*NBYTES |
322 | beqz len, .Ldone | 327 | beqz len, .Ldone\@ |
323 | .set noreorder | 328 | .set noreorder |
324 | .Lless_than_4units: | 329 | .Lless_than_4units\@: |
325 | /* | 330 | /* |
326 | * rem = len % NBYTES | 331 | * rem = len % NBYTES |
327 | */ | 332 | */ |
328 | beq rem, len, .Lcopy_bytes | 333 | beq rem, len, .Lcopy_bytes\@ |
329 | nop | 334 | nop |
330 | 1: | 335 | 1: |
331 | R10KCBARRIER(0(ra)) | 336 | R10KCBARRIER(0(ra)) |
332 | LOAD(t0, 0(src), .Ll_exc) | 337 | LOAD(t0, 0(src), .Ll_exc\@) |
333 | ADD src, src, NBYTES | 338 | ADD src, src, NBYTES |
334 | SUB len, len, NBYTES | 339 | SUB len, len, NBYTES |
335 | STORE(t0, 0(dst), .Ls_exc_p1u) | 340 | STORE(t0, 0(dst), .Ls_exc_p1u\@) |
336 | .set reorder /* DADDI_WAR */ | 341 | .set reorder /* DADDI_WAR */ |
337 | ADD dst, dst, NBYTES | 342 | ADD dst, dst, NBYTES |
338 | bne rem, len, 1b | 343 | bne rem, len, 1b |
@@ -350,17 +355,17 @@ __copy_user_common: | |||
350 | * more instruction-level parallelism. | 355 | * more instruction-level parallelism. |
351 | */ | 356 | */ |
352 | #define bits t2 | 357 | #define bits t2 |
353 | beqz len, .Ldone | 358 | beqz len, .Ldone\@ |
354 | ADD t1, dst, len # t1 is just past last byte of dst | 359 | ADD t1, dst, len # t1 is just past last byte of dst |
355 | li bits, 8*NBYTES | 360 | li bits, 8*NBYTES |
356 | SLL rem, len, 3 # rem = number of bits to keep | 361 | SLL rem, len, 3 # rem = number of bits to keep |
357 | LOAD(t0, 0(src), .Ll_exc) | 362 | LOAD(t0, 0(src), .Ll_exc\@) |
358 | SUB bits, bits, rem # bits = number of bits to discard | 363 | SUB bits, bits, rem # bits = number of bits to discard |
359 | SHIFT_DISCARD t0, t0, bits | 364 | SHIFT_DISCARD t0, t0, bits |
360 | STREST(t0, -1(t1), .Ls_exc) | 365 | STREST(t0, -1(t1), .Ls_exc\@) |
361 | jr ra | 366 | jr ra |
362 | move len, zero | 367 | move len, zero |
363 | .Ldst_unaligned: | 368 | .Ldst_unaligned\@: |
364 | /* | 369 | /* |
365 | * dst is unaligned | 370 | * dst is unaligned |
366 | * t0 = src & ADDRMASK | 371 | * t0 = src & ADDRMASK |
@@ -371,23 +376,23 @@ __copy_user_common: | |||
371 | * Set match = (src and dst have same alignment) | 376 | * Set match = (src and dst have same alignment) |
372 | */ | 377 | */ |
373 | #define match rem | 378 | #define match rem |
374 | LDFIRST(t3, FIRST(0)(src), .Ll_exc) | 379 | LDFIRST(t3, FIRST(0)(src), .Ll_exc\@) |
375 | ADD t2, zero, NBYTES | 380 | ADD t2, zero, NBYTES |
376 | LDREST(t3, REST(0)(src), .Ll_exc_copy) | 381 | LDREST(t3, REST(0)(src), .Ll_exc_copy\@) |
377 | SUB t2, t2, t1 # t2 = number of bytes copied | 382 | SUB t2, t2, t1 # t2 = number of bytes copied |
378 | xor match, t0, t1 | 383 | xor match, t0, t1 |
379 | R10KCBARRIER(0(ra)) | 384 | R10KCBARRIER(0(ra)) |
380 | STFIRST(t3, FIRST(0)(dst), .Ls_exc) | 385 | STFIRST(t3, FIRST(0)(dst), .Ls_exc\@) |
381 | beq len, t2, .Ldone | 386 | beq len, t2, .Ldone\@ |
382 | SUB len, len, t2 | 387 | SUB len, len, t2 |
383 | ADD dst, dst, t2 | 388 | ADD dst, dst, t2 |
384 | beqz match, .Lboth_aligned | 389 | beqz match, .Lboth_aligned\@ |
385 | ADD src, src, t2 | 390 | ADD src, src, t2 |
386 | 391 | ||
387 | .Lsrc_unaligned_dst_aligned: | 392 | .Lsrc_unaligned_dst_aligned\@: |
388 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 393 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
389 | PREFS( 0, 3*32(src) ) | 394 | PREFS( 0, 3*32(src) ) |
390 | beqz t0, .Lcleanup_src_unaligned | 395 | beqz t0, .Lcleanup_src_unaligned\@ |
391 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 396 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
392 | PREFD( 1, 3*32(dst) ) | 397 | PREFD( 1, 3*32(dst) ) |
393 | 1: | 398 | 1: |
@@ -398,58 +403,58 @@ __copy_user_common: | |||
398 | * are to the same unit (unless src is aligned, but it's not). | 403 | * are to the same unit (unless src is aligned, but it's not). |
399 | */ | 404 | */ |
400 | R10KCBARRIER(0(ra)) | 405 | R10KCBARRIER(0(ra)) |
401 | LDFIRST(t0, FIRST(0)(src), .Ll_exc) | 406 | LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) |
402 | LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy) | 407 | LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@) |
403 | SUB len, len, 4*NBYTES | 408 | SUB len, len, 4*NBYTES |
404 | LDREST(t0, REST(0)(src), .Ll_exc_copy) | 409 | LDREST(t0, REST(0)(src), .Ll_exc_copy\@) |
405 | LDREST(t1, REST(1)(src), .Ll_exc_copy) | 410 | LDREST(t1, REST(1)(src), .Ll_exc_copy\@) |
406 | LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy) | 411 | LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@) |
407 | LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy) | 412 | LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@) |
408 | LDREST(t2, REST(2)(src), .Ll_exc_copy) | 413 | LDREST(t2, REST(2)(src), .Ll_exc_copy\@) |
409 | LDREST(t3, REST(3)(src), .Ll_exc_copy) | 414 | LDREST(t3, REST(3)(src), .Ll_exc_copy\@) |
410 | PREFS( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) | 415 | PREFS( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) |
411 | ADD src, src, 4*NBYTES | 416 | ADD src, src, 4*NBYTES |
412 | #ifdef CONFIG_CPU_SB1 | 417 | #ifdef CONFIG_CPU_SB1 |
413 | nop # improves slotting | 418 | nop # improves slotting |
414 | #endif | 419 | #endif |
415 | STORE(t0, UNIT(0)(dst), .Ls_exc_p4u) | 420 | STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@) |
416 | STORE(t1, UNIT(1)(dst), .Ls_exc_p3u) | 421 | STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@) |
417 | STORE(t2, UNIT(2)(dst), .Ls_exc_p2u) | 422 | STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@) |
418 | STORE(t3, UNIT(3)(dst), .Ls_exc_p1u) | 423 | STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@) |
419 | PREFD( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) | 424 | PREFD( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) |
420 | .set reorder /* DADDI_WAR */ | 425 | .set reorder /* DADDI_WAR */ |
421 | ADD dst, dst, 4*NBYTES | 426 | ADD dst, dst, 4*NBYTES |
422 | bne len, rem, 1b | 427 | bne len, rem, 1b |
423 | .set noreorder | 428 | .set noreorder |
424 | 429 | ||
425 | .Lcleanup_src_unaligned: | 430 | .Lcleanup_src_unaligned\@: |
426 | beqz len, .Ldone | 431 | beqz len, .Ldone\@ |
427 | and rem, len, NBYTES-1 # rem = len % NBYTES | 432 | and rem, len, NBYTES-1 # rem = len % NBYTES |
428 | beq rem, len, .Lcopy_bytes | 433 | beq rem, len, .Lcopy_bytes\@ |
429 | nop | 434 | nop |
430 | 1: | 435 | 1: |
431 | R10KCBARRIER(0(ra)) | 436 | R10KCBARRIER(0(ra)) |
432 | LDFIRST(t0, FIRST(0)(src), .Ll_exc) | 437 | LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) |
433 | LDREST(t0, REST(0)(src), .Ll_exc_copy) | 438 | LDREST(t0, REST(0)(src), .Ll_exc_copy\@) |
434 | ADD src, src, NBYTES | 439 | ADD src, src, NBYTES |
435 | SUB len, len, NBYTES | 440 | SUB len, len, NBYTES |
436 | STORE(t0, 0(dst), .Ls_exc_p1u) | 441 | STORE(t0, 0(dst), .Ls_exc_p1u\@) |
437 | .set reorder /* DADDI_WAR */ | 442 | .set reorder /* DADDI_WAR */ |
438 | ADD dst, dst, NBYTES | 443 | ADD dst, dst, NBYTES |
439 | bne len, rem, 1b | 444 | bne len, rem, 1b |
440 | .set noreorder | 445 | .set noreorder |
441 | 446 | ||
442 | .Lcopy_bytes_checklen: | 447 | .Lcopy_bytes_checklen\@: |
443 | beqz len, .Ldone | 448 | beqz len, .Ldone\@ |
444 | nop | 449 | nop |
445 | .Lcopy_bytes: | 450 | .Lcopy_bytes\@: |
446 | /* 0 < len < NBYTES */ | 451 | /* 0 < len < NBYTES */ |
447 | R10KCBARRIER(0(ra)) | 452 | R10KCBARRIER(0(ra)) |
448 | #define COPY_BYTE(N) \ | 453 | #define COPY_BYTE(N) \ |
449 | LOADB(t0, N(src), .Ll_exc); \ | 454 | LOADB(t0, N(src), .Ll_exc\@); \ |
450 | SUB len, len, 1; \ | 455 | SUB len, len, 1; \ |
451 | beqz len, .Ldone; \ | 456 | beqz len, .Ldone\@; \ |
452 | STOREB(t0, N(dst), .Ls_exc_p1) | 457 | STOREB(t0, N(dst), .Ls_exc_p1\@) |
453 | 458 | ||
454 | COPY_BYTE(0) | 459 | COPY_BYTE(0) |
455 | COPY_BYTE(1) | 460 | COPY_BYTE(1) |
@@ -459,16 +464,19 @@ __copy_user_common: | |||
459 | COPY_BYTE(4) | 464 | COPY_BYTE(4) |
460 | COPY_BYTE(5) | 465 | COPY_BYTE(5) |
461 | #endif | 466 | #endif |
462 | LOADB(t0, NBYTES-2(src), .Ll_exc) | 467 | LOADB(t0, NBYTES-2(src), .Ll_exc\@) |
463 | SUB len, len, 1 | 468 | SUB len, len, 1 |
464 | jr ra | 469 | jr ra |
465 | STOREB(t0, NBYTES-2(dst), .Ls_exc_p1) | 470 | STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@) |
466 | .Ldone: | 471 | .Ldone\@: |
467 | jr ra | 472 | jr ra |
468 | nop | 473 | .if __memcpy == 1 |
469 | END(memcpy) | 474 | END(memcpy) |
475 | .set __memcpy, 0 | ||
476 | .hidden __memcpy | ||
477 | .endif | ||
470 | 478 | ||
471 | .Ll_exc_copy: | 479 | .Ll_exc_copy\@: |
472 | /* | 480 | /* |
473 | * Copy bytes from src until faulting load address (or until a | 481 | * Copy bytes from src until faulting load address (or until a |
474 | * lb faults) | 482 | * lb faults) |
@@ -483,20 +491,20 @@ __copy_user_common: | |||
483 | nop | 491 | nop |
484 | LOADK t0, THREAD_BUADDR(t0) | 492 | LOADK t0, THREAD_BUADDR(t0) |
485 | 1: | 493 | 1: |
486 | LOADB(t1, 0(src), .Ll_exc) | 494 | LOADB(t1, 0(src), .Ll_exc\@) |
487 | ADD src, src, 1 | 495 | ADD src, src, 1 |
488 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 496 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
489 | .set reorder /* DADDI_WAR */ | 497 | .set reorder /* DADDI_WAR */ |
490 | ADD dst, dst, 1 | 498 | ADD dst, dst, 1 |
491 | bne src, t0, 1b | 499 | bne src, t0, 1b |
492 | .set noreorder | 500 | .set noreorder |
493 | .Ll_exc: | 501 | .Ll_exc\@: |
494 | LOADK t0, TI_TASK($28) | 502 | LOADK t0, TI_TASK($28) |
495 | nop | 503 | nop |
496 | LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address | 504 | LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address |
497 | nop | 505 | nop |
498 | SUB len, AT, t0 # len number of uncopied bytes | 506 | SUB len, AT, t0 # len number of uncopied bytes |
499 | bnez t6, .Ldone /* Skip the zeroing part if inatomic */ | 507 | bnez t6, .Ldone\@ /* Skip the zeroing part if inatomic */ |
500 | /* | 508 | /* |
501 | * Here's where we rely on src and dst being incremented in tandem, | 509 | * Here's where we rely on src and dst being incremented in tandem, |
502 | * See (3) above. | 510 | * See (3) above. |
@@ -510,7 +518,7 @@ __copy_user_common: | |||
510 | */ | 518 | */ |
511 | .set reorder /* DADDI_WAR */ | 519 | .set reorder /* DADDI_WAR */ |
512 | SUB src, len, 1 | 520 | SUB src, len, 1 |
513 | beqz len, .Ldone | 521 | beqz len, .Ldone\@ |
514 | .set noreorder | 522 | .set noreorder |
515 | 1: sb zero, 0(dst) | 523 | 1: sb zero, 0(dst) |
516 | ADD dst, dst, 1 | 524 | ADD dst, dst, 1 |
@@ -531,7 +539,7 @@ __copy_user_common: | |||
531 | 539 | ||
532 | #define SEXC(n) \ | 540 | #define SEXC(n) \ |
533 | .set reorder; /* DADDI_WAR */ \ | 541 | .set reorder; /* DADDI_WAR */ \ |
534 | .Ls_exc_p ## n ## u: \ | 542 | .Ls_exc_p ## n ## u\@: \ |
535 | ADD len, len, n*NBYTES; \ | 543 | ADD len, len, n*NBYTES; \ |
536 | jr ra; \ | 544 | jr ra; \ |
537 | .set noreorder | 545 | .set noreorder |
@@ -545,14 +553,15 @@ SEXC(3) | |||
545 | SEXC(2) | 553 | SEXC(2) |
546 | SEXC(1) | 554 | SEXC(1) |
547 | 555 | ||
548 | .Ls_exc_p1: | 556 | .Ls_exc_p1\@: |
549 | .set reorder /* DADDI_WAR */ | 557 | .set reorder /* DADDI_WAR */ |
550 | ADD len, len, 1 | 558 | ADD len, len, 1 |
551 | jr ra | 559 | jr ra |
552 | .set noreorder | 560 | .set noreorder |
553 | .Ls_exc: | 561 | .Ls_exc\@: |
554 | jr ra | 562 | jr ra |
555 | nop | 563 | nop |
564 | .endm | ||
556 | 565 | ||
557 | .align 5 | 566 | .align 5 |
558 | LEAF(memmove) | 567 | LEAF(memmove) |
@@ -603,3 +612,27 @@ LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ | |||
603 | jr ra | 612 | jr ra |
604 | move a2, zero | 613 | move a2, zero |
605 | END(__rmemcpy) | 614 | END(__rmemcpy) |
615 | |||
616 | /* | ||
617 | * t6 is used as a flag to note inatomic mode. | ||
618 | */ | ||
619 | LEAF(__copy_user_inatomic) | ||
620 | b __copy_user_common | ||
621 | li t6, 1 | ||
622 | END(__copy_user_inatomic) | ||
623 | |||
624 | /* | ||
625 | * A combined memcpy/__copy_user | ||
626 | * __copy_user sets len to 0 for success; else to an upper bound of | ||
627 | * the number of uncopied bytes. | ||
628 | * memcpy sets v0 to dst. | ||
629 | */ | ||
630 | .align 5 | ||
631 | LEAF(memcpy) /* a0=dst a1=src a2=len */ | ||
632 | move v0, dst /* return value */ | ||
633 | .L__memcpy: | ||
634 | FEXPORT(__copy_user) | ||
635 | li t6, 0 /* not inatomic */ | ||
636 | __copy_user_common: | ||
637 | /* Legacy Mode, user <-> user */ | ||
638 | __BUILD_COPY_USER LEGACY_MODE USEROP USEROP | ||