aboutsummaryrefslogtreecommitdiffstats
path: root/arch/mips/lib/memcpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/mips/lib/memcpy.S')
-rw-r--r--arch/mips/lib/memcpy.S416
1 files changed, 277 insertions, 139 deletions
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index c5c40dad0bbf..c17ef80cf65a 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -10,6 +10,7 @@
10 * Copyright (C) 2002 Broadcom, Inc. 10 * Copyright (C) 2002 Broadcom, Inc.
11 * memcpy/copy_user author: Mark Vandevoorde 11 * memcpy/copy_user author: Mark Vandevoorde
12 * Copyright (C) 2007 Maciej W. Rozycki 12 * Copyright (C) 2007 Maciej W. Rozycki
13 * Copyright (C) 2014 Imagination Technologies Ltd.
13 * 14 *
14 * Mnemonic names for arguments to memcpy/__copy_user 15 * Mnemonic names for arguments to memcpy/__copy_user
15 */ 16 */
@@ -85,11 +86,51 @@
85 * they're not protected. 86 * they're not protected.
86 */ 87 */
87 88
88#define EXC(inst_reg,addr,handler) \ 89/* Instruction type */
899: inst_reg, addr; \ 90#define LD_INSN 1
90 .section __ex_table,"a"; \ 91#define ST_INSN 2
91 PTR 9b, handler; \ 92/* Pretech type */
92 .previous 93#define SRC_PREFETCH 1
94#define DST_PREFETCH 2
95#define LEGACY_MODE 1
96#define EVA_MODE 2
97#define USEROP 1
98#define KERNELOP 2
99
100/*
101 * Wrapper to add an entry in the exception table
102 * in case the insn causes a memory exception.
103 * Arguments:
104 * insn : Load/store instruction
105 * type : Instruction type
106 * reg : Register
107 * addr : Address
108 * handler : Exception handler
109 */
110
111#define EXC(insn, type, reg, addr, handler) \
112 .if \mode == LEGACY_MODE; \
1139: insn reg, addr; \
114 .section __ex_table,"a"; \
115 PTR 9b, handler; \
116 .previous; \
117 /* This is assembled in EVA mode */ \
118 .else; \
119 /* If loading from user or storing to user */ \
120 .if ((\from == USEROP) && (type == LD_INSN)) || \
121 ((\to == USEROP) && (type == ST_INSN)); \
1229: __BUILD_EVA_INSN(insn##e, reg, addr); \
123 .section __ex_table,"a"; \
124 PTR 9b, handler; \
125 .previous; \
126 .else; \
127 /* \
128 * Still in EVA, but no need for \
129 * exception handler or EVA insn \
130 */ \
131 insn reg, addr; \
132 .endif; \
133 .endif
93 134
94/* 135/*
95 * Only on the 64-bit kernel we can made use of 64-bit registers. 136 * Only on the 64-bit kernel we can made use of 64-bit registers.
@@ -100,12 +141,13 @@
100 141
101#ifdef USE_DOUBLE 142#ifdef USE_DOUBLE
102 143
103#define LOAD ld 144#define LOADK ld /* No exception */
104#define LOADL ldl 145#define LOAD(reg, addr, handler) EXC(ld, LD_INSN, reg, addr, handler)
105#define LOADR ldr 146#define LOADL(reg, addr, handler) EXC(ldl, LD_INSN, reg, addr, handler)
106#define STOREL sdl 147#define LOADR(reg, addr, handler) EXC(ldr, LD_INSN, reg, addr, handler)
107#define STORER sdr 148#define STOREL(reg, addr, handler) EXC(sdl, ST_INSN, reg, addr, handler)
108#define STORE sd 149#define STORER(reg, addr, handler) EXC(sdr, ST_INSN, reg, addr, handler)
150#define STORE(reg, addr, handler) EXC(sd, ST_INSN, reg, addr, handler)
109#define ADD daddu 151#define ADD daddu
110#define SUB dsubu 152#define SUB dsubu
111#define SRL dsrl 153#define SRL dsrl
@@ -136,12 +178,13 @@
136 178
137#else 179#else
138 180
139#define LOAD lw 181#define LOADK lw /* No exception */
140#define LOADL lwl 182#define LOAD(reg, addr, handler) EXC(lw, LD_INSN, reg, addr, handler)
141#define LOADR lwr 183#define LOADL(reg, addr, handler) EXC(lwl, LD_INSN, reg, addr, handler)
142#define STOREL swl 184#define LOADR(reg, addr, handler) EXC(lwr, LD_INSN, reg, addr, handler)
143#define STORER swr 185#define STOREL(reg, addr, handler) EXC(swl, ST_INSN, reg, addr, handler)
144#define STORE sw 186#define STORER(reg, addr, handler) EXC(swr, ST_INSN, reg, addr, handler)
187#define STORE(reg, addr, handler) EXC(sw, ST_INSN, reg, addr, handler)
145#define ADD addu 188#define ADD addu
146#define SUB subu 189#define SUB subu
147#define SRL srl 190#define SRL srl
@@ -154,6 +197,33 @@
154 197
155#endif /* USE_DOUBLE */ 198#endif /* USE_DOUBLE */
156 199
200#define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler)
201#define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler)
202
203#define _PREF(hint, addr, type) \
204 .if \mode == LEGACY_MODE; \
205 PREF(hint, addr); \
206 .else; \
207 .if ((\from == USEROP) && (type == SRC_PREFETCH)) || \
208 ((\to == USEROP) && (type == DST_PREFETCH)); \
209 /* \
210 * PREFE has only 9 bits for the offset \
211 * compared to PREF which has 16, so it may \
212 * need to use the $at register but this \
213 * register should remain intact because it's \
214 * used later on. Therefore use $v1. \
215 */ \
216 .set at=v1; \
217 PREFE(hint, addr); \
218 .set noat; \
219 .else; \
220 PREF(hint, addr); \
221 .endif; \
222 .endif
223
224#define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH)
225#define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH)
226
157#ifdef CONFIG_CPU_LITTLE_ENDIAN 227#ifdef CONFIG_CPU_LITTLE_ENDIAN
158#define LDFIRST LOADR 228#define LDFIRST LOADR
159#define LDREST LOADL 229#define LDREST LOADL
@@ -182,27 +252,23 @@
182 .set at=v1 252 .set at=v1
183#endif 253#endif
184 254
185/*
186 * t6 is used as a flag to note inatomic mode.
187 */
188LEAF(__copy_user_inatomic)
189 b __copy_user_common
190 li t6, 1
191 END(__copy_user_inatomic)
192
193/*
194 * A combined memcpy/__copy_user
195 * __copy_user sets len to 0 for success; else to an upper bound of
196 * the number of uncopied bytes.
197 * memcpy sets v0 to dst.
198 */
199 .align 5 255 .align 5
200LEAF(memcpy) /* a0=dst a1=src a2=len */ 256
201 move v0, dst /* return value */ 257 /*
202.L__memcpy: 258 * Macro to build the __copy_user common code
203FEXPORT(__copy_user) 259 * Arguements:
204 li t6, 0 /* not inatomic */ 260 * mode : LEGACY_MODE or EVA_MODE
205__copy_user_common: 261 * from : Source operand. USEROP or KERNELOP
262 * to : Destination operand. USEROP or KERNELOP
263 */
264 .macro __BUILD_COPY_USER mode, from, to
265
266 /* initialize __memcpy if this the first time we execute this macro */
267 .ifnotdef __memcpy
268 .set __memcpy, 1
269 .hidden __memcpy /* make sure it does not leak */
270 .endif
271
206 /* 272 /*
207 * Note: dst & src may be unaligned, len may be 0 273 * Note: dst & src may be unaligned, len may be 0
208 * Temps 274 * Temps
@@ -217,94 +283,94 @@ __copy_user_common:
217 * 283 *
218 * If len < NBYTES use byte operations. 284 * If len < NBYTES use byte operations.
219 */ 285 */
220 PREF( 0, 0(src) ) 286 PREFS( 0, 0(src) )
221 PREF( 1, 0(dst) ) 287 PREFD( 1, 0(dst) )
222 sltu t2, len, NBYTES 288 sltu t2, len, NBYTES
223 and t1, dst, ADDRMASK 289 and t1, dst, ADDRMASK
224 PREF( 0, 1*32(src) ) 290 PREFS( 0, 1*32(src) )
225 PREF( 1, 1*32(dst) ) 291 PREFD( 1, 1*32(dst) )
226 bnez t2, .Lcopy_bytes_checklen 292 bnez t2, .Lcopy_bytes_checklen\@
227 and t0, src, ADDRMASK 293 and t0, src, ADDRMASK
228 PREF( 0, 2*32(src) ) 294 PREFS( 0, 2*32(src) )
229 PREF( 1, 2*32(dst) ) 295 PREFD( 1, 2*32(dst) )
230 bnez t1, .Ldst_unaligned 296 bnez t1, .Ldst_unaligned\@
231 nop 297 nop
232 bnez t0, .Lsrc_unaligned_dst_aligned 298 bnez t0, .Lsrc_unaligned_dst_aligned\@
233 /* 299 /*
234 * use delay slot for fall-through 300 * use delay slot for fall-through
235 * src and dst are aligned; need to compute rem 301 * src and dst are aligned; need to compute rem
236 */ 302 */
237.Lboth_aligned: 303.Lboth_aligned\@:
238 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter 304 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
239 beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES 305 beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
240 and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) 306 and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES)
241 PREF( 0, 3*32(src) ) 307 PREFS( 0, 3*32(src) )
242 PREF( 1, 3*32(dst) ) 308 PREFD( 1, 3*32(dst) )
243 .align 4 309 .align 4
2441: 3101:
245 R10KCBARRIER(0(ra)) 311 R10KCBARRIER(0(ra))
246EXC( LOAD t0, UNIT(0)(src), .Ll_exc) 312 LOAD(t0, UNIT(0)(src), .Ll_exc\@)
247EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) 313 LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@)
248EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) 314 LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@)
249EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) 315 LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@)
250 SUB len, len, 8*NBYTES 316 SUB len, len, 8*NBYTES
251EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) 317 LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@)
252EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy) 318 LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@)
253EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p8u) 319 STORE(t0, UNIT(0)(dst), .Ls_exc_p8u\@)
254EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p7u) 320 STORE(t1, UNIT(1)(dst), .Ls_exc_p7u\@)
255EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy) 321 LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@)
256EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy) 322 LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@)
257 ADD src, src, 8*NBYTES 323 ADD src, src, 8*NBYTES
258 ADD dst, dst, 8*NBYTES 324 ADD dst, dst, 8*NBYTES
259EXC( STORE t2, UNIT(-6)(dst), .Ls_exc_p6u) 325 STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@)
260EXC( STORE t3, UNIT(-5)(dst), .Ls_exc_p5u) 326 STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@)
261EXC( STORE t4, UNIT(-4)(dst), .Ls_exc_p4u) 327 STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@)
262EXC( STORE t7, UNIT(-3)(dst), .Ls_exc_p3u) 328 STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@)
263EXC( STORE t0, UNIT(-2)(dst), .Ls_exc_p2u) 329 STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@)
264EXC( STORE t1, UNIT(-1)(dst), .Ls_exc_p1u) 330 STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@)
265 PREF( 0, 8*32(src) ) 331 PREFS( 0, 8*32(src) )
266 PREF( 1, 8*32(dst) ) 332 PREFD( 1, 8*32(dst) )
267 bne len, rem, 1b 333 bne len, rem, 1b
268 nop 334 nop
269 335
270 /* 336 /*
271 * len == rem == the number of bytes left to copy < 8*NBYTES 337 * len == rem == the number of bytes left to copy < 8*NBYTES
272 */ 338 */
273.Lcleanup_both_aligned: 339.Lcleanup_both_aligned\@:
274 beqz len, .Ldone 340 beqz len, .Ldone\@
275 sltu t0, len, 4*NBYTES 341 sltu t0, len, 4*NBYTES
276 bnez t0, .Lless_than_4units 342 bnez t0, .Lless_than_4units\@
277 and rem, len, (NBYTES-1) # rem = len % NBYTES 343 and rem, len, (NBYTES-1) # rem = len % NBYTES
278 /* 344 /*
279 * len >= 4*NBYTES 345 * len >= 4*NBYTES
280 */ 346 */
281EXC( LOAD t0, UNIT(0)(src), .Ll_exc) 347 LOAD( t0, UNIT(0)(src), .Ll_exc\@)
282EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) 348 LOAD( t1, UNIT(1)(src), .Ll_exc_copy\@)
283EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) 349 LOAD( t2, UNIT(2)(src), .Ll_exc_copy\@)
284EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) 350 LOAD( t3, UNIT(3)(src), .Ll_exc_copy\@)
285 SUB len, len, 4*NBYTES 351 SUB len, len, 4*NBYTES
286 ADD src, src, 4*NBYTES 352 ADD src, src, 4*NBYTES
287 R10KCBARRIER(0(ra)) 353 R10KCBARRIER(0(ra))
288EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) 354 STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@)
289EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) 355 STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@)
290EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) 356 STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@)
291EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) 357 STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@)
292 .set reorder /* DADDI_WAR */ 358 .set reorder /* DADDI_WAR */
293 ADD dst, dst, 4*NBYTES 359 ADD dst, dst, 4*NBYTES
294 beqz len, .Ldone 360 beqz len, .Ldone\@
295 .set noreorder 361 .set noreorder
296.Lless_than_4units: 362.Lless_than_4units\@:
297 /* 363 /*
298 * rem = len % NBYTES 364 * rem = len % NBYTES
299 */ 365 */
300 beq rem, len, .Lcopy_bytes 366 beq rem, len, .Lcopy_bytes\@
301 nop 367 nop
3021: 3681:
303 R10KCBARRIER(0(ra)) 369 R10KCBARRIER(0(ra))
304EXC( LOAD t0, 0(src), .Ll_exc) 370 LOAD(t0, 0(src), .Ll_exc\@)
305 ADD src, src, NBYTES 371 ADD src, src, NBYTES
306 SUB len, len, NBYTES 372 SUB len, len, NBYTES
307EXC( STORE t0, 0(dst), .Ls_exc_p1u) 373 STORE(t0, 0(dst), .Ls_exc_p1u\@)
308 .set reorder /* DADDI_WAR */ 374 .set reorder /* DADDI_WAR */
309 ADD dst, dst, NBYTES 375 ADD dst, dst, NBYTES
310 bne rem, len, 1b 376 bne rem, len, 1b
@@ -322,17 +388,17 @@ EXC( STORE t0, 0(dst), .Ls_exc_p1u)
322 * more instruction-level parallelism. 388 * more instruction-level parallelism.
323 */ 389 */
324#define bits t2 390#define bits t2
325 beqz len, .Ldone 391 beqz len, .Ldone\@
326 ADD t1, dst, len # t1 is just past last byte of dst 392 ADD t1, dst, len # t1 is just past last byte of dst
327 li bits, 8*NBYTES 393 li bits, 8*NBYTES
328 SLL rem, len, 3 # rem = number of bits to keep 394 SLL rem, len, 3 # rem = number of bits to keep
329EXC( LOAD t0, 0(src), .Ll_exc) 395 LOAD(t0, 0(src), .Ll_exc\@)
330 SUB bits, bits, rem # bits = number of bits to discard 396 SUB bits, bits, rem # bits = number of bits to discard
331 SHIFT_DISCARD t0, t0, bits 397 SHIFT_DISCARD t0, t0, bits
332EXC( STREST t0, -1(t1), .Ls_exc) 398 STREST(t0, -1(t1), .Ls_exc\@)
333 jr ra 399 jr ra
334 move len, zero 400 move len, zero
335.Ldst_unaligned: 401.Ldst_unaligned\@:
336 /* 402 /*
337 * dst is unaligned 403 * dst is unaligned
338 * t0 = src & ADDRMASK 404 * t0 = src & ADDRMASK
@@ -343,25 +409,25 @@ EXC( STREST t0, -1(t1), .Ls_exc)
343 * Set match = (src and dst have same alignment) 409 * Set match = (src and dst have same alignment)
344 */ 410 */
345#define match rem 411#define match rem
346EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) 412 LDFIRST(t3, FIRST(0)(src), .Ll_exc\@)
347 ADD t2, zero, NBYTES 413 ADD t2, zero, NBYTES
348EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) 414 LDREST(t3, REST(0)(src), .Ll_exc_copy\@)
349 SUB t2, t2, t1 # t2 = number of bytes copied 415 SUB t2, t2, t1 # t2 = number of bytes copied
350 xor match, t0, t1 416 xor match, t0, t1
351 R10KCBARRIER(0(ra)) 417 R10KCBARRIER(0(ra))
352EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) 418 STFIRST(t3, FIRST(0)(dst), .Ls_exc\@)
353 beq len, t2, .Ldone 419 beq len, t2, .Ldone\@
354 SUB len, len, t2 420 SUB len, len, t2
355 ADD dst, dst, t2 421 ADD dst, dst, t2
356 beqz match, .Lboth_aligned 422 beqz match, .Lboth_aligned\@
357 ADD src, src, t2 423 ADD src, src, t2
358 424
359.Lsrc_unaligned_dst_aligned: 425.Lsrc_unaligned_dst_aligned\@:
360 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter 426 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
361 PREF( 0, 3*32(src) ) 427 PREFS( 0, 3*32(src) )
362 beqz t0, .Lcleanup_src_unaligned 428 beqz t0, .Lcleanup_src_unaligned\@
363 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES 429 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
364 PREF( 1, 3*32(dst) ) 430 PREFD( 1, 3*32(dst) )
3651: 4311:
366/* 432/*
367 * Avoid consecutive LD*'s to the same register since some mips 433 * Avoid consecutive LD*'s to the same register since some mips
@@ -370,58 +436,58 @@ EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc)
370 * are to the same unit (unless src is aligned, but it's not). 436 * are to the same unit (unless src is aligned, but it's not).
371 */ 437 */
372 R10KCBARRIER(0(ra)) 438 R10KCBARRIER(0(ra))
373EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) 439 LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
374EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) 440 LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@)
375 SUB len, len, 4*NBYTES 441 SUB len, len, 4*NBYTES
376EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) 442 LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
377EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) 443 LDREST(t1, REST(1)(src), .Ll_exc_copy\@)
378EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) 444 LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@)
379EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) 445 LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@)
380EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) 446 LDREST(t2, REST(2)(src), .Ll_exc_copy\@)
381EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) 447 LDREST(t3, REST(3)(src), .Ll_exc_copy\@)
382 PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) 448 PREFS( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed)
383 ADD src, src, 4*NBYTES 449 ADD src, src, 4*NBYTES
384#ifdef CONFIG_CPU_SB1 450#ifdef CONFIG_CPU_SB1
385 nop # improves slotting 451 nop # improves slotting
386#endif 452#endif
387EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) 453 STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@)
388EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) 454 STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@)
389EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) 455 STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@)
390EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) 456 STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@)
391 PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) 457 PREFD( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed)
392 .set reorder /* DADDI_WAR */ 458 .set reorder /* DADDI_WAR */
393 ADD dst, dst, 4*NBYTES 459 ADD dst, dst, 4*NBYTES
394 bne len, rem, 1b 460 bne len, rem, 1b
395 .set noreorder 461 .set noreorder
396 462
397.Lcleanup_src_unaligned: 463.Lcleanup_src_unaligned\@:
398 beqz len, .Ldone 464 beqz len, .Ldone\@
399 and rem, len, NBYTES-1 # rem = len % NBYTES 465 and rem, len, NBYTES-1 # rem = len % NBYTES
400 beq rem, len, .Lcopy_bytes 466 beq rem, len, .Lcopy_bytes\@
401 nop 467 nop
4021: 4681:
403 R10KCBARRIER(0(ra)) 469 R10KCBARRIER(0(ra))
404EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) 470 LDFIRST(t0, FIRST(0)(src), .Ll_exc\@)
405EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) 471 LDREST(t0, REST(0)(src), .Ll_exc_copy\@)
406 ADD src, src, NBYTES 472 ADD src, src, NBYTES
407 SUB len, len, NBYTES 473 SUB len, len, NBYTES
408EXC( STORE t0, 0(dst), .Ls_exc_p1u) 474 STORE(t0, 0(dst), .Ls_exc_p1u\@)
409 .set reorder /* DADDI_WAR */ 475 .set reorder /* DADDI_WAR */
410 ADD dst, dst, NBYTES 476 ADD dst, dst, NBYTES
411 bne len, rem, 1b 477 bne len, rem, 1b
412 .set noreorder 478 .set noreorder
413 479
414.Lcopy_bytes_checklen: 480.Lcopy_bytes_checklen\@:
415 beqz len, .Ldone 481 beqz len, .Ldone\@
416 nop 482 nop
417.Lcopy_bytes: 483.Lcopy_bytes\@:
418 /* 0 < len < NBYTES */ 484 /* 0 < len < NBYTES */
419 R10KCBARRIER(0(ra)) 485 R10KCBARRIER(0(ra))
420#define COPY_BYTE(N) \ 486#define COPY_BYTE(N) \
421EXC( lb t0, N(src), .Ll_exc); \ 487 LOADB(t0, N(src), .Ll_exc\@); \
422 SUB len, len, 1; \ 488 SUB len, len, 1; \
423 beqz len, .Ldone; \ 489 beqz len, .Ldone\@; \
424EXC( sb t0, N(dst), .Ls_exc_p1) 490 STOREB(t0, N(dst), .Ls_exc_p1\@)
425 491
426 COPY_BYTE(0) 492 COPY_BYTE(0)
427 COPY_BYTE(1) 493 COPY_BYTE(1)
@@ -431,16 +497,19 @@ EXC( sb t0, N(dst), .Ls_exc_p1)
431 COPY_BYTE(4) 497 COPY_BYTE(4)
432 COPY_BYTE(5) 498 COPY_BYTE(5)
433#endif 499#endif
434EXC( lb t0, NBYTES-2(src), .Ll_exc) 500 LOADB(t0, NBYTES-2(src), .Ll_exc\@)
435 SUB len, len, 1 501 SUB len, len, 1
436 jr ra 502 jr ra
437EXC( sb t0, NBYTES-2(dst), .Ls_exc_p1) 503 STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@)
438.Ldone: 504.Ldone\@:
439 jr ra 505 jr ra
440 nop 506 .if __memcpy == 1
441 END(memcpy) 507 END(memcpy)
508 .set __memcpy, 0
509 .hidden __memcpy
510 .endif
442 511
443.Ll_exc_copy: 512.Ll_exc_copy\@:
444 /* 513 /*
445 * Copy bytes from src until faulting load address (or until a 514 * Copy bytes from src until faulting load address (or until a
446 * lb faults) 515 * lb faults)
@@ -451,24 +520,24 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc_p1)
451 * 520 *
452 * Assumes src < THREAD_BUADDR($28) 521 * Assumes src < THREAD_BUADDR($28)
453 */ 522 */
454 LOAD t0, TI_TASK($28) 523 LOADK t0, TI_TASK($28)
455 nop 524 nop
456 LOAD t0, THREAD_BUADDR(t0) 525 LOADK t0, THREAD_BUADDR(t0)
4571: 5261:
458EXC( lb t1, 0(src), .Ll_exc) 527 LOADB(t1, 0(src), .Ll_exc\@)
459 ADD src, src, 1 528 ADD src, src, 1
460 sb t1, 0(dst) # can't fault -- we're copy_from_user 529 sb t1, 0(dst) # can't fault -- we're copy_from_user
461 .set reorder /* DADDI_WAR */ 530 .set reorder /* DADDI_WAR */
462 ADD dst, dst, 1 531 ADD dst, dst, 1
463 bne src, t0, 1b 532 bne src, t0, 1b
464 .set noreorder 533 .set noreorder
465.Ll_exc: 534.Ll_exc\@:
466 LOAD t0, TI_TASK($28) 535 LOADK t0, TI_TASK($28)
467 nop 536 nop
468 LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address 537 LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address
469 nop 538 nop
470 SUB len, AT, t0 # len number of uncopied bytes 539 SUB len, AT, t0 # len number of uncopied bytes
471 bnez t6, .Ldone /* Skip the zeroing part if inatomic */ 540 bnez t6, .Ldone\@ /* Skip the zeroing part if inatomic */
472 /* 541 /*
473 * Here's where we rely on src and dst being incremented in tandem, 542 * Here's where we rely on src and dst being incremented in tandem,
474 * See (3) above. 543 * See (3) above.
@@ -482,7 +551,7 @@ EXC( lb t1, 0(src), .Ll_exc)
482 */ 551 */
483 .set reorder /* DADDI_WAR */ 552 .set reorder /* DADDI_WAR */
484 SUB src, len, 1 553 SUB src, len, 1
485 beqz len, .Ldone 554 beqz len, .Ldone\@
486 .set noreorder 555 .set noreorder
4871: sb zero, 0(dst) 5561: sb zero, 0(dst)
488 ADD dst, dst, 1 557 ADD dst, dst, 1
@@ -503,7 +572,7 @@ EXC( lb t1, 0(src), .Ll_exc)
503 572
504#define SEXC(n) \ 573#define SEXC(n) \
505 .set reorder; /* DADDI_WAR */ \ 574 .set reorder; /* DADDI_WAR */ \
506.Ls_exc_p ## n ## u: \ 575.Ls_exc_p ## n ## u\@: \
507 ADD len, len, n*NBYTES; \ 576 ADD len, len, n*NBYTES; \
508 jr ra; \ 577 jr ra; \
509 .set noreorder 578 .set noreorder
@@ -517,14 +586,15 @@ SEXC(3)
517SEXC(2) 586SEXC(2)
518SEXC(1) 587SEXC(1)
519 588
520.Ls_exc_p1: 589.Ls_exc_p1\@:
521 .set reorder /* DADDI_WAR */ 590 .set reorder /* DADDI_WAR */
522 ADD len, len, 1 591 ADD len, len, 1
523 jr ra 592 jr ra
524 .set noreorder 593 .set noreorder
525.Ls_exc: 594.Ls_exc\@:
526 jr ra 595 jr ra
527 nop 596 nop
597 .endm
528 598
529 .align 5 599 .align 5
530LEAF(memmove) 600LEAF(memmove)
@@ -575,3 +645,71 @@ LEAF(__rmemcpy) /* a0=dst a1=src a2=len */
575 jr ra 645 jr ra
576 move a2, zero 646 move a2, zero
577 END(__rmemcpy) 647 END(__rmemcpy)
648
649/*
650 * t6 is used as a flag to note inatomic mode.
651 */
652LEAF(__copy_user_inatomic)
653 b __copy_user_common
654 li t6, 1
655 END(__copy_user_inatomic)
656
657/*
658 * A combined memcpy/__copy_user
659 * __copy_user sets len to 0 for success; else to an upper bound of
660 * the number of uncopied bytes.
661 * memcpy sets v0 to dst.
662 */
663 .align 5
664LEAF(memcpy) /* a0=dst a1=src a2=len */
665 move v0, dst /* return value */
666.L__memcpy:
667FEXPORT(__copy_user)
668 li t6, 0 /* not inatomic */
669__copy_user_common:
670 /* Legacy Mode, user <-> user */
671 __BUILD_COPY_USER LEGACY_MODE USEROP USEROP
672
673#ifdef CONFIG_EVA
674
675/*
676 * For EVA we need distinct symbols for reading and writing to user space.
677 * This is because we need to use specific EVA instructions to perform the
678 * virtual <-> physical translation when a virtual address is actually in user
679 * space
680 */
681
682LEAF(__copy_user_inatomic_eva)
683 b __copy_from_user_common
684 li t6, 1
685 END(__copy_user_inatomic_eva)
686
687/*
688 * __copy_from_user (EVA)
689 */
690
691LEAF(__copy_from_user_eva)
692 li t6, 0 /* not inatomic */
693__copy_from_user_common:
694 __BUILD_COPY_USER EVA_MODE USEROP KERNELOP
695END(__copy_from_user_eva)
696
697
698
699/*
700 * __copy_to_user (EVA)
701 */
702
703LEAF(__copy_to_user_eva)
704__BUILD_COPY_USER EVA_MODE KERNELOP USEROP
705END(__copy_to_user_eva)
706
707/*
708 * __copy_in_user (EVA)
709 */
710
711LEAF(__copy_in_user_eva)
712__BUILD_COPY_USER EVA_MODE USEROP USEROP
713END(__copy_in_user_eva)
714
715#endif