diff options
Diffstat (limited to 'arch/mips/lib/memcpy.S')
-rw-r--r-- | arch/mips/lib/memcpy.S | 416 |
1 files changed, 277 insertions, 139 deletions
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index c5c40dad0bbf..c17ef80cf65a 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S | |||
@@ -10,6 +10,7 @@ | |||
10 | * Copyright (C) 2002 Broadcom, Inc. | 10 | * Copyright (C) 2002 Broadcom, Inc. |
11 | * memcpy/copy_user author: Mark Vandevoorde | 11 | * memcpy/copy_user author: Mark Vandevoorde |
12 | * Copyright (C) 2007 Maciej W. Rozycki | 12 | * Copyright (C) 2007 Maciej W. Rozycki |
13 | * Copyright (C) 2014 Imagination Technologies Ltd. | ||
13 | * | 14 | * |
14 | * Mnemonic names for arguments to memcpy/__copy_user | 15 | * Mnemonic names for arguments to memcpy/__copy_user |
15 | */ | 16 | */ |
@@ -85,11 +86,51 @@ | |||
85 | * they're not protected. | 86 | * they're not protected. |
86 | */ | 87 | */ |
87 | 88 | ||
88 | #define EXC(inst_reg,addr,handler) \ | 89 | /* Instruction type */ |
89 | 9: inst_reg, addr; \ | 90 | #define LD_INSN 1 |
90 | .section __ex_table,"a"; \ | 91 | #define ST_INSN 2 |
91 | PTR 9b, handler; \ | 92 | /* Pretech type */ |
92 | .previous | 93 | #define SRC_PREFETCH 1 |
94 | #define DST_PREFETCH 2 | ||
95 | #define LEGACY_MODE 1 | ||
96 | #define EVA_MODE 2 | ||
97 | #define USEROP 1 | ||
98 | #define KERNELOP 2 | ||
99 | |||
100 | /* | ||
101 | * Wrapper to add an entry in the exception table | ||
102 | * in case the insn causes a memory exception. | ||
103 | * Arguments: | ||
104 | * insn : Load/store instruction | ||
105 | * type : Instruction type | ||
106 | * reg : Register | ||
107 | * addr : Address | ||
108 | * handler : Exception handler | ||
109 | */ | ||
110 | |||
111 | #define EXC(insn, type, reg, addr, handler) \ | ||
112 | .if \mode == LEGACY_MODE; \ | ||
113 | 9: insn reg, addr; \ | ||
114 | .section __ex_table,"a"; \ | ||
115 | PTR 9b, handler; \ | ||
116 | .previous; \ | ||
117 | /* This is assembled in EVA mode */ \ | ||
118 | .else; \ | ||
119 | /* If loading from user or storing to user */ \ | ||
120 | .if ((\from == USEROP) && (type == LD_INSN)) || \ | ||
121 | ((\to == USEROP) && (type == ST_INSN)); \ | ||
122 | 9: __BUILD_EVA_INSN(insn##e, reg, addr); \ | ||
123 | .section __ex_table,"a"; \ | ||
124 | PTR 9b, handler; \ | ||
125 | .previous; \ | ||
126 | .else; \ | ||
127 | /* \ | ||
128 | * Still in EVA, but no need for \ | ||
129 | * exception handler or EVA insn \ | ||
130 | */ \ | ||
131 | insn reg, addr; \ | ||
132 | .endif; \ | ||
133 | .endif | ||
93 | 134 | ||
94 | /* | 135 | /* |
95 | * Only on the 64-bit kernel we can made use of 64-bit registers. | 136 | * Only on the 64-bit kernel we can made use of 64-bit registers. |
@@ -100,12 +141,13 @@ | |||
100 | 141 | ||
101 | #ifdef USE_DOUBLE | 142 | #ifdef USE_DOUBLE |
102 | 143 | ||
103 | #define LOAD ld | 144 | #define LOADK ld /* No exception */ |
104 | #define LOADL ldl | 145 | #define LOAD(reg, addr, handler) EXC(ld, LD_INSN, reg, addr, handler) |
105 | #define LOADR ldr | 146 | #define LOADL(reg, addr, handler) EXC(ldl, LD_INSN, reg, addr, handler) |
106 | #define STOREL sdl | 147 | #define LOADR(reg, addr, handler) EXC(ldr, LD_INSN, reg, addr, handler) |
107 | #define STORER sdr | 148 | #define STOREL(reg, addr, handler) EXC(sdl, ST_INSN, reg, addr, handler) |
108 | #define STORE sd | 149 | #define STORER(reg, addr, handler) EXC(sdr, ST_INSN, reg, addr, handler) |
150 | #define STORE(reg, addr, handler) EXC(sd, ST_INSN, reg, addr, handler) | ||
109 | #define ADD daddu | 151 | #define ADD daddu |
110 | #define SUB dsubu | 152 | #define SUB dsubu |
111 | #define SRL dsrl | 153 | #define SRL dsrl |
@@ -136,12 +178,13 @@ | |||
136 | 178 | ||
137 | #else | 179 | #else |
138 | 180 | ||
139 | #define LOAD lw | 181 | #define LOADK lw /* No exception */ |
140 | #define LOADL lwl | 182 | #define LOAD(reg, addr, handler) EXC(lw, LD_INSN, reg, addr, handler) |
141 | #define LOADR lwr | 183 | #define LOADL(reg, addr, handler) EXC(lwl, LD_INSN, reg, addr, handler) |
142 | #define STOREL swl | 184 | #define LOADR(reg, addr, handler) EXC(lwr, LD_INSN, reg, addr, handler) |
143 | #define STORER swr | 185 | #define STOREL(reg, addr, handler) EXC(swl, ST_INSN, reg, addr, handler) |
144 | #define STORE sw | 186 | #define STORER(reg, addr, handler) EXC(swr, ST_INSN, reg, addr, handler) |
187 | #define STORE(reg, addr, handler) EXC(sw, ST_INSN, reg, addr, handler) | ||
145 | #define ADD addu | 188 | #define ADD addu |
146 | #define SUB subu | 189 | #define SUB subu |
147 | #define SRL srl | 190 | #define SRL srl |
@@ -154,6 +197,33 @@ | |||
154 | 197 | ||
155 | #endif /* USE_DOUBLE */ | 198 | #endif /* USE_DOUBLE */ |
156 | 199 | ||
200 | #define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler) | ||
201 | #define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler) | ||
202 | |||
203 | #define _PREF(hint, addr, type) \ | ||
204 | .if \mode == LEGACY_MODE; \ | ||
205 | PREF(hint, addr); \ | ||
206 | .else; \ | ||
207 | .if ((\from == USEROP) && (type == SRC_PREFETCH)) || \ | ||
208 | ((\to == USEROP) && (type == DST_PREFETCH)); \ | ||
209 | /* \ | ||
210 | * PREFE has only 9 bits for the offset \ | ||
211 | * compared to PREF which has 16, so it may \ | ||
212 | * need to use the $at register but this \ | ||
213 | * register should remain intact because it's \ | ||
214 | * used later on. Therefore use $v1. \ | ||
215 | */ \ | ||
216 | .set at=v1; \ | ||
217 | PREFE(hint, addr); \ | ||
218 | .set noat; \ | ||
219 | .else; \ | ||
220 | PREF(hint, addr); \ | ||
221 | .endif; \ | ||
222 | .endif | ||
223 | |||
224 | #define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH) | ||
225 | #define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH) | ||
226 | |||
157 | #ifdef CONFIG_CPU_LITTLE_ENDIAN | 227 | #ifdef CONFIG_CPU_LITTLE_ENDIAN |
158 | #define LDFIRST LOADR | 228 | #define LDFIRST LOADR |
159 | #define LDREST LOADL | 229 | #define LDREST LOADL |
@@ -182,27 +252,23 @@ | |||
182 | .set at=v1 | 252 | .set at=v1 |
183 | #endif | 253 | #endif |
184 | 254 | ||
185 | /* | ||
186 | * t6 is used as a flag to note inatomic mode. | ||
187 | */ | ||
188 | LEAF(__copy_user_inatomic) | ||
189 | b __copy_user_common | ||
190 | li t6, 1 | ||
191 | END(__copy_user_inatomic) | ||
192 | |||
193 | /* | ||
194 | * A combined memcpy/__copy_user | ||
195 | * __copy_user sets len to 0 for success; else to an upper bound of | ||
196 | * the number of uncopied bytes. | ||
197 | * memcpy sets v0 to dst. | ||
198 | */ | ||
199 | .align 5 | 255 | .align 5 |
200 | LEAF(memcpy) /* a0=dst a1=src a2=len */ | 256 | |
201 | move v0, dst /* return value */ | 257 | /* |
202 | .L__memcpy: | 258 | * Macro to build the __copy_user common code |
203 | FEXPORT(__copy_user) | 259 | * Arguements: |
204 | li t6, 0 /* not inatomic */ | 260 | * mode : LEGACY_MODE or EVA_MODE |
205 | __copy_user_common: | 261 | * from : Source operand. USEROP or KERNELOP |
262 | * to : Destination operand. USEROP or KERNELOP | ||
263 | */ | ||
264 | .macro __BUILD_COPY_USER mode, from, to | ||
265 | |||
266 | /* initialize __memcpy if this the first time we execute this macro */ | ||
267 | .ifnotdef __memcpy | ||
268 | .set __memcpy, 1 | ||
269 | .hidden __memcpy /* make sure it does not leak */ | ||
270 | .endif | ||
271 | |||
206 | /* | 272 | /* |
207 | * Note: dst & src may be unaligned, len may be 0 | 273 | * Note: dst & src may be unaligned, len may be 0 |
208 | * Temps | 274 | * Temps |
@@ -217,94 +283,94 @@ __copy_user_common: | |||
217 | * | 283 | * |
218 | * If len < NBYTES use byte operations. | 284 | * If len < NBYTES use byte operations. |
219 | */ | 285 | */ |
220 | PREF( 0, 0(src) ) | 286 | PREFS( 0, 0(src) ) |
221 | PREF( 1, 0(dst) ) | 287 | PREFD( 1, 0(dst) ) |
222 | sltu t2, len, NBYTES | 288 | sltu t2, len, NBYTES |
223 | and t1, dst, ADDRMASK | 289 | and t1, dst, ADDRMASK |
224 | PREF( 0, 1*32(src) ) | 290 | PREFS( 0, 1*32(src) ) |
225 | PREF( 1, 1*32(dst) ) | 291 | PREFD( 1, 1*32(dst) ) |
226 | bnez t2, .Lcopy_bytes_checklen | 292 | bnez t2, .Lcopy_bytes_checklen\@ |
227 | and t0, src, ADDRMASK | 293 | and t0, src, ADDRMASK |
228 | PREF( 0, 2*32(src) ) | 294 | PREFS( 0, 2*32(src) ) |
229 | PREF( 1, 2*32(dst) ) | 295 | PREFD( 1, 2*32(dst) ) |
230 | bnez t1, .Ldst_unaligned | 296 | bnez t1, .Ldst_unaligned\@ |
231 | nop | 297 | nop |
232 | bnez t0, .Lsrc_unaligned_dst_aligned | 298 | bnez t0, .Lsrc_unaligned_dst_aligned\@ |
233 | /* | 299 | /* |
234 | * use delay slot for fall-through | 300 | * use delay slot for fall-through |
235 | * src and dst are aligned; need to compute rem | 301 | * src and dst are aligned; need to compute rem |
236 | */ | 302 | */ |
237 | .Lboth_aligned: | 303 | .Lboth_aligned\@: |
238 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter | 304 | SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter |
239 | beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES | 305 | beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES |
240 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) | 306 | and rem, len, (8*NBYTES-1) # rem = len % (8*NBYTES) |
241 | PREF( 0, 3*32(src) ) | 307 | PREFS( 0, 3*32(src) ) |
242 | PREF( 1, 3*32(dst) ) | 308 | PREFD( 1, 3*32(dst) ) |
243 | .align 4 | 309 | .align 4 |
244 | 1: | 310 | 1: |
245 | R10KCBARRIER(0(ra)) | 311 | R10KCBARRIER(0(ra)) |
246 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) | 312 | LOAD(t0, UNIT(0)(src), .Ll_exc\@) |
247 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) | 313 | LOAD(t1, UNIT(1)(src), .Ll_exc_copy\@) |
248 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) | 314 | LOAD(t2, UNIT(2)(src), .Ll_exc_copy\@) |
249 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) | 315 | LOAD(t3, UNIT(3)(src), .Ll_exc_copy\@) |
250 | SUB len, len, 8*NBYTES | 316 | SUB len, len, 8*NBYTES |
251 | EXC( LOAD t4, UNIT(4)(src), .Ll_exc_copy) | 317 | LOAD(t4, UNIT(4)(src), .Ll_exc_copy\@) |
252 | EXC( LOAD t7, UNIT(5)(src), .Ll_exc_copy) | 318 | LOAD(t7, UNIT(5)(src), .Ll_exc_copy\@) |
253 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p8u) | 319 | STORE(t0, UNIT(0)(dst), .Ls_exc_p8u\@) |
254 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p7u) | 320 | STORE(t1, UNIT(1)(dst), .Ls_exc_p7u\@) |
255 | EXC( LOAD t0, UNIT(6)(src), .Ll_exc_copy) | 321 | LOAD(t0, UNIT(6)(src), .Ll_exc_copy\@) |
256 | EXC( LOAD t1, UNIT(7)(src), .Ll_exc_copy) | 322 | LOAD(t1, UNIT(7)(src), .Ll_exc_copy\@) |
257 | ADD src, src, 8*NBYTES | 323 | ADD src, src, 8*NBYTES |
258 | ADD dst, dst, 8*NBYTES | 324 | ADD dst, dst, 8*NBYTES |
259 | EXC( STORE t2, UNIT(-6)(dst), .Ls_exc_p6u) | 325 | STORE(t2, UNIT(-6)(dst), .Ls_exc_p6u\@) |
260 | EXC( STORE t3, UNIT(-5)(dst), .Ls_exc_p5u) | 326 | STORE(t3, UNIT(-5)(dst), .Ls_exc_p5u\@) |
261 | EXC( STORE t4, UNIT(-4)(dst), .Ls_exc_p4u) | 327 | STORE(t4, UNIT(-4)(dst), .Ls_exc_p4u\@) |
262 | EXC( STORE t7, UNIT(-3)(dst), .Ls_exc_p3u) | 328 | STORE(t7, UNIT(-3)(dst), .Ls_exc_p3u\@) |
263 | EXC( STORE t0, UNIT(-2)(dst), .Ls_exc_p2u) | 329 | STORE(t0, UNIT(-2)(dst), .Ls_exc_p2u\@) |
264 | EXC( STORE t1, UNIT(-1)(dst), .Ls_exc_p1u) | 330 | STORE(t1, UNIT(-1)(dst), .Ls_exc_p1u\@) |
265 | PREF( 0, 8*32(src) ) | 331 | PREFS( 0, 8*32(src) ) |
266 | PREF( 1, 8*32(dst) ) | 332 | PREFD( 1, 8*32(dst) ) |
267 | bne len, rem, 1b | 333 | bne len, rem, 1b |
268 | nop | 334 | nop |
269 | 335 | ||
270 | /* | 336 | /* |
271 | * len == rem == the number of bytes left to copy < 8*NBYTES | 337 | * len == rem == the number of bytes left to copy < 8*NBYTES |
272 | */ | 338 | */ |
273 | .Lcleanup_both_aligned: | 339 | .Lcleanup_both_aligned\@: |
274 | beqz len, .Ldone | 340 | beqz len, .Ldone\@ |
275 | sltu t0, len, 4*NBYTES | 341 | sltu t0, len, 4*NBYTES |
276 | bnez t0, .Lless_than_4units | 342 | bnez t0, .Lless_than_4units\@ |
277 | and rem, len, (NBYTES-1) # rem = len % NBYTES | 343 | and rem, len, (NBYTES-1) # rem = len % NBYTES |
278 | /* | 344 | /* |
279 | * len >= 4*NBYTES | 345 | * len >= 4*NBYTES |
280 | */ | 346 | */ |
281 | EXC( LOAD t0, UNIT(0)(src), .Ll_exc) | 347 | LOAD( t0, UNIT(0)(src), .Ll_exc\@) |
282 | EXC( LOAD t1, UNIT(1)(src), .Ll_exc_copy) | 348 | LOAD( t1, UNIT(1)(src), .Ll_exc_copy\@) |
283 | EXC( LOAD t2, UNIT(2)(src), .Ll_exc_copy) | 349 | LOAD( t2, UNIT(2)(src), .Ll_exc_copy\@) |
284 | EXC( LOAD t3, UNIT(3)(src), .Ll_exc_copy) | 350 | LOAD( t3, UNIT(3)(src), .Ll_exc_copy\@) |
285 | SUB len, len, 4*NBYTES | 351 | SUB len, len, 4*NBYTES |
286 | ADD src, src, 4*NBYTES | 352 | ADD src, src, 4*NBYTES |
287 | R10KCBARRIER(0(ra)) | 353 | R10KCBARRIER(0(ra)) |
288 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) | 354 | STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@) |
289 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) | 355 | STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@) |
290 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) | 356 | STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@) |
291 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) | 357 | STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@) |
292 | .set reorder /* DADDI_WAR */ | 358 | .set reorder /* DADDI_WAR */ |
293 | ADD dst, dst, 4*NBYTES | 359 | ADD dst, dst, 4*NBYTES |
294 | beqz len, .Ldone | 360 | beqz len, .Ldone\@ |
295 | .set noreorder | 361 | .set noreorder |
296 | .Lless_than_4units: | 362 | .Lless_than_4units\@: |
297 | /* | 363 | /* |
298 | * rem = len % NBYTES | 364 | * rem = len % NBYTES |
299 | */ | 365 | */ |
300 | beq rem, len, .Lcopy_bytes | 366 | beq rem, len, .Lcopy_bytes\@ |
301 | nop | 367 | nop |
302 | 1: | 368 | 1: |
303 | R10KCBARRIER(0(ra)) | 369 | R10KCBARRIER(0(ra)) |
304 | EXC( LOAD t0, 0(src), .Ll_exc) | 370 | LOAD(t0, 0(src), .Ll_exc\@) |
305 | ADD src, src, NBYTES | 371 | ADD src, src, NBYTES |
306 | SUB len, len, NBYTES | 372 | SUB len, len, NBYTES |
307 | EXC( STORE t0, 0(dst), .Ls_exc_p1u) | 373 | STORE(t0, 0(dst), .Ls_exc_p1u\@) |
308 | .set reorder /* DADDI_WAR */ | 374 | .set reorder /* DADDI_WAR */ |
309 | ADD dst, dst, NBYTES | 375 | ADD dst, dst, NBYTES |
310 | bne rem, len, 1b | 376 | bne rem, len, 1b |
@@ -322,17 +388,17 @@ EXC( STORE t0, 0(dst), .Ls_exc_p1u) | |||
322 | * more instruction-level parallelism. | 388 | * more instruction-level parallelism. |
323 | */ | 389 | */ |
324 | #define bits t2 | 390 | #define bits t2 |
325 | beqz len, .Ldone | 391 | beqz len, .Ldone\@ |
326 | ADD t1, dst, len # t1 is just past last byte of dst | 392 | ADD t1, dst, len # t1 is just past last byte of dst |
327 | li bits, 8*NBYTES | 393 | li bits, 8*NBYTES |
328 | SLL rem, len, 3 # rem = number of bits to keep | 394 | SLL rem, len, 3 # rem = number of bits to keep |
329 | EXC( LOAD t0, 0(src), .Ll_exc) | 395 | LOAD(t0, 0(src), .Ll_exc\@) |
330 | SUB bits, bits, rem # bits = number of bits to discard | 396 | SUB bits, bits, rem # bits = number of bits to discard |
331 | SHIFT_DISCARD t0, t0, bits | 397 | SHIFT_DISCARD t0, t0, bits |
332 | EXC( STREST t0, -1(t1), .Ls_exc) | 398 | STREST(t0, -1(t1), .Ls_exc\@) |
333 | jr ra | 399 | jr ra |
334 | move len, zero | 400 | move len, zero |
335 | .Ldst_unaligned: | 401 | .Ldst_unaligned\@: |
336 | /* | 402 | /* |
337 | * dst is unaligned | 403 | * dst is unaligned |
338 | * t0 = src & ADDRMASK | 404 | * t0 = src & ADDRMASK |
@@ -343,25 +409,25 @@ EXC( STREST t0, -1(t1), .Ls_exc) | |||
343 | * Set match = (src and dst have same alignment) | 409 | * Set match = (src and dst have same alignment) |
344 | */ | 410 | */ |
345 | #define match rem | 411 | #define match rem |
346 | EXC( LDFIRST t3, FIRST(0)(src), .Ll_exc) | 412 | LDFIRST(t3, FIRST(0)(src), .Ll_exc\@) |
347 | ADD t2, zero, NBYTES | 413 | ADD t2, zero, NBYTES |
348 | EXC( LDREST t3, REST(0)(src), .Ll_exc_copy) | 414 | LDREST(t3, REST(0)(src), .Ll_exc_copy\@) |
349 | SUB t2, t2, t1 # t2 = number of bytes copied | 415 | SUB t2, t2, t1 # t2 = number of bytes copied |
350 | xor match, t0, t1 | 416 | xor match, t0, t1 |
351 | R10KCBARRIER(0(ra)) | 417 | R10KCBARRIER(0(ra)) |
352 | EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) | 418 | STFIRST(t3, FIRST(0)(dst), .Ls_exc\@) |
353 | beq len, t2, .Ldone | 419 | beq len, t2, .Ldone\@ |
354 | SUB len, len, t2 | 420 | SUB len, len, t2 |
355 | ADD dst, dst, t2 | 421 | ADD dst, dst, t2 |
356 | beqz match, .Lboth_aligned | 422 | beqz match, .Lboth_aligned\@ |
357 | ADD src, src, t2 | 423 | ADD src, src, t2 |
358 | 424 | ||
359 | .Lsrc_unaligned_dst_aligned: | 425 | .Lsrc_unaligned_dst_aligned\@: |
360 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter | 426 | SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter |
361 | PREF( 0, 3*32(src) ) | 427 | PREFS( 0, 3*32(src) ) |
362 | beqz t0, .Lcleanup_src_unaligned | 428 | beqz t0, .Lcleanup_src_unaligned\@ |
363 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES | 429 | and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES |
364 | PREF( 1, 3*32(dst) ) | 430 | PREFD( 1, 3*32(dst) ) |
365 | 1: | 431 | 1: |
366 | /* | 432 | /* |
367 | * Avoid consecutive LD*'s to the same register since some mips | 433 | * Avoid consecutive LD*'s to the same register since some mips |
@@ -370,58 +436,58 @@ EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) | |||
370 | * are to the same unit (unless src is aligned, but it's not). | 436 | * are to the same unit (unless src is aligned, but it's not). |
371 | */ | 437 | */ |
372 | R10KCBARRIER(0(ra)) | 438 | R10KCBARRIER(0(ra)) |
373 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) | 439 | LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) |
374 | EXC( LDFIRST t1, FIRST(1)(src), .Ll_exc_copy) | 440 | LDFIRST(t1, FIRST(1)(src), .Ll_exc_copy\@) |
375 | SUB len, len, 4*NBYTES | 441 | SUB len, len, 4*NBYTES |
376 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) | 442 | LDREST(t0, REST(0)(src), .Ll_exc_copy\@) |
377 | EXC( LDREST t1, REST(1)(src), .Ll_exc_copy) | 443 | LDREST(t1, REST(1)(src), .Ll_exc_copy\@) |
378 | EXC( LDFIRST t2, FIRST(2)(src), .Ll_exc_copy) | 444 | LDFIRST(t2, FIRST(2)(src), .Ll_exc_copy\@) |
379 | EXC( LDFIRST t3, FIRST(3)(src), .Ll_exc_copy) | 445 | LDFIRST(t3, FIRST(3)(src), .Ll_exc_copy\@) |
380 | EXC( LDREST t2, REST(2)(src), .Ll_exc_copy) | 446 | LDREST(t2, REST(2)(src), .Ll_exc_copy\@) |
381 | EXC( LDREST t3, REST(3)(src), .Ll_exc_copy) | 447 | LDREST(t3, REST(3)(src), .Ll_exc_copy\@) |
382 | PREF( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) | 448 | PREFS( 0, 9*32(src) ) # 0 is PREF_LOAD (not streamed) |
383 | ADD src, src, 4*NBYTES | 449 | ADD src, src, 4*NBYTES |
384 | #ifdef CONFIG_CPU_SB1 | 450 | #ifdef CONFIG_CPU_SB1 |
385 | nop # improves slotting | 451 | nop # improves slotting |
386 | #endif | 452 | #endif |
387 | EXC( STORE t0, UNIT(0)(dst), .Ls_exc_p4u) | 453 | STORE(t0, UNIT(0)(dst), .Ls_exc_p4u\@) |
388 | EXC( STORE t1, UNIT(1)(dst), .Ls_exc_p3u) | 454 | STORE(t1, UNIT(1)(dst), .Ls_exc_p3u\@) |
389 | EXC( STORE t2, UNIT(2)(dst), .Ls_exc_p2u) | 455 | STORE(t2, UNIT(2)(dst), .Ls_exc_p2u\@) |
390 | EXC( STORE t3, UNIT(3)(dst), .Ls_exc_p1u) | 456 | STORE(t3, UNIT(3)(dst), .Ls_exc_p1u\@) |
391 | PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) | 457 | PREFD( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) |
392 | .set reorder /* DADDI_WAR */ | 458 | .set reorder /* DADDI_WAR */ |
393 | ADD dst, dst, 4*NBYTES | 459 | ADD dst, dst, 4*NBYTES |
394 | bne len, rem, 1b | 460 | bne len, rem, 1b |
395 | .set noreorder | 461 | .set noreorder |
396 | 462 | ||
397 | .Lcleanup_src_unaligned: | 463 | .Lcleanup_src_unaligned\@: |
398 | beqz len, .Ldone | 464 | beqz len, .Ldone\@ |
399 | and rem, len, NBYTES-1 # rem = len % NBYTES | 465 | and rem, len, NBYTES-1 # rem = len % NBYTES |
400 | beq rem, len, .Lcopy_bytes | 466 | beq rem, len, .Lcopy_bytes\@ |
401 | nop | 467 | nop |
402 | 1: | 468 | 1: |
403 | R10KCBARRIER(0(ra)) | 469 | R10KCBARRIER(0(ra)) |
404 | EXC( LDFIRST t0, FIRST(0)(src), .Ll_exc) | 470 | LDFIRST(t0, FIRST(0)(src), .Ll_exc\@) |
405 | EXC( LDREST t0, REST(0)(src), .Ll_exc_copy) | 471 | LDREST(t0, REST(0)(src), .Ll_exc_copy\@) |
406 | ADD src, src, NBYTES | 472 | ADD src, src, NBYTES |
407 | SUB len, len, NBYTES | 473 | SUB len, len, NBYTES |
408 | EXC( STORE t0, 0(dst), .Ls_exc_p1u) | 474 | STORE(t0, 0(dst), .Ls_exc_p1u\@) |
409 | .set reorder /* DADDI_WAR */ | 475 | .set reorder /* DADDI_WAR */ |
410 | ADD dst, dst, NBYTES | 476 | ADD dst, dst, NBYTES |
411 | bne len, rem, 1b | 477 | bne len, rem, 1b |
412 | .set noreorder | 478 | .set noreorder |
413 | 479 | ||
414 | .Lcopy_bytes_checklen: | 480 | .Lcopy_bytes_checklen\@: |
415 | beqz len, .Ldone | 481 | beqz len, .Ldone\@ |
416 | nop | 482 | nop |
417 | .Lcopy_bytes: | 483 | .Lcopy_bytes\@: |
418 | /* 0 < len < NBYTES */ | 484 | /* 0 < len < NBYTES */ |
419 | R10KCBARRIER(0(ra)) | 485 | R10KCBARRIER(0(ra)) |
420 | #define COPY_BYTE(N) \ | 486 | #define COPY_BYTE(N) \ |
421 | EXC( lb t0, N(src), .Ll_exc); \ | 487 | LOADB(t0, N(src), .Ll_exc\@); \ |
422 | SUB len, len, 1; \ | 488 | SUB len, len, 1; \ |
423 | beqz len, .Ldone; \ | 489 | beqz len, .Ldone\@; \ |
424 | EXC( sb t0, N(dst), .Ls_exc_p1) | 490 | STOREB(t0, N(dst), .Ls_exc_p1\@) |
425 | 491 | ||
426 | COPY_BYTE(0) | 492 | COPY_BYTE(0) |
427 | COPY_BYTE(1) | 493 | COPY_BYTE(1) |
@@ -431,16 +497,19 @@ EXC( sb t0, N(dst), .Ls_exc_p1) | |||
431 | COPY_BYTE(4) | 497 | COPY_BYTE(4) |
432 | COPY_BYTE(5) | 498 | COPY_BYTE(5) |
433 | #endif | 499 | #endif |
434 | EXC( lb t0, NBYTES-2(src), .Ll_exc) | 500 | LOADB(t0, NBYTES-2(src), .Ll_exc\@) |
435 | SUB len, len, 1 | 501 | SUB len, len, 1 |
436 | jr ra | 502 | jr ra |
437 | EXC( sb t0, NBYTES-2(dst), .Ls_exc_p1) | 503 | STOREB(t0, NBYTES-2(dst), .Ls_exc_p1\@) |
438 | .Ldone: | 504 | .Ldone\@: |
439 | jr ra | 505 | jr ra |
440 | nop | 506 | .if __memcpy == 1 |
441 | END(memcpy) | 507 | END(memcpy) |
508 | .set __memcpy, 0 | ||
509 | .hidden __memcpy | ||
510 | .endif | ||
442 | 511 | ||
443 | .Ll_exc_copy: | 512 | .Ll_exc_copy\@: |
444 | /* | 513 | /* |
445 | * Copy bytes from src until faulting load address (or until a | 514 | * Copy bytes from src until faulting load address (or until a |
446 | * lb faults) | 515 | * lb faults) |
@@ -451,24 +520,24 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc_p1) | |||
451 | * | 520 | * |
452 | * Assumes src < THREAD_BUADDR($28) | 521 | * Assumes src < THREAD_BUADDR($28) |
453 | */ | 522 | */ |
454 | LOAD t0, TI_TASK($28) | 523 | LOADK t0, TI_TASK($28) |
455 | nop | 524 | nop |
456 | LOAD t0, THREAD_BUADDR(t0) | 525 | LOADK t0, THREAD_BUADDR(t0) |
457 | 1: | 526 | 1: |
458 | EXC( lb t1, 0(src), .Ll_exc) | 527 | LOADB(t1, 0(src), .Ll_exc\@) |
459 | ADD src, src, 1 | 528 | ADD src, src, 1 |
460 | sb t1, 0(dst) # can't fault -- we're copy_from_user | 529 | sb t1, 0(dst) # can't fault -- we're copy_from_user |
461 | .set reorder /* DADDI_WAR */ | 530 | .set reorder /* DADDI_WAR */ |
462 | ADD dst, dst, 1 | 531 | ADD dst, dst, 1 |
463 | bne src, t0, 1b | 532 | bne src, t0, 1b |
464 | .set noreorder | 533 | .set noreorder |
465 | .Ll_exc: | 534 | .Ll_exc\@: |
466 | LOAD t0, TI_TASK($28) | 535 | LOADK t0, TI_TASK($28) |
467 | nop | 536 | nop |
468 | LOAD t0, THREAD_BUADDR(t0) # t0 is just past last good address | 537 | LOADK t0, THREAD_BUADDR(t0) # t0 is just past last good address |
469 | nop | 538 | nop |
470 | SUB len, AT, t0 # len number of uncopied bytes | 539 | SUB len, AT, t0 # len number of uncopied bytes |
471 | bnez t6, .Ldone /* Skip the zeroing part if inatomic */ | 540 | bnez t6, .Ldone\@ /* Skip the zeroing part if inatomic */ |
472 | /* | 541 | /* |
473 | * Here's where we rely on src and dst being incremented in tandem, | 542 | * Here's where we rely on src and dst being incremented in tandem, |
474 | * See (3) above. | 543 | * See (3) above. |
@@ -482,7 +551,7 @@ EXC( lb t1, 0(src), .Ll_exc) | |||
482 | */ | 551 | */ |
483 | .set reorder /* DADDI_WAR */ | 552 | .set reorder /* DADDI_WAR */ |
484 | SUB src, len, 1 | 553 | SUB src, len, 1 |
485 | beqz len, .Ldone | 554 | beqz len, .Ldone\@ |
486 | .set noreorder | 555 | .set noreorder |
487 | 1: sb zero, 0(dst) | 556 | 1: sb zero, 0(dst) |
488 | ADD dst, dst, 1 | 557 | ADD dst, dst, 1 |
@@ -503,7 +572,7 @@ EXC( lb t1, 0(src), .Ll_exc) | |||
503 | 572 | ||
504 | #define SEXC(n) \ | 573 | #define SEXC(n) \ |
505 | .set reorder; /* DADDI_WAR */ \ | 574 | .set reorder; /* DADDI_WAR */ \ |
506 | .Ls_exc_p ## n ## u: \ | 575 | .Ls_exc_p ## n ## u\@: \ |
507 | ADD len, len, n*NBYTES; \ | 576 | ADD len, len, n*NBYTES; \ |
508 | jr ra; \ | 577 | jr ra; \ |
509 | .set noreorder | 578 | .set noreorder |
@@ -517,14 +586,15 @@ SEXC(3) | |||
517 | SEXC(2) | 586 | SEXC(2) |
518 | SEXC(1) | 587 | SEXC(1) |
519 | 588 | ||
520 | .Ls_exc_p1: | 589 | .Ls_exc_p1\@: |
521 | .set reorder /* DADDI_WAR */ | 590 | .set reorder /* DADDI_WAR */ |
522 | ADD len, len, 1 | 591 | ADD len, len, 1 |
523 | jr ra | 592 | jr ra |
524 | .set noreorder | 593 | .set noreorder |
525 | .Ls_exc: | 594 | .Ls_exc\@: |
526 | jr ra | 595 | jr ra |
527 | nop | 596 | nop |
597 | .endm | ||
528 | 598 | ||
529 | .align 5 | 599 | .align 5 |
530 | LEAF(memmove) | 600 | LEAF(memmove) |
@@ -575,3 +645,71 @@ LEAF(__rmemcpy) /* a0=dst a1=src a2=len */ | |||
575 | jr ra | 645 | jr ra |
576 | move a2, zero | 646 | move a2, zero |
577 | END(__rmemcpy) | 647 | END(__rmemcpy) |
648 | |||
649 | /* | ||
650 | * t6 is used as a flag to note inatomic mode. | ||
651 | */ | ||
652 | LEAF(__copy_user_inatomic) | ||
653 | b __copy_user_common | ||
654 | li t6, 1 | ||
655 | END(__copy_user_inatomic) | ||
656 | |||
657 | /* | ||
658 | * A combined memcpy/__copy_user | ||
659 | * __copy_user sets len to 0 for success; else to an upper bound of | ||
660 | * the number of uncopied bytes. | ||
661 | * memcpy sets v0 to dst. | ||
662 | */ | ||
663 | .align 5 | ||
664 | LEAF(memcpy) /* a0=dst a1=src a2=len */ | ||
665 | move v0, dst /* return value */ | ||
666 | .L__memcpy: | ||
667 | FEXPORT(__copy_user) | ||
668 | li t6, 0 /* not inatomic */ | ||
669 | __copy_user_common: | ||
670 | /* Legacy Mode, user <-> user */ | ||
671 | __BUILD_COPY_USER LEGACY_MODE USEROP USEROP | ||
672 | |||
673 | #ifdef CONFIG_EVA | ||
674 | |||
675 | /* | ||
676 | * For EVA we need distinct symbols for reading and writing to user space. | ||
677 | * This is because we need to use specific EVA instructions to perform the | ||
678 | * virtual <-> physical translation when a virtual address is actually in user | ||
679 | * space | ||
680 | */ | ||
681 | |||
682 | LEAF(__copy_user_inatomic_eva) | ||
683 | b __copy_from_user_common | ||
684 | li t6, 1 | ||
685 | END(__copy_user_inatomic_eva) | ||
686 | |||
687 | /* | ||
688 | * __copy_from_user (EVA) | ||
689 | */ | ||
690 | |||
691 | LEAF(__copy_from_user_eva) | ||
692 | li t6, 0 /* not inatomic */ | ||
693 | __copy_from_user_common: | ||
694 | __BUILD_COPY_USER EVA_MODE USEROP KERNELOP | ||
695 | END(__copy_from_user_eva) | ||
696 | |||
697 | |||
698 | |||
699 | /* | ||
700 | * __copy_to_user (EVA) | ||
701 | */ | ||
702 | |||
703 | LEAF(__copy_to_user_eva) | ||
704 | __BUILD_COPY_USER EVA_MODE KERNELOP USEROP | ||
705 | END(__copy_to_user_eva) | ||
706 | |||
707 | /* | ||
708 | * __copy_in_user (EVA) | ||
709 | */ | ||
710 | |||
711 | LEAF(__copy_in_user_eva) | ||
712 | __BUILD_COPY_USER EVA_MODE USEROP USEROP | ||
713 | END(__copy_in_user_eva) | ||
714 | |||
715 | #endif | ||