diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-02-18 03:28:03 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-02-18 03:28:03 -0500 |
commit | 3a2f2ac9b96f9a9f5538396a212d3b9fb543bfc5 (patch) | |
tree | 294c2f340b11584e58cea90adfc4182ac8742348 /arch/x86/lib | |
parent | 4e79e182b419172e35936a47f098509092d69817 (diff) | |
parent | f4eafd8bcd5229e998aa252627703b8462c3b90f (diff) |
Merge branch 'x86/urgent' into x86/asm, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/copy_user_64.S | 142 |
1 files changed, 101 insertions, 41 deletions
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index fba343062055..2b0ef26da0bd 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -232,17 +232,31 @@ ENDPROC(copy_user_enhanced_fast_string) | |||
232 | 232 | ||
233 | /* | 233 | /* |
234 | * copy_user_nocache - Uncached memory copy with exception handling | 234 | * copy_user_nocache - Uncached memory copy with exception handling |
235 | * This will force destination/source out of cache for more performance. | 235 | * This will force destination out of cache for more performance. |
236 | * | ||
237 | * Note: Cached memory copy is used when destination or size is not | ||
238 | * naturally aligned. That is: | ||
239 | * - Require 8-byte alignment when size is 8 bytes or larger. | ||
240 | * - Require 4-byte alignment when size is 4 bytes. | ||
236 | */ | 241 | */ |
237 | ENTRY(__copy_user_nocache) | 242 | ENTRY(__copy_user_nocache) |
238 | ASM_STAC | 243 | ASM_STAC |
244 | |||
245 | /* If size is less than 8 bytes, go to 4-byte copy */ | ||
239 | cmpl $8,%edx | 246 | cmpl $8,%edx |
240 | jb 20f /* less then 8 bytes, go to byte copy loop */ | 247 | jb .L_4b_nocache_copy_entry |
248 | |||
249 | /* If destination is not 8-byte aligned, "cache" copy to align it */ | ||
241 | ALIGN_DESTINATION | 250 | ALIGN_DESTINATION |
251 | |||
252 | /* Set 4x8-byte copy count and remainder */ | ||
242 | movl %edx,%ecx | 253 | movl %edx,%ecx |
243 | andl $63,%edx | 254 | andl $63,%edx |
244 | shrl $6,%ecx | 255 | shrl $6,%ecx |
245 | jz 17f | 256 | jz .L_8b_nocache_copy_entry /* jump if count is 0 */ |
257 | |||
258 | /* Perform 4x8-byte nocache loop-copy */ | ||
259 | .L_4x8b_nocache_copy_loop: | ||
246 | 1: movq (%rsi),%r8 | 260 | 1: movq (%rsi),%r8 |
247 | 2: movq 1*8(%rsi),%r9 | 261 | 2: movq 1*8(%rsi),%r9 |
248 | 3: movq 2*8(%rsi),%r10 | 262 | 3: movq 2*8(%rsi),%r10 |
@@ -262,60 +276,106 @@ ENTRY(__copy_user_nocache) | |||
262 | leaq 64(%rsi),%rsi | 276 | leaq 64(%rsi),%rsi |
263 | leaq 64(%rdi),%rdi | 277 | leaq 64(%rdi),%rdi |
264 | decl %ecx | 278 | decl %ecx |
265 | jnz 1b | 279 | jnz .L_4x8b_nocache_copy_loop |
266 | 17: movl %edx,%ecx | 280 | |
281 | /* Set 8-byte copy count and remainder */ | ||
282 | .L_8b_nocache_copy_entry: | ||
283 | movl %edx,%ecx | ||
267 | andl $7,%edx | 284 | andl $7,%edx |
268 | shrl $3,%ecx | 285 | shrl $3,%ecx |
269 | jz 20f | 286 | jz .L_4b_nocache_copy_entry /* jump if count is 0 */ |
270 | 18: movq (%rsi),%r8 | 287 | |
271 | 19: movnti %r8,(%rdi) | 288 | /* Perform 8-byte nocache loop-copy */ |
289 | .L_8b_nocache_copy_loop: | ||
290 | 20: movq (%rsi),%r8 | ||
291 | 21: movnti %r8,(%rdi) | ||
272 | leaq 8(%rsi),%rsi | 292 | leaq 8(%rsi),%rsi |
273 | leaq 8(%rdi),%rdi | 293 | leaq 8(%rdi),%rdi |
274 | decl %ecx | 294 | decl %ecx |
275 | jnz 18b | 295 | jnz .L_8b_nocache_copy_loop |
276 | 20: andl %edx,%edx | 296 | |
277 | jz 23f | 297 | /* If no byte left, we're done */ |
298 | .L_4b_nocache_copy_entry: | ||
299 | andl %edx,%edx | ||
300 | jz .L_finish_copy | ||
301 | |||
302 | /* If destination is not 4-byte aligned, go to byte copy: */ | ||
303 | movl %edi,%ecx | ||
304 | andl $3,%ecx | ||
305 | jnz .L_1b_cache_copy_entry | ||
306 | |||
307 | /* Set 4-byte copy count (1 or 0) and remainder */ | ||
278 | movl %edx,%ecx | 308 | movl %edx,%ecx |
279 | 21: movb (%rsi),%al | 309 | andl $3,%edx |
280 | 22: movb %al,(%rdi) | 310 | shrl $2,%ecx |
311 | jz .L_1b_cache_copy_entry /* jump if count is 0 */ | ||
312 | |||
313 | /* Perform 4-byte nocache copy: */ | ||
314 | 30: movl (%rsi),%r8d | ||
315 | 31: movnti %r8d,(%rdi) | ||
316 | leaq 4(%rsi),%rsi | ||
317 | leaq 4(%rdi),%rdi | ||
318 | |||
319 | /* If no bytes left, we're done: */ | ||
320 | andl %edx,%edx | ||
321 | jz .L_finish_copy | ||
322 | |||
323 | /* Perform byte "cache" loop-copy for the remainder */ | ||
324 | .L_1b_cache_copy_entry: | ||
325 | movl %edx,%ecx | ||
326 | .L_1b_cache_copy_loop: | ||
327 | 40: movb (%rsi),%al | ||
328 | 41: movb %al,(%rdi) | ||
281 | incq %rsi | 329 | incq %rsi |
282 | incq %rdi | 330 | incq %rdi |
283 | decl %ecx | 331 | decl %ecx |
284 | jnz 21b | 332 | jnz .L_1b_cache_copy_loop |
285 | 23: xorl %eax,%eax | 333 | |
334 | /* Finished copying; fence the prior stores */ | ||
335 | .L_finish_copy: | ||
336 | xorl %eax,%eax | ||
286 | ASM_CLAC | 337 | ASM_CLAC |
287 | sfence | 338 | sfence |
288 | ret | 339 | ret |
289 | 340 | ||
290 | .section .fixup,"ax" | 341 | .section .fixup,"ax" |
291 | 30: shll $6,%ecx | 342 | .L_fixup_4x8b_copy: |
343 | shll $6,%ecx | ||
292 | addl %ecx,%edx | 344 | addl %ecx,%edx |
293 | jmp 60f | 345 | jmp .L_fixup_handle_tail |
294 | 40: lea (%rdx,%rcx,8),%rdx | 346 | .L_fixup_8b_copy: |
295 | jmp 60f | 347 | lea (%rdx,%rcx,8),%rdx |
296 | 50: movl %ecx,%edx | 348 | jmp .L_fixup_handle_tail |
297 | 60: sfence | 349 | .L_fixup_4b_copy: |
350 | lea (%rdx,%rcx,4),%rdx | ||
351 | jmp .L_fixup_handle_tail | ||
352 | .L_fixup_1b_copy: | ||
353 | movl %ecx,%edx | ||
354 | .L_fixup_handle_tail: | ||
355 | sfence | ||
298 | jmp copy_user_handle_tail | 356 | jmp copy_user_handle_tail |
299 | .previous | 357 | .previous |
300 | 358 | ||
301 | _ASM_EXTABLE(1b,30b) | 359 | _ASM_EXTABLE(1b,.L_fixup_4x8b_copy) |
302 | _ASM_EXTABLE(2b,30b) | 360 | _ASM_EXTABLE(2b,.L_fixup_4x8b_copy) |
303 | _ASM_EXTABLE(3b,30b) | 361 | _ASM_EXTABLE(3b,.L_fixup_4x8b_copy) |
304 | _ASM_EXTABLE(4b,30b) | 362 | _ASM_EXTABLE(4b,.L_fixup_4x8b_copy) |
305 | _ASM_EXTABLE(5b,30b) | 363 | _ASM_EXTABLE(5b,.L_fixup_4x8b_copy) |
306 | _ASM_EXTABLE(6b,30b) | 364 | _ASM_EXTABLE(6b,.L_fixup_4x8b_copy) |
307 | _ASM_EXTABLE(7b,30b) | 365 | _ASM_EXTABLE(7b,.L_fixup_4x8b_copy) |
308 | _ASM_EXTABLE(8b,30b) | 366 | _ASM_EXTABLE(8b,.L_fixup_4x8b_copy) |
309 | _ASM_EXTABLE(9b,30b) | 367 | _ASM_EXTABLE(9b,.L_fixup_4x8b_copy) |
310 | _ASM_EXTABLE(10b,30b) | 368 | _ASM_EXTABLE(10b,.L_fixup_4x8b_copy) |
311 | _ASM_EXTABLE(11b,30b) | 369 | _ASM_EXTABLE(11b,.L_fixup_4x8b_copy) |
312 | _ASM_EXTABLE(12b,30b) | 370 | _ASM_EXTABLE(12b,.L_fixup_4x8b_copy) |
313 | _ASM_EXTABLE(13b,30b) | 371 | _ASM_EXTABLE(13b,.L_fixup_4x8b_copy) |
314 | _ASM_EXTABLE(14b,30b) | 372 | _ASM_EXTABLE(14b,.L_fixup_4x8b_copy) |
315 | _ASM_EXTABLE(15b,30b) | 373 | _ASM_EXTABLE(15b,.L_fixup_4x8b_copy) |
316 | _ASM_EXTABLE(16b,30b) | 374 | _ASM_EXTABLE(16b,.L_fixup_4x8b_copy) |
317 | _ASM_EXTABLE(18b,40b) | 375 | _ASM_EXTABLE(20b,.L_fixup_8b_copy) |
318 | _ASM_EXTABLE(19b,40b) | 376 | _ASM_EXTABLE(21b,.L_fixup_8b_copy) |
319 | _ASM_EXTABLE(21b,50b) | 377 | _ASM_EXTABLE(30b,.L_fixup_4b_copy) |
320 | _ASM_EXTABLE(22b,50b) | 378 | _ASM_EXTABLE(31b,.L_fixup_4b_copy) |
379 | _ASM_EXTABLE(40b,.L_fixup_1b_copy) | ||
380 | _ASM_EXTABLE(41b,.L_fixup_1b_copy) | ||
321 | ENDPROC(__copy_user_nocache) | 381 | ENDPROC(__copy_user_nocache) |