aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-02-18 03:28:03 -0500
committerIngo Molnar <mingo@kernel.org>2016-02-18 03:28:03 -0500
commit3a2f2ac9b96f9a9f5538396a212d3b9fb543bfc5 (patch)
tree294c2f340b11584e58cea90adfc4182ac8742348 /arch/x86/lib
parent4e79e182b419172e35936a47f098509092d69817 (diff)
parentf4eafd8bcd5229e998aa252627703b8462c3b90f (diff)
Merge branch 'x86/urgent' into x86/asm, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/copy_user_64.S142
1 files changed, 101 insertions, 41 deletions
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index fba343062055..2b0ef26da0bd 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -232,17 +232,31 @@ ENDPROC(copy_user_enhanced_fast_string)
232 232
233/* 233/*
234 * copy_user_nocache - Uncached memory copy with exception handling 234 * copy_user_nocache - Uncached memory copy with exception handling
235 * This will force destination/source out of cache for more performance. 235 * This will force destination out of cache for more performance.
236 *
237 * Note: Cached memory copy is used when destination or size is not
238 * naturally aligned. That is:
239 * - Require 8-byte alignment when size is 8 bytes or larger.
240 * - Require 4-byte alignment when size is 4 bytes.
236 */ 241 */
237ENTRY(__copy_user_nocache) 242ENTRY(__copy_user_nocache)
238 ASM_STAC 243 ASM_STAC
244
245 /* If size is less than 8 bytes, go to 4-byte copy */
239 cmpl $8,%edx 246 cmpl $8,%edx
240 jb 20f /* less then 8 bytes, go to byte copy loop */ 247 jb .L_4b_nocache_copy_entry
248
249 /* If destination is not 8-byte aligned, "cache" copy to align it */
241 ALIGN_DESTINATION 250 ALIGN_DESTINATION
251
252 /* Set 4x8-byte copy count and remainder */
242 movl %edx,%ecx 253 movl %edx,%ecx
243 andl $63,%edx 254 andl $63,%edx
244 shrl $6,%ecx 255 shrl $6,%ecx
245 jz 17f 256 jz .L_8b_nocache_copy_entry /* jump if count is 0 */
257
258 /* Perform 4x8-byte nocache loop-copy */
259.L_4x8b_nocache_copy_loop:
2461: movq (%rsi),%r8 2601: movq (%rsi),%r8
2472: movq 1*8(%rsi),%r9 2612: movq 1*8(%rsi),%r9
2483: movq 2*8(%rsi),%r10 2623: movq 2*8(%rsi),%r10
@@ -262,60 +276,106 @@ ENTRY(__copy_user_nocache)
262 leaq 64(%rsi),%rsi 276 leaq 64(%rsi),%rsi
263 leaq 64(%rdi),%rdi 277 leaq 64(%rdi),%rdi
264 decl %ecx 278 decl %ecx
265 jnz 1b 279 jnz .L_4x8b_nocache_copy_loop
26617: movl %edx,%ecx 280
281 /* Set 8-byte copy count and remainder */
282.L_8b_nocache_copy_entry:
283 movl %edx,%ecx
267 andl $7,%edx 284 andl $7,%edx
268 shrl $3,%ecx 285 shrl $3,%ecx
269 jz 20f 286 jz .L_4b_nocache_copy_entry /* jump if count is 0 */
27018: movq (%rsi),%r8 287
27119: movnti %r8,(%rdi) 288 /* Perform 8-byte nocache loop-copy */
289.L_8b_nocache_copy_loop:
29020: movq (%rsi),%r8
29121: movnti %r8,(%rdi)
272 leaq 8(%rsi),%rsi 292 leaq 8(%rsi),%rsi
273 leaq 8(%rdi),%rdi 293 leaq 8(%rdi),%rdi
274 decl %ecx 294 decl %ecx
275 jnz 18b 295 jnz .L_8b_nocache_copy_loop
27620: andl %edx,%edx 296
277 jz 23f 297 /* If no byte left, we're done */
298.L_4b_nocache_copy_entry:
299 andl %edx,%edx
300 jz .L_finish_copy
301
302 /* If destination is not 4-byte aligned, go to byte copy: */
303 movl %edi,%ecx
304 andl $3,%ecx
305 jnz .L_1b_cache_copy_entry
306
307 /* Set 4-byte copy count (1 or 0) and remainder */
278 movl %edx,%ecx 308 movl %edx,%ecx
27921: movb (%rsi),%al 309 andl $3,%edx
28022: movb %al,(%rdi) 310 shrl $2,%ecx
311 jz .L_1b_cache_copy_entry /* jump if count is 0 */
312
313 /* Perform 4-byte nocache copy: */
31430: movl (%rsi),%r8d
31531: movnti %r8d,(%rdi)
316 leaq 4(%rsi),%rsi
317 leaq 4(%rdi),%rdi
318
319 /* If no bytes left, we're done: */
320 andl %edx,%edx
321 jz .L_finish_copy
322
323 /* Perform byte "cache" loop-copy for the remainder */
324.L_1b_cache_copy_entry:
325 movl %edx,%ecx
326.L_1b_cache_copy_loop:
32740: movb (%rsi),%al
32841: movb %al,(%rdi)
281 incq %rsi 329 incq %rsi
282 incq %rdi 330 incq %rdi
283 decl %ecx 331 decl %ecx
284 jnz 21b 332 jnz .L_1b_cache_copy_loop
28523: xorl %eax,%eax 333
334 /* Finished copying; fence the prior stores */
335.L_finish_copy:
336 xorl %eax,%eax
286 ASM_CLAC 337 ASM_CLAC
287 sfence 338 sfence
288 ret 339 ret
289 340
290 .section .fixup,"ax" 341 .section .fixup,"ax"
29130: shll $6,%ecx 342.L_fixup_4x8b_copy:
343 shll $6,%ecx
292 addl %ecx,%edx 344 addl %ecx,%edx
293 jmp 60f 345 jmp .L_fixup_handle_tail
29440: lea (%rdx,%rcx,8),%rdx 346.L_fixup_8b_copy:
295 jmp 60f 347 lea (%rdx,%rcx,8),%rdx
29650: movl %ecx,%edx 348 jmp .L_fixup_handle_tail
29760: sfence 349.L_fixup_4b_copy:
350 lea (%rdx,%rcx,4),%rdx
351 jmp .L_fixup_handle_tail
352.L_fixup_1b_copy:
353 movl %ecx,%edx
354.L_fixup_handle_tail:
355 sfence
298 jmp copy_user_handle_tail 356 jmp copy_user_handle_tail
299 .previous 357 .previous
300 358
301 _ASM_EXTABLE(1b,30b) 359 _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
302 _ASM_EXTABLE(2b,30b) 360 _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
303 _ASM_EXTABLE(3b,30b) 361 _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
304 _ASM_EXTABLE(4b,30b) 362 _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
305 _ASM_EXTABLE(5b,30b) 363 _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
306 _ASM_EXTABLE(6b,30b) 364 _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
307 _ASM_EXTABLE(7b,30b) 365 _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
308 _ASM_EXTABLE(8b,30b) 366 _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
309 _ASM_EXTABLE(9b,30b) 367 _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
310 _ASM_EXTABLE(10b,30b) 368 _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
311 _ASM_EXTABLE(11b,30b) 369 _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
312 _ASM_EXTABLE(12b,30b) 370 _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
313 _ASM_EXTABLE(13b,30b) 371 _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
314 _ASM_EXTABLE(14b,30b) 372 _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
315 _ASM_EXTABLE(15b,30b) 373 _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
316 _ASM_EXTABLE(16b,30b) 374 _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
317 _ASM_EXTABLE(18b,40b) 375 _ASM_EXTABLE(20b,.L_fixup_8b_copy)
318 _ASM_EXTABLE(19b,40b) 376 _ASM_EXTABLE(21b,.L_fixup_8b_copy)
319 _ASM_EXTABLE(21b,50b) 377 _ASM_EXTABLE(30b,.L_fixup_4b_copy)
320 _ASM_EXTABLE(22b,50b) 378 _ASM_EXTABLE(31b,.L_fixup_4b_copy)
379 _ASM_EXTABLE(40b,.L_fixup_1b_copy)
380 _ASM_EXTABLE(41b,.L_fixup_1b_copy)
321ENDPROC(__copy_user_nocache) 381ENDPROC(__copy_user_nocache)