diff options
| author | Ingo Molnar <mingo@kernel.org> | 2016-02-18 03:28:03 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2016-02-18 03:28:03 -0500 |
| commit | 3a2f2ac9b96f9a9f5538396a212d3b9fb543bfc5 (patch) | |
| tree | 294c2f340b11584e58cea90adfc4182ac8742348 /arch/x86/lib | |
| parent | 4e79e182b419172e35936a47f098509092d69817 (diff) | |
| parent | f4eafd8bcd5229e998aa252627703b8462c3b90f (diff) | |
Merge branch 'x86/urgent' into x86/asm, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/lib')
| -rw-r--r-- | arch/x86/lib/copy_user_64.S | 142 |
1 files changed, 101 insertions, 41 deletions
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index fba343062055..2b0ef26da0bd 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
| @@ -232,17 +232,31 @@ ENDPROC(copy_user_enhanced_fast_string) | |||
| 232 | 232 | ||
| 233 | /* | 233 | /* |
| 234 | * copy_user_nocache - Uncached memory copy with exception handling | 234 | * copy_user_nocache - Uncached memory copy with exception handling |
| 235 | * This will force destination/source out of cache for more performance. | 235 | * This will force destination out of cache for more performance. |
| 236 | * | ||
| 237 | * Note: Cached memory copy is used when destination or size is not | ||
| 238 | * naturally aligned. That is: | ||
| 239 | * - Require 8-byte alignment when size is 8 bytes or larger. | ||
| 240 | * - Require 4-byte alignment when size is 4 bytes. | ||
| 236 | */ | 241 | */ |
| 237 | ENTRY(__copy_user_nocache) | 242 | ENTRY(__copy_user_nocache) |
| 238 | ASM_STAC | 243 | ASM_STAC |
| 244 | |||
| 245 | /* If size is less than 8 bytes, go to 4-byte copy */ | ||
| 239 | cmpl $8,%edx | 246 | cmpl $8,%edx |
| 240 | jb 20f /* less then 8 bytes, go to byte copy loop */ | 247 | jb .L_4b_nocache_copy_entry |
| 248 | |||
| 249 | /* If destination is not 8-byte aligned, "cache" copy to align it */ | ||
| 241 | ALIGN_DESTINATION | 250 | ALIGN_DESTINATION |
| 251 | |||
| 252 | /* Set 4x8-byte copy count and remainder */ | ||
| 242 | movl %edx,%ecx | 253 | movl %edx,%ecx |
| 243 | andl $63,%edx | 254 | andl $63,%edx |
| 244 | shrl $6,%ecx | 255 | shrl $6,%ecx |
| 245 | jz 17f | 256 | jz .L_8b_nocache_copy_entry /* jump if count is 0 */ |
| 257 | |||
| 258 | /* Perform 4x8-byte nocache loop-copy */ | ||
| 259 | .L_4x8b_nocache_copy_loop: | ||
| 246 | 1: movq (%rsi),%r8 | 260 | 1: movq (%rsi),%r8 |
| 247 | 2: movq 1*8(%rsi),%r9 | 261 | 2: movq 1*8(%rsi),%r9 |
| 248 | 3: movq 2*8(%rsi),%r10 | 262 | 3: movq 2*8(%rsi),%r10 |
| @@ -262,60 +276,106 @@ ENTRY(__copy_user_nocache) | |||
| 262 | leaq 64(%rsi),%rsi | 276 | leaq 64(%rsi),%rsi |
| 263 | leaq 64(%rdi),%rdi | 277 | leaq 64(%rdi),%rdi |
| 264 | decl %ecx | 278 | decl %ecx |
| 265 | jnz 1b | 279 | jnz .L_4x8b_nocache_copy_loop |
| 266 | 17: movl %edx,%ecx | 280 | |
| 281 | /* Set 8-byte copy count and remainder */ | ||
| 282 | .L_8b_nocache_copy_entry: | ||
| 283 | movl %edx,%ecx | ||
| 267 | andl $7,%edx | 284 | andl $7,%edx |
| 268 | shrl $3,%ecx | 285 | shrl $3,%ecx |
| 269 | jz 20f | 286 | jz .L_4b_nocache_copy_entry /* jump if count is 0 */ |
| 270 | 18: movq (%rsi),%r8 | 287 | |
| 271 | 19: movnti %r8,(%rdi) | 288 | /* Perform 8-byte nocache loop-copy */ |
| 289 | .L_8b_nocache_copy_loop: | ||
| 290 | 20: movq (%rsi),%r8 | ||
| 291 | 21: movnti %r8,(%rdi) | ||
| 272 | leaq 8(%rsi),%rsi | 292 | leaq 8(%rsi),%rsi |
| 273 | leaq 8(%rdi),%rdi | 293 | leaq 8(%rdi),%rdi |
| 274 | decl %ecx | 294 | decl %ecx |
| 275 | jnz 18b | 295 | jnz .L_8b_nocache_copy_loop |
| 276 | 20: andl %edx,%edx | 296 | |
| 277 | jz 23f | 297 | /* If no byte left, we're done */ |
| 298 | .L_4b_nocache_copy_entry: | ||
| 299 | andl %edx,%edx | ||
| 300 | jz .L_finish_copy | ||
| 301 | |||
| 302 | /* If destination is not 4-byte aligned, go to byte copy: */ | ||
| 303 | movl %edi,%ecx | ||
| 304 | andl $3,%ecx | ||
| 305 | jnz .L_1b_cache_copy_entry | ||
| 306 | |||
| 307 | /* Set 4-byte copy count (1 or 0) and remainder */ | ||
| 278 | movl %edx,%ecx | 308 | movl %edx,%ecx |
| 279 | 21: movb (%rsi),%al | 309 | andl $3,%edx |
| 280 | 22: movb %al,(%rdi) | 310 | shrl $2,%ecx |
| 311 | jz .L_1b_cache_copy_entry /* jump if count is 0 */ | ||
| 312 | |||
| 313 | /* Perform 4-byte nocache copy: */ | ||
| 314 | 30: movl (%rsi),%r8d | ||
| 315 | 31: movnti %r8d,(%rdi) | ||
| 316 | leaq 4(%rsi),%rsi | ||
| 317 | leaq 4(%rdi),%rdi | ||
| 318 | |||
| 319 | /* If no bytes left, we're done: */ | ||
| 320 | andl %edx,%edx | ||
| 321 | jz .L_finish_copy | ||
| 322 | |||
| 323 | /* Perform byte "cache" loop-copy for the remainder */ | ||
| 324 | .L_1b_cache_copy_entry: | ||
| 325 | movl %edx,%ecx | ||
| 326 | .L_1b_cache_copy_loop: | ||
| 327 | 40: movb (%rsi),%al | ||
| 328 | 41: movb %al,(%rdi) | ||
| 281 | incq %rsi | 329 | incq %rsi |
| 282 | incq %rdi | 330 | incq %rdi |
| 283 | decl %ecx | 331 | decl %ecx |
| 284 | jnz 21b | 332 | jnz .L_1b_cache_copy_loop |
| 285 | 23: xorl %eax,%eax | 333 | |
| 334 | /* Finished copying; fence the prior stores */ | ||
| 335 | .L_finish_copy: | ||
| 336 | xorl %eax,%eax | ||
| 286 | ASM_CLAC | 337 | ASM_CLAC |
| 287 | sfence | 338 | sfence |
| 288 | ret | 339 | ret |
| 289 | 340 | ||
| 290 | .section .fixup,"ax" | 341 | .section .fixup,"ax" |
| 291 | 30: shll $6,%ecx | 342 | .L_fixup_4x8b_copy: |
| 343 | shll $6,%ecx | ||
| 292 | addl %ecx,%edx | 344 | addl %ecx,%edx |
| 293 | jmp 60f | 345 | jmp .L_fixup_handle_tail |
| 294 | 40: lea (%rdx,%rcx,8),%rdx | 346 | .L_fixup_8b_copy: |
| 295 | jmp 60f | 347 | lea (%rdx,%rcx,8),%rdx |
| 296 | 50: movl %ecx,%edx | 348 | jmp .L_fixup_handle_tail |
| 297 | 60: sfence | 349 | .L_fixup_4b_copy: |
| 350 | lea (%rdx,%rcx,4),%rdx | ||
| 351 | jmp .L_fixup_handle_tail | ||
| 352 | .L_fixup_1b_copy: | ||
| 353 | movl %ecx,%edx | ||
| 354 | .L_fixup_handle_tail: | ||
| 355 | sfence | ||
| 298 | jmp copy_user_handle_tail | 356 | jmp copy_user_handle_tail |
| 299 | .previous | 357 | .previous |
| 300 | 358 | ||
| 301 | _ASM_EXTABLE(1b,30b) | 359 | _ASM_EXTABLE(1b,.L_fixup_4x8b_copy) |
| 302 | _ASM_EXTABLE(2b,30b) | 360 | _ASM_EXTABLE(2b,.L_fixup_4x8b_copy) |
| 303 | _ASM_EXTABLE(3b,30b) | 361 | _ASM_EXTABLE(3b,.L_fixup_4x8b_copy) |
| 304 | _ASM_EXTABLE(4b,30b) | 362 | _ASM_EXTABLE(4b,.L_fixup_4x8b_copy) |
| 305 | _ASM_EXTABLE(5b,30b) | 363 | _ASM_EXTABLE(5b,.L_fixup_4x8b_copy) |
| 306 | _ASM_EXTABLE(6b,30b) | 364 | _ASM_EXTABLE(6b,.L_fixup_4x8b_copy) |
| 307 | _ASM_EXTABLE(7b,30b) | 365 | _ASM_EXTABLE(7b,.L_fixup_4x8b_copy) |
| 308 | _ASM_EXTABLE(8b,30b) | 366 | _ASM_EXTABLE(8b,.L_fixup_4x8b_copy) |
| 309 | _ASM_EXTABLE(9b,30b) | 367 | _ASM_EXTABLE(9b,.L_fixup_4x8b_copy) |
| 310 | _ASM_EXTABLE(10b,30b) | 368 | _ASM_EXTABLE(10b,.L_fixup_4x8b_copy) |
| 311 | _ASM_EXTABLE(11b,30b) | 369 | _ASM_EXTABLE(11b,.L_fixup_4x8b_copy) |
| 312 | _ASM_EXTABLE(12b,30b) | 370 | _ASM_EXTABLE(12b,.L_fixup_4x8b_copy) |
| 313 | _ASM_EXTABLE(13b,30b) | 371 | _ASM_EXTABLE(13b,.L_fixup_4x8b_copy) |
| 314 | _ASM_EXTABLE(14b,30b) | 372 | _ASM_EXTABLE(14b,.L_fixup_4x8b_copy) |
| 315 | _ASM_EXTABLE(15b,30b) | 373 | _ASM_EXTABLE(15b,.L_fixup_4x8b_copy) |
| 316 | _ASM_EXTABLE(16b,30b) | 374 | _ASM_EXTABLE(16b,.L_fixup_4x8b_copy) |
| 317 | _ASM_EXTABLE(18b,40b) | 375 | _ASM_EXTABLE(20b,.L_fixup_8b_copy) |
| 318 | _ASM_EXTABLE(19b,40b) | 376 | _ASM_EXTABLE(21b,.L_fixup_8b_copy) |
| 319 | _ASM_EXTABLE(21b,50b) | 377 | _ASM_EXTABLE(30b,.L_fixup_4b_copy) |
| 320 | _ASM_EXTABLE(22b,50b) | 378 | _ASM_EXTABLE(31b,.L_fixup_4b_copy) |
| 379 | _ASM_EXTABLE(40b,.L_fixup_1b_copy) | ||
| 380 | _ASM_EXTABLE(41b,.L_fixup_1b_copy) | ||
| 321 | ENDPROC(__copy_user_nocache) | 381 | ENDPROC(__copy_user_nocache) |
