diff options
| -rw-r--r-- | arch/x86/lib/copy_user_64.S | 114 |
1 files changed, 73 insertions, 41 deletions
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 982ce34f4a9b..a644aad1f112 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
| @@ -232,17 +232,30 @@ ENDPROC(copy_user_enhanced_fast_string) | |||
| 232 | 232 | ||
| 233 | /* | 233 | /* |
| 234 | * copy_user_nocache - Uncached memory copy with exception handling | 234 | * copy_user_nocache - Uncached memory copy with exception handling |
| 235 | * This will force destination/source out of cache for more performance. | 235 | * This will force destination out of cache for more performance. |
| 236 | * | ||
| 237 | * Note: Cached memory copy is used when destination or size is not | ||
| 238 | * naturally aligned. That is: | ||
| 239 | * - Require 8-byte alignment when size is 8 bytes or larger. | ||
| 236 | */ | 240 | */ |
| 237 | ENTRY(__copy_user_nocache) | 241 | ENTRY(__copy_user_nocache) |
| 238 | ASM_STAC | 242 | ASM_STAC |
| 243 | |||
| 244 | /* If size is less than 8 bytes, go to byte copy */ | ||
| 239 | cmpl $8,%edx | 245 | cmpl $8,%edx |
| 240 | jb 20f /* less then 8 bytes, go to byte copy loop */ | 246 | jb .L_1b_cache_copy_entry |
| 247 | |||
| 248 | /* If destination is not 8-byte aligned, "cache" copy to align it */ | ||
| 241 | ALIGN_DESTINATION | 249 | ALIGN_DESTINATION |
| 250 | |||
| 251 | /* Set 4x8-byte copy count and remainder */ | ||
| 242 | movl %edx,%ecx | 252 | movl %edx,%ecx |
| 243 | andl $63,%edx | 253 | andl $63,%edx |
| 244 | shrl $6,%ecx | 254 | shrl $6,%ecx |
| 245 | jz 17f | 255 | jz .L_8b_nocache_copy_entry /* jump if count is 0 */ |
| 256 | |||
| 257 | /* Perform 4x8-byte nocache loop-copy */ | ||
| 258 | .L_4x8b_nocache_copy_loop: | ||
| 246 | 1: movq (%rsi),%r8 | 259 | 1: movq (%rsi),%r8 |
| 247 | 2: movq 1*8(%rsi),%r9 | 260 | 2: movq 1*8(%rsi),%r9 |
| 248 | 3: movq 2*8(%rsi),%r10 | 261 | 3: movq 2*8(%rsi),%r10 |
| @@ -262,60 +275,79 @@ ENTRY(__copy_user_nocache) | |||
| 262 | leaq 64(%rsi),%rsi | 275 | leaq 64(%rsi),%rsi |
| 263 | leaq 64(%rdi),%rdi | 276 | leaq 64(%rdi),%rdi |
| 264 | decl %ecx | 277 | decl %ecx |
| 265 | jnz 1b | 278 | jnz .L_4x8b_nocache_copy_loop |
| 266 | 17: movl %edx,%ecx | 279 | |
| 280 | /* Set 8-byte copy count and remainder */ | ||
| 281 | .L_8b_nocache_copy_entry: | ||
| 282 | movl %edx,%ecx | ||
| 267 | andl $7,%edx | 283 | andl $7,%edx |
| 268 | shrl $3,%ecx | 284 | shrl $3,%ecx |
| 269 | jz 20f | 285 | jz .L_1b_cache_copy_entry /* jump if count is 0 */ |
| 270 | 18: movq (%rsi),%r8 | 286 | |
| 271 | 19: movnti %r8,(%rdi) | 287 | /* Perform 8-byte nocache loop-copy */ |
| 288 | .L_8b_nocache_copy_loop: | ||
| 289 | 20: movq (%rsi),%r8 | ||
| 290 | 21: movnti %r8,(%rdi) | ||
| 272 | leaq 8(%rsi),%rsi | 291 | leaq 8(%rsi),%rsi |
| 273 | leaq 8(%rdi),%rdi | 292 | leaq 8(%rdi),%rdi |
| 274 | decl %ecx | 293 | decl %ecx |
| 275 | jnz 18b | 294 | jnz .L_8b_nocache_copy_loop |
| 276 | 20: andl %edx,%edx | 295 | |
| 277 | jz 23f | 296 | /* If no byte left, we're done */ |
| 297 | .L_1b_cache_copy_entry: | ||
| 298 | andl %edx,%edx | ||
| 299 | jz .L_finish_copy | ||
| 300 | |||
| 301 | /* Perform byte "cache" loop-copy for the remainder */ | ||
| 278 | movl %edx,%ecx | 302 | movl %edx,%ecx |
| 279 | 21: movb (%rsi),%al | 303 | .L_1b_cache_copy_loop: |
| 280 | 22: movb %al,(%rdi) | 304 | 40: movb (%rsi),%al |
| 305 | 41: movb %al,(%rdi) | ||
| 281 | incq %rsi | 306 | incq %rsi |
| 282 | incq %rdi | 307 | incq %rdi |
| 283 | decl %ecx | 308 | decl %ecx |
| 284 | jnz 21b | 309 | jnz .L_1b_cache_copy_loop |
| 285 | 23: xorl %eax,%eax | 310 | |
| 311 | /* Finished copying; fence the prior stores */ | ||
| 312 | .L_finish_copy: | ||
| 313 | xorl %eax,%eax | ||
| 286 | ASM_CLAC | 314 | ASM_CLAC |
| 287 | sfence | 315 | sfence |
| 288 | ret | 316 | ret |
| 289 | 317 | ||
| 290 | .section .fixup,"ax" | 318 | .section .fixup,"ax" |
| 291 | 30: shll $6,%ecx | 319 | .L_fixup_4x8b_copy: |
| 320 | shll $6,%ecx | ||
| 292 | addl %ecx,%edx | 321 | addl %ecx,%edx |
| 293 | jmp 60f | 322 | jmp .L_fixup_handle_tail |
| 294 | 40: lea (%rdx,%rcx,8),%rdx | 323 | .L_fixup_8b_copy: |
| 295 | jmp 60f | 324 | lea (%rdx,%rcx,8),%rdx |
| 296 | 50: movl %ecx,%edx | 325 | jmp .L_fixup_handle_tail |
| 297 | 60: sfence | 326 | .L_fixup_1b_copy: |
| 327 | movl %ecx,%edx | ||
| 328 | .L_fixup_handle_tail: | ||
| 329 | sfence | ||
| 298 | jmp copy_user_handle_tail | 330 | jmp copy_user_handle_tail |
| 299 | .previous | 331 | .previous |
| 300 | 332 | ||
| 301 | _ASM_EXTABLE(1b,30b) | 333 | _ASM_EXTABLE(1b,.L_fixup_4x8b_copy) |
| 302 | _ASM_EXTABLE(2b,30b) | 334 | _ASM_EXTABLE(2b,.L_fixup_4x8b_copy) |
| 303 | _ASM_EXTABLE(3b,30b) | 335 | _ASM_EXTABLE(3b,.L_fixup_4x8b_copy) |
| 304 | _ASM_EXTABLE(4b,30b) | 336 | _ASM_EXTABLE(4b,.L_fixup_4x8b_copy) |
| 305 | _ASM_EXTABLE(5b,30b) | 337 | _ASM_EXTABLE(5b,.L_fixup_4x8b_copy) |
| 306 | _ASM_EXTABLE(6b,30b) | 338 | _ASM_EXTABLE(6b,.L_fixup_4x8b_copy) |
| 307 | _ASM_EXTABLE(7b,30b) | 339 | _ASM_EXTABLE(7b,.L_fixup_4x8b_copy) |
| 308 | _ASM_EXTABLE(8b,30b) | 340 | _ASM_EXTABLE(8b,.L_fixup_4x8b_copy) |
| 309 | _ASM_EXTABLE(9b,30b) | 341 | _ASM_EXTABLE(9b,.L_fixup_4x8b_copy) |
| 310 | _ASM_EXTABLE(10b,30b) | 342 | _ASM_EXTABLE(10b,.L_fixup_4x8b_copy) |
| 311 | _ASM_EXTABLE(11b,30b) | 343 | _ASM_EXTABLE(11b,.L_fixup_4x8b_copy) |
| 312 | _ASM_EXTABLE(12b,30b) | 344 | _ASM_EXTABLE(12b,.L_fixup_4x8b_copy) |
| 313 | _ASM_EXTABLE(13b,30b) | 345 | _ASM_EXTABLE(13b,.L_fixup_4x8b_copy) |
| 314 | _ASM_EXTABLE(14b,30b) | 346 | _ASM_EXTABLE(14b,.L_fixup_4x8b_copy) |
| 315 | _ASM_EXTABLE(15b,30b) | 347 | _ASM_EXTABLE(15b,.L_fixup_4x8b_copy) |
| 316 | _ASM_EXTABLE(16b,30b) | 348 | _ASM_EXTABLE(16b,.L_fixup_4x8b_copy) |
| 317 | _ASM_EXTABLE(18b,40b) | 349 | _ASM_EXTABLE(20b,.L_fixup_8b_copy) |
| 318 | _ASM_EXTABLE(19b,40b) | 350 | _ASM_EXTABLE(21b,.L_fixup_8b_copy) |
| 319 | _ASM_EXTABLE(21b,50b) | 351 | _ASM_EXTABLE(40b,.L_fixup_1b_copy) |
| 320 | _ASM_EXTABLE(22b,50b) | 352 | _ASM_EXTABLE(41b,.L_fixup_1b_copy) |
| 321 | ENDPROC(__copy_user_nocache) | 353 | ENDPROC(__copy_user_nocache) |
