diff options
author | Toshi Kani <toshi.kani@hpe.com> | 2016-02-11 16:24:16 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-02-17 03:10:22 -0500 |
commit | ee9737c924706aaa72c2ead93e3ad5644681dc1c (patch) | |
tree | 7e08386e0b8211dcc576959b6ecc6ec52b479e93 | |
parent | 02a5f765c961ea778e30a8356f32528e65f34942 (diff) |
x86/uaccess/64: Make the __copy_user_nocache() assembly code more readable
Add comments to __copy_user_nocache() to clarify its procedures
and alignment requirements.
Also change numeric branch target labels to named local labels.
No code changed:
arch/x86/lib/copy_user_64.o:
text data bss dec hex filename
1239 0 0 1239 4d7 copy_user_64.o.before
1239 0 0 1239 4d7 copy_user_64.o.after
md5:
58bed94c2db98c1ca9a2d46d0680aaae copy_user_64.o.before.asm
58bed94c2db98c1ca9a2d46d0680aaae copy_user_64.o.after.asm
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: <stable@vger.kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luis R. Rodriguez <mcgrof@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: brian.boylston@hpe.com
Cc: dan.j.williams@intel.com
Cc: linux-nvdimm@lists.01.org
Cc: micah.parrish@hpe.com
Cc: ross.zwisler@linux.intel.com
Cc: vishal.l.verma@intel.com
Link: http://lkml.kernel.org/r/1455225857-12039-2-git-send-email-toshi.kani@hpe.com
[ Small readability edits and added object file comparison. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/lib/copy_user_64.S | 114 |
1 files changed, 73 insertions, 41 deletions
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 982ce34f4a9b..a644aad1f112 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -232,17 +232,30 @@ ENDPROC(copy_user_enhanced_fast_string) | |||
232 | 232 | ||
233 | /* | 233 | /* |
234 | * copy_user_nocache - Uncached memory copy with exception handling | 234 | * copy_user_nocache - Uncached memory copy with exception handling |
235 | * This will force destination/source out of cache for more performance. | 235 | * This will force destination out of cache for more performance. |
236 | * | ||
237 | * Note: Cached memory copy is used when destination or size is not | ||
238 | * naturally aligned. That is: | ||
239 | * - Require 8-byte alignment when size is 8 bytes or larger. | ||
236 | */ | 240 | */ |
237 | ENTRY(__copy_user_nocache) | 241 | ENTRY(__copy_user_nocache) |
238 | ASM_STAC | 242 | ASM_STAC |
243 | |||
244 | /* If size is less than 8 bytes, go to byte copy */ | ||
239 | cmpl $8,%edx | 245 | cmpl $8,%edx |
240 | jb 20f /* less then 8 bytes, go to byte copy loop */ | 246 | jb .L_1b_cache_copy_entry |
247 | |||
248 | /* If destination is not 8-byte aligned, "cache" copy to align it */ | ||
241 | ALIGN_DESTINATION | 249 | ALIGN_DESTINATION |
250 | |||
251 | /* Set 4x8-byte copy count and remainder */ | ||
242 | movl %edx,%ecx | 252 | movl %edx,%ecx |
243 | andl $63,%edx | 253 | andl $63,%edx |
244 | shrl $6,%ecx | 254 | shrl $6,%ecx |
245 | jz 17f | 255 | jz .L_8b_nocache_copy_entry /* jump if count is 0 */ |
256 | |||
257 | /* Perform 4x8-byte nocache loop-copy */ | ||
258 | .L_4x8b_nocache_copy_loop: | ||
246 | 1: movq (%rsi),%r8 | 259 | 1: movq (%rsi),%r8 |
247 | 2: movq 1*8(%rsi),%r9 | 260 | 2: movq 1*8(%rsi),%r9 |
248 | 3: movq 2*8(%rsi),%r10 | 261 | 3: movq 2*8(%rsi),%r10 |
@@ -262,60 +275,79 @@ ENTRY(__copy_user_nocache) | |||
262 | leaq 64(%rsi),%rsi | 275 | leaq 64(%rsi),%rsi |
263 | leaq 64(%rdi),%rdi | 276 | leaq 64(%rdi),%rdi |
264 | decl %ecx | 277 | decl %ecx |
265 | jnz 1b | 278 | jnz .L_4x8b_nocache_copy_loop |
266 | 17: movl %edx,%ecx | 279 | |
280 | /* Set 8-byte copy count and remainder */ | ||
281 | .L_8b_nocache_copy_entry: | ||
282 | movl %edx,%ecx | ||
267 | andl $7,%edx | 283 | andl $7,%edx |
268 | shrl $3,%ecx | 284 | shrl $3,%ecx |
269 | jz 20f | 285 | jz .L_1b_cache_copy_entry /* jump if count is 0 */ |
270 | 18: movq (%rsi),%r8 | 286 | |
271 | 19: movnti %r8,(%rdi) | 287 | /* Perform 8-byte nocache loop-copy */ |
288 | .L_8b_nocache_copy_loop: | ||
289 | 20: movq (%rsi),%r8 | ||
290 | 21: movnti %r8,(%rdi) | ||
272 | leaq 8(%rsi),%rsi | 291 | leaq 8(%rsi),%rsi |
273 | leaq 8(%rdi),%rdi | 292 | leaq 8(%rdi),%rdi |
274 | decl %ecx | 293 | decl %ecx |
275 | jnz 18b | 294 | jnz .L_8b_nocache_copy_loop |
276 | 20: andl %edx,%edx | 295 | |
277 | jz 23f | 296 | /* If no byte left, we're done */ |
297 | .L_1b_cache_copy_entry: | ||
298 | andl %edx,%edx | ||
299 | jz .L_finish_copy | ||
300 | |||
301 | /* Perform byte "cache" loop-copy for the remainder */ | ||
278 | movl %edx,%ecx | 302 | movl %edx,%ecx |
279 | 21: movb (%rsi),%al | 303 | .L_1b_cache_copy_loop: |
280 | 22: movb %al,(%rdi) | 304 | 40: movb (%rsi),%al |
305 | 41: movb %al,(%rdi) | ||
281 | incq %rsi | 306 | incq %rsi |
282 | incq %rdi | 307 | incq %rdi |
283 | decl %ecx | 308 | decl %ecx |
284 | jnz 21b | 309 | jnz .L_1b_cache_copy_loop |
285 | 23: xorl %eax,%eax | 310 | |
311 | /* Finished copying; fence the prior stores */ | ||
312 | .L_finish_copy: | ||
313 | xorl %eax,%eax | ||
286 | ASM_CLAC | 314 | ASM_CLAC |
287 | sfence | 315 | sfence |
288 | ret | 316 | ret |
289 | 317 | ||
290 | .section .fixup,"ax" | 318 | .section .fixup,"ax" |
291 | 30: shll $6,%ecx | 319 | .L_fixup_4x8b_copy: |
320 | shll $6,%ecx | ||
292 | addl %ecx,%edx | 321 | addl %ecx,%edx |
293 | jmp 60f | 322 | jmp .L_fixup_handle_tail |
294 | 40: lea (%rdx,%rcx,8),%rdx | 323 | .L_fixup_8b_copy: |
295 | jmp 60f | 324 | lea (%rdx,%rcx,8),%rdx |
296 | 50: movl %ecx,%edx | 325 | jmp .L_fixup_handle_tail |
297 | 60: sfence | 326 | .L_fixup_1b_copy: |
327 | movl %ecx,%edx | ||
328 | .L_fixup_handle_tail: | ||
329 | sfence | ||
298 | jmp copy_user_handle_tail | 330 | jmp copy_user_handle_tail |
299 | .previous | 331 | .previous |
300 | 332 | ||
301 | _ASM_EXTABLE(1b,30b) | 333 | _ASM_EXTABLE(1b,.L_fixup_4x8b_copy) |
302 | _ASM_EXTABLE(2b,30b) | 334 | _ASM_EXTABLE(2b,.L_fixup_4x8b_copy) |
303 | _ASM_EXTABLE(3b,30b) | 335 | _ASM_EXTABLE(3b,.L_fixup_4x8b_copy) |
304 | _ASM_EXTABLE(4b,30b) | 336 | _ASM_EXTABLE(4b,.L_fixup_4x8b_copy) |
305 | _ASM_EXTABLE(5b,30b) | 337 | _ASM_EXTABLE(5b,.L_fixup_4x8b_copy) |
306 | _ASM_EXTABLE(6b,30b) | 338 | _ASM_EXTABLE(6b,.L_fixup_4x8b_copy) |
307 | _ASM_EXTABLE(7b,30b) | 339 | _ASM_EXTABLE(7b,.L_fixup_4x8b_copy) |
308 | _ASM_EXTABLE(8b,30b) | 340 | _ASM_EXTABLE(8b,.L_fixup_4x8b_copy) |
309 | _ASM_EXTABLE(9b,30b) | 341 | _ASM_EXTABLE(9b,.L_fixup_4x8b_copy) |
310 | _ASM_EXTABLE(10b,30b) | 342 | _ASM_EXTABLE(10b,.L_fixup_4x8b_copy) |
311 | _ASM_EXTABLE(11b,30b) | 343 | _ASM_EXTABLE(11b,.L_fixup_4x8b_copy) |
312 | _ASM_EXTABLE(12b,30b) | 344 | _ASM_EXTABLE(12b,.L_fixup_4x8b_copy) |
313 | _ASM_EXTABLE(13b,30b) | 345 | _ASM_EXTABLE(13b,.L_fixup_4x8b_copy) |
314 | _ASM_EXTABLE(14b,30b) | 346 | _ASM_EXTABLE(14b,.L_fixup_4x8b_copy) |
315 | _ASM_EXTABLE(15b,30b) | 347 | _ASM_EXTABLE(15b,.L_fixup_4x8b_copy) |
316 | _ASM_EXTABLE(16b,30b) | 348 | _ASM_EXTABLE(16b,.L_fixup_4x8b_copy) |
317 | _ASM_EXTABLE(18b,40b) | 349 | _ASM_EXTABLE(20b,.L_fixup_8b_copy) |
318 | _ASM_EXTABLE(19b,40b) | 350 | _ASM_EXTABLE(21b,.L_fixup_8b_copy) |
319 | _ASM_EXTABLE(21b,50b) | 351 | _ASM_EXTABLE(40b,.L_fixup_1b_copy) |
320 | _ASM_EXTABLE(22b,50b) | 352 | _ASM_EXTABLE(41b,.L_fixup_1b_copy) |
321 | ENDPROC(__copy_user_nocache) | 353 | ENDPROC(__copy_user_nocache) |