aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorToshi Kani <toshi.kani@hpe.com>2016-02-11 16:24:16 -0500
committerIngo Molnar <mingo@kernel.org>2016-02-17 03:10:22 -0500
commitee9737c924706aaa72c2ead93e3ad5644681dc1c (patch)
tree7e08386e0b8211dcc576959b6ecc6ec52b479e93
parent02a5f765c961ea778e30a8356f32528e65f34942 (diff)
x86/uaccess/64: Make the __copy_user_nocache() assembly code more readable
Add comments to __copy_user_nocache() to clarify its procedures and alignment requirements. Also change numeric branch target labels to named local labels. No code changed: arch/x86/lib/copy_user_64.o: text data bss dec hex filename 1239 0 0 1239 4d7 copy_user_64.o.before 1239 0 0 1239 4d7 copy_user_64.o.after md5: 58bed94c2db98c1ca9a2d46d0680aaae copy_user_64.o.before.asm 58bed94c2db98c1ca9a2d46d0680aaae copy_user_64.o.after.asm Signed-off-by: Toshi Kani <toshi.kani@hpe.com> Cc: <stable@vger.kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bp@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Luis R. Rodriguez <mcgrof@suse.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Toshi Kani <toshi.kani@hp.com> Cc: brian.boylston@hpe.com Cc: dan.j.williams@intel.com Cc: linux-nvdimm@lists.01.org Cc: micah.parrish@hpe.com Cc: ross.zwisler@linux.intel.com Cc: vishal.l.verma@intel.com Link: http://lkml.kernel.org/r/1455225857-12039-2-git-send-email-toshi.kani@hpe.com [ Small readability edits and added object file comparison. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/lib/copy_user_64.S114
1 files changed, 73 insertions, 41 deletions
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 982ce34f4a9b..a644aad1f112 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -232,17 +232,30 @@ ENDPROC(copy_user_enhanced_fast_string)
232 232
233/* 233/*
234 * copy_user_nocache - Uncached memory copy with exception handling 234 * copy_user_nocache - Uncached memory copy with exception handling
235 * This will force destination/source out of cache for more performance. 235 * This will force destination out of cache for more performance.
236 *
237 * Note: Cached memory copy is used when destination or size is not
238 * naturally aligned. That is:
239 * - Require 8-byte alignment when size is 8 bytes or larger.
236 */ 240 */
237ENTRY(__copy_user_nocache) 241ENTRY(__copy_user_nocache)
238 ASM_STAC 242 ASM_STAC
243
244 /* If size is less than 8 bytes, go to byte copy */
239 cmpl $8,%edx 245 cmpl $8,%edx
240 jb 20f /* less then 8 bytes, go to byte copy loop */ 246 jb .L_1b_cache_copy_entry
247
248 /* If destination is not 8-byte aligned, "cache" copy to align it */
241 ALIGN_DESTINATION 249 ALIGN_DESTINATION
250
251 /* Set 4x8-byte copy count and remainder */
242 movl %edx,%ecx 252 movl %edx,%ecx
243 andl $63,%edx 253 andl $63,%edx
244 shrl $6,%ecx 254 shrl $6,%ecx
245 jz 17f 255 jz .L_8b_nocache_copy_entry /* jump if count is 0 */
256
257 /* Perform 4x8-byte nocache loop-copy */
258.L_4x8b_nocache_copy_loop:
2461: movq (%rsi),%r8 2591: movq (%rsi),%r8
2472: movq 1*8(%rsi),%r9 2602: movq 1*8(%rsi),%r9
2483: movq 2*8(%rsi),%r10 2613: movq 2*8(%rsi),%r10
@@ -262,60 +275,79 @@ ENTRY(__copy_user_nocache)
262 leaq 64(%rsi),%rsi 275 leaq 64(%rsi),%rsi
263 leaq 64(%rdi),%rdi 276 leaq 64(%rdi),%rdi
264 decl %ecx 277 decl %ecx
265 jnz 1b 278 jnz .L_4x8b_nocache_copy_loop
26617: movl %edx,%ecx 279
280 /* Set 8-byte copy count and remainder */
281.L_8b_nocache_copy_entry:
282 movl %edx,%ecx
267 andl $7,%edx 283 andl $7,%edx
268 shrl $3,%ecx 284 shrl $3,%ecx
269 jz 20f 285 jz .L_1b_cache_copy_entry /* jump if count is 0 */
27018: movq (%rsi),%r8 286
27119: movnti %r8,(%rdi) 287 /* Perform 8-byte nocache loop-copy */
288.L_8b_nocache_copy_loop:
28920: movq (%rsi),%r8
29021: movnti %r8,(%rdi)
272 leaq 8(%rsi),%rsi 291 leaq 8(%rsi),%rsi
273 leaq 8(%rdi),%rdi 292 leaq 8(%rdi),%rdi
274 decl %ecx 293 decl %ecx
275 jnz 18b 294 jnz .L_8b_nocache_copy_loop
27620: andl %edx,%edx 295
277 jz 23f 296 /* If no byte left, we're done */
297.L_1b_cache_copy_entry:
298 andl %edx,%edx
299 jz .L_finish_copy
300
301 /* Perform byte "cache" loop-copy for the remainder */
278 movl %edx,%ecx 302 movl %edx,%ecx
27921: movb (%rsi),%al 303.L_1b_cache_copy_loop:
28022: movb %al,(%rdi) 30440: movb (%rsi),%al
30541: movb %al,(%rdi)
281 incq %rsi 306 incq %rsi
282 incq %rdi 307 incq %rdi
283 decl %ecx 308 decl %ecx
284 jnz 21b 309 jnz .L_1b_cache_copy_loop
28523: xorl %eax,%eax 310
311 /* Finished copying; fence the prior stores */
312.L_finish_copy:
313 xorl %eax,%eax
286 ASM_CLAC 314 ASM_CLAC
287 sfence 315 sfence
288 ret 316 ret
289 317
290 .section .fixup,"ax" 318 .section .fixup,"ax"
29130: shll $6,%ecx 319.L_fixup_4x8b_copy:
320 shll $6,%ecx
292 addl %ecx,%edx 321 addl %ecx,%edx
293 jmp 60f 322 jmp .L_fixup_handle_tail
29440: lea (%rdx,%rcx,8),%rdx 323.L_fixup_8b_copy:
295 jmp 60f 324 lea (%rdx,%rcx,8),%rdx
29650: movl %ecx,%edx 325 jmp .L_fixup_handle_tail
29760: sfence 326.L_fixup_1b_copy:
327 movl %ecx,%edx
328.L_fixup_handle_tail:
329 sfence
298 jmp copy_user_handle_tail 330 jmp copy_user_handle_tail
299 .previous 331 .previous
300 332
301 _ASM_EXTABLE(1b,30b) 333 _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
302 _ASM_EXTABLE(2b,30b) 334 _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
303 _ASM_EXTABLE(3b,30b) 335 _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
304 _ASM_EXTABLE(4b,30b) 336 _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
305 _ASM_EXTABLE(5b,30b) 337 _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
306 _ASM_EXTABLE(6b,30b) 338 _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
307 _ASM_EXTABLE(7b,30b) 339 _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
308 _ASM_EXTABLE(8b,30b) 340 _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
309 _ASM_EXTABLE(9b,30b) 341 _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
310 _ASM_EXTABLE(10b,30b) 342 _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
311 _ASM_EXTABLE(11b,30b) 343 _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
312 _ASM_EXTABLE(12b,30b) 344 _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
313 _ASM_EXTABLE(13b,30b) 345 _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
314 _ASM_EXTABLE(14b,30b) 346 _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
315 _ASM_EXTABLE(15b,30b) 347 _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
316 _ASM_EXTABLE(16b,30b) 348 _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
317 _ASM_EXTABLE(18b,40b) 349 _ASM_EXTABLE(20b,.L_fixup_8b_copy)
318 _ASM_EXTABLE(19b,40b) 350 _ASM_EXTABLE(21b,.L_fixup_8b_copy)
319 _ASM_EXTABLE(21b,50b) 351 _ASM_EXTABLE(40b,.L_fixup_1b_copy)
320 _ASM_EXTABLE(22b,50b) 352 _ASM_EXTABLE(41b,.L_fixup_1b_copy)
321ENDPROC(__copy_user_nocache) 353ENDPROC(__copy_user_nocache)