aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/lib/copy_user.S
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2006-09-26 04:52:39 -0400
committerAndi Kleen <andi@basil.nowhere.org>2006-09-26 04:52:39 -0400
commit3022d734a54cbd2b65eea9a024564821101b4a9a (patch)
treea1445aeaf8fbdf84898c8cdc13dd98c550b5be0f /arch/x86_64/lib/copy_user.S
parentf0f4c3432e5e1087b3a8c0e6bd4113d3c37497ff (diff)
[PATCH] Fix zeroing on exception in copy_*_user
- Don't zero for __copy_from_user_inatomic following i386. This will prevent spurious zeros for parallel file system writers when one does a exception - The string instruction version didn't zero the output on exception. Oops. Also I cleaned up the code a bit while I was at it and added a minor optimization to the string instruction path. Signed-off-by: Andi Kleen <ak@suse.de>
Diffstat (limited to 'arch/x86_64/lib/copy_user.S')
-rw-r--r--arch/x86_64/lib/copy_user.S124
1 files changed, 79 insertions, 45 deletions
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S
index 962f3a693c5e..70bebd310408 100644
--- a/arch/x86_64/lib/copy_user.S
+++ b/arch/x86_64/lib/copy_user.S
@@ -9,10 +9,29 @@
9 9
10#define FIX_ALIGNMENT 1 10#define FIX_ALIGNMENT 1
11 11
12 #include <asm/current.h> 12#include <asm/current.h>
13 #include <asm/asm-offsets.h> 13#include <asm/asm-offsets.h>
14 #include <asm/thread_info.h> 14#include <asm/thread_info.h>
15 #include <asm/cpufeature.h> 15#include <asm/cpufeature.h>
16
17 .macro ALTERNATIVE_JUMP feature,orig,alt
180:
19 .byte 0xe9 /* 32bit jump */
20 .long \orig-1f /* by default jump to orig */
211:
22 .section .altinstr_replacement,"ax"
232: .byte 0xe9 /* near jump with 32bit immediate */
24 .long \alt-1b /* offset */ /* or alternatively to alt */
25 .previous
26 .section .altinstructions,"a"
27 .align 8
28 .quad 0b
29 .quad 2b
30 .byte \feature /* when feature is set */
31 .byte 5
32 .byte 5
33 .previous
34 .endm
16 35
17/* Standard copy_to_user with segment limit checking */ 36/* Standard copy_to_user with segment limit checking */
18ENTRY(copy_to_user) 37ENTRY(copy_to_user)
@@ -23,25 +42,21 @@ ENTRY(copy_to_user)
23 jc bad_to_user 42 jc bad_to_user
24 cmpq threadinfo_addr_limit(%rax),%rcx 43 cmpq threadinfo_addr_limit(%rax),%rcx
25 jae bad_to_user 44 jae bad_to_user
262: 45 xorl %eax,%eax /* clear zero flag */
27 .byte 0xe9 /* 32bit jump */ 46 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
28 .long .Lcug-1f
291:
30 CFI_ENDPROC 47 CFI_ENDPROC
31ENDPROC(copy_to_user)
32 48
33 .section .altinstr_replacement,"ax" 49ENTRY(copy_user_generic)
343: .byte 0xe9 /* replacement jmp with 32 bit immediate */ 50 CFI_STARTPROC
35 .long copy_user_generic_c-1b /* offset */ 51 movl $1,%ecx /* set zero flag */
36 .previous 52 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
37 .section .altinstructions,"a" 53 CFI_ENDPROC
38 .align 8 54
39 .quad 2b 55ENTRY(__copy_from_user_inatomic)
40 .quad 3b 56 CFI_STARTPROC
41 .byte X86_FEATURE_REP_GOOD 57 xorl %ecx,%ecx /* clear zero flag */
42 .byte 5 58 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
43 .byte 5 59 CFI_ENDPROC
44 .previous
45 60
46/* Standard copy_from_user with segment limit checking */ 61/* Standard copy_from_user with segment limit checking */
47ENTRY(copy_from_user) 62ENTRY(copy_from_user)
@@ -52,7 +67,8 @@ ENTRY(copy_from_user)
52 jc bad_from_user 67 jc bad_from_user
53 cmpq threadinfo_addr_limit(%rax),%rcx 68 cmpq threadinfo_addr_limit(%rax),%rcx
54 jae bad_from_user 69 jae bad_from_user
55 /* FALL THROUGH to copy_user_generic */ 70 movl $1,%ecx /* set zero flag */
71 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
56 CFI_ENDPROC 72 CFI_ENDPROC
57ENDPROC(copy_from_user) 73ENDPROC(copy_from_user)
58 74
@@ -73,37 +89,26 @@ END(bad_from_user)
73 89
74 90
75/* 91/*
76 * copy_user_generic - memory copy with exception handling. 92 * copy_user_generic_unrolled - memory copy with exception handling.
93 * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
77 * 94 *
78 * Input: 95 * Input:
79 * rdi destination 96 * rdi destination
80 * rsi source 97 * rsi source
81 * rdx count 98 * rdx count
99 * ecx zero flag -- if true zero destination on error
82 * 100 *
83 * Output: 101 * Output:
84 * eax uncopied bytes or 0 if successful. 102 * eax uncopied bytes or 0 if successful.
85 */ 103 */
86ENTRY(copy_user_generic) 104ENTRY(copy_user_generic_unrolled)
87 CFI_STARTPROC 105 CFI_STARTPROC
88 .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */
89 .byte 0x66,0x90
901:
91 .section .altinstr_replacement,"ax"
922: .byte 0xe9 /* near jump with 32bit immediate */
93 .long copy_user_generic_c-1b /* offset */
94 .previous
95 .section .altinstructions,"a"
96 .align 8
97 .quad copy_user_generic
98 .quad 2b
99 .byte X86_FEATURE_REP_GOOD
100 .byte 5
101 .byte 5
102 .previous
103.Lcug:
104 pushq %rbx 106 pushq %rbx
105 CFI_ADJUST_CFA_OFFSET 8 107 CFI_ADJUST_CFA_OFFSET 8
106 CFI_REL_OFFSET rbx, 0 108 CFI_REL_OFFSET rbx, 0
109 pushq %rcx
110 CFI_ADJUST_CFA_OFFSET 8
111 CFI_REL_OFFSET rcx, 0
107 xorl %eax,%eax /*zero for the exception handler */ 112 xorl %eax,%eax /*zero for the exception handler */
108 113
109#ifdef FIX_ALIGNMENT 114#ifdef FIX_ALIGNMENT
@@ -179,6 +184,9 @@ ENTRY(copy_user_generic)
179 184
180 CFI_REMEMBER_STATE 185 CFI_REMEMBER_STATE
181.Lende: 186.Lende:
187 popq %rcx
188 CFI_ADJUST_CFA_OFFSET -8
189 CFI_RESTORE rcx
182 popq %rbx 190 popq %rbx
183 CFI_ADJUST_CFA_OFFSET -8 191 CFI_ADJUST_CFA_OFFSET -8
184 CFI_RESTORE rbx 192 CFI_RESTORE rbx
@@ -265,6 +273,8 @@ ENTRY(copy_user_generic)
265 addl %ecx,%edx 273 addl %ecx,%edx
266 /* edx: bytes to zero, rdi: dest, eax:zero */ 274 /* edx: bytes to zero, rdi: dest, eax:zero */
267.Lzero_rest: 275.Lzero_rest:
276 cmpl $0,(%rsp)
277 jz .Le_zero
268 movq %rdx,%rcx 278 movq %rdx,%rcx
269.Le_byte: 279.Le_byte:
270 xorl %eax,%eax 280 xorl %eax,%eax
@@ -286,6 +296,7 @@ ENDPROC(copy_user_generic)
286 /* rdi destination 296 /* rdi destination
287 * rsi source 297 * rsi source
288 * rdx count 298 * rdx count
299 * ecx zero flag
289 * 300 *
290 * Output: 301 * Output:
291 * eax uncopied bytes or 0 if successfull. 302 * eax uncopied bytes or 0 if successfull.
@@ -296,25 +307,48 @@ ENDPROC(copy_user_generic)
296 * And more would be dangerous because both Intel and AMD have 307 * And more would be dangerous because both Intel and AMD have
297 * errata with rep movsq > 4GB. If someone feels the need to fix 308 * errata with rep movsq > 4GB. If someone feels the need to fix
298 * this please consider this. 309 * this please consider this.
299 */ 310 */
300copy_user_generic_c: 311ENTRY(copy_user_generic_string)
301 CFI_STARTPROC 312 CFI_STARTPROC
313 movl %ecx,%r8d /* save zero flag */
302 movl %edx,%ecx 314 movl %edx,%ecx
303 shrl $3,%ecx 315 shrl $3,%ecx
304 andl $7,%edx 316 andl $7,%edx
317 jz 10f
3051: rep 3181: rep
306 movsq 319 movsq
307 movl %edx,%ecx 320 movl %edx,%ecx
3082: rep 3212: rep
309 movsb 322 movsb
3104: movl %ecx,%eax 3239: movl %ecx,%eax
311 ret 324 ret
3123: lea (%rdx,%rcx,8),%rax 325
326 /* multiple of 8 byte */
32710: rep
328 movsq
329 xor %eax,%eax
313 ret 330 ret
331
332 /* exception handling */
3333: lea (%rdx,%rcx,8),%rax /* exception on quad loop */
334 jmp 6f
3355: movl %ecx,%eax /* exception on byte loop */
336 /* eax: left over bytes */
3376: testl %r8d,%r8d /* zero flag set? */
338 jz 7f
339 movl %eax,%ecx /* initialize x86 loop counter */
340 push %rax
341 xorl %eax,%eax
3428: rep
343 stosb /* zero the rest */
34411: pop %rax
3457: ret
314 CFI_ENDPROC 346 CFI_ENDPROC
315END(copy_user_generic_c) 347END(copy_user_generic_c)
316 348
317 .section __ex_table,"a" 349 .section __ex_table,"a"
318 .quad 1b,3b 350 .quad 1b,3b
319 .quad 2b,4b 351 .quad 2b,5b
352 .quad 8b,11b
353 .quad 10b,3b
320 .previous 354 .previous