diff options
Diffstat (limited to 'arch/x86_64/lib/copy_user.S')
-rw-r--r-- | arch/x86_64/lib/copy_user.S | 153 |
1 files changed, 103 insertions, 50 deletions
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S index f64569b83b54..70bebd310408 100644 --- a/arch/x86_64/lib/copy_user.S +++ b/arch/x86_64/lib/copy_user.S | |||
@@ -4,56 +4,78 @@ | |||
4 | * Functions to copy from and to user space. | 4 | * Functions to copy from and to user space. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/linkage.h> | ||
8 | #include <asm/dwarf2.h> | ||
9 | |||
7 | #define FIX_ALIGNMENT 1 | 10 | #define FIX_ALIGNMENT 1 |
8 | 11 | ||
9 | #include <asm/current.h> | 12 | #include <asm/current.h> |
10 | #include <asm/asm-offsets.h> | 13 | #include <asm/asm-offsets.h> |
11 | #include <asm/thread_info.h> | 14 | #include <asm/thread_info.h> |
12 | #include <asm/cpufeature.h> | 15 | #include <asm/cpufeature.h> |
13 | 16 | ||
14 | /* Standard copy_to_user with segment limit checking */ | 17 | .macro ALTERNATIVE_JUMP feature,orig,alt |
15 | .globl copy_to_user | 18 | 0: |
16 | .p2align 4 | ||
17 | copy_to_user: | ||
18 | GET_THREAD_INFO(%rax) | ||
19 | movq %rdi,%rcx | ||
20 | addq %rdx,%rcx | ||
21 | jc bad_to_user | ||
22 | cmpq threadinfo_addr_limit(%rax),%rcx | ||
23 | jae bad_to_user | ||
24 | 2: | ||
25 | .byte 0xe9 /* 32bit jump */ | 19 | .byte 0xe9 /* 32bit jump */ |
26 | .long .Lcug-1f | 20 | .long \orig-1f /* by default jump to orig */ |
27 | 1: | 21 | 1: |
28 | |||
29 | .section .altinstr_replacement,"ax" | 22 | .section .altinstr_replacement,"ax" |
30 | 3: .byte 0xe9 /* replacement jmp with 8 bit immediate */ | 23 | 2: .byte 0xe9 /* near jump with 32bit immediate */ |
31 | .long copy_user_generic_c-1b /* offset */ | 24 | .long \alt-1b /* offset */ /* or alternatively to alt */ |
32 | .previous | 25 | .previous |
33 | .section .altinstructions,"a" | 26 | .section .altinstructions,"a" |
34 | .align 8 | 27 | .align 8 |
28 | .quad 0b | ||
35 | .quad 2b | 29 | .quad 2b |
36 | .quad 3b | 30 | .byte \feature /* when feature is set */ |
37 | .byte X86_FEATURE_REP_GOOD | ||
38 | .byte 5 | 31 | .byte 5 |
39 | .byte 5 | 32 | .byte 5 |
40 | .previous | 33 | .previous |
34 | .endm | ||
35 | |||
36 | /* Standard copy_to_user with segment limit checking */ | ||
37 | ENTRY(copy_to_user) | ||
38 | CFI_STARTPROC | ||
39 | GET_THREAD_INFO(%rax) | ||
40 | movq %rdi,%rcx | ||
41 | addq %rdx,%rcx | ||
42 | jc bad_to_user | ||
43 | cmpq threadinfo_addr_limit(%rax),%rcx | ||
44 | jae bad_to_user | ||
45 | xorl %eax,%eax /* clear zero flag */ | ||
46 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
47 | CFI_ENDPROC | ||
48 | |||
49 | ENTRY(copy_user_generic) | ||
50 | CFI_STARTPROC | ||
51 | movl $1,%ecx /* set zero flag */ | ||
52 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
53 | CFI_ENDPROC | ||
54 | |||
55 | ENTRY(__copy_from_user_inatomic) | ||
56 | CFI_STARTPROC | ||
57 | xorl %ecx,%ecx /* clear zero flag */ | ||
58 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
59 | CFI_ENDPROC | ||
41 | 60 | ||
42 | /* Standard copy_from_user with segment limit checking */ | 61 | /* Standard copy_from_user with segment limit checking */ |
43 | .globl copy_from_user | 62 | ENTRY(copy_from_user) |
44 | .p2align 4 | 63 | CFI_STARTPROC |
45 | copy_from_user: | ||
46 | GET_THREAD_INFO(%rax) | 64 | GET_THREAD_INFO(%rax) |
47 | movq %rsi,%rcx | 65 | movq %rsi,%rcx |
48 | addq %rdx,%rcx | 66 | addq %rdx,%rcx |
49 | jc bad_from_user | 67 | jc bad_from_user |
50 | cmpq threadinfo_addr_limit(%rax),%rcx | 68 | cmpq threadinfo_addr_limit(%rax),%rcx |
51 | jae bad_from_user | 69 | jae bad_from_user |
52 | /* FALL THROUGH to copy_user_generic */ | 70 | movl $1,%ecx /* set zero flag */ |
71 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
72 | CFI_ENDPROC | ||
73 | ENDPROC(copy_from_user) | ||
53 | 74 | ||
54 | .section .fixup,"ax" | 75 | .section .fixup,"ax" |
55 | /* must zero dest */ | 76 | /* must zero dest */ |
56 | bad_from_user: | 77 | bad_from_user: |
78 | CFI_STARTPROC | ||
57 | movl %edx,%ecx | 79 | movl %edx,%ecx |
58 | xorl %eax,%eax | 80 | xorl %eax,%eax |
59 | rep | 81 | rep |
@@ -61,40 +83,32 @@ bad_from_user: | |||
61 | bad_to_user: | 83 | bad_to_user: |
62 | movl %edx,%eax | 84 | movl %edx,%eax |
63 | ret | 85 | ret |
86 | CFI_ENDPROC | ||
87 | END(bad_from_user) | ||
64 | .previous | 88 | .previous |
65 | 89 | ||
66 | 90 | ||
67 | /* | 91 | /* |
68 | * copy_user_generic - memory copy with exception handling. | 92 | * copy_user_generic_unrolled - memory copy with exception handling. |
93 | * This version is for CPUs like P4 that don't have efficient micro code for rep movsq | ||
69 | * | 94 | * |
70 | * Input: | 95 | * Input: |
71 | * rdi destination | 96 | * rdi destination |
72 | * rsi source | 97 | * rsi source |
73 | * rdx count | 98 | * rdx count |
99 | * ecx zero flag -- if true zero destination on error | ||
74 | * | 100 | * |
75 | * Output: | 101 | * Output: |
76 | * eax uncopied bytes or 0 if successful. | 102 | * eax uncopied bytes or 0 if successful. |
77 | */ | 103 | */ |
78 | .globl copy_user_generic | 104 | ENTRY(copy_user_generic_unrolled) |
79 | .p2align 4 | 105 | CFI_STARTPROC |
80 | copy_user_generic: | ||
81 | .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */ | ||
82 | .byte 0x66,0x90 | ||
83 | 1: | ||
84 | .section .altinstr_replacement,"ax" | ||
85 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | ||
86 | .long copy_user_generic_c-1b /* offset */ | ||
87 | .previous | ||
88 | .section .altinstructions,"a" | ||
89 | .align 8 | ||
90 | .quad copy_user_generic | ||
91 | .quad 2b | ||
92 | .byte X86_FEATURE_REP_GOOD | ||
93 | .byte 5 | ||
94 | .byte 5 | ||
95 | .previous | ||
96 | .Lcug: | ||
97 | pushq %rbx | 106 | pushq %rbx |
107 | CFI_ADJUST_CFA_OFFSET 8 | ||
108 | CFI_REL_OFFSET rbx, 0 | ||
109 | pushq %rcx | ||
110 | CFI_ADJUST_CFA_OFFSET 8 | ||
111 | CFI_REL_OFFSET rcx, 0 | ||
98 | xorl %eax,%eax /*zero for the exception handler */ | 112 | xorl %eax,%eax /*zero for the exception handler */ |
99 | 113 | ||
100 | #ifdef FIX_ALIGNMENT | 114 | #ifdef FIX_ALIGNMENT |
@@ -168,9 +182,16 @@ copy_user_generic: | |||
168 | decl %ecx | 182 | decl %ecx |
169 | jnz .Lloop_1 | 183 | jnz .Lloop_1 |
170 | 184 | ||
185 | CFI_REMEMBER_STATE | ||
171 | .Lende: | 186 | .Lende: |
187 | popq %rcx | ||
188 | CFI_ADJUST_CFA_OFFSET -8 | ||
189 | CFI_RESTORE rcx | ||
172 | popq %rbx | 190 | popq %rbx |
191 | CFI_ADJUST_CFA_OFFSET -8 | ||
192 | CFI_RESTORE rbx | ||
173 | ret | 193 | ret |
194 | CFI_RESTORE_STATE | ||
174 | 195 | ||
175 | #ifdef FIX_ALIGNMENT | 196 | #ifdef FIX_ALIGNMENT |
176 | /* align destination */ | 197 | /* align destination */ |
@@ -252,6 +273,8 @@ copy_user_generic: | |||
252 | addl %ecx,%edx | 273 | addl %ecx,%edx |
253 | /* edx: bytes to zero, rdi: dest, eax:zero */ | 274 | /* edx: bytes to zero, rdi: dest, eax:zero */ |
254 | .Lzero_rest: | 275 | .Lzero_rest: |
276 | cmpl $0,(%rsp) | ||
277 | jz .Le_zero | ||
255 | movq %rdx,%rcx | 278 | movq %rdx,%rcx |
256 | .Le_byte: | 279 | .Le_byte: |
257 | xorl %eax,%eax | 280 | xorl %eax,%eax |
@@ -261,6 +284,9 @@ copy_user_generic: | |||
261 | .Le_zero: | 284 | .Le_zero: |
262 | movq %rdx,%rax | 285 | movq %rdx,%rax |
263 | jmp .Lende | 286 | jmp .Lende |
287 | CFI_ENDPROC | ||
288 | ENDPROC(copy_user_generic) | ||
289 | |||
264 | 290 | ||
265 | /* Some CPUs run faster using the string copy instructions. | 291 | /* Some CPUs run faster using the string copy instructions. |
266 | This is also a lot simpler. Use them when possible. | 292 | This is also a lot simpler. Use them when possible. |
@@ -270,6 +296,7 @@ copy_user_generic: | |||
270 | /* rdi destination | 296 | /* rdi destination |
271 | * rsi source | 297 | * rsi source |
272 | * rdx count | 298 | * rdx count |
299 | * ecx zero flag | ||
273 | * | 300 | * |
274 | * Output: | 301 | * Output: |
275 | * eax uncopied bytes or 0 if successfull. | 302 | * eax uncopied bytes or 0 if successfull. |
@@ -280,22 +307,48 @@ copy_user_generic: | |||
280 | * And more would be dangerous because both Intel and AMD have | 307 | * And more would be dangerous because both Intel and AMD have |
281 | * errata with rep movsq > 4GB. If someone feels the need to fix | 308 | * errata with rep movsq > 4GB. If someone feels the need to fix |
282 | * this please consider this. | 309 | * this please consider this. |
283 | */ | 310 | */ |
284 | copy_user_generic_c: | 311 | ENTRY(copy_user_generic_string) |
312 | CFI_STARTPROC | ||
313 | movl %ecx,%r8d /* save zero flag */ | ||
285 | movl %edx,%ecx | 314 | movl %edx,%ecx |
286 | shrl $3,%ecx | 315 | shrl $3,%ecx |
287 | andl $7,%edx | 316 | andl $7,%edx |
317 | jz 10f | ||
288 | 1: rep | 318 | 1: rep |
289 | movsq | 319 | movsq |
290 | movl %edx,%ecx | 320 | movl %edx,%ecx |
291 | 2: rep | 321 | 2: rep |
292 | movsb | 322 | movsb |
293 | 4: movl %ecx,%eax | 323 | 9: movl %ecx,%eax |
294 | ret | 324 | ret |
295 | 3: lea (%rdx,%rcx,8),%rax | 325 | |
326 | /* multiple of 8 byte */ | ||
327 | 10: rep | ||
328 | movsq | ||
329 | xor %eax,%eax | ||
296 | ret | 330 | ret |
297 | 331 | ||
332 | /* exception handling */ | ||
333 | 3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */ | ||
334 | jmp 6f | ||
335 | 5: movl %ecx,%eax /* exception on byte loop */ | ||
336 | /* eax: left over bytes */ | ||
337 | 6: testl %r8d,%r8d /* zero flag set? */ | ||
338 | jz 7f | ||
339 | movl %eax,%ecx /* initialize x86 loop counter */ | ||
340 | push %rax | ||
341 | xorl %eax,%eax | ||
342 | 8: rep | ||
343 | stosb /* zero the rest */ | ||
344 | 11: pop %rax | ||
345 | 7: ret | ||
346 | CFI_ENDPROC | ||
347 | END(copy_user_generic_c) | ||
348 | |||
298 | .section __ex_table,"a" | 349 | .section __ex_table,"a" |
299 | .quad 1b,3b | 350 | .quad 1b,3b |
300 | .quad 2b,4b | 351 | .quad 2b,5b |
352 | .quad 8b,11b | ||
353 | .quad 10b,3b | ||
301 | .previous | 354 | .previous |