diff options
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/copy_user_64.S | 427 | ||||
-rw-r--r-- | arch/x86/lib/copy_user_nocache_64.S | 283 |
2 files changed, 275 insertions, 435 deletions
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 7eaaf0123b4d..e5afb4ad3f1c 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -1,8 +1,10 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs. | 1 | /* |
2 | * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> | ||
3 | * Copyright 2002 Andi Kleen, SuSE Labs. | ||
2 | * Subject to the GNU Public License v2. | 4 | * Subject to the GNU Public License v2. |
3 | * | 5 | * |
4 | * Functions to copy from and to user space. | 6 | * Functions to copy from and to user space. |
5 | */ | 7 | */ |
6 | 8 | ||
7 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
8 | #include <asm/dwarf2.h> | 10 | #include <asm/dwarf2.h> |
@@ -20,60 +22,88 @@ | |||
20 | .long \orig-1f /* by default jump to orig */ | 22 | .long \orig-1f /* by default jump to orig */ |
21 | 1: | 23 | 1: |
22 | .section .altinstr_replacement,"ax" | 24 | .section .altinstr_replacement,"ax" |
23 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | 25 | 2: .byte 0xe9 /* near jump with 32bit immediate */ |
24 | .long \alt-1b /* offset */ /* or alternatively to alt */ | 26 | .long \alt-1b /* offset */ /* or alternatively to alt */ |
25 | .previous | 27 | .previous |
26 | .section .altinstructions,"a" | 28 | .section .altinstructions,"a" |
27 | .align 8 | 29 | .align 8 |
28 | .quad 0b | 30 | .quad 0b |
29 | .quad 2b | 31 | .quad 2b |
30 | .byte \feature /* when feature is set */ | 32 | .byte \feature /* when feature is set */ |
31 | .byte 5 | 33 | .byte 5 |
32 | .byte 5 | 34 | .byte 5 |
33 | .previous | 35 | .previous |
34 | .endm | 36 | .endm |
35 | 37 | ||
36 | /* Standard copy_to_user with segment limit checking */ | 38 | .macro ALIGN_DESTINATION |
39 | #ifdef FIX_ALIGNMENT | ||
40 | /* check for bad alignment of destination */ | ||
41 | movl %edi,%ecx | ||
42 | andl $7,%ecx | ||
43 | jz 102f /* already aligned */ | ||
44 | subl $8,%ecx | ||
45 | negl %ecx | ||
46 | subl %ecx,%edx | ||
47 | 100: movb (%rsi),%al | ||
48 | 101: movb %al,(%rdi) | ||
49 | incq %rsi | ||
50 | incq %rdi | ||
51 | decl %ecx | ||
52 | jnz 100b | ||
53 | 102: | ||
54 | .section .fixup,"ax" | ||
55 | 103: addl %r8d,%edx /* ecx is zerorest also */ | ||
56 | jmp copy_user_handle_tail | ||
57 | .previous | ||
58 | |||
59 | .section __ex_table,"a" | ||
60 | .align 8 | ||
61 | .quad 100b,103b | ||
62 | .quad 101b,103b | ||
63 | .previous | ||
64 | #endif | ||
65 | .endm | ||
66 | |||
67 | /* Standard copy_to_user with segment limit checking */ | ||
37 | ENTRY(copy_to_user) | 68 | ENTRY(copy_to_user) |
38 | CFI_STARTPROC | 69 | CFI_STARTPROC |
39 | GET_THREAD_INFO(%rax) | 70 | GET_THREAD_INFO(%rax) |
40 | movq %rdi,%rcx | 71 | movq %rdi,%rcx |
41 | addq %rdx,%rcx | 72 | addq %rdx,%rcx |
42 | jc bad_to_user | 73 | jc bad_to_user |
43 | cmpq TI_addr_limit(%rax),%rcx | 74 | cmpq TI_addr_limit(%rax),%rcx |
44 | jae bad_to_user | 75 | jae bad_to_user |
45 | xorl %eax,%eax /* clear zero flag */ | ||
46 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 76 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string |
47 | CFI_ENDPROC | 77 | CFI_ENDPROC |
48 | 78 | ||
49 | ENTRY(copy_user_generic) | 79 | /* Standard copy_from_user with segment limit checking */ |
80 | ENTRY(copy_from_user) | ||
50 | CFI_STARTPROC | 81 | CFI_STARTPROC |
51 | movl $1,%ecx /* set zero flag */ | 82 | GET_THREAD_INFO(%rax) |
83 | movq %rsi,%rcx | ||
84 | addq %rdx,%rcx | ||
85 | jc bad_from_user | ||
86 | cmpq TI_addr_limit(%rax),%rcx | ||
87 | jae bad_from_user | ||
52 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 88 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string |
53 | CFI_ENDPROC | 89 | CFI_ENDPROC |
90 | ENDPROC(copy_from_user) | ||
54 | 91 | ||
55 | ENTRY(__copy_from_user_inatomic) | 92 | ENTRY(copy_user_generic) |
56 | CFI_STARTPROC | 93 | CFI_STARTPROC |
57 | xorl %ecx,%ecx /* clear zero flag */ | ||
58 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 94 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string |
59 | CFI_ENDPROC | 95 | CFI_ENDPROC |
96 | ENDPROC(copy_user_generic) | ||
60 | 97 | ||
61 | /* Standard copy_from_user with segment limit checking */ | 98 | ENTRY(__copy_from_user_inatomic) |
62 | ENTRY(copy_from_user) | ||
63 | CFI_STARTPROC | 99 | CFI_STARTPROC |
64 | GET_THREAD_INFO(%rax) | ||
65 | movq %rsi,%rcx | ||
66 | addq %rdx,%rcx | ||
67 | jc bad_from_user | ||
68 | cmpq TI_addr_limit(%rax),%rcx | ||
69 | jae bad_from_user | ||
70 | movl $1,%ecx /* set zero flag */ | ||
71 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 100 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string |
72 | CFI_ENDPROC | 101 | CFI_ENDPROC |
73 | ENDPROC(copy_from_user) | 102 | ENDPROC(__copy_from_user_inatomic) |
74 | 103 | ||
75 | .section .fixup,"ax" | 104 | .section .fixup,"ax" |
76 | /* must zero dest */ | 105 | /* must zero dest */ |
106 | ENTRY(bad_from_user) | ||
77 | bad_from_user: | 107 | bad_from_user: |
78 | CFI_STARTPROC | 108 | CFI_STARTPROC |
79 | movl %edx,%ecx | 109 | movl %edx,%ecx |
@@ -81,271 +111,158 @@ bad_from_user: | |||
81 | rep | 111 | rep |
82 | stosb | 112 | stosb |
83 | bad_to_user: | 113 | bad_to_user: |
84 | movl %edx,%eax | 114 | movl %edx,%eax |
85 | ret | 115 | ret |
86 | CFI_ENDPROC | 116 | CFI_ENDPROC |
87 | END(bad_from_user) | 117 | ENDPROC(bad_from_user) |
88 | .previous | 118 | .previous |
89 | 119 | ||
90 | |||
91 | /* | 120 | /* |
92 | * copy_user_generic_unrolled - memory copy with exception handling. | 121 | * copy_user_generic_unrolled - memory copy with exception handling. |
93 | * This version is for CPUs like P4 that don't have efficient micro code for rep movsq | 122 | * This version is for CPUs like P4 that don't have efficient micro |
94 | * | 123 | * code for rep movsq |
95 | * Input: | 124 | * |
125 | * Input: | ||
96 | * rdi destination | 126 | * rdi destination |
97 | * rsi source | 127 | * rsi source |
98 | * rdx count | 128 | * rdx count |
99 | * ecx zero flag -- if true zero destination on error | ||
100 | * | 129 | * |
101 | * Output: | 130 | * Output: |
102 | * eax uncopied bytes or 0 if successful. | 131 | * eax uncopied bytes or 0 if successfull. |
103 | */ | 132 | */ |
104 | ENTRY(copy_user_generic_unrolled) | 133 | ENTRY(copy_user_generic_unrolled) |
105 | CFI_STARTPROC | 134 | CFI_STARTPROC |
106 | pushq %rbx | 135 | cmpl $8,%edx |
107 | CFI_ADJUST_CFA_OFFSET 8 | 136 | jb 20f /* less then 8 bytes, go to byte copy loop */ |
108 | CFI_REL_OFFSET rbx, 0 | 137 | ALIGN_DESTINATION |
109 | pushq %rcx | 138 | movl %edx,%ecx |
110 | CFI_ADJUST_CFA_OFFSET 8 | 139 | andl $63,%edx |
111 | CFI_REL_OFFSET rcx, 0 | 140 | shrl $6,%ecx |
112 | xorl %eax,%eax /*zero for the exception handler */ | 141 | jz 17f |
113 | 142 | 1: movq (%rsi),%r8 | |
114 | #ifdef FIX_ALIGNMENT | 143 | 2: movq 1*8(%rsi),%r9 |
115 | /* check for bad alignment of destination */ | 144 | 3: movq 2*8(%rsi),%r10 |
116 | movl %edi,%ecx | 145 | 4: movq 3*8(%rsi),%r11 |
117 | andl $7,%ecx | 146 | 5: movq %r8,(%rdi) |
118 | jnz .Lbad_alignment | 147 | 6: movq %r9,1*8(%rdi) |
119 | .Lafter_bad_alignment: | 148 | 7: movq %r10,2*8(%rdi) |
120 | #endif | 149 | 8: movq %r11,3*8(%rdi) |
121 | 150 | 9: movq 4*8(%rsi),%r8 | |
122 | movq %rdx,%rcx | 151 | 10: movq 5*8(%rsi),%r9 |
123 | 152 | 11: movq 6*8(%rsi),%r10 | |
124 | movl $64,%ebx | 153 | 12: movq 7*8(%rsi),%r11 |
125 | shrq $6,%rdx | 154 | 13: movq %r8,4*8(%rdi) |
126 | decq %rdx | 155 | 14: movq %r9,5*8(%rdi) |
127 | js .Lhandle_tail | 156 | 15: movq %r10,6*8(%rdi) |
128 | 157 | 16: movq %r11,7*8(%rdi) | |
129 | .p2align 4 | ||
130 | .Lloop: | ||
131 | .Ls1: movq (%rsi),%r11 | ||
132 | .Ls2: movq 1*8(%rsi),%r8 | ||
133 | .Ls3: movq 2*8(%rsi),%r9 | ||
134 | .Ls4: movq 3*8(%rsi),%r10 | ||
135 | .Ld1: movq %r11,(%rdi) | ||
136 | .Ld2: movq %r8,1*8(%rdi) | ||
137 | .Ld3: movq %r9,2*8(%rdi) | ||
138 | .Ld4: movq %r10,3*8(%rdi) | ||
139 | |||
140 | .Ls5: movq 4*8(%rsi),%r11 | ||
141 | .Ls6: movq 5*8(%rsi),%r8 | ||
142 | .Ls7: movq 6*8(%rsi),%r9 | ||
143 | .Ls8: movq 7*8(%rsi),%r10 | ||
144 | .Ld5: movq %r11,4*8(%rdi) | ||
145 | .Ld6: movq %r8,5*8(%rdi) | ||
146 | .Ld7: movq %r9,6*8(%rdi) | ||
147 | .Ld8: movq %r10,7*8(%rdi) | ||
148 | |||
149 | decq %rdx | ||
150 | |||
151 | leaq 64(%rsi),%rsi | 158 | leaq 64(%rsi),%rsi |
152 | leaq 64(%rdi),%rdi | 159 | leaq 64(%rdi),%rdi |
153 | |||
154 | jns .Lloop | ||
155 | |||
156 | .p2align 4 | ||
157 | .Lhandle_tail: | ||
158 | movl %ecx,%edx | ||
159 | andl $63,%ecx | ||
160 | shrl $3,%ecx | ||
161 | jz .Lhandle_7 | ||
162 | movl $8,%ebx | ||
163 | .p2align 4 | ||
164 | .Lloop_8: | ||
165 | .Ls9: movq (%rsi),%r8 | ||
166 | .Ld9: movq %r8,(%rdi) | ||
167 | decl %ecx | 160 | decl %ecx |
168 | leaq 8(%rdi),%rdi | 161 | jnz 1b |
162 | 17: movl %edx,%ecx | ||
163 | andl $7,%edx | ||
164 | shrl $3,%ecx | ||
165 | jz 20f | ||
166 | 18: movq (%rsi),%r8 | ||
167 | 19: movq %r8,(%rdi) | ||
169 | leaq 8(%rsi),%rsi | 168 | leaq 8(%rsi),%rsi |
170 | jnz .Lloop_8 | 169 | leaq 8(%rdi),%rdi |
171 | 170 | decl %ecx | |
172 | .Lhandle_7: | 171 | jnz 18b |
172 | 20: andl %edx,%edx | ||
173 | jz 23f | ||
173 | movl %edx,%ecx | 174 | movl %edx,%ecx |
174 | andl $7,%ecx | 175 | 21: movb (%rsi),%al |
175 | jz .Lende | 176 | 22: movb %al,(%rdi) |
176 | .p2align 4 | ||
177 | .Lloop_1: | ||
178 | .Ls10: movb (%rsi),%bl | ||
179 | .Ld10: movb %bl,(%rdi) | ||
180 | incq %rdi | ||
181 | incq %rsi | 177 | incq %rsi |
178 | incq %rdi | ||
182 | decl %ecx | 179 | decl %ecx |
183 | jnz .Lloop_1 | 180 | jnz 21b |
184 | 181 | 23: xor %eax,%eax | |
185 | CFI_REMEMBER_STATE | ||
186 | .Lende: | ||
187 | popq %rcx | ||
188 | CFI_ADJUST_CFA_OFFSET -8 | ||
189 | CFI_RESTORE rcx | ||
190 | popq %rbx | ||
191 | CFI_ADJUST_CFA_OFFSET -8 | ||
192 | CFI_RESTORE rbx | ||
193 | ret | 182 | ret |
194 | CFI_RESTORE_STATE | ||
195 | 183 | ||
196 | #ifdef FIX_ALIGNMENT | 184 | .section .fixup,"ax" |
197 | /* align destination */ | 185 | 30: shll $6,%ecx |
198 | .p2align 4 | 186 | addl %ecx,%edx |
199 | .Lbad_alignment: | 187 | jmp 60f |
200 | movl $8,%r9d | 188 | 40: leal (%edx,%ecx,8),%edx |
201 | subl %ecx,%r9d | 189 | jmp 60f |
202 | movl %r9d,%ecx | 190 | 50: movl %ecx,%edx |
203 | cmpq %r9,%rdx | 191 | 60: jmp copy_user_handle_tail /* ecx is zerorest also */ |
204 | jz .Lhandle_7 | 192 | .previous |
205 | js .Lhandle_7 | ||
206 | .Lalign_1: | ||
207 | .Ls11: movb (%rsi),%bl | ||
208 | .Ld11: movb %bl,(%rdi) | ||
209 | incq %rsi | ||
210 | incq %rdi | ||
211 | decl %ecx | ||
212 | jnz .Lalign_1 | ||
213 | subq %r9,%rdx | ||
214 | jmp .Lafter_bad_alignment | ||
215 | #endif | ||
216 | 193 | ||
217 | /* table sorted by exception address */ | ||
218 | .section __ex_table,"a" | 194 | .section __ex_table,"a" |
219 | .align 8 | 195 | .align 8 |
220 | .quad .Ls1,.Ls1e /* Ls1-Ls4 have copied zero bytes */ | 196 | .quad 1b,30b |
221 | .quad .Ls2,.Ls1e | 197 | .quad 2b,30b |
222 | .quad .Ls3,.Ls1e | 198 | .quad 3b,30b |
223 | .quad .Ls4,.Ls1e | 199 | .quad 4b,30b |
224 | .quad .Ld1,.Ls1e /* Ld1-Ld4 have copied 0-24 bytes */ | 200 | .quad 5b,30b |
225 | .quad .Ld2,.Ls2e | 201 | .quad 6b,30b |
226 | .quad .Ld3,.Ls3e | 202 | .quad 7b,30b |
227 | .quad .Ld4,.Ls4e | 203 | .quad 8b,30b |
228 | .quad .Ls5,.Ls5e /* Ls5-Ls8 have copied 32 bytes */ | 204 | .quad 9b,30b |
229 | .quad .Ls6,.Ls5e | 205 | .quad 10b,30b |
230 | .quad .Ls7,.Ls5e | 206 | .quad 11b,30b |
231 | .quad .Ls8,.Ls5e | 207 | .quad 12b,30b |
232 | .quad .Ld5,.Ls5e /* Ld5-Ld8 have copied 32-56 bytes */ | 208 | .quad 13b,30b |
233 | .quad .Ld6,.Ls6e | 209 | .quad 14b,30b |
234 | .quad .Ld7,.Ls7e | 210 | .quad 15b,30b |
235 | .quad .Ld8,.Ls8e | 211 | .quad 16b,30b |
236 | .quad .Ls9,.Le_quad | 212 | .quad 18b,40b |
237 | .quad .Ld9,.Le_quad | 213 | .quad 19b,40b |
238 | .quad .Ls10,.Le_byte | 214 | .quad 21b,50b |
239 | .quad .Ld10,.Le_byte | 215 | .quad 22b,50b |
240 | #ifdef FIX_ALIGNMENT | ||
241 | .quad .Ls11,.Lzero_rest | ||
242 | .quad .Ld11,.Lzero_rest | ||
243 | #endif | ||
244 | .quad .Le5,.Le_zero | ||
245 | .previous | 216 | .previous |
246 | |||
247 | /* eax: zero, ebx: 64 */ | ||
248 | .Ls1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */ | ||
249 | .Ls2e: addl $8,%eax | ||
250 | .Ls3e: addl $8,%eax | ||
251 | .Ls4e: addl $8,%eax | ||
252 | .Ls5e: addl $8,%eax | ||
253 | .Ls6e: addl $8,%eax | ||
254 | .Ls7e: addl $8,%eax | ||
255 | .Ls8e: addl $8,%eax | ||
256 | addq %rbx,%rdi /* +64 */ | ||
257 | subq %rax,%rdi /* correct destination with computed offset */ | ||
258 | |||
259 | shlq $6,%rdx /* loop counter * 64 (stride length) */ | ||
260 | addq %rax,%rdx /* add offset to loopcnt */ | ||
261 | andl $63,%ecx /* remaining bytes */ | ||
262 | addq %rcx,%rdx /* add them */ | ||
263 | jmp .Lzero_rest | ||
264 | |||
265 | /* exception on quad word loop in tail handling */ | ||
266 | /* ecx: loopcnt/8, %edx: length, rdi: correct */ | ||
267 | .Le_quad: | ||
268 | shll $3,%ecx | ||
269 | andl $7,%edx | ||
270 | addl %ecx,%edx | ||
271 | /* edx: bytes to zero, rdi: dest, eax:zero */ | ||
272 | .Lzero_rest: | ||
273 | cmpl $0,(%rsp) | ||
274 | jz .Le_zero | ||
275 | movq %rdx,%rcx | ||
276 | .Le_byte: | ||
277 | xorl %eax,%eax | ||
278 | .Le5: rep | ||
279 | stosb | ||
280 | /* when there is another exception while zeroing the rest just return */ | ||
281 | .Le_zero: | ||
282 | movq %rdx,%rax | ||
283 | jmp .Lende | ||
284 | CFI_ENDPROC | 217 | CFI_ENDPROC |
285 | ENDPROC(copy_user_generic) | 218 | ENDPROC(copy_user_generic_unrolled) |
286 | 219 | ||
287 | 220 | /* Some CPUs run faster using the string copy instructions. | |
288 | /* Some CPUs run faster using the string copy instructions. | 221 | * This is also a lot simpler. Use them when possible. |
289 | This is also a lot simpler. Use them when possible. | 222 | * |
290 | Patch in jmps to this code instead of copying it fully | 223 | * Only 4GB of copy is supported. This shouldn't be a problem |
291 | to avoid unwanted aliasing in the exception tables. */ | 224 | * because the kernel normally only writes from/to page sized chunks |
292 | 225 | * even if user space passed a longer buffer. | |
293 | /* rdi destination | 226 | * And more would be dangerous because both Intel and AMD have |
294 | * rsi source | 227 | * errata with rep movsq > 4GB. If someone feels the need to fix |
295 | * rdx count | 228 | * this please consider this. |
296 | * ecx zero flag | 229 | * |
297 | * | 230 | * Input: |
298 | * Output: | 231 | * rdi destination |
299 | * eax uncopied bytes or 0 if successfull. | 232 | * rsi source |
300 | * | 233 | * rdx count |
301 | * Only 4GB of copy is supported. This shouldn't be a problem | 234 | * |
302 | * because the kernel normally only writes from/to page sized chunks | 235 | * Output: |
303 | * even if user space passed a longer buffer. | 236 | * eax uncopied bytes or 0 if successful. |
304 | * And more would be dangerous because both Intel and AMD have | 237 | */ |
305 | * errata with rep movsq > 4GB. If someone feels the need to fix | ||
306 | * this please consider this. | ||
307 | */ | ||
308 | ENTRY(copy_user_generic_string) | 238 | ENTRY(copy_user_generic_string) |
309 | CFI_STARTPROC | 239 | CFI_STARTPROC |
310 | movl %ecx,%r8d /* save zero flag */ | 240 | andl %edx,%edx |
241 | jz 4f | ||
242 | cmpl $8,%edx | ||
243 | jb 2f /* less than 8 bytes, go to byte copy loop */ | ||
244 | ALIGN_DESTINATION | ||
311 | movl %edx,%ecx | 245 | movl %edx,%ecx |
312 | shrl $3,%ecx | 246 | shrl $3,%ecx |
313 | andl $7,%edx | 247 | andl $7,%edx |
314 | jz 10f | 248 | 1: rep |
315 | 1: rep | ||
316 | movsq | ||
317 | movl %edx,%ecx | ||
318 | 2: rep | ||
319 | movsb | ||
320 | 9: movl %ecx,%eax | ||
321 | ret | ||
322 | |||
323 | /* multiple of 8 byte */ | ||
324 | 10: rep | ||
325 | movsq | 249 | movsq |
326 | xor %eax,%eax | 250 | 2: movl %edx,%ecx |
251 | 3: rep | ||
252 | movsb | ||
253 | 4: xorl %eax,%eax | ||
327 | ret | 254 | ret |
328 | 255 | ||
329 | /* exception handling */ | 256 | .section .fixup,"ax" |
330 | 3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */ | 257 | 11: leal (%edx,%ecx,8),%ecx |
331 | jmp 6f | 258 | 12: movl %ecx,%edx /* ecx is zerorest also */ |
332 | 5: movl %ecx,%eax /* exception on byte loop */ | 259 | jmp copy_user_handle_tail |
333 | /* eax: left over bytes */ | 260 | .previous |
334 | 6: testl %r8d,%r8d /* zero flag set? */ | ||
335 | jz 7f | ||
336 | movl %eax,%ecx /* initialize x86 loop counter */ | ||
337 | push %rax | ||
338 | xorl %eax,%eax | ||
339 | 8: rep | ||
340 | stosb /* zero the rest */ | ||
341 | 11: pop %rax | ||
342 | 7: ret | ||
343 | CFI_ENDPROC | ||
344 | END(copy_user_generic_c) | ||
345 | 261 | ||
346 | .section __ex_table,"a" | 262 | .section __ex_table,"a" |
347 | .quad 1b,3b | 263 | .align 8 |
348 | .quad 2b,5b | 264 | .quad 1b,11b |
349 | .quad 8b,11b | 265 | .quad 3b,12b |
350 | .quad 10b,3b | ||
351 | .previous | 266 | .previous |
267 | CFI_ENDPROC | ||
268 | ENDPROC(copy_user_generic_string) | ||
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 9d3d1ab83763..93353d6a5263 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S | |||
@@ -1,4 +1,6 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs. | 1 | /* |
2 | * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> | ||
3 | * Copyright 2002 Andi Kleen, SuSE Labs. | ||
2 | * Subject to the GNU Public License v2. | 4 | * Subject to the GNU Public License v2. |
3 | * | 5 | * |
4 | * Functions to copy from and to user space. | 6 | * Functions to copy from and to user space. |
@@ -12,204 +14,125 @@ | |||
12 | #include <asm/current.h> | 14 | #include <asm/current.h> |
13 | #include <asm/asm-offsets.h> | 15 | #include <asm/asm-offsets.h> |
14 | #include <asm/thread_info.h> | 16 | #include <asm/thread_info.h> |
15 | #include <asm/cpufeature.h> | ||
16 | |||
17 | /* | ||
18 | * copy_user_nocache - Uncached memory copy with exception handling | ||
19 | * This will force destination/source out of cache for more performance. | ||
20 | * | ||
21 | * Input: | ||
22 | * rdi destination | ||
23 | * rsi source | ||
24 | * rdx count | ||
25 | * rcx zero flag when 1 zero on exception | ||
26 | * | ||
27 | * Output: | ||
28 | * eax uncopied bytes or 0 if successful. | ||
29 | */ | ||
30 | ENTRY(__copy_user_nocache) | ||
31 | CFI_STARTPROC | ||
32 | pushq %rbx | ||
33 | CFI_ADJUST_CFA_OFFSET 8 | ||
34 | CFI_REL_OFFSET rbx, 0 | ||
35 | pushq %rcx /* save zero flag */ | ||
36 | CFI_ADJUST_CFA_OFFSET 8 | ||
37 | CFI_REL_OFFSET rcx, 0 | ||
38 | |||
39 | xorl %eax,%eax /* zero for the exception handler */ | ||
40 | 17 | ||
18 | .macro ALIGN_DESTINATION | ||
41 | #ifdef FIX_ALIGNMENT | 19 | #ifdef FIX_ALIGNMENT |
42 | /* check for bad alignment of destination */ | 20 | /* check for bad alignment of destination */ |
43 | movl %edi,%ecx | 21 | movl %edi,%ecx |
44 | andl $7,%ecx | 22 | andl $7,%ecx |
45 | jnz .Lbad_alignment | 23 | jz 102f /* already aligned */ |
46 | .Lafter_bad_alignment: | 24 | subl $8,%ecx |
47 | #endif | 25 | negl %ecx |
48 | 26 | subl %ecx,%edx | |
49 | movq %rdx,%rcx | 27 | 100: movb (%rsi),%al |
50 | 28 | 101: movb %al,(%rdi) | |
51 | movl $64,%ebx | 29 | incq %rsi |
52 | shrq $6,%rdx | 30 | incq %rdi |
53 | decq %rdx | 31 | decl %ecx |
54 | js .Lhandle_tail | 32 | jnz 100b |
55 | 33 | 102: | |
56 | .p2align 4 | 34 | .section .fixup,"ax" |
57 | .Lloop: | 35 | 103: addl %r8d,%edx /* ecx is zerorest also */ |
58 | .Ls1: movq (%rsi),%r11 | 36 | jmp copy_user_handle_tail |
59 | .Ls2: movq 1*8(%rsi),%r8 | 37 | .previous |
60 | .Ls3: movq 2*8(%rsi),%r9 | ||
61 | .Ls4: movq 3*8(%rsi),%r10 | ||
62 | .Ld1: movnti %r11,(%rdi) | ||
63 | .Ld2: movnti %r8,1*8(%rdi) | ||
64 | .Ld3: movnti %r9,2*8(%rdi) | ||
65 | .Ld4: movnti %r10,3*8(%rdi) | ||
66 | |||
67 | .Ls5: movq 4*8(%rsi),%r11 | ||
68 | .Ls6: movq 5*8(%rsi),%r8 | ||
69 | .Ls7: movq 6*8(%rsi),%r9 | ||
70 | .Ls8: movq 7*8(%rsi),%r10 | ||
71 | .Ld5: movnti %r11,4*8(%rdi) | ||
72 | .Ld6: movnti %r8,5*8(%rdi) | ||
73 | .Ld7: movnti %r9,6*8(%rdi) | ||
74 | .Ld8: movnti %r10,7*8(%rdi) | ||
75 | 38 | ||
76 | dec %rdx | 39 | .section __ex_table,"a" |
40 | .align 8 | ||
41 | .quad 100b,103b | ||
42 | .quad 101b,103b | ||
43 | .previous | ||
44 | #endif | ||
45 | .endm | ||
77 | 46 | ||
47 | /* | ||
48 | * copy_user_nocache - Uncached memory copy with exception handling | ||
49 | * This will force destination/source out of cache for more performance. | ||
50 | */ | ||
51 | ENTRY(__copy_user_nocache) | ||
52 | CFI_STARTPROC | ||
53 | cmpl $8,%edx | ||
54 | jb 20f /* less then 8 bytes, go to byte copy loop */ | ||
55 | ALIGN_DESTINATION | ||
56 | movl %edx,%ecx | ||
57 | andl $63,%edx | ||
58 | shrl $6,%ecx | ||
59 | jz 17f | ||
60 | 1: movq (%rsi),%r8 | ||
61 | 2: movq 1*8(%rsi),%r9 | ||
62 | 3: movq 2*8(%rsi),%r10 | ||
63 | 4: movq 3*8(%rsi),%r11 | ||
64 | 5: movnti %r8,(%rdi) | ||
65 | 6: movnti %r9,1*8(%rdi) | ||
66 | 7: movnti %r10,2*8(%rdi) | ||
67 | 8: movnti %r11,3*8(%rdi) | ||
68 | 9: movq 4*8(%rsi),%r8 | ||
69 | 10: movq 5*8(%rsi),%r9 | ||
70 | 11: movq 6*8(%rsi),%r10 | ||
71 | 12: movq 7*8(%rsi),%r11 | ||
72 | 13: movnti %r8,4*8(%rdi) | ||
73 | 14: movnti %r9,5*8(%rdi) | ||
74 | 15: movnti %r10,6*8(%rdi) | ||
75 | 16: movnti %r11,7*8(%rdi) | ||
78 | leaq 64(%rsi),%rsi | 76 | leaq 64(%rsi),%rsi |
79 | leaq 64(%rdi),%rdi | 77 | leaq 64(%rdi),%rdi |
80 | |||
81 | jns .Lloop | ||
82 | |||
83 | .p2align 4 | ||
84 | .Lhandle_tail: | ||
85 | movl %ecx,%edx | ||
86 | andl $63,%ecx | ||
87 | shrl $3,%ecx | ||
88 | jz .Lhandle_7 | ||
89 | movl $8,%ebx | ||
90 | .p2align 4 | ||
91 | .Lloop_8: | ||
92 | .Ls9: movq (%rsi),%r8 | ||
93 | .Ld9: movnti %r8,(%rdi) | ||
94 | decl %ecx | 78 | decl %ecx |
95 | leaq 8(%rdi),%rdi | 79 | jnz 1b |
80 | 17: movl %edx,%ecx | ||
81 | andl $7,%edx | ||
82 | shrl $3,%ecx | ||
83 | jz 20f | ||
84 | 18: movq (%rsi),%r8 | ||
85 | 19: movnti %r8,(%rdi) | ||
96 | leaq 8(%rsi),%rsi | 86 | leaq 8(%rsi),%rsi |
97 | jnz .Lloop_8 | 87 | leaq 8(%rdi),%rdi |
98 | 88 | decl %ecx | |
99 | .Lhandle_7: | 89 | jnz 18b |
90 | 20: andl %edx,%edx | ||
91 | jz 23f | ||
100 | movl %edx,%ecx | 92 | movl %edx,%ecx |
101 | andl $7,%ecx | 93 | 21: movb (%rsi),%al |
102 | jz .Lende | 94 | 22: movb %al,(%rdi) |
103 | .p2align 4 | ||
104 | .Lloop_1: | ||
105 | .Ls10: movb (%rsi),%bl | ||
106 | .Ld10: movb %bl,(%rdi) | ||
107 | incq %rdi | ||
108 | incq %rsi | 95 | incq %rsi |
96 | incq %rdi | ||
109 | decl %ecx | 97 | decl %ecx |
110 | jnz .Lloop_1 | 98 | jnz 21b |
111 | 99 | 23: xorl %eax,%eax | |
112 | CFI_REMEMBER_STATE | ||
113 | .Lende: | ||
114 | popq %rcx | ||
115 | CFI_ADJUST_CFA_OFFSET -8 | ||
116 | CFI_RESTORE %rcx | ||
117 | popq %rbx | ||
118 | CFI_ADJUST_CFA_OFFSET -8 | ||
119 | CFI_RESTORE rbx | ||
120 | sfence | 100 | sfence |
121 | ret | 101 | ret |
122 | CFI_RESTORE_STATE | ||
123 | 102 | ||
124 | #ifdef FIX_ALIGNMENT | 103 | .section .fixup,"ax" |
125 | /* align destination */ | 104 | 30: shll $6,%ecx |
126 | .p2align 4 | 105 | addl %ecx,%edx |
127 | .Lbad_alignment: | 106 | jmp 60f |
128 | movl $8,%r9d | 107 | 40: leal (%edx,%ecx,8),%edx |
129 | subl %ecx,%r9d | 108 | jmp 60f |
130 | movl %r9d,%ecx | 109 | 50: movl %ecx,%edx |
131 | cmpq %r9,%rdx | 110 | 60: sfence |
132 | jz .Lhandle_7 | 111 | movl %r8d,%ecx |
133 | js .Lhandle_7 | 112 | jmp copy_user_handle_tail |
134 | .Lalign_1: | 113 | .previous |
135 | .Ls11: movb (%rsi),%bl | ||
136 | .Ld11: movb %bl,(%rdi) | ||
137 | incq %rsi | ||
138 | incq %rdi | ||
139 | decl %ecx | ||
140 | jnz .Lalign_1 | ||
141 | subq %r9,%rdx | ||
142 | jmp .Lafter_bad_alignment | ||
143 | #endif | ||
144 | 114 | ||
145 | /* table sorted by exception address */ | ||
146 | .section __ex_table,"a" | 115 | .section __ex_table,"a" |
147 | .align 8 | 116 | .quad 1b,30b |
148 | .quad .Ls1,.Ls1e /* .Ls[1-4] - 0 bytes copied */ | 117 | .quad 2b,30b |
149 | .quad .Ls2,.Ls1e | 118 | .quad 3b,30b |
150 | .quad .Ls3,.Ls1e | 119 | .quad 4b,30b |
151 | .quad .Ls4,.Ls1e | 120 | .quad 5b,30b |
152 | .quad .Ld1,.Ls1e /* .Ld[1-4] - 0..24 bytes coped */ | 121 | .quad 6b,30b |
153 | .quad .Ld2,.Ls2e | 122 | .quad 7b,30b |
154 | .quad .Ld3,.Ls3e | 123 | .quad 8b,30b |
155 | .quad .Ld4,.Ls4e | 124 | .quad 9b,30b |
156 | .quad .Ls5,.Ls5e /* .Ls[5-8] - 32 bytes copied */ | 125 | .quad 10b,30b |
157 | .quad .Ls6,.Ls5e | 126 | .quad 11b,30b |
158 | .quad .Ls7,.Ls5e | 127 | .quad 12b,30b |
159 | .quad .Ls8,.Ls5e | 128 | .quad 13b,30b |
160 | .quad .Ld5,.Ls5e /* .Ld[5-8] - 32..56 bytes copied */ | 129 | .quad 14b,30b |
161 | .quad .Ld6,.Ls6e | 130 | .quad 15b,30b |
162 | .quad .Ld7,.Ls7e | 131 | .quad 16b,30b |
163 | .quad .Ld8,.Ls8e | 132 | .quad 18b,40b |
164 | .quad .Ls9,.Le_quad | 133 | .quad 19b,40b |
165 | .quad .Ld9,.Le_quad | 134 | .quad 21b,50b |
166 | .quad .Ls10,.Le_byte | 135 | .quad 22b,50b |
167 | .quad .Ld10,.Le_byte | ||
168 | #ifdef FIX_ALIGNMENT | ||
169 | .quad .Ls11,.Lzero_rest | ||
170 | .quad .Ld11,.Lzero_rest | ||
171 | #endif | ||
172 | .quad .Le5,.Le_zero | ||
173 | .previous | 136 | .previous |
174 | |||
175 | /* eax: zero, ebx: 64 */ | ||
176 | .Ls1e: addl $8,%eax /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */ | ||
177 | .Ls2e: addl $8,%eax | ||
178 | .Ls3e: addl $8,%eax | ||
179 | .Ls4e: addl $8,%eax | ||
180 | .Ls5e: addl $8,%eax | ||
181 | .Ls6e: addl $8,%eax | ||
182 | .Ls7e: addl $8,%eax | ||
183 | .Ls8e: addl $8,%eax | ||
184 | addq %rbx,%rdi /* +64 */ | ||
185 | subq %rax,%rdi /* correct destination with computed offset */ | ||
186 | |||
187 | shlq $6,%rdx /* loop counter * 64 (stride length) */ | ||
188 | addq %rax,%rdx /* add offset to loopcnt */ | ||
189 | andl $63,%ecx /* remaining bytes */ | ||
190 | addq %rcx,%rdx /* add them */ | ||
191 | jmp .Lzero_rest | ||
192 | |||
193 | /* exception on quad word loop in tail handling */ | ||
194 | /* ecx: loopcnt/8, %edx: length, rdi: correct */ | ||
195 | .Le_quad: | ||
196 | shll $3,%ecx | ||
197 | andl $7,%edx | ||
198 | addl %ecx,%edx | ||
199 | /* edx: bytes to zero, rdi: dest, eax:zero */ | ||
200 | .Lzero_rest: | ||
201 | cmpl $0,(%rsp) /* zero flag set? */ | ||
202 | jz .Le_zero | ||
203 | movq %rdx,%rcx | ||
204 | .Le_byte: | ||
205 | xorl %eax,%eax | ||
206 | .Le5: rep | ||
207 | stosb | ||
208 | /* when there is another exception while zeroing the rest just return */ | ||
209 | .Le_zero: | ||
210 | movq %rdx,%rax | ||
211 | jmp .Lende | ||
212 | CFI_ENDPROC | 137 | CFI_ENDPROC |
213 | ENDPROC(__copy_user_nocache) | 138 | ENDPROC(__copy_user_nocache) |
214 | |||
215 | |||