diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:17:08 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:17:08 -0400 |
commit | 185f3d38900f750a4566f87cde6a178f3595a115 (patch) | |
tree | d463f6da1af452b1bbdf476828ea88427087f255 /arch/x86/lib/copy_user_64.S | |
parent | 51b2833060f26258ea2da091c7b9c6a358ac9dd2 (diff) |
x86_64: move lib
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/lib/copy_user_64.S')
-rw-r--r-- | arch/x86/lib/copy_user_64.S | 354 |
1 files changed, 354 insertions, 0 deletions
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S new file mode 100644 index 000000000000..70bebd310408 --- /dev/null +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -0,0 +1,354 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs. | ||
2 | * Subject to the GNU Public License v2. | ||
3 | * | ||
4 | * Functions to copy from and to user space. | ||
5 | */ | ||
6 | |||
7 | #include <linux/linkage.h> | ||
8 | #include <asm/dwarf2.h> | ||
9 | |||
10 | #define FIX_ALIGNMENT 1 | ||
11 | |||
12 | #include <asm/current.h> | ||
13 | #include <asm/asm-offsets.h> | ||
14 | #include <asm/thread_info.h> | ||
15 | #include <asm/cpufeature.h> | ||
16 | |||
17 | .macro ALTERNATIVE_JUMP feature,orig,alt | ||
18 | 0: | ||
19 | .byte 0xe9 /* 32bit jump */ | ||
20 | .long \orig-1f /* by default jump to orig */ | ||
21 | 1: | ||
22 | .section .altinstr_replacement,"ax" | ||
23 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | ||
24 | .long \alt-1b /* offset */ /* or alternatively to alt */ | ||
25 | .previous | ||
26 | .section .altinstructions,"a" | ||
27 | .align 8 | ||
28 | .quad 0b | ||
29 | .quad 2b | ||
30 | .byte \feature /* when feature is set */ | ||
31 | .byte 5 | ||
32 | .byte 5 | ||
33 | .previous | ||
34 | .endm | ||
35 | |||
36 | /* Standard copy_to_user with segment limit checking */ | ||
37 | ENTRY(copy_to_user) | ||
38 | CFI_STARTPROC | ||
39 | GET_THREAD_INFO(%rax) | ||
40 | movq %rdi,%rcx | ||
41 | addq %rdx,%rcx | ||
42 | jc bad_to_user | ||
43 | cmpq threadinfo_addr_limit(%rax),%rcx | ||
44 | jae bad_to_user | ||
45 | xorl %eax,%eax /* clear zero flag */ | ||
46 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
47 | CFI_ENDPROC | ||
48 | |||
49 | ENTRY(copy_user_generic) | ||
50 | CFI_STARTPROC | ||
51 | movl $1,%ecx /* set zero flag */ | ||
52 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
53 | CFI_ENDPROC | ||
54 | |||
55 | ENTRY(__copy_from_user_inatomic) | ||
56 | CFI_STARTPROC | ||
57 | xorl %ecx,%ecx /* clear zero flag */ | ||
58 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
59 | CFI_ENDPROC | ||
60 | |||
61 | /* Standard copy_from_user with segment limit checking */ | ||
62 | ENTRY(copy_from_user) | ||
63 | CFI_STARTPROC | ||
64 | GET_THREAD_INFO(%rax) | ||
65 | movq %rsi,%rcx | ||
66 | addq %rdx,%rcx | ||
67 | jc bad_from_user | ||
68 | cmpq threadinfo_addr_limit(%rax),%rcx | ||
69 | jae bad_from_user | ||
70 | movl $1,%ecx /* set zero flag */ | ||
71 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
72 | CFI_ENDPROC | ||
73 | ENDPROC(copy_from_user) | ||
74 | |||
75 | .section .fixup,"ax" | ||
76 | /* must zero dest */ | ||
77 | bad_from_user: | ||
78 | CFI_STARTPROC | ||
79 | movl %edx,%ecx | ||
80 | xorl %eax,%eax | ||
81 | rep | ||
82 | stosb | ||
83 | bad_to_user: | ||
84 | movl %edx,%eax | ||
85 | ret | ||
86 | CFI_ENDPROC | ||
87 | END(bad_from_user) | ||
88 | .previous | ||
89 | |||
90 | |||
91 | /* | ||
92 | * copy_user_generic_unrolled - memory copy with exception handling. | ||
93 | * This version is for CPUs like P4 that don't have efficient micro code for rep movsq | ||
94 | * | ||
95 | * Input: | ||
96 | * rdi destination | ||
97 | * rsi source | ||
98 | * rdx count | ||
99 | * ecx zero flag -- if true zero destination on error | ||
100 | * | ||
101 | * Output: | ||
102 | * eax uncopied bytes or 0 if successful. | ||
103 | */ | ||
104 | ENTRY(copy_user_generic_unrolled) | ||
105 | CFI_STARTPROC | ||
106 | pushq %rbx | ||
107 | CFI_ADJUST_CFA_OFFSET 8 | ||
108 | CFI_REL_OFFSET rbx, 0 | ||
109 | pushq %rcx | ||
110 | CFI_ADJUST_CFA_OFFSET 8 | ||
111 | CFI_REL_OFFSET rcx, 0 | ||
112 | xorl %eax,%eax /*zero for the exception handler */ | ||
113 | |||
114 | #ifdef FIX_ALIGNMENT | ||
115 | /* check for bad alignment of destination */ | ||
116 | movl %edi,%ecx | ||
117 | andl $7,%ecx | ||
118 | jnz .Lbad_alignment | ||
119 | .Lafter_bad_alignment: | ||
120 | #endif | ||
121 | |||
122 | movq %rdx,%rcx | ||
123 | |||
124 | movl $64,%ebx | ||
125 | shrq $6,%rdx | ||
126 | decq %rdx | ||
127 | js .Lhandle_tail | ||
128 | |||
129 | .p2align 4 | ||
130 | .Lloop: | ||
131 | .Ls1: movq (%rsi),%r11 | ||
132 | .Ls2: movq 1*8(%rsi),%r8 | ||
133 | .Ls3: movq 2*8(%rsi),%r9 | ||
134 | .Ls4: movq 3*8(%rsi),%r10 | ||
135 | .Ld1: movq %r11,(%rdi) | ||
136 | .Ld2: movq %r8,1*8(%rdi) | ||
137 | .Ld3: movq %r9,2*8(%rdi) | ||
138 | .Ld4: movq %r10,3*8(%rdi) | ||
139 | |||
140 | .Ls5: movq 4*8(%rsi),%r11 | ||
141 | .Ls6: movq 5*8(%rsi),%r8 | ||
142 | .Ls7: movq 6*8(%rsi),%r9 | ||
143 | .Ls8: movq 7*8(%rsi),%r10 | ||
144 | .Ld5: movq %r11,4*8(%rdi) | ||
145 | .Ld6: movq %r8,5*8(%rdi) | ||
146 | .Ld7: movq %r9,6*8(%rdi) | ||
147 | .Ld8: movq %r10,7*8(%rdi) | ||
148 | |||
149 | decq %rdx | ||
150 | |||
151 | leaq 64(%rsi),%rsi | ||
152 | leaq 64(%rdi),%rdi | ||
153 | |||
154 | jns .Lloop | ||
155 | |||
156 | .p2align 4 | ||
157 | .Lhandle_tail: | ||
158 | movl %ecx,%edx | ||
159 | andl $63,%ecx | ||
160 | shrl $3,%ecx | ||
161 | jz .Lhandle_7 | ||
162 | movl $8,%ebx | ||
163 | .p2align 4 | ||
164 | .Lloop_8: | ||
165 | .Ls9: movq (%rsi),%r8 | ||
166 | .Ld9: movq %r8,(%rdi) | ||
167 | decl %ecx | ||
168 | leaq 8(%rdi),%rdi | ||
169 | leaq 8(%rsi),%rsi | ||
170 | jnz .Lloop_8 | ||
171 | |||
172 | .Lhandle_7: | ||
173 | movl %edx,%ecx | ||
174 | andl $7,%ecx | ||
175 | jz .Lende | ||
176 | .p2align 4 | ||
177 | .Lloop_1: | ||
178 | .Ls10: movb (%rsi),%bl | ||
179 | .Ld10: movb %bl,(%rdi) | ||
180 | incq %rdi | ||
181 | incq %rsi | ||
182 | decl %ecx | ||
183 | jnz .Lloop_1 | ||
184 | |||
185 | CFI_REMEMBER_STATE | ||
186 | .Lende: | ||
187 | popq %rcx | ||
188 | CFI_ADJUST_CFA_OFFSET -8 | ||
189 | CFI_RESTORE rcx | ||
190 | popq %rbx | ||
191 | CFI_ADJUST_CFA_OFFSET -8 | ||
192 | CFI_RESTORE rbx | ||
193 | ret | ||
194 | CFI_RESTORE_STATE | ||
195 | |||
196 | #ifdef FIX_ALIGNMENT | ||
197 | /* align destination */ | ||
198 | .p2align 4 | ||
199 | .Lbad_alignment: | ||
200 | movl $8,%r9d | ||
201 | subl %ecx,%r9d | ||
202 | movl %r9d,%ecx | ||
203 | cmpq %r9,%rdx | ||
204 | jz .Lhandle_7 | ||
205 | js .Lhandle_7 | ||
206 | .Lalign_1: | ||
207 | .Ls11: movb (%rsi),%bl | ||
208 | .Ld11: movb %bl,(%rdi) | ||
209 | incq %rsi | ||
210 | incq %rdi | ||
211 | decl %ecx | ||
212 | jnz .Lalign_1 | ||
213 | subq %r9,%rdx | ||
214 | jmp .Lafter_bad_alignment | ||
215 | #endif | ||
216 | |||
217 | /* table sorted by exception address */ | ||
218 | .section __ex_table,"a" | ||
219 | .align 8 | ||
220 | .quad .Ls1,.Ls1e | ||
221 | .quad .Ls2,.Ls2e | ||
222 | .quad .Ls3,.Ls3e | ||
223 | .quad .Ls4,.Ls4e | ||
224 | .quad .Ld1,.Ls1e | ||
225 | .quad .Ld2,.Ls2e | ||
226 | .quad .Ld3,.Ls3e | ||
227 | .quad .Ld4,.Ls4e | ||
228 | .quad .Ls5,.Ls5e | ||
229 | .quad .Ls6,.Ls6e | ||
230 | .quad .Ls7,.Ls7e | ||
231 | .quad .Ls8,.Ls8e | ||
232 | .quad .Ld5,.Ls5e | ||
233 | .quad .Ld6,.Ls6e | ||
234 | .quad .Ld7,.Ls7e | ||
235 | .quad .Ld8,.Ls8e | ||
236 | .quad .Ls9,.Le_quad | ||
237 | .quad .Ld9,.Le_quad | ||
238 | .quad .Ls10,.Le_byte | ||
239 | .quad .Ld10,.Le_byte | ||
240 | #ifdef FIX_ALIGNMENT | ||
241 | .quad .Ls11,.Lzero_rest | ||
242 | .quad .Ld11,.Lzero_rest | ||
243 | #endif | ||
244 | .quad .Le5,.Le_zero | ||
245 | .previous | ||
246 | |||
247 | /* compute 64-offset for main loop. 8 bytes accuracy with error on the | ||
248 | pessimistic side. this is gross. it would be better to fix the | ||
249 | interface. */ | ||
250 | /* eax: zero, ebx: 64 */ | ||
251 | .Ls1e: addl $8,%eax | ||
252 | .Ls2e: addl $8,%eax | ||
253 | .Ls3e: addl $8,%eax | ||
254 | .Ls4e: addl $8,%eax | ||
255 | .Ls5e: addl $8,%eax | ||
256 | .Ls6e: addl $8,%eax | ||
257 | .Ls7e: addl $8,%eax | ||
258 | .Ls8e: addl $8,%eax | ||
259 | addq %rbx,%rdi /* +64 */ | ||
260 | subq %rax,%rdi /* correct destination with computed offset */ | ||
261 | |||
262 | shlq $6,%rdx /* loop counter * 64 (stride length) */ | ||
263 | addq %rax,%rdx /* add offset to loopcnt */ | ||
264 | andl $63,%ecx /* remaining bytes */ | ||
265 | addq %rcx,%rdx /* add them */ | ||
266 | jmp .Lzero_rest | ||
267 | |||
268 | /* exception on quad word loop in tail handling */ | ||
269 | /* ecx: loopcnt/8, %edx: length, rdi: correct */ | ||
270 | .Le_quad: | ||
271 | shll $3,%ecx | ||
272 | andl $7,%edx | ||
273 | addl %ecx,%edx | ||
274 | /* edx: bytes to zero, rdi: dest, eax:zero */ | ||
275 | .Lzero_rest: | ||
276 | cmpl $0,(%rsp) | ||
277 | jz .Le_zero | ||
278 | movq %rdx,%rcx | ||
279 | .Le_byte: | ||
280 | xorl %eax,%eax | ||
281 | .Le5: rep | ||
282 | stosb | ||
283 | /* when there is another exception while zeroing the rest just return */ | ||
284 | .Le_zero: | ||
285 | movq %rdx,%rax | ||
286 | jmp .Lende | ||
287 | CFI_ENDPROC | ||
288 | ENDPROC(copy_user_generic) | ||
289 | |||
290 | |||
291 | /* Some CPUs run faster using the string copy instructions. | ||
292 | This is also a lot simpler. Use them when possible. | ||
293 | Patch in jmps to this code instead of copying it fully | ||
294 | to avoid unwanted aliasing in the exception tables. */ | ||
295 | |||
296 | /* rdi destination | ||
297 | * rsi source | ||
298 | * rdx count | ||
299 | * ecx zero flag | ||
300 | * | ||
301 | * Output: | ||
302 | * eax uncopied bytes or 0 if successfull. | ||
303 | * | ||
304 | * Only 4GB of copy is supported. This shouldn't be a problem | ||
305 | * because the kernel normally only writes from/to page sized chunks | ||
306 | * even if user space passed a longer buffer. | ||
307 | * And more would be dangerous because both Intel and AMD have | ||
308 | * errata with rep movsq > 4GB. If someone feels the need to fix | ||
309 | * this please consider this. | ||
310 | */ | ||
311 | ENTRY(copy_user_generic_string) | ||
312 | CFI_STARTPROC | ||
313 | movl %ecx,%r8d /* save zero flag */ | ||
314 | movl %edx,%ecx | ||
315 | shrl $3,%ecx | ||
316 | andl $7,%edx | ||
317 | jz 10f | ||
318 | 1: rep | ||
319 | movsq | ||
320 | movl %edx,%ecx | ||
321 | 2: rep | ||
322 | movsb | ||
323 | 9: movl %ecx,%eax | ||
324 | ret | ||
325 | |||
326 | /* multiple of 8 byte */ | ||
327 | 10: rep | ||
328 | movsq | ||
329 | xor %eax,%eax | ||
330 | ret | ||
331 | |||
332 | /* exception handling */ | ||
333 | 3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */ | ||
334 | jmp 6f | ||
335 | 5: movl %ecx,%eax /* exception on byte loop */ | ||
336 | /* eax: left over bytes */ | ||
337 | 6: testl %r8d,%r8d /* zero flag set? */ | ||
338 | jz 7f | ||
339 | movl %eax,%ecx /* initialize x86 loop counter */ | ||
340 | push %rax | ||
341 | xorl %eax,%eax | ||
342 | 8: rep | ||
343 | stosb /* zero the rest */ | ||
344 | 11: pop %rax | ||
345 | 7: ret | ||
346 | CFI_ENDPROC | ||
347 | END(copy_user_generic_c) | ||
348 | |||
349 | .section __ex_table,"a" | ||
350 | .quad 1b,3b | ||
351 | .quad 2b,5b | ||
352 | .quad 8b,11b | ||
353 | .quad 10b,3b | ||
354 | .previous | ||