diff options
Diffstat (limited to 'arch/x86_64/lib/copy_user.S')
-rw-r--r-- | arch/x86_64/lib/copy_user.S | 294 |
1 files changed, 294 insertions, 0 deletions
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S new file mode 100644 index 000000000000..bd556c804248 --- /dev/null +++ b/arch/x86_64/lib/copy_user.S | |||
@@ -0,0 +1,294 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs. | ||
2 | * Subject to the GNU Public License v2. | ||
3 | * | ||
4 | * Functions to copy from and to user space. | ||
5 | */ | ||
6 | |||
7 | #define FIX_ALIGNMENT 1 | ||
8 | |||
9 | #include <asm/current.h> | ||
10 | #include <asm/offset.h> | ||
11 | #include <asm/thread_info.h> | ||
12 | #include <asm/cpufeature.h> | ||
13 | |||
14 | /* Standard copy_to_user with segment limit checking */ | ||
15 | .globl copy_to_user | ||
16 | .p2align 4 | ||
17 | copy_to_user: | ||
18 | GET_THREAD_INFO(%rax) | ||
19 | movq %rdi,%rcx | ||
20 | addq %rdx,%rcx | ||
21 | jc bad_to_user | ||
22 | cmpq threadinfo_addr_limit(%rax),%rcx | ||
23 | jae bad_to_user | ||
24 | 2: | ||
25 | .byte 0xe9 /* 32bit jump */ | ||
26 | .long .Lcug-1f | ||
27 | 1: | ||
28 | |||
29 | .section .altinstr_replacement,"ax" | ||
30 | 3: .byte 0xe9 /* replacement jmp with 8 bit immediate */ | ||
31 | .long copy_user_generic_c-1b /* offset */ | ||
32 | .previous | ||
33 | .section .altinstructions,"a" | ||
34 | .align 8 | ||
35 | .quad 2b | ||
36 | .quad 3b | ||
37 | .byte X86_FEATURE_K8_C | ||
38 | .byte 5 | ||
39 | .byte 5 | ||
40 | .previous | ||
41 | |||
42 | /* Standard copy_from_user with segment limit checking */ | ||
43 | .globl copy_from_user | ||
44 | .p2align 4 | ||
45 | copy_from_user: | ||
46 | GET_THREAD_INFO(%rax) | ||
47 | movq %rsi,%rcx | ||
48 | addq %rdx,%rcx | ||
49 | jc bad_from_user | ||
50 | cmpq threadinfo_addr_limit(%rax),%rcx | ||
51 | jae bad_from_user | ||
52 | /* FALL THROUGH to copy_user_generic */ | ||
53 | |||
54 | .section .fixup,"ax" | ||
55 | /* must zero dest */ | ||
56 | bad_from_user: | ||
57 | movl %edx,%ecx | ||
58 | xorl %eax,%eax | ||
59 | rep | ||
60 | stosb | ||
61 | bad_to_user: | ||
62 | movl %edx,%eax | ||
63 | ret | ||
64 | .previous | ||
65 | |||
66 | |||
67 | /* | ||
68 | * copy_user_generic - memory copy with exception handling. | ||
69 | * | ||
70 | * Input: | ||
71 | * rdi destination | ||
72 | * rsi source | ||
73 | * rdx count | ||
74 | * | ||
75 | * Output: | ||
76 | * eax uncopied bytes or 0 if successful. | ||
77 | */ | ||
78 | .globl copy_user_generic | ||
79 | .p2align 4 | ||
80 | copy_user_generic: | ||
81 | .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */ | ||
82 | .byte 0x66,0x90 | ||
83 | 1: | ||
84 | .section .altinstr_replacement,"ax" | ||
85 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | ||
86 | .long copy_user_generic_c-1b /* offset */ | ||
87 | .previous | ||
88 | .section .altinstructions,"a" | ||
89 | .align 8 | ||
90 | .quad copy_user_generic | ||
91 | .quad 2b | ||
92 | .byte X86_FEATURE_K8_C | ||
93 | .byte 5 | ||
94 | .byte 5 | ||
95 | .previous | ||
96 | .Lcug: | ||
97 | pushq %rbx | ||
98 | xorl %eax,%eax /*zero for the exception handler */ | ||
99 | |||
100 | #ifdef FIX_ALIGNMENT | ||
101 | /* check for bad alignment of destination */ | ||
102 | movl %edi,%ecx | ||
103 | andl $7,%ecx | ||
104 | jnz .Lbad_alignment | ||
105 | .Lafter_bad_alignment: | ||
106 | #endif | ||
107 | |||
108 | movq %rdx,%rcx | ||
109 | |||
110 | movl $64,%ebx | ||
111 | shrq $6,%rdx | ||
112 | decq %rdx | ||
113 | js .Lhandle_tail | ||
114 | |||
115 | .p2align 4 | ||
116 | .Lloop: | ||
117 | .Ls1: movq (%rsi),%r11 | ||
118 | .Ls2: movq 1*8(%rsi),%r8 | ||
119 | .Ls3: movq 2*8(%rsi),%r9 | ||
120 | .Ls4: movq 3*8(%rsi),%r10 | ||
121 | .Ld1: movq %r11,(%rdi) | ||
122 | .Ld2: movq %r8,1*8(%rdi) | ||
123 | .Ld3: movq %r9,2*8(%rdi) | ||
124 | .Ld4: movq %r10,3*8(%rdi) | ||
125 | |||
126 | .Ls5: movq 4*8(%rsi),%r11 | ||
127 | .Ls6: movq 5*8(%rsi),%r8 | ||
128 | .Ls7: movq 6*8(%rsi),%r9 | ||
129 | .Ls8: movq 7*8(%rsi),%r10 | ||
130 | .Ld5: movq %r11,4*8(%rdi) | ||
131 | .Ld6: movq %r8,5*8(%rdi) | ||
132 | .Ld7: movq %r9,6*8(%rdi) | ||
133 | .Ld8: movq %r10,7*8(%rdi) | ||
134 | |||
135 | decq %rdx | ||
136 | |||
137 | leaq 64(%rsi),%rsi | ||
138 | leaq 64(%rdi),%rdi | ||
139 | |||
140 | jns .Lloop | ||
141 | |||
142 | .p2align 4 | ||
143 | .Lhandle_tail: | ||
144 | movl %ecx,%edx | ||
145 | andl $63,%ecx | ||
146 | shrl $3,%ecx | ||
147 | jz .Lhandle_7 | ||
148 | movl $8,%ebx | ||
149 | .p2align 4 | ||
150 | .Lloop_8: | ||
151 | .Ls9: movq (%rsi),%r8 | ||
152 | .Ld9: movq %r8,(%rdi) | ||
153 | decl %ecx | ||
154 | leaq 8(%rdi),%rdi | ||
155 | leaq 8(%rsi),%rsi | ||
156 | jnz .Lloop_8 | ||
157 | |||
158 | .Lhandle_7: | ||
159 | movl %edx,%ecx | ||
160 | andl $7,%ecx | ||
161 | jz .Lende | ||
162 | .p2align 4 | ||
163 | .Lloop_1: | ||
164 | .Ls10: movb (%rsi),%bl | ||
165 | .Ld10: movb %bl,(%rdi) | ||
166 | incq %rdi | ||
167 | incq %rsi | ||
168 | decl %ecx | ||
169 | jnz .Lloop_1 | ||
170 | |||
171 | .Lende: | ||
172 | popq %rbx | ||
173 | ret | ||
174 | |||
175 | #ifdef FIX_ALIGNMENT | ||
176 | /* align destination */ | ||
177 | .p2align 4 | ||
178 | .Lbad_alignment: | ||
179 | movl $8,%r9d | ||
180 | subl %ecx,%r9d | ||
181 | movl %r9d,%ecx | ||
182 | cmpq %r9,%rdx | ||
183 | jz .Lhandle_7 | ||
184 | js .Lhandle_7 | ||
185 | .Lalign_1: | ||
186 | .Ls11: movb (%rsi),%bl | ||
187 | .Ld11: movb %bl,(%rdi) | ||
188 | incq %rsi | ||
189 | incq %rdi | ||
190 | decl %ecx | ||
191 | jnz .Lalign_1 | ||
192 | subq %r9,%rdx | ||
193 | jmp .Lafter_bad_alignment | ||
194 | #endif | ||
195 | |||
196 | /* table sorted by exception address */ | ||
197 | .section __ex_table,"a" | ||
198 | .align 8 | ||
199 | .quad .Ls1,.Ls1e | ||
200 | .quad .Ls2,.Ls2e | ||
201 | .quad .Ls3,.Ls3e | ||
202 | .quad .Ls4,.Ls4e | ||
203 | .quad .Ld1,.Ls1e | ||
204 | .quad .Ld2,.Ls2e | ||
205 | .quad .Ld3,.Ls3e | ||
206 | .quad .Ld4,.Ls4e | ||
207 | .quad .Ls5,.Ls5e | ||
208 | .quad .Ls6,.Ls6e | ||
209 | .quad .Ls7,.Ls7e | ||
210 | .quad .Ls8,.Ls8e | ||
211 | .quad .Ld5,.Ls5e | ||
212 | .quad .Ld6,.Ls6e | ||
213 | .quad .Ld7,.Ls7e | ||
214 | .quad .Ld8,.Ls8e | ||
215 | .quad .Ls9,.Le_quad | ||
216 | .quad .Ld9,.Le_quad | ||
217 | .quad .Ls10,.Le_byte | ||
218 | .quad .Ld10,.Le_byte | ||
219 | #ifdef FIX_ALIGNMENT | ||
220 | .quad .Ls11,.Lzero_rest | ||
221 | .quad .Ld11,.Lzero_rest | ||
222 | #endif | ||
223 | .quad .Le5,.Le_zero | ||
224 | .previous | ||
225 | |||
226 | /* compute 64-offset for main loop. 8 bytes accuracy with error on the | ||
227 | pessimistic side. this is gross. it would be better to fix the | ||
228 | interface. */ | ||
229 | /* eax: zero, ebx: 64 */ | ||
230 | .Ls1e: addl $8,%eax | ||
231 | .Ls2e: addl $8,%eax | ||
232 | .Ls3e: addl $8,%eax | ||
233 | .Ls4e: addl $8,%eax | ||
234 | .Ls5e: addl $8,%eax | ||
235 | .Ls6e: addl $8,%eax | ||
236 | .Ls7e: addl $8,%eax | ||
237 | .Ls8e: addl $8,%eax | ||
238 | addq %rbx,%rdi /* +64 */ | ||
239 | subq %rax,%rdi /* correct destination with computed offset */ | ||
240 | |||
241 | shlq $6,%rdx /* loop counter * 64 (stride length) */ | ||
242 | addq %rax,%rdx /* add offset to loopcnt */ | ||
243 | andl $63,%ecx /* remaining bytes */ | ||
244 | addq %rcx,%rdx /* add them */ | ||
245 | jmp .Lzero_rest | ||
246 | |||
247 | /* exception on quad word loop in tail handling */ | ||
248 | /* ecx: loopcnt/8, %edx: length, rdi: correct */ | ||
249 | .Le_quad: | ||
250 | shll $3,%ecx | ||
251 | andl $7,%edx | ||
252 | addl %ecx,%edx | ||
253 | /* edx: bytes to zero, rdi: dest, eax:zero */ | ||
254 | .Lzero_rest: | ||
255 | movq %rdx,%rcx | ||
256 | .Le_byte: | ||
257 | xorl %eax,%eax | ||
258 | .Le5: rep | ||
259 | stosb | ||
260 | /* when there is another exception while zeroing the rest just return */ | ||
261 | .Le_zero: | ||
262 | movq %rdx,%rax | ||
263 | jmp .Lende | ||
264 | |||
265 | /* C stepping K8 run faster using the string copy instructions. | ||
266 | This is also a lot simpler. Use them when possible. | ||
267 | Patch in jmps to this code instead of copying it fully | ||
268 | to avoid unwanted aliasing in the exception tables. */ | ||
269 | |||
270 | /* rdi destination | ||
271 | * rsi source | ||
272 | * rdx count | ||
273 | * | ||
274 | * Output: | ||
275 | * eax uncopied bytes or 0 if successfull. | ||
276 | */ | ||
277 | copy_user_generic_c: | ||
278 | movl %edx,%ecx | ||
279 | shrl $3,%ecx | ||
280 | andl $7,%edx | ||
281 | 1: rep | ||
282 | movsq | ||
283 | movl %edx,%ecx | ||
284 | 2: rep | ||
285 | movsb | ||
286 | 4: movl %ecx,%eax | ||
287 | ret | ||
288 | 3: lea (%rdx,%rcx,8),%rax | ||
289 | ret | ||
290 | |||
291 | .section __ex_table,"a" | ||
292 | .quad 1b,3b | ||
293 | .quad 2b,4b | ||
294 | .previous | ||