aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/lib')
-rw-r--r--arch/x86/lib/Makefile5
-rw-r--r--arch/x86/lib/copy_user_64.S432
-rw-r--r--arch/x86/lib/copy_user_nocache_64.S286
-rw-r--r--arch/x86/lib/delay.c (renamed from arch/x86/lib/delay_32.c)38
-rw-r--r--arch/x86/lib/delay_64.c85
-rw-r--r--arch/x86/lib/getuser.S (renamed from arch/x86/lib/getuser_64.S)87
-rw-r--r--arch/x86/lib/getuser_32.S78
-rw-r--r--arch/x86/lib/msr-on-cpu.c8
-rw-r--r--arch/x86/lib/putuser.S (renamed from arch/x86/lib/putuser_32.S)73
-rw-r--r--arch/x86/lib/putuser_64.S106
-rw-r--r--arch/x86/lib/thunk_32.S47
-rw-r--r--arch/x86/lib/thunk_64.S19
-rw-r--r--arch/x86/lib/usercopy_64.c23
13 files changed, 469 insertions, 818 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 76f60f52a885..aa3fa4119424 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -4,8 +4,9 @@
4 4
5obj-$(CONFIG_SMP) := msr-on-cpu.o 5obj-$(CONFIG_SMP) := msr-on-cpu.o
6 6
7lib-y := delay_$(BITS).o 7lib-y := delay.o
8lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o 8lib-y += thunk_$(BITS).o
9lib-y += usercopy_$(BITS).o getuser.o putuser.o
9lib-y += memcpy_$(BITS).o 10lib-y += memcpy_$(BITS).o
10 11
11ifeq ($(CONFIG_X86_32),y) 12ifeq ($(CONFIG_X86_32),y)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 70bebd310408..dfdf428975c0 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -1,8 +1,10 @@
1/* Copyright 2002 Andi Kleen, SuSE Labs. 1/*
2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3 * Copyright 2002 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v2. 4 * Subject to the GNU Public License v2.
3 * 5 *
4 * Functions to copy from and to user space. 6 * Functions to copy from and to user space.
5 */ 7 */
6 8
7#include <linux/linkage.h> 9#include <linux/linkage.h>
8#include <asm/dwarf2.h> 10#include <asm/dwarf2.h>
@@ -20,60 +22,88 @@
20 .long \orig-1f /* by default jump to orig */ 22 .long \orig-1f /* by default jump to orig */
211: 231:
22 .section .altinstr_replacement,"ax" 24 .section .altinstr_replacement,"ax"
232: .byte 0xe9 /* near jump with 32bit immediate */ 252: .byte 0xe9 /* near jump with 32bit immediate */
24 .long \alt-1b /* offset */ /* or alternatively to alt */ 26 .long \alt-1b /* offset */ /* or alternatively to alt */
25 .previous 27 .previous
26 .section .altinstructions,"a" 28 .section .altinstructions,"a"
27 .align 8 29 .align 8
28 .quad 0b 30 .quad 0b
29 .quad 2b 31 .quad 2b
30 .byte \feature /* when feature is set */ 32 .byte \feature /* when feature is set */
31 .byte 5 33 .byte 5
32 .byte 5 34 .byte 5
33 .previous 35 .previous
34 .endm 36 .endm
35 37
36/* Standard copy_to_user with segment limit checking */ 38 .macro ALIGN_DESTINATION
39#ifdef FIX_ALIGNMENT
40 /* check for bad alignment of destination */
41 movl %edi,%ecx
42 andl $7,%ecx
43 jz 102f /* already aligned */
44 subl $8,%ecx
45 negl %ecx
46 subl %ecx,%edx
47100: movb (%rsi),%al
48101: movb %al,(%rdi)
49 incq %rsi
50 incq %rdi
51 decl %ecx
52 jnz 100b
53102:
54 .section .fixup,"ax"
55103: addl %r8d,%edx /* ecx is zerorest also */
56 jmp copy_user_handle_tail
57 .previous
58
59 .section __ex_table,"a"
60 .align 8
61 .quad 100b,103b
62 .quad 101b,103b
63 .previous
64#endif
65 .endm
66
67/* Standard copy_to_user with segment limit checking */
37ENTRY(copy_to_user) 68ENTRY(copy_to_user)
38 CFI_STARTPROC 69 CFI_STARTPROC
39 GET_THREAD_INFO(%rax) 70 GET_THREAD_INFO(%rax)
40 movq %rdi,%rcx 71 movq %rdi,%rcx
41 addq %rdx,%rcx 72 addq %rdx,%rcx
42 jc bad_to_user 73 jc bad_to_user
43 cmpq threadinfo_addr_limit(%rax),%rcx 74 cmpq TI_addr_limit(%rax),%rcx
44 jae bad_to_user 75 jae bad_to_user
45 xorl %eax,%eax /* clear zero flag */
46 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 76 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
47 CFI_ENDPROC 77 CFI_ENDPROC
48 78
49ENTRY(copy_user_generic) 79/* Standard copy_from_user with segment limit checking */
80ENTRY(copy_from_user)
50 CFI_STARTPROC 81 CFI_STARTPROC
51 movl $1,%ecx /* set zero flag */ 82 GET_THREAD_INFO(%rax)
83 movq %rsi,%rcx
84 addq %rdx,%rcx
85 jc bad_from_user
86 cmpq TI_addr_limit(%rax),%rcx
87 jae bad_from_user
52 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 88 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
53 CFI_ENDPROC 89 CFI_ENDPROC
90ENDPROC(copy_from_user)
54 91
55ENTRY(__copy_from_user_inatomic) 92ENTRY(copy_user_generic)
56 CFI_STARTPROC 93 CFI_STARTPROC
57 xorl %ecx,%ecx /* clear zero flag */
58 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 94 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
59 CFI_ENDPROC 95 CFI_ENDPROC
96ENDPROC(copy_user_generic)
60 97
61/* Standard copy_from_user with segment limit checking */ 98ENTRY(__copy_from_user_inatomic)
62ENTRY(copy_from_user)
63 CFI_STARTPROC 99 CFI_STARTPROC
64 GET_THREAD_INFO(%rax)
65 movq %rsi,%rcx
66 addq %rdx,%rcx
67 jc bad_from_user
68 cmpq threadinfo_addr_limit(%rax),%rcx
69 jae bad_from_user
70 movl $1,%ecx /* set zero flag */
71 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string 100 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
72 CFI_ENDPROC 101 CFI_ENDPROC
73ENDPROC(copy_from_user) 102ENDPROC(__copy_from_user_inatomic)
74 103
75 .section .fixup,"ax" 104 .section .fixup,"ax"
76 /* must zero dest */ 105 /* must zero dest */
106ENTRY(bad_from_user)
77bad_from_user: 107bad_from_user:
78 CFI_STARTPROC 108 CFI_STARTPROC
79 movl %edx,%ecx 109 movl %edx,%ecx
@@ -81,274 +111,158 @@ bad_from_user:
81 rep 111 rep
82 stosb 112 stosb
83bad_to_user: 113bad_to_user:
84 movl %edx,%eax 114 movl %edx,%eax
85 ret 115 ret
86 CFI_ENDPROC 116 CFI_ENDPROC
87END(bad_from_user) 117ENDPROC(bad_from_user)
88 .previous 118 .previous
89 119
90
91/* 120/*
92 * copy_user_generic_unrolled - memory copy with exception handling. 121 * copy_user_generic_unrolled - memory copy with exception handling.
93 * This version is for CPUs like P4 that don't have efficient micro code for rep movsq 122 * This version is for CPUs like P4 that don't have efficient micro
94 * 123 * code for rep movsq
95 * Input: 124 *
125 * Input:
96 * rdi destination 126 * rdi destination
97 * rsi source 127 * rsi source
98 * rdx count 128 * rdx count
99 * ecx zero flag -- if true zero destination on error
100 * 129 *
101 * Output: 130 * Output:
102 * eax uncopied bytes or 0 if successful. 131 * eax uncopied bytes or 0 if successfull.
103 */ 132 */
104ENTRY(copy_user_generic_unrolled) 133ENTRY(copy_user_generic_unrolled)
105 CFI_STARTPROC 134 CFI_STARTPROC
106 pushq %rbx 135 cmpl $8,%edx
107 CFI_ADJUST_CFA_OFFSET 8 136 jb 20f /* less then 8 bytes, go to byte copy loop */
108 CFI_REL_OFFSET rbx, 0 137 ALIGN_DESTINATION
109 pushq %rcx 138 movl %edx,%ecx
110 CFI_ADJUST_CFA_OFFSET 8 139 andl $63,%edx
111 CFI_REL_OFFSET rcx, 0 140 shrl $6,%ecx
112 xorl %eax,%eax /*zero for the exception handler */ 141 jz 17f
113 1421: movq (%rsi),%r8
114#ifdef FIX_ALIGNMENT 1432: movq 1*8(%rsi),%r9
115 /* check for bad alignment of destination */ 1443: movq 2*8(%rsi),%r10
116 movl %edi,%ecx 1454: movq 3*8(%rsi),%r11
117 andl $7,%ecx 1465: movq %r8,(%rdi)
118 jnz .Lbad_alignment 1476: movq %r9,1*8(%rdi)
119.Lafter_bad_alignment: 1487: movq %r10,2*8(%rdi)
120#endif 1498: movq %r11,3*8(%rdi)
121 1509: movq 4*8(%rsi),%r8
122 movq %rdx,%rcx 15110: movq 5*8(%rsi),%r9
123 15211: movq 6*8(%rsi),%r10
124 movl $64,%ebx 15312: movq 7*8(%rsi),%r11
125 shrq $6,%rdx 15413: movq %r8,4*8(%rdi)
126 decq %rdx 15514: movq %r9,5*8(%rdi)
127 js .Lhandle_tail 15615: movq %r10,6*8(%rdi)
128 15716: movq %r11,7*8(%rdi)
129 .p2align 4
130.Lloop:
131.Ls1: movq (%rsi),%r11
132.Ls2: movq 1*8(%rsi),%r8
133.Ls3: movq 2*8(%rsi),%r9
134.Ls4: movq 3*8(%rsi),%r10
135.Ld1: movq %r11,(%rdi)
136.Ld2: movq %r8,1*8(%rdi)
137.Ld3: movq %r9,2*8(%rdi)
138.Ld4: movq %r10,3*8(%rdi)
139
140.Ls5: movq 4*8(%rsi),%r11
141.Ls6: movq 5*8(%rsi),%r8
142.Ls7: movq 6*8(%rsi),%r9
143.Ls8: movq 7*8(%rsi),%r10
144.Ld5: movq %r11,4*8(%rdi)
145.Ld6: movq %r8,5*8(%rdi)
146.Ld7: movq %r9,6*8(%rdi)
147.Ld8: movq %r10,7*8(%rdi)
148
149 decq %rdx
150
151 leaq 64(%rsi),%rsi 158 leaq 64(%rsi),%rsi
152 leaq 64(%rdi),%rdi 159 leaq 64(%rdi),%rdi
153
154 jns .Lloop
155
156 .p2align 4
157.Lhandle_tail:
158 movl %ecx,%edx
159 andl $63,%ecx
160 shrl $3,%ecx
161 jz .Lhandle_7
162 movl $8,%ebx
163 .p2align 4
164.Lloop_8:
165.Ls9: movq (%rsi),%r8
166.Ld9: movq %r8,(%rdi)
167 decl %ecx 160 decl %ecx
168 leaq 8(%rdi),%rdi 161 jnz 1b
16217: movl %edx,%ecx
163 andl $7,%edx
164 shrl $3,%ecx
165 jz 20f
16618: movq (%rsi),%r8
16719: movq %r8,(%rdi)
169 leaq 8(%rsi),%rsi 168 leaq 8(%rsi),%rsi
170 jnz .Lloop_8 169 leaq 8(%rdi),%rdi
171 170 decl %ecx
172.Lhandle_7: 171 jnz 18b
17220: andl %edx,%edx
173 jz 23f
173 movl %edx,%ecx 174 movl %edx,%ecx
174 andl $7,%ecx 17521: movb (%rsi),%al
175 jz .Lende 17622: movb %al,(%rdi)
176 .p2align 4
177.Lloop_1:
178.Ls10: movb (%rsi),%bl
179.Ld10: movb %bl,(%rdi)
180 incq %rdi
181 incq %rsi 177 incq %rsi
178 incq %rdi
182 decl %ecx 179 decl %ecx
183 jnz .Lloop_1 180 jnz 21b
184 18123: xor %eax,%eax
185 CFI_REMEMBER_STATE
186.Lende:
187 popq %rcx
188 CFI_ADJUST_CFA_OFFSET -8
189 CFI_RESTORE rcx
190 popq %rbx
191 CFI_ADJUST_CFA_OFFSET -8
192 CFI_RESTORE rbx
193 ret 182 ret
194 CFI_RESTORE_STATE
195 183
196#ifdef FIX_ALIGNMENT 184 .section .fixup,"ax"
197 /* align destination */ 18530: shll $6,%ecx
198 .p2align 4 186 addl %ecx,%edx
199.Lbad_alignment: 187 jmp 60f
200 movl $8,%r9d 18840: lea (%rdx,%rcx,8),%rdx
201 subl %ecx,%r9d 189 jmp 60f
202 movl %r9d,%ecx 19050: movl %ecx,%edx
203 cmpq %r9,%rdx 19160: jmp copy_user_handle_tail /* ecx is zerorest also */
204 jz .Lhandle_7 192 .previous
205 js .Lhandle_7
206.Lalign_1:
207.Ls11: movb (%rsi),%bl
208.Ld11: movb %bl,(%rdi)
209 incq %rsi
210 incq %rdi
211 decl %ecx
212 jnz .Lalign_1
213 subq %r9,%rdx
214 jmp .Lafter_bad_alignment
215#endif
216 193
217 /* table sorted by exception address */
218 .section __ex_table,"a" 194 .section __ex_table,"a"
219 .align 8 195 .align 8
220 .quad .Ls1,.Ls1e 196 .quad 1b,30b
221 .quad .Ls2,.Ls2e 197 .quad 2b,30b
222 .quad .Ls3,.Ls3e 198 .quad 3b,30b
223 .quad .Ls4,.Ls4e 199 .quad 4b,30b
224 .quad .Ld1,.Ls1e 200 .quad 5b,30b
225 .quad .Ld2,.Ls2e 201 .quad 6b,30b
226 .quad .Ld3,.Ls3e 202 .quad 7b,30b
227 .quad .Ld4,.Ls4e 203 .quad 8b,30b
228 .quad .Ls5,.Ls5e 204 .quad 9b,30b
229 .quad .Ls6,.Ls6e 205 .quad 10b,30b
230 .quad .Ls7,.Ls7e 206 .quad 11b,30b
231 .quad .Ls8,.Ls8e 207 .quad 12b,30b
232 .quad .Ld5,.Ls5e 208 .quad 13b,30b
233 .quad .Ld6,.Ls6e 209 .quad 14b,30b
234 .quad .Ld7,.Ls7e 210 .quad 15b,30b
235 .quad .Ld8,.Ls8e 211 .quad 16b,30b
236 .quad .Ls9,.Le_quad 212 .quad 18b,40b
237 .quad .Ld9,.Le_quad 213 .quad 19b,40b
238 .quad .Ls10,.Le_byte 214 .quad 21b,50b
239 .quad .Ld10,.Le_byte 215 .quad 22b,50b
240#ifdef FIX_ALIGNMENT
241 .quad .Ls11,.Lzero_rest
242 .quad .Ld11,.Lzero_rest
243#endif
244 .quad .Le5,.Le_zero
245 .previous 216 .previous
246
247 /* compute 64-offset for main loop. 8 bytes accuracy with error on the
248 pessimistic side. this is gross. it would be better to fix the
249 interface. */
250 /* eax: zero, ebx: 64 */
251.Ls1e: addl $8,%eax
252.Ls2e: addl $8,%eax
253.Ls3e: addl $8,%eax
254.Ls4e: addl $8,%eax
255.Ls5e: addl $8,%eax
256.Ls6e: addl $8,%eax
257.Ls7e: addl $8,%eax
258.Ls8e: addl $8,%eax
259 addq %rbx,%rdi /* +64 */
260 subq %rax,%rdi /* correct destination with computed offset */
261
262 shlq $6,%rdx /* loop counter * 64 (stride length) */
263 addq %rax,%rdx /* add offset to loopcnt */
264 andl $63,%ecx /* remaining bytes */
265 addq %rcx,%rdx /* add them */
266 jmp .Lzero_rest
267
268 /* exception on quad word loop in tail handling */
269 /* ecx: loopcnt/8, %edx: length, rdi: correct */
270.Le_quad:
271 shll $3,%ecx
272 andl $7,%edx
273 addl %ecx,%edx
274 /* edx: bytes to zero, rdi: dest, eax:zero */
275.Lzero_rest:
276 cmpl $0,(%rsp)
277 jz .Le_zero
278 movq %rdx,%rcx
279.Le_byte:
280 xorl %eax,%eax
281.Le5: rep
282 stosb
283 /* when there is another exception while zeroing the rest just return */
284.Le_zero:
285 movq %rdx,%rax
286 jmp .Lende
287 CFI_ENDPROC 217 CFI_ENDPROC
288ENDPROC(copy_user_generic) 218ENDPROC(copy_user_generic_unrolled)
289 219
290 220/* Some CPUs run faster using the string copy instructions.
291 /* Some CPUs run faster using the string copy instructions. 221 * This is also a lot simpler. Use them when possible.
292 This is also a lot simpler. Use them when possible. 222 *
293 Patch in jmps to this code instead of copying it fully 223 * Only 4GB of copy is supported. This shouldn't be a problem
294 to avoid unwanted aliasing in the exception tables. */ 224 * because the kernel normally only writes from/to page sized chunks
295 225 * even if user space passed a longer buffer.
296 /* rdi destination 226 * And more would be dangerous because both Intel and AMD have
297 * rsi source 227 * errata with rep movsq > 4GB. If someone feels the need to fix
298 * rdx count 228 * this please consider this.
299 * ecx zero flag 229 *
300 * 230 * Input:
301 * Output: 231 * rdi destination
302 * eax uncopied bytes or 0 if successfull. 232 * rsi source
303 * 233 * rdx count
304 * Only 4GB of copy is supported. This shouldn't be a problem 234 *
305 * because the kernel normally only writes from/to page sized chunks 235 * Output:
306 * even if user space passed a longer buffer. 236 * eax uncopied bytes or 0 if successful.
307 * And more would be dangerous because both Intel and AMD have 237 */
308 * errata with rep movsq > 4GB. If someone feels the need to fix
309 * this please consider this.
310 */
311ENTRY(copy_user_generic_string) 238ENTRY(copy_user_generic_string)
312 CFI_STARTPROC 239 CFI_STARTPROC
313 movl %ecx,%r8d /* save zero flag */ 240 andl %edx,%edx
241 jz 4f
242 cmpl $8,%edx
243 jb 2f /* less than 8 bytes, go to byte copy loop */
244 ALIGN_DESTINATION
314 movl %edx,%ecx 245 movl %edx,%ecx
315 shrl $3,%ecx 246 shrl $3,%ecx
316 andl $7,%edx 247 andl $7,%edx
317 jz 10f 2481: rep
3181: rep
319 movsq
320 movl %edx,%ecx
3212: rep
322 movsb
3239: movl %ecx,%eax
324 ret
325
326 /* multiple of 8 byte */
32710: rep
328 movsq 249 movsq
329 xor %eax,%eax 2502: movl %edx,%ecx
2513: rep
252 movsb
2534: xorl %eax,%eax
330 ret 254 ret
331 255
332 /* exception handling */ 256 .section .fixup,"ax"
3333: lea (%rdx,%rcx,8),%rax /* exception on quad loop */ 25711: lea (%rdx,%rcx,8),%rcx
334 jmp 6f 25812: movl %ecx,%edx /* ecx is zerorest also */
3355: movl %ecx,%eax /* exception on byte loop */ 259 jmp copy_user_handle_tail
336 /* eax: left over bytes */ 260 .previous
3376: testl %r8d,%r8d /* zero flag set? */
338 jz 7f
339 movl %eax,%ecx /* initialize x86 loop counter */
340 push %rax
341 xorl %eax,%eax
3428: rep
343 stosb /* zero the rest */
34411: pop %rax
3457: ret
346 CFI_ENDPROC
347END(copy_user_generic_c)
348 261
349 .section __ex_table,"a" 262 .section __ex_table,"a"
350 .quad 1b,3b 263 .align 8
351 .quad 2b,5b 264 .quad 1b,11b
352 .quad 8b,11b 265 .quad 3b,12b
353 .quad 10b,3b
354 .previous 266 .previous
267 CFI_ENDPROC
268ENDPROC(copy_user_generic_string)
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index 5196762b3b0e..40e0e309d27e 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -1,4 +1,6 @@
1/* Copyright 2002 Andi Kleen, SuSE Labs. 1/*
2 * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
3 * Copyright 2002 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v2. 4 * Subject to the GNU Public License v2.
3 * 5 *
4 * Functions to copy from and to user space. 6 * Functions to copy from and to user space.
@@ -12,207 +14,125 @@
12#include <asm/current.h> 14#include <asm/current.h>
13#include <asm/asm-offsets.h> 15#include <asm/asm-offsets.h>
14#include <asm/thread_info.h> 16#include <asm/thread_info.h>
15#include <asm/cpufeature.h>
16
17/*
18 * copy_user_nocache - Uncached memory copy with exception handling
19 * This will force destination/source out of cache for more performance.
20 *
21 * Input:
22 * rdi destination
23 * rsi source
24 * rdx count
25 * rcx zero flag when 1 zero on exception
26 *
27 * Output:
28 * eax uncopied bytes or 0 if successful.
29 */
30ENTRY(__copy_user_nocache)
31 CFI_STARTPROC
32 pushq %rbx
33 CFI_ADJUST_CFA_OFFSET 8
34 CFI_REL_OFFSET rbx, 0
35 pushq %rcx /* save zero flag */
36 CFI_ADJUST_CFA_OFFSET 8
37 CFI_REL_OFFSET rcx, 0
38
39 xorl %eax,%eax /* zero for the exception handler */
40 17
18 .macro ALIGN_DESTINATION
41#ifdef FIX_ALIGNMENT 19#ifdef FIX_ALIGNMENT
42 /* check for bad alignment of destination */ 20 /* check for bad alignment of destination */
43 movl %edi,%ecx 21 movl %edi,%ecx
44 andl $7,%ecx 22 andl $7,%ecx
45 jnz .Lbad_alignment 23 jz 102f /* already aligned */
46.Lafter_bad_alignment: 24 subl $8,%ecx
47#endif 25 negl %ecx
48 26 subl %ecx,%edx
49 movq %rdx,%rcx 27100: movb (%rsi),%al
50 28101: movb %al,(%rdi)
51 movl $64,%ebx 29 incq %rsi
52 shrq $6,%rdx 30 incq %rdi
53 decq %rdx 31 decl %ecx
54 js .Lhandle_tail 32 jnz 100b
55 33102:
56 .p2align 4 34 .section .fixup,"ax"
57.Lloop: 35103: addl %r8d,%edx /* ecx is zerorest also */
58.Ls1: movq (%rsi),%r11 36 jmp copy_user_handle_tail
59.Ls2: movq 1*8(%rsi),%r8 37 .previous
60.Ls3: movq 2*8(%rsi),%r9
61.Ls4: movq 3*8(%rsi),%r10
62.Ld1: movnti %r11,(%rdi)
63.Ld2: movnti %r8,1*8(%rdi)
64.Ld3: movnti %r9,2*8(%rdi)
65.Ld4: movnti %r10,3*8(%rdi)
66
67.Ls5: movq 4*8(%rsi),%r11
68.Ls6: movq 5*8(%rsi),%r8
69.Ls7: movq 6*8(%rsi),%r9
70.Ls8: movq 7*8(%rsi),%r10
71.Ld5: movnti %r11,4*8(%rdi)
72.Ld6: movnti %r8,5*8(%rdi)
73.Ld7: movnti %r9,6*8(%rdi)
74.Ld8: movnti %r10,7*8(%rdi)
75 38
76 dec %rdx 39 .section __ex_table,"a"
40 .align 8
41 .quad 100b,103b
42 .quad 101b,103b
43 .previous
44#endif
45 .endm
77 46
47/*
48 * copy_user_nocache - Uncached memory copy with exception handling
49 * This will force destination/source out of cache for more performance.
50 */
51ENTRY(__copy_user_nocache)
52 CFI_STARTPROC
53 cmpl $8,%edx
54 jb 20f /* less then 8 bytes, go to byte copy loop */
55 ALIGN_DESTINATION
56 movl %edx,%ecx
57 andl $63,%edx
58 shrl $6,%ecx
59 jz 17f
601: movq (%rsi),%r8
612: movq 1*8(%rsi),%r9
623: movq 2*8(%rsi),%r10
634: movq 3*8(%rsi),%r11
645: movnti %r8,(%rdi)
656: movnti %r9,1*8(%rdi)
667: movnti %r10,2*8(%rdi)
678: movnti %r11,3*8(%rdi)
689: movq 4*8(%rsi),%r8
6910: movq 5*8(%rsi),%r9
7011: movq 6*8(%rsi),%r10
7112: movq 7*8(%rsi),%r11
7213: movnti %r8,4*8(%rdi)
7314: movnti %r9,5*8(%rdi)
7415: movnti %r10,6*8(%rdi)
7516: movnti %r11,7*8(%rdi)
78 leaq 64(%rsi),%rsi 76 leaq 64(%rsi),%rsi
79 leaq 64(%rdi),%rdi 77 leaq 64(%rdi),%rdi
80
81 jns .Lloop
82
83 .p2align 4
84.Lhandle_tail:
85 movl %ecx,%edx
86 andl $63,%ecx
87 shrl $3,%ecx
88 jz .Lhandle_7
89 movl $8,%ebx
90 .p2align 4
91.Lloop_8:
92.Ls9: movq (%rsi),%r8
93.Ld9: movnti %r8,(%rdi)
94 decl %ecx 78 decl %ecx
95 leaq 8(%rdi),%rdi 79 jnz 1b
8017: movl %edx,%ecx
81 andl $7,%edx
82 shrl $3,%ecx
83 jz 20f
8418: movq (%rsi),%r8
8519: movnti %r8,(%rdi)
96 leaq 8(%rsi),%rsi 86 leaq 8(%rsi),%rsi
97 jnz .Lloop_8 87 leaq 8(%rdi),%rdi
98 88 decl %ecx
99.Lhandle_7: 89 jnz 18b
9020: andl %edx,%edx
91 jz 23f
100 movl %edx,%ecx 92 movl %edx,%ecx
101 andl $7,%ecx 9321: movb (%rsi),%al
102 jz .Lende 9422: movb %al,(%rdi)
103 .p2align 4
104.Lloop_1:
105.Ls10: movb (%rsi),%bl
106.Ld10: movb %bl,(%rdi)
107 incq %rdi
108 incq %rsi 95 incq %rsi
96 incq %rdi
109 decl %ecx 97 decl %ecx
110 jnz .Lloop_1 98 jnz 21b
111 9923: xorl %eax,%eax
112 CFI_REMEMBER_STATE
113.Lende:
114 popq %rcx
115 CFI_ADJUST_CFA_OFFSET -8
116 CFI_RESTORE %rcx
117 popq %rbx
118 CFI_ADJUST_CFA_OFFSET -8
119 CFI_RESTORE rbx
120 sfence 100 sfence
121 ret 101 ret
122 CFI_RESTORE_STATE
123 102
124#ifdef FIX_ALIGNMENT 103 .section .fixup,"ax"
125 /* align destination */ 10430: shll $6,%ecx
126 .p2align 4 105 addl %ecx,%edx
127.Lbad_alignment: 106 jmp 60f
128 movl $8,%r9d 10740: lea (%rdx,%rcx,8),%rdx
129 subl %ecx,%r9d 108 jmp 60f
130 movl %r9d,%ecx 10950: movl %ecx,%edx
131 cmpq %r9,%rdx 11060: sfence
132 jz .Lhandle_7 111 movl %r8d,%ecx
133 js .Lhandle_7 112 jmp copy_user_handle_tail
134.Lalign_1: 113 .previous
135.Ls11: movb (%rsi),%bl
136.Ld11: movb %bl,(%rdi)
137 incq %rsi
138 incq %rdi
139 decl %ecx
140 jnz .Lalign_1
141 subq %r9,%rdx
142 jmp .Lafter_bad_alignment
143#endif
144 114
145 /* table sorted by exception address */
146 .section __ex_table,"a" 115 .section __ex_table,"a"
147 .align 8 116 .quad 1b,30b
148 .quad .Ls1,.Ls1e 117 .quad 2b,30b
149 .quad .Ls2,.Ls2e 118 .quad 3b,30b
150 .quad .Ls3,.Ls3e 119 .quad 4b,30b
151 .quad .Ls4,.Ls4e 120 .quad 5b,30b
152 .quad .Ld1,.Ls1e 121 .quad 6b,30b
153 .quad .Ld2,.Ls2e 122 .quad 7b,30b
154 .quad .Ld3,.Ls3e 123 .quad 8b,30b
155 .quad .Ld4,.Ls4e 124 .quad 9b,30b
156 .quad .Ls5,.Ls5e 125 .quad 10b,30b
157 .quad .Ls6,.Ls6e 126 .quad 11b,30b
158 .quad .Ls7,.Ls7e 127 .quad 12b,30b
159 .quad .Ls8,.Ls8e 128 .quad 13b,30b
160 .quad .Ld5,.Ls5e 129 .quad 14b,30b
161 .quad .Ld6,.Ls6e 130 .quad 15b,30b
162 .quad .Ld7,.Ls7e 131 .quad 16b,30b
163 .quad .Ld8,.Ls8e 132 .quad 18b,40b
164 .quad .Ls9,.Le_quad 133 .quad 19b,40b
165 .quad .Ld9,.Le_quad 134 .quad 21b,50b
166 .quad .Ls10,.Le_byte 135 .quad 22b,50b
167 .quad .Ld10,.Le_byte
168#ifdef FIX_ALIGNMENT
169 .quad .Ls11,.Lzero_rest
170 .quad .Ld11,.Lzero_rest
171#endif
172 .quad .Le5,.Le_zero
173 .previous 136 .previous
174
175 /* compute 64-offset for main loop. 8 bytes accuracy with error on the
176 pessimistic side. this is gross. it would be better to fix the
177 interface. */
178 /* eax: zero, ebx: 64 */
179.Ls1e: addl $8,%eax
180.Ls2e: addl $8,%eax
181.Ls3e: addl $8,%eax
182.Ls4e: addl $8,%eax
183.Ls5e: addl $8,%eax
184.Ls6e: addl $8,%eax
185.Ls7e: addl $8,%eax
186.Ls8e: addl $8,%eax
187 addq %rbx,%rdi /* +64 */
188 subq %rax,%rdi /* correct destination with computed offset */
189
190 shlq $6,%rdx /* loop counter * 64 (stride length) */
191 addq %rax,%rdx /* add offset to loopcnt */
192 andl $63,%ecx /* remaining bytes */
193 addq %rcx,%rdx /* add them */
194 jmp .Lzero_rest
195
196 /* exception on quad word loop in tail handling */
197 /* ecx: loopcnt/8, %edx: length, rdi: correct */
198.Le_quad:
199 shll $3,%ecx
200 andl $7,%edx
201 addl %ecx,%edx
202 /* edx: bytes to zero, rdi: dest, eax:zero */
203.Lzero_rest:
204 cmpl $0,(%rsp) /* zero flag set? */
205 jz .Le_zero
206 movq %rdx,%rcx
207.Le_byte:
208 xorl %eax,%eax
209.Le5: rep
210 stosb
211 /* when there is another exception while zeroing the rest just return */
212.Le_zero:
213 movq %rdx,%rax
214 jmp .Lende
215 CFI_ENDPROC 137 CFI_ENDPROC
216ENDPROC(__copy_user_nocache) 138ENDPROC(__copy_user_nocache)
217
218
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay.c
index d710f2d167bb..f4568605d7d5 100644
--- a/arch/x86/lib/delay_32.c
+++ b/arch/x86/lib/delay.c
@@ -3,6 +3,7 @@
3 * 3 *
4 * Copyright (C) 1993 Linus Torvalds 4 * Copyright (C) 1993 Linus Torvalds
5 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> 5 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
6 * Copyright (C) 2008 Jiri Hladky <hladky _dot_ jiri _at_ gmail _dot_ com>
6 * 7 *
7 * The __delay function must _NOT_ be inlined as its execution time 8 * The __delay function must _NOT_ be inlined as its execution time
8 * depends wildly on alignment on many x86 processors. The additional 9 * depends wildly on alignment on many x86 processors. The additional
@@ -28,16 +29,22 @@
28/* simple loop based delay: */ 29/* simple loop based delay: */
29static void delay_loop(unsigned long loops) 30static void delay_loop(unsigned long loops)
30{ 31{
31 int d0; 32 asm volatile(
32 33 " test %0,%0 \n"
33 __asm__ __volatile__( 34 " jz 3f \n"
34 "\tjmp 1f\n" 35 " jmp 1f \n"
35 ".align 16\n" 36
36 "1:\tjmp 2f\n" 37 ".align 16 \n"
37 ".align 16\n" 38 "1: jmp 2f \n"
38 "2:\tdecl %0\n\tjns 2b" 39
39 :"=&a" (d0) 40 ".align 16 \n"
40 :"0" (loops)); 41 "2: dec %0 \n"
42 " jnz 2b \n"
43 "3: dec %0 \n"
44
45 : /* we don't need output */
46 :"a" (loops)
47 );
41} 48}
42 49
43/* TSC based delay: */ 50/* TSC based delay: */
@@ -91,7 +98,7 @@ void use_tsc_delay(void)
91int __devinit read_current_timer(unsigned long *timer_val) 98int __devinit read_current_timer(unsigned long *timer_val)
92{ 99{
93 if (delay_fn == delay_tsc) { 100 if (delay_fn == delay_tsc) {
94 rdtscl(*timer_val); 101 rdtscll(*timer_val);
95 return 0; 102 return 0;
96 } 103 }
97 return -1; 104 return -1;
@@ -101,31 +108,30 @@ void __delay(unsigned long loops)
101{ 108{
102 delay_fn(loops); 109 delay_fn(loops);
103} 110}
111EXPORT_SYMBOL(__delay);
104 112
105inline void __const_udelay(unsigned long xloops) 113inline void __const_udelay(unsigned long xloops)
106{ 114{
107 int d0; 115 int d0;
108 116
109 xloops *= 4; 117 xloops *= 4;
110 __asm__("mull %0" 118 asm("mull %%edx"
111 :"=d" (xloops), "=&a" (d0) 119 :"=d" (xloops), "=&a" (d0)
112 :"1" (xloops), "0" 120 :"1" (xloops), "0"
113 (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); 121 (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4)));
114 122
115 __delay(++xloops); 123 __delay(++xloops);
116} 124}
125EXPORT_SYMBOL(__const_udelay);
117 126
118void __udelay(unsigned long usecs) 127void __udelay(unsigned long usecs)
119{ 128{
120 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ 129 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
121} 130}
131EXPORT_SYMBOL(__udelay);
122 132
123void __ndelay(unsigned long nsecs) 133void __ndelay(unsigned long nsecs)
124{ 134{
125 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ 135 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
126} 136}
127
128EXPORT_SYMBOL(__delay);
129EXPORT_SYMBOL(__const_udelay);
130EXPORT_SYMBOL(__udelay);
131EXPORT_SYMBOL(__ndelay); 137EXPORT_SYMBOL(__ndelay);
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c
deleted file mode 100644
index 4c441be92641..000000000000
--- a/arch/x86/lib/delay_64.c
+++ /dev/null
@@ -1,85 +0,0 @@
1/*
2 * Precise Delay Loops for x86-64
3 *
4 * Copyright (C) 1993 Linus Torvalds
5 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
6 *
7 * The __delay function must _NOT_ be inlined as its execution time
8 * depends wildly on alignment on many x86 processors.
9 */
10
11#include <linux/module.h>
12#include <linux/sched.h>
13#include <linux/timex.h>
14#include <linux/preempt.h>
15#include <linux/delay.h>
16#include <linux/init.h>
17
18#include <asm/delay.h>
19#include <asm/msr.h>
20
21#ifdef CONFIG_SMP
22#include <asm/smp.h>
23#endif
24
25int __devinit read_current_timer(unsigned long *timer_value)
26{
27 rdtscll(*timer_value);
28 return 0;
29}
30
31void __delay(unsigned long loops)
32{
33 unsigned bclock, now;
34 int cpu;
35
36 preempt_disable();
37 cpu = smp_processor_id();
38 rdtscl(bclock);
39 for (;;) {
40 rdtscl(now);
41 if ((now - bclock) >= loops)
42 break;
43
44 /* Allow RT tasks to run */
45 preempt_enable();
46 rep_nop();
47 preempt_disable();
48
49 /*
50 * It is possible that we moved to another CPU, and
51 * since TSC's are per-cpu we need to calculate
52 * that. The delay must guarantee that we wait "at
53 * least" the amount of time. Being moved to another
54 * CPU could make the wait longer but we just need to
55 * make sure we waited long enough. Rebalance the
56 * counter for this CPU.
57 */
58 if (unlikely(cpu != smp_processor_id())) {
59 loops -= (now - bclock);
60 cpu = smp_processor_id();
61 rdtscl(bclock);
62 }
63 }
64 preempt_enable();
65}
66EXPORT_SYMBOL(__delay);
67
68inline void __const_udelay(unsigned long xloops)
69{
70 __delay(((xloops * HZ *
71 cpu_data(raw_smp_processor_id()).loops_per_jiffy) >> 32) + 1);
72}
73EXPORT_SYMBOL(__const_udelay);
74
75void __udelay(unsigned long usecs)
76{
77 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
78}
79EXPORT_SYMBOL(__udelay);
80
81void __ndelay(unsigned long nsecs)
82{
83 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
84}
85EXPORT_SYMBOL(__ndelay);
diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser.S
index 5448876261f8..ad374003742f 100644
--- a/arch/x86/lib/getuser_64.S
+++ b/arch/x86/lib/getuser.S
@@ -3,6 +3,7 @@
3 * 3 *
4 * (C) Copyright 1998 Linus Torvalds 4 * (C) Copyright 1998 Linus Torvalds
5 * (C) Copyright 2005 Andi Kleen 5 * (C) Copyright 2005 Andi Kleen
6 * (C) Copyright 2008 Glauber Costa
6 * 7 *
7 * These functions have a non-standard call interface 8 * These functions have a non-standard call interface
8 * to make them more efficient, especially as they 9 * to make them more efficient, especially as they
@@ -13,14 +14,13 @@
13/* 14/*
14 * __get_user_X 15 * __get_user_X
15 * 16 *
16 * Inputs: %rcx contains the address. 17 * Inputs: %[r|e]ax contains the address.
17 * The register is modified, but all changes are undone 18 * The register is modified, but all changes are undone
18 * before returning because the C code doesn't know about it. 19 * before returning because the C code doesn't know about it.
19 * 20 *
20 * Outputs: %rax is error code (0 or -EFAULT) 21 * Outputs: %[r|e]ax is error code (0 or -EFAULT)
21 * %rdx contains zero-extended value 22 * %[r|e]dx contains zero-extended value
22 * 23 *
23 * %r8 is destroyed.
24 * 24 *
25 * These functions should not modify any other registers, 25 * These functions should not modify any other registers,
26 * as they get called from within inline assembly. 26 * as they get called from within inline assembly.
@@ -32,78 +32,73 @@
32#include <asm/errno.h> 32#include <asm/errno.h>
33#include <asm/asm-offsets.h> 33#include <asm/asm-offsets.h>
34#include <asm/thread_info.h> 34#include <asm/thread_info.h>
35#include <asm/asm.h>
35 36
36 .text 37 .text
37ENTRY(__get_user_1) 38ENTRY(__get_user_1)
38 CFI_STARTPROC 39 CFI_STARTPROC
39 GET_THREAD_INFO(%r8) 40 GET_THREAD_INFO(%_ASM_DX)
40 cmpq threadinfo_addr_limit(%r8),%rcx 41 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
41 jae bad_get_user 42 jae bad_get_user
421: movzb (%rcx),%edx 431: movzb (%_ASM_AX),%edx
43 xorl %eax,%eax 44 xor %eax,%eax
44 ret 45 ret
45 CFI_ENDPROC 46 CFI_ENDPROC
46ENDPROC(__get_user_1) 47ENDPROC(__get_user_1)
47 48
48ENTRY(__get_user_2) 49ENTRY(__get_user_2)
49 CFI_STARTPROC 50 CFI_STARTPROC
50 GET_THREAD_INFO(%r8) 51 add $1,%_ASM_AX
51 addq $1,%rcx 52 jc bad_get_user
52 jc 20f 53 GET_THREAD_INFO(%_ASM_DX)
53 cmpq threadinfo_addr_limit(%r8),%rcx 54 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
54 jae 20f 55 jae bad_get_user
55 decq %rcx 562: movzwl -1(%_ASM_AX),%edx
562: movzwl (%rcx),%edx 57 xor %eax,%eax
57 xorl %eax,%eax
58 ret 58 ret
5920: decq %rcx
60 jmp bad_get_user
61 CFI_ENDPROC 59 CFI_ENDPROC
62ENDPROC(__get_user_2) 60ENDPROC(__get_user_2)
63 61
64ENTRY(__get_user_4) 62ENTRY(__get_user_4)
65 CFI_STARTPROC 63 CFI_STARTPROC
66 GET_THREAD_INFO(%r8) 64 add $3,%_ASM_AX
67 addq $3,%rcx 65 jc bad_get_user
68 jc 30f 66 GET_THREAD_INFO(%_ASM_DX)
69 cmpq threadinfo_addr_limit(%r8),%rcx 67 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
70 jae 30f 68 jae bad_get_user
71 subq $3,%rcx 693: mov -3(%_ASM_AX),%edx
723: movl (%rcx),%edx 70 xor %eax,%eax
73 xorl %eax,%eax
74 ret 71 ret
7530: subq $3,%rcx
76 jmp bad_get_user
77 CFI_ENDPROC 72 CFI_ENDPROC
78ENDPROC(__get_user_4) 73ENDPROC(__get_user_4)
79 74
75#ifdef CONFIG_X86_64
80ENTRY(__get_user_8) 76ENTRY(__get_user_8)
81 CFI_STARTPROC 77 CFI_STARTPROC
82 GET_THREAD_INFO(%r8) 78 add $7,%_ASM_AX
83 addq $7,%rcx 79 jc bad_get_user
84 jc 40f 80 GET_THREAD_INFO(%_ASM_DX)
85 cmpq threadinfo_addr_limit(%r8),%rcx 81 cmp TI_addr_limit(%_ASM_DX),%_ASM_AX
86 jae 40f 82 jae bad_get_user
87 subq $7,%rcx 834: movq -7(%_ASM_AX),%_ASM_DX
884: movq (%rcx),%rdx 84 xor %eax,%eax
89 xorl %eax,%eax
90 ret 85 ret
9140: subq $7,%rcx
92 jmp bad_get_user
93 CFI_ENDPROC 86 CFI_ENDPROC
94ENDPROC(__get_user_8) 87ENDPROC(__get_user_8)
88#endif
95 89
96bad_get_user: 90bad_get_user:
97 CFI_STARTPROC 91 CFI_STARTPROC
98 xorl %edx,%edx 92 xor %edx,%edx
99 movq $(-EFAULT),%rax 93 mov $(-EFAULT),%_ASM_AX
100 ret 94 ret
101 CFI_ENDPROC 95 CFI_ENDPROC
102END(bad_get_user) 96END(bad_get_user)
103 97
104.section __ex_table,"a" 98.section __ex_table,"a"
105 .quad 1b,bad_get_user 99 _ASM_PTR 1b,bad_get_user
106 .quad 2b,bad_get_user 100 _ASM_PTR 2b,bad_get_user
107 .quad 3b,bad_get_user 101 _ASM_PTR 3b,bad_get_user
108 .quad 4b,bad_get_user 102#ifdef CONFIG_X86_64
109.previous 103 _ASM_PTR 4b,bad_get_user
104#endif
diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S
deleted file mode 100644
index 6d84b53f12a2..000000000000
--- a/arch/x86/lib/getuser_32.S
+++ /dev/null
@@ -1,78 +0,0 @@
1/*
2 * __get_user functions.
3 *
4 * (C) Copyright 1998 Linus Torvalds
5 *
6 * These functions have a non-standard call interface
7 * to make them more efficient, especially as they
8 * return an error value in addition to the "real"
9 * return value.
10 */
11#include <linux/linkage.h>
12#include <asm/dwarf2.h>
13#include <asm/thread_info.h>
14
15
16/*
17 * __get_user_X
18 *
19 * Inputs: %eax contains the address
20 *
21 * Outputs: %eax is error code (0 or -EFAULT)
22 * %edx contains zero-extended value
23 *
24 * These functions should not modify any other registers,
25 * as they get called from within inline assembly.
26 */
27
28.text
29ENTRY(__get_user_1)
30 CFI_STARTPROC
31 GET_THREAD_INFO(%edx)
32 cmpl TI_addr_limit(%edx),%eax
33 jae bad_get_user
341: movzbl (%eax),%edx
35 xorl %eax,%eax
36 ret
37 CFI_ENDPROC
38ENDPROC(__get_user_1)
39
40ENTRY(__get_user_2)
41 CFI_STARTPROC
42 addl $1,%eax
43 jc bad_get_user
44 GET_THREAD_INFO(%edx)
45 cmpl TI_addr_limit(%edx),%eax
46 jae bad_get_user
472: movzwl -1(%eax),%edx
48 xorl %eax,%eax
49 ret
50 CFI_ENDPROC
51ENDPROC(__get_user_2)
52
53ENTRY(__get_user_4)
54 CFI_STARTPROC
55 addl $3,%eax
56 jc bad_get_user
57 GET_THREAD_INFO(%edx)
58 cmpl TI_addr_limit(%edx),%eax
59 jae bad_get_user
603: movl -3(%eax),%edx
61 xorl %eax,%eax
62 ret
63 CFI_ENDPROC
64ENDPROC(__get_user_4)
65
66bad_get_user:
67 CFI_STARTPROC
68 xorl %edx,%edx
69 movl $-14,%eax
70 ret
71 CFI_ENDPROC
72END(bad_get_user)
73
74.section __ex_table,"a"
75 .long 1b,bad_get_user
76 .long 2b,bad_get_user
77 .long 3b,bad_get_user
78.previous
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c
index 57d043fa893e..d5a2b39f882b 100644
--- a/arch/x86/lib/msr-on-cpu.c
+++ b/arch/x86/lib/msr-on-cpu.c
@@ -30,10 +30,10 @@ static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe)
30 30
31 rv.msr_no = msr_no; 31 rv.msr_no = msr_no;
32 if (safe) { 32 if (safe) {
33 smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 0, 1); 33 smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
34 err = rv.err; 34 err = rv.err;
35 } else { 35 } else {
36 smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1); 36 smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
37 } 37 }
38 *l = rv.l; 38 *l = rv.l;
39 *h = rv.h; 39 *h = rv.h;
@@ -64,10 +64,10 @@ static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe)
64 rv.l = l; 64 rv.l = l;
65 rv.h = h; 65 rv.h = h;
66 if (safe) { 66 if (safe) {
67 smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 0, 1); 67 smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
68 err = rv.err; 68 err = rv.err;
69 } else { 69 } else {
70 smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1); 70 smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
71 } 71 }
72 72
73 return err; 73 return err;
diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser.S
index f58fba109d18..36b0d15ae6e9 100644
--- a/arch/x86/lib/putuser_32.S
+++ b/arch/x86/lib/putuser.S
@@ -2,6 +2,8 @@
2 * __put_user functions. 2 * __put_user functions.
3 * 3 *
4 * (C) Copyright 2005 Linus Torvalds 4 * (C) Copyright 2005 Linus Torvalds
5 * (C) Copyright 2005 Andi Kleen
6 * (C) Copyright 2008 Glauber Costa
5 * 7 *
6 * These functions have a non-standard call interface 8 * These functions have a non-standard call interface
7 * to make them more efficient, especially as they 9 * to make them more efficient, especially as they
@@ -11,6 +13,8 @@
11#include <linux/linkage.h> 13#include <linux/linkage.h>
12#include <asm/dwarf2.h> 14#include <asm/dwarf2.h>
13#include <asm/thread_info.h> 15#include <asm/thread_info.h>
16#include <asm/errno.h>
17#include <asm/asm.h>
14 18
15 19
16/* 20/*
@@ -26,73 +30,68 @@
26 */ 30 */
27 31
28#define ENTER CFI_STARTPROC ; \ 32#define ENTER CFI_STARTPROC ; \
29 pushl %ebx ; \ 33 GET_THREAD_INFO(%_ASM_BX)
30 CFI_ADJUST_CFA_OFFSET 4 ; \ 34#define EXIT ret ; \
31 CFI_REL_OFFSET ebx, 0 ; \
32 GET_THREAD_INFO(%ebx)
33#define EXIT popl %ebx ; \
34 CFI_ADJUST_CFA_OFFSET -4 ; \
35 CFI_RESTORE ebx ; \
36 ret ; \
37 CFI_ENDPROC 35 CFI_ENDPROC
38 36
39.text 37.text
40ENTRY(__put_user_1) 38ENTRY(__put_user_1)
41 ENTER 39 ENTER
42 cmpl TI_addr_limit(%ebx),%ecx 40 cmp TI_addr_limit(%_ASM_BX),%_ASM_CX
43 jae bad_put_user 41 jae bad_put_user
441: movb %al,(%ecx) 421: movb %al,(%_ASM_CX)
45 xorl %eax,%eax 43 xor %eax,%eax
46 EXIT 44 EXIT
47ENDPROC(__put_user_1) 45ENDPROC(__put_user_1)
48 46
49ENTRY(__put_user_2) 47ENTRY(__put_user_2)
50 ENTER 48 ENTER
51 movl TI_addr_limit(%ebx),%ebx 49 mov TI_addr_limit(%_ASM_BX),%_ASM_BX
52 subl $1,%ebx 50 sub $1,%_ASM_BX
53 cmpl %ebx,%ecx 51 cmp %_ASM_BX,%_ASM_CX
54 jae bad_put_user 52 jae bad_put_user
552: movw %ax,(%ecx) 532: movw %ax,(%_ASM_CX)
56 xorl %eax,%eax 54 xor %eax,%eax
57 EXIT 55 EXIT
58ENDPROC(__put_user_2) 56ENDPROC(__put_user_2)
59 57
60ENTRY(__put_user_4) 58ENTRY(__put_user_4)
61 ENTER 59 ENTER
62 movl TI_addr_limit(%ebx),%ebx 60 mov TI_addr_limit(%_ASM_BX),%_ASM_BX
63 subl $3,%ebx 61 sub $3,%_ASM_BX
64 cmpl %ebx,%ecx 62 cmp %_ASM_BX,%_ASM_CX
65 jae bad_put_user 63 jae bad_put_user
663: movl %eax,(%ecx) 643: movl %eax,(%_ASM_CX)
67 xorl %eax,%eax 65 xor %eax,%eax
68 EXIT 66 EXIT
69ENDPROC(__put_user_4) 67ENDPROC(__put_user_4)
70 68
71ENTRY(__put_user_8) 69ENTRY(__put_user_8)
72 ENTER 70 ENTER
73 movl TI_addr_limit(%ebx),%ebx 71 mov TI_addr_limit(%_ASM_BX),%_ASM_BX
74 subl $7,%ebx 72 sub $7,%_ASM_BX
75 cmpl %ebx,%ecx 73 cmp %_ASM_BX,%_ASM_CX
76 jae bad_put_user 74 jae bad_put_user
774: movl %eax,(%ecx) 754: mov %_ASM_AX,(%_ASM_CX)
785: movl %edx,4(%ecx) 76#ifdef CONFIG_X86_32
79 xorl %eax,%eax 775: movl %edx,4(%_ASM_CX)
78#endif
79 xor %eax,%eax
80 EXIT 80 EXIT
81ENDPROC(__put_user_8) 81ENDPROC(__put_user_8)
82 82
83bad_put_user: 83bad_put_user:
84 CFI_STARTPROC simple 84 CFI_STARTPROC
85 CFI_DEF_CFA esp, 2*4 85 movl $-EFAULT,%eax
86 CFI_OFFSET eip, -1*4
87 CFI_OFFSET ebx, -2*4
88 movl $-14,%eax
89 EXIT 86 EXIT
90END(bad_put_user) 87END(bad_put_user)
91 88
92.section __ex_table,"a" 89.section __ex_table,"a"
93 .long 1b,bad_put_user 90 _ASM_PTR 1b,bad_put_user
94 .long 2b,bad_put_user 91 _ASM_PTR 2b,bad_put_user
95 .long 3b,bad_put_user 92 _ASM_PTR 3b,bad_put_user
96 .long 4b,bad_put_user 93 _ASM_PTR 4b,bad_put_user
97 .long 5b,bad_put_user 94#ifdef CONFIG_X86_32
95 _ASM_PTR 5b,bad_put_user
96#endif
98.previous 97.previous
diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S
deleted file mode 100644
index 4989f5a8fa9b..000000000000
--- a/arch/x86/lib/putuser_64.S
+++ /dev/null
@@ -1,106 +0,0 @@
1/*
2 * __put_user functions.
3 *
4 * (C) Copyright 1998 Linus Torvalds
5 * (C) Copyright 2005 Andi Kleen
6 *
7 * These functions have a non-standard call interface
8 * to make them more efficient, especially as they
9 * return an error value in addition to the "real"
10 * return value.
11 */
12
13/*
14 * __put_user_X
15 *
16 * Inputs: %rcx contains the address
17 * %rdx contains new value
18 *
19 * Outputs: %rax is error code (0 or -EFAULT)
20 *
21 * %r8 is destroyed.
22 *
23 * These functions should not modify any other registers,
24 * as they get called from within inline assembly.
25 */
26
27#include <linux/linkage.h>
28#include <asm/dwarf2.h>
29#include <asm/page.h>
30#include <asm/errno.h>
31#include <asm/asm-offsets.h>
32#include <asm/thread_info.h>
33
34 .text
35ENTRY(__put_user_1)
36 CFI_STARTPROC
37 GET_THREAD_INFO(%r8)
38 cmpq threadinfo_addr_limit(%r8),%rcx
39 jae bad_put_user
401: movb %dl,(%rcx)
41 xorl %eax,%eax
42 ret
43 CFI_ENDPROC
44ENDPROC(__put_user_1)
45
46ENTRY(__put_user_2)
47 CFI_STARTPROC
48 GET_THREAD_INFO(%r8)
49 addq $1,%rcx
50 jc 20f
51 cmpq threadinfo_addr_limit(%r8),%rcx
52 jae 20f
53 decq %rcx
542: movw %dx,(%rcx)
55 xorl %eax,%eax
56 ret
5720: decq %rcx
58 jmp bad_put_user
59 CFI_ENDPROC
60ENDPROC(__put_user_2)
61
62ENTRY(__put_user_4)
63 CFI_STARTPROC
64 GET_THREAD_INFO(%r8)
65 addq $3,%rcx
66 jc 30f
67 cmpq threadinfo_addr_limit(%r8),%rcx
68 jae 30f
69 subq $3,%rcx
703: movl %edx,(%rcx)
71 xorl %eax,%eax
72 ret
7330: subq $3,%rcx
74 jmp bad_put_user
75 CFI_ENDPROC
76ENDPROC(__put_user_4)
77
78ENTRY(__put_user_8)
79 CFI_STARTPROC
80 GET_THREAD_INFO(%r8)
81 addq $7,%rcx
82 jc 40f
83 cmpq threadinfo_addr_limit(%r8),%rcx
84 jae 40f
85 subq $7,%rcx
864: movq %rdx,(%rcx)
87 xorl %eax,%eax
88 ret
8940: subq $7,%rcx
90 jmp bad_put_user
91 CFI_ENDPROC
92ENDPROC(__put_user_8)
93
94bad_put_user:
95 CFI_STARTPROC
96 movq $(-EFAULT),%rax
97 ret
98 CFI_ENDPROC
99END(bad_put_user)
100
101.section __ex_table,"a"
102 .quad 1b,bad_put_user
103 .quad 2b,bad_put_user
104 .quad 3b,bad_put_user
105 .quad 4b,bad_put_user
106.previous
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
new file mode 100644
index 000000000000..650b11e00ecc
--- /dev/null
+++ b/arch/x86/lib/thunk_32.S
@@ -0,0 +1,47 @@
1/*
2 * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
3 * Copyright 2008 by Steven Rostedt, Red Hat, Inc
4 * (inspired by Andi Kleen's thunk_64.S)
5 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */
7
8 #include <linux/linkage.h>
9
10#define ARCH_TRACE_IRQS_ON \
11 pushl %eax; \
12 pushl %ecx; \
13 pushl %edx; \
14 call trace_hardirqs_on; \
15 popl %edx; \
16 popl %ecx; \
17 popl %eax;
18
19#define ARCH_TRACE_IRQS_OFF \
20 pushl %eax; \
21 pushl %ecx; \
22 pushl %edx; \
23 call trace_hardirqs_off; \
24 popl %edx; \
25 popl %ecx; \
26 popl %eax;
27
28#ifdef CONFIG_TRACE_IRQFLAGS
29 /* put return address in eax (arg1) */
30 .macro thunk_ra name,func
31 .globl \name
32\name:
33 pushl %eax
34 pushl %ecx
35 pushl %edx
36 /* Place EIP in the arg1 */
37 movl 3*4(%esp), %eax
38 call \func
39 popl %edx
40 popl %ecx
41 popl %eax
42 ret
43 .endm
44
45 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
46 thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
47#endif
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index e009251d4e9f..bf9a7d5a5428 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -2,6 +2,7 @@
2 * Save registers before calling assembly functions. This avoids 2 * Save registers before calling assembly functions. This avoids
3 * disturbance of register allocation in some inline assembly constructs. 3 * disturbance of register allocation in some inline assembly constructs.
4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs. 4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
5 * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
5 * Subject to the GNU public license, v.2. No warranty of any kind. 6 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */ 7 */
7 8
@@ -42,8 +43,22 @@
42#endif 43#endif
43 44
44#ifdef CONFIG_TRACE_IRQFLAGS 45#ifdef CONFIG_TRACE_IRQFLAGS
45 thunk trace_hardirqs_on_thunk,trace_hardirqs_on 46 /* put return address in rdi (arg1) */
46 thunk trace_hardirqs_off_thunk,trace_hardirqs_off 47 .macro thunk_ra name,func
48 .globl \name
49\name:
50 CFI_STARTPROC
51 SAVE_ARGS
52 /* SAVE_ARGS pushs 9 elements */
53 /* the next element would be the rip */
54 movq 9*8(%rsp), %rdi
55 call \func
56 jmp restore
57 CFI_ENDPROC
58 .endm
59
60 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
61 thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller
47#endif 62#endif
48 63
49#ifdef CONFIG_DEBUG_LOCK_ALLOC 64#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 0c89d1bb0287..f4df6e7c718b 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -158,3 +158,26 @@ unsigned long copy_in_user(void __user *to, const void __user *from, unsigned le
158} 158}
159EXPORT_SYMBOL(copy_in_user); 159EXPORT_SYMBOL(copy_in_user);
160 160
161/*
162 * Try to copy last bytes and clear the rest if needed.
163 * Since protection fault in copy_from/to_user is not a normal situation,
164 * it is not necessary to optimize tail handling.
165 */
166unsigned long
167copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
168{
169 char c;
170 unsigned zero_len;
171
172 for (; len; --len) {
173 if (__get_user_nocheck(c, from++, sizeof(char)))
174 break;
175 if (__put_user_nocheck(c, to++, sizeof(char)))
176 break;
177 }
178
179 for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
180 if (__put_user_nocheck(c, to++, sizeof(char)))
181 break;
182 return len;
183}