aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/lib')
-rw-r--r--arch/x86_64/lib/Makefile2
-rw-r--r--arch/x86_64/lib/clear_page.S47
-rw-r--r--arch/x86_64/lib/copy_page.S53
-rw-r--r--arch/x86_64/lib/copy_user.S153
-rw-r--r--arch/x86_64/lib/csum-copy.S26
-rw-r--r--arch/x86_64/lib/getuser.S32
-rw-r--r--arch/x86_64/lib/iomap_copy.S10
-rw-r--r--arch/x86_64/lib/memcpy.S69
-rw-r--r--arch/x86_64/lib/memset.S79
-rw-r--r--arch/x86_64/lib/putuser.S32
-rw-r--r--arch/x86_64/lib/rwlock.S38
-rw-r--r--arch/x86_64/lib/thunk.S43
12 files changed, 363 insertions, 221 deletions
diff --git a/arch/x86_64/lib/Makefile b/arch/x86_64/lib/Makefile
index ccef6ae747a3..b78d4170fce2 100644
--- a/arch/x86_64/lib/Makefile
+++ b/arch/x86_64/lib/Makefile
@@ -9,4 +9,4 @@ obj-y := io.o iomap_copy.o
9lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \ 9lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \
10 usercopy.o getuser.o putuser.o \ 10 usercopy.o getuser.o putuser.o \
11 thunk.o clear_page.o copy_page.o bitstr.o bitops.o 11 thunk.o clear_page.o copy_page.o bitstr.o bitops.o
12lib-y += memcpy.o memmove.o memset.o copy_user.o 12lib-y += memcpy.o memmove.o memset.o copy_user.o rwlock.o
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S
index 1f81b79b796c..9a10a78bb4a4 100644
--- a/arch/x86_64/lib/clear_page.S
+++ b/arch/x86_64/lib/clear_page.S
@@ -1,10 +1,22 @@
1#include <linux/linkage.h>
2#include <asm/dwarf2.h>
3
1/* 4/*
2 * Zero a page. 5 * Zero a page.
3 * rdi page 6 * rdi page
4 */ 7 */
5 .globl clear_page 8 ALIGN
6 .p2align 4 9clear_page_c:
7clear_page: 10 CFI_STARTPROC
11 movl $4096/8,%ecx
12 xorl %eax,%eax
13 rep stosq
14 ret
15 CFI_ENDPROC
16ENDPROC(clear_page)
17
18ENTRY(clear_page)
19 CFI_STARTPROC
8 xorl %eax,%eax 20 xorl %eax,%eax
9 movl $4096/64,%ecx 21 movl $4096/64,%ecx
10 .p2align 4 22 .p2align 4
@@ -23,28 +35,25 @@ clear_page:
23 jnz .Lloop 35 jnz .Lloop
24 nop 36 nop
25 ret 37 ret
26clear_page_end: 38 CFI_ENDPROC
39.Lclear_page_end:
40ENDPROC(clear_page)
27 41
28 /* Some CPUs run faster using the string instructions. 42 /* Some CPUs run faster using the string instructions.
29 It is also a lot simpler. Use this when possible */ 43 It is also a lot simpler. Use this when possible */
30 44
31#include <asm/cpufeature.h> 45#include <asm/cpufeature.h>
32 46
47 .section .altinstr_replacement,"ax"
481: .byte 0xeb /* jmp <disp8> */
49 .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
502:
51 .previous
33 .section .altinstructions,"a" 52 .section .altinstructions,"a"
34 .align 8 53 .align 8
35 .quad clear_page 54 .quad clear_page
36 .quad clear_page_c 55 .quad 1b
37 .byte X86_FEATURE_REP_GOOD 56 .byte X86_FEATURE_REP_GOOD
38 .byte clear_page_end-clear_page 57 .byte .Lclear_page_end - clear_page
39 .byte clear_page_c_end-clear_page_c 58 .byte 2b - 1b
40 .previous
41
42 .section .altinstr_replacement,"ax"
43clear_page_c:
44 movl $4096/8,%ecx
45 xorl %eax,%eax
46 rep
47 stosq
48 ret
49clear_page_c_end:
50 .previous 59 .previous
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S
index 8fa19d96a7ee..0ebb03b60e79 100644
--- a/arch/x86_64/lib/copy_page.S
+++ b/arch/x86_64/lib/copy_page.S
@@ -1,17 +1,33 @@
1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ 1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
2 2
3#include <linux/config.h>
4#include <linux/linkage.h>
5#include <asm/dwarf2.h>
6
7 ALIGN
8copy_page_c:
9 CFI_STARTPROC
10 movl $4096/8,%ecx
11 rep movsq
12 ret
13 CFI_ENDPROC
14ENDPROC(copy_page_c)
15
3/* Don't use streaming store because it's better when the target 16/* Don't use streaming store because it's better when the target
4 ends up in cache. */ 17 ends up in cache. */
5 18
6/* Could vary the prefetch distance based on SMP/UP */ 19/* Could vary the prefetch distance based on SMP/UP */
7 20
8 .globl copy_page 21ENTRY(copy_page)
9 .p2align 4 22 CFI_STARTPROC
10copy_page:
11 subq $3*8,%rsp 23 subq $3*8,%rsp
24 CFI_ADJUST_CFA_OFFSET 3*8
12 movq %rbx,(%rsp) 25 movq %rbx,(%rsp)
26 CFI_REL_OFFSET rbx, 0
13 movq %r12,1*8(%rsp) 27 movq %r12,1*8(%rsp)
28 CFI_REL_OFFSET r12, 1*8
14 movq %r13,2*8(%rsp) 29 movq %r13,2*8(%rsp)
30 CFI_REL_OFFSET r13, 2*8
15 31
16 movl $(4096/64)-5,%ecx 32 movl $(4096/64)-5,%ecx
17 .p2align 4 33 .p2align 4
@@ -72,30 +88,33 @@ copy_page:
72 jnz .Loop2 88 jnz .Loop2
73 89
74 movq (%rsp),%rbx 90 movq (%rsp),%rbx
91 CFI_RESTORE rbx
75 movq 1*8(%rsp),%r12 92 movq 1*8(%rsp),%r12
93 CFI_RESTORE r12
76 movq 2*8(%rsp),%r13 94 movq 2*8(%rsp),%r13
95 CFI_RESTORE r13
77 addq $3*8,%rsp 96 addq $3*8,%rsp
97 CFI_ADJUST_CFA_OFFSET -3*8
78 ret 98 ret
99.Lcopy_page_end:
100 CFI_ENDPROC
101ENDPROC(copy_page)
79 102
80 /* Some CPUs run faster using the string copy instructions. 103 /* Some CPUs run faster using the string copy instructions.
81 It is also a lot simpler. Use this when possible */ 104 It is also a lot simpler. Use this when possible */
82 105
83#include <asm/cpufeature.h> 106#include <asm/cpufeature.h>
84 107
108 .section .altinstr_replacement,"ax"
1091: .byte 0xeb /* jmp <disp8> */
110 .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
1112:
112 .previous
85 .section .altinstructions,"a" 113 .section .altinstructions,"a"
86 .align 8 114 .align 8
87 .quad copy_page 115 .quad copy_page
88 .quad copy_page_c 116 .quad 1b
89 .byte X86_FEATURE_REP_GOOD 117 .byte X86_FEATURE_REP_GOOD
90 .byte copy_page_c_end-copy_page_c 118 .byte .Lcopy_page_end - copy_page
91 .byte copy_page_c_end-copy_page_c 119 .byte 2b - 1b
92 .previous
93
94 .section .altinstr_replacement,"ax"
95copy_page_c:
96 movl $4096/8,%ecx
97 rep
98 movsq
99 ret
100copy_page_c_end:
101 .previous 120 .previous
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S
index f64569b83b54..70bebd310408 100644
--- a/arch/x86_64/lib/copy_user.S
+++ b/arch/x86_64/lib/copy_user.S
@@ -4,56 +4,78 @@
4 * Functions to copy from and to user space. 4 * Functions to copy from and to user space.
5 */ 5 */
6 6
7#include <linux/linkage.h>
8#include <asm/dwarf2.h>
9
7#define FIX_ALIGNMENT 1 10#define FIX_ALIGNMENT 1
8 11
9 #include <asm/current.h> 12#include <asm/current.h>
10 #include <asm/asm-offsets.h> 13#include <asm/asm-offsets.h>
11 #include <asm/thread_info.h> 14#include <asm/thread_info.h>
12 #include <asm/cpufeature.h> 15#include <asm/cpufeature.h>
13 16
14/* Standard copy_to_user with segment limit checking */ 17 .macro ALTERNATIVE_JUMP feature,orig,alt
15 .globl copy_to_user 180:
16 .p2align 4
17copy_to_user:
18 GET_THREAD_INFO(%rax)
19 movq %rdi,%rcx
20 addq %rdx,%rcx
21 jc bad_to_user
22 cmpq threadinfo_addr_limit(%rax),%rcx
23 jae bad_to_user
242:
25 .byte 0xe9 /* 32bit jump */ 19 .byte 0xe9 /* 32bit jump */
26 .long .Lcug-1f 20 .long \orig-1f /* by default jump to orig */
271: 211:
28
29 .section .altinstr_replacement,"ax" 22 .section .altinstr_replacement,"ax"
303: .byte 0xe9 /* replacement jmp with 8 bit immediate */ 232: .byte 0xe9 /* near jump with 32bit immediate */
31 .long copy_user_generic_c-1b /* offset */ 24 .long \alt-1b /* offset */ /* or alternatively to alt */
32 .previous 25 .previous
33 .section .altinstructions,"a" 26 .section .altinstructions,"a"
34 .align 8 27 .align 8
28 .quad 0b
35 .quad 2b 29 .quad 2b
36 .quad 3b 30 .byte \feature /* when feature is set */
37 .byte X86_FEATURE_REP_GOOD
38 .byte 5 31 .byte 5
39 .byte 5 32 .byte 5
40 .previous 33 .previous
34 .endm
35
36/* Standard copy_to_user with segment limit checking */
37ENTRY(copy_to_user)
38 CFI_STARTPROC
39 GET_THREAD_INFO(%rax)
40 movq %rdi,%rcx
41 addq %rdx,%rcx
42 jc bad_to_user
43 cmpq threadinfo_addr_limit(%rax),%rcx
44 jae bad_to_user
45 xorl %eax,%eax /* clear zero flag */
46 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
47 CFI_ENDPROC
48
49ENTRY(copy_user_generic)
50 CFI_STARTPROC
51 movl $1,%ecx /* set zero flag */
52 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
53 CFI_ENDPROC
54
55ENTRY(__copy_from_user_inatomic)
56 CFI_STARTPROC
57 xorl %ecx,%ecx /* clear zero flag */
58 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
59 CFI_ENDPROC
41 60
42/* Standard copy_from_user with segment limit checking */ 61/* Standard copy_from_user with segment limit checking */
43 .globl copy_from_user 62ENTRY(copy_from_user)
44 .p2align 4 63 CFI_STARTPROC
45copy_from_user:
46 GET_THREAD_INFO(%rax) 64 GET_THREAD_INFO(%rax)
47 movq %rsi,%rcx 65 movq %rsi,%rcx
48 addq %rdx,%rcx 66 addq %rdx,%rcx
49 jc bad_from_user 67 jc bad_from_user
50 cmpq threadinfo_addr_limit(%rax),%rcx 68 cmpq threadinfo_addr_limit(%rax),%rcx
51 jae bad_from_user 69 jae bad_from_user
52 /* FALL THROUGH to copy_user_generic */ 70 movl $1,%ecx /* set zero flag */
71 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
72 CFI_ENDPROC
73ENDPROC(copy_from_user)
53 74
54 .section .fixup,"ax" 75 .section .fixup,"ax"
55 /* must zero dest */ 76 /* must zero dest */
56bad_from_user: 77bad_from_user:
78 CFI_STARTPROC
57 movl %edx,%ecx 79 movl %edx,%ecx
58 xorl %eax,%eax 80 xorl %eax,%eax
59 rep 81 rep
@@ -61,40 +83,32 @@ bad_from_user:
61bad_to_user: 83bad_to_user:
62 movl %edx,%eax 84 movl %edx,%eax
63 ret 85 ret
86 CFI_ENDPROC
87END(bad_from_user)
64 .previous 88 .previous
65 89
66 90
67/* 91/*
68 * copy_user_generic - memory copy with exception handling. 92 * copy_user_generic_unrolled - memory copy with exception handling.
93 * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
69 * 94 *
70 * Input: 95 * Input:
71 * rdi destination 96 * rdi destination
72 * rsi source 97 * rsi source
73 * rdx count 98 * rdx count
99 * ecx zero flag -- if true zero destination on error
74 * 100 *
75 * Output: 101 * Output:
76 * eax uncopied bytes or 0 if successful. 102 * eax uncopied bytes or 0 if successful.
77 */ 103 */
78 .globl copy_user_generic 104ENTRY(copy_user_generic_unrolled)
79 .p2align 4 105 CFI_STARTPROC
80copy_user_generic:
81 .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */
82 .byte 0x66,0x90
831:
84 .section .altinstr_replacement,"ax"
852: .byte 0xe9 /* near jump with 32bit immediate */
86 .long copy_user_generic_c-1b /* offset */
87 .previous
88 .section .altinstructions,"a"
89 .align 8
90 .quad copy_user_generic
91 .quad 2b
92 .byte X86_FEATURE_REP_GOOD
93 .byte 5
94 .byte 5
95 .previous
96.Lcug:
97 pushq %rbx 106 pushq %rbx
107 CFI_ADJUST_CFA_OFFSET 8
108 CFI_REL_OFFSET rbx, 0
109 pushq %rcx
110 CFI_ADJUST_CFA_OFFSET 8
111 CFI_REL_OFFSET rcx, 0
98 xorl %eax,%eax /*zero for the exception handler */ 112 xorl %eax,%eax /*zero for the exception handler */
99 113
100#ifdef FIX_ALIGNMENT 114#ifdef FIX_ALIGNMENT
@@ -168,9 +182,16 @@ copy_user_generic:
168 decl %ecx 182 decl %ecx
169 jnz .Lloop_1 183 jnz .Lloop_1
170 184
185 CFI_REMEMBER_STATE
171.Lende: 186.Lende:
187 popq %rcx
188 CFI_ADJUST_CFA_OFFSET -8
189 CFI_RESTORE rcx
172 popq %rbx 190 popq %rbx
191 CFI_ADJUST_CFA_OFFSET -8
192 CFI_RESTORE rbx
173 ret 193 ret
194 CFI_RESTORE_STATE
174 195
175#ifdef FIX_ALIGNMENT 196#ifdef FIX_ALIGNMENT
176 /* align destination */ 197 /* align destination */
@@ -252,6 +273,8 @@ copy_user_generic:
252 addl %ecx,%edx 273 addl %ecx,%edx
253 /* edx: bytes to zero, rdi: dest, eax:zero */ 274 /* edx: bytes to zero, rdi: dest, eax:zero */
254.Lzero_rest: 275.Lzero_rest:
276 cmpl $0,(%rsp)
277 jz .Le_zero
255 movq %rdx,%rcx 278 movq %rdx,%rcx
256.Le_byte: 279.Le_byte:
257 xorl %eax,%eax 280 xorl %eax,%eax
@@ -261,6 +284,9 @@ copy_user_generic:
261.Le_zero: 284.Le_zero:
262 movq %rdx,%rax 285 movq %rdx,%rax
263 jmp .Lende 286 jmp .Lende
287 CFI_ENDPROC
288ENDPROC(copy_user_generic)
289
264 290
265 /* Some CPUs run faster using the string copy instructions. 291 /* Some CPUs run faster using the string copy instructions.
266 This is also a lot simpler. Use them when possible. 292 This is also a lot simpler. Use them when possible.
@@ -270,6 +296,7 @@ copy_user_generic:
270 /* rdi destination 296 /* rdi destination
271 * rsi source 297 * rsi source
272 * rdx count 298 * rdx count
299 * ecx zero flag
273 * 300 *
274 * Output: 301 * Output:
275 * eax uncopied bytes or 0 if successfull. 302 * eax uncopied bytes or 0 if successfull.
@@ -280,22 +307,48 @@ copy_user_generic:
280 * And more would be dangerous because both Intel and AMD have 307 * And more would be dangerous because both Intel and AMD have
281 * errata with rep movsq > 4GB. If someone feels the need to fix 308 * errata with rep movsq > 4GB. If someone feels the need to fix
282 * this please consider this. 309 * this please consider this.
283 */ 310 */
284copy_user_generic_c: 311ENTRY(copy_user_generic_string)
312 CFI_STARTPROC
313 movl %ecx,%r8d /* save zero flag */
285 movl %edx,%ecx 314 movl %edx,%ecx
286 shrl $3,%ecx 315 shrl $3,%ecx
287 andl $7,%edx 316 andl $7,%edx
317 jz 10f
2881: rep 3181: rep
289 movsq 319 movsq
290 movl %edx,%ecx 320 movl %edx,%ecx
2912: rep 3212: rep
292 movsb 322 movsb
2934: movl %ecx,%eax 3239: movl %ecx,%eax
294 ret 324 ret
2953: lea (%rdx,%rcx,8),%rax 325
326 /* multiple of 8 byte */
32710: rep
328 movsq
329 xor %eax,%eax
296 ret 330 ret
297 331
332 /* exception handling */
3333: lea (%rdx,%rcx,8),%rax /* exception on quad loop */
334 jmp 6f
3355: movl %ecx,%eax /* exception on byte loop */
336 /* eax: left over bytes */
3376: testl %r8d,%r8d /* zero flag set? */
338 jz 7f
339 movl %eax,%ecx /* initialize x86 loop counter */
340 push %rax
341 xorl %eax,%eax
3428: rep
343 stosb /* zero the rest */
34411: pop %rax
3457: ret
346 CFI_ENDPROC
347END(copy_user_generic_c)
348
298 .section __ex_table,"a" 349 .section __ex_table,"a"
299 .quad 1b,3b 350 .quad 1b,3b
300 .quad 2b,4b 351 .quad 2b,5b
352 .quad 8b,11b
353 .quad 10b,3b
301 .previous 354 .previous
diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S
index 72fd55ee896e..f0dba36578ea 100644
--- a/arch/x86_64/lib/csum-copy.S
+++ b/arch/x86_64/lib/csum-copy.S
@@ -5,8 +5,9 @@
5 * License. See the file COPYING in the main directory of this archive 5 * License. See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all. 6 * for more details. No warranty for anything given at all.
7 */ 7 */
8 #include <linux/linkage.h> 8#include <linux/linkage.h>
9 #include <asm/errno.h> 9#include <asm/dwarf2.h>
10#include <asm/errno.h>
10 11
11/* 12/*
12 * Checksum copy with exception handling. 13 * Checksum copy with exception handling.
@@ -53,19 +54,24 @@
53 .endm 54 .endm
54 55
55 56
56 .globl csum_partial_copy_generic 57ENTRY(csum_partial_copy_generic)
57 .p2align 4 58 CFI_STARTPROC
58csum_partial_copy_generic:
59 cmpl $3*64,%edx 59 cmpl $3*64,%edx
60 jle .Lignore 60 jle .Lignore
61 61
62.Lignore: 62.Lignore:
63 subq $7*8,%rsp 63 subq $7*8,%rsp
64 CFI_ADJUST_CFA_OFFSET 7*8
64 movq %rbx,2*8(%rsp) 65 movq %rbx,2*8(%rsp)
66 CFI_REL_OFFSET rbx, 2*8
65 movq %r12,3*8(%rsp) 67 movq %r12,3*8(%rsp)
68 CFI_REL_OFFSET r12, 3*8
66 movq %r14,4*8(%rsp) 69 movq %r14,4*8(%rsp)
70 CFI_REL_OFFSET r14, 4*8
67 movq %r13,5*8(%rsp) 71 movq %r13,5*8(%rsp)
72 CFI_REL_OFFSET r13, 5*8
68 movq %rbp,6*8(%rsp) 73 movq %rbp,6*8(%rsp)
74 CFI_REL_OFFSET rbp, 6*8
69 75
70 movq %r8,(%rsp) 76 movq %r8,(%rsp)
71 movq %r9,1*8(%rsp) 77 movq %r9,1*8(%rsp)
@@ -208,14 +214,22 @@ csum_partial_copy_generic:
208 addl %ebx,%eax 214 addl %ebx,%eax
209 adcl %r9d,%eax /* carry */ 215 adcl %r9d,%eax /* carry */
210 216
217 CFI_REMEMBER_STATE
211.Lende: 218.Lende:
212 movq 2*8(%rsp),%rbx 219 movq 2*8(%rsp),%rbx
220 CFI_RESTORE rbx
213 movq 3*8(%rsp),%r12 221 movq 3*8(%rsp),%r12
222 CFI_RESTORE r12
214 movq 4*8(%rsp),%r14 223 movq 4*8(%rsp),%r14
224 CFI_RESTORE r14
215 movq 5*8(%rsp),%r13 225 movq 5*8(%rsp),%r13
226 CFI_RESTORE r13
216 movq 6*8(%rsp),%rbp 227 movq 6*8(%rsp),%rbp
228 CFI_RESTORE rbp
217 addq $7*8,%rsp 229 addq $7*8,%rsp
230 CFI_ADJUST_CFA_OFFSET -7*8
218 ret 231 ret
232 CFI_RESTORE_STATE
219 233
220 /* Exception handlers. Very simple, zeroing is done in the wrappers */ 234 /* Exception handlers. Very simple, zeroing is done in the wrappers */
221.Lbad_source: 235.Lbad_source:
@@ -231,3 +245,5 @@ csum_partial_copy_generic:
231 jz .Lende 245 jz .Lende
232 movl $-EFAULT,(%rax) 246 movl $-EFAULT,(%rax)
233 jmp .Lende 247 jmp .Lende
248 CFI_ENDPROC
249ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S
index 3844d5e885a4..5448876261f8 100644
--- a/arch/x86_64/lib/getuser.S
+++ b/arch/x86_64/lib/getuser.S
@@ -27,25 +27,26 @@
27 */ 27 */
28 28
29#include <linux/linkage.h> 29#include <linux/linkage.h>
30#include <asm/dwarf2.h>
30#include <asm/page.h> 31#include <asm/page.h>
31#include <asm/errno.h> 32#include <asm/errno.h>
32#include <asm/asm-offsets.h> 33#include <asm/asm-offsets.h>
33#include <asm/thread_info.h> 34#include <asm/thread_info.h>
34 35
35 .text 36 .text
36 .p2align 4 37ENTRY(__get_user_1)
37.globl __get_user_1 38 CFI_STARTPROC
38__get_user_1:
39 GET_THREAD_INFO(%r8) 39 GET_THREAD_INFO(%r8)
40 cmpq threadinfo_addr_limit(%r8),%rcx 40 cmpq threadinfo_addr_limit(%r8),%rcx
41 jae bad_get_user 41 jae bad_get_user
421: movzb (%rcx),%edx 421: movzb (%rcx),%edx
43 xorl %eax,%eax 43 xorl %eax,%eax
44 ret 44 ret
45 CFI_ENDPROC
46ENDPROC(__get_user_1)
45 47
46 .p2align 4 48ENTRY(__get_user_2)
47.globl __get_user_2 49 CFI_STARTPROC
48__get_user_2:
49 GET_THREAD_INFO(%r8) 50 GET_THREAD_INFO(%r8)
50 addq $1,%rcx 51 addq $1,%rcx
51 jc 20f 52 jc 20f
@@ -57,10 +58,11 @@ __get_user_2:
57 ret 58 ret
5820: decq %rcx 5920: decq %rcx
59 jmp bad_get_user 60 jmp bad_get_user
61 CFI_ENDPROC
62ENDPROC(__get_user_2)
60 63
61 .p2align 4 64ENTRY(__get_user_4)
62.globl __get_user_4 65 CFI_STARTPROC
63__get_user_4:
64 GET_THREAD_INFO(%r8) 66 GET_THREAD_INFO(%r8)
65 addq $3,%rcx 67 addq $3,%rcx
66 jc 30f 68 jc 30f
@@ -72,10 +74,11 @@ __get_user_4:
72 ret 74 ret
7330: subq $3,%rcx 7530: subq $3,%rcx
74 jmp bad_get_user 76 jmp bad_get_user
77 CFI_ENDPROC
78ENDPROC(__get_user_4)
75 79
76 .p2align 4 80ENTRY(__get_user_8)
77.globl __get_user_8 81 CFI_STARTPROC
78__get_user_8:
79 GET_THREAD_INFO(%r8) 82 GET_THREAD_INFO(%r8)
80 addq $7,%rcx 83 addq $7,%rcx
81 jc 40f 84 jc 40f
@@ -87,11 +90,16 @@ __get_user_8:
87 ret 90 ret
8840: subq $7,%rcx 9140: subq $7,%rcx
89 jmp bad_get_user 92 jmp bad_get_user
93 CFI_ENDPROC
94ENDPROC(__get_user_8)
90 95
91bad_get_user: 96bad_get_user:
97 CFI_STARTPROC
92 xorl %edx,%edx 98 xorl %edx,%edx
93 movq $(-EFAULT),%rax 99 movq $(-EFAULT),%rax
94 ret 100 ret
101 CFI_ENDPROC
102END(bad_get_user)
95 103
96.section __ex_table,"a" 104.section __ex_table,"a"
97 .quad 1b,bad_get_user 105 .quad 1b,bad_get_user
diff --git a/arch/x86_64/lib/iomap_copy.S b/arch/x86_64/lib/iomap_copy.S
index 8bbade5fea05..05a95e713da8 100644
--- a/arch/x86_64/lib/iomap_copy.S
+++ b/arch/x86_64/lib/iomap_copy.S
@@ -15,12 +15,16 @@
15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
16 */ 16 */
17 17
18#include <linux/linkage.h>
19#include <asm/dwarf2.h>
20
18/* 21/*
19 * override generic version in lib/iomap_copy.c 22 * override generic version in lib/iomap_copy.c
20 */ 23 */
21 .globl __iowrite32_copy 24ENTRY(__iowrite32_copy)
22 .p2align 4 25 CFI_STARTPROC
23__iowrite32_copy:
24 movl %edx,%ecx 26 movl %edx,%ecx
25 rep movsd 27 rep movsd
26 ret 28 ret
29 CFI_ENDPROC
30ENDPROC(__iowrite32_copy)
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S
index 5554948b5554..967b22fa7d07 100644
--- a/arch/x86_64/lib/memcpy.S
+++ b/arch/x86_64/lib/memcpy.S
@@ -1,6 +1,10 @@
1/* Copyright 2002 Andi Kleen */ 1/* Copyright 2002 Andi Kleen */
2 2
3 #include <asm/cpufeature.h> 3#include <linux/config.h>
4#include <linux/linkage.h>
5#include <asm/dwarf2.h>
6#include <asm/cpufeature.h>
7
4/* 8/*
5 * memcpy - Copy a memory block. 9 * memcpy - Copy a memory block.
6 * 10 *
@@ -13,12 +17,26 @@
13 * rax original destination 17 * rax original destination
14 */ 18 */
15 19
16 .globl __memcpy 20 ALIGN
17 .globl memcpy 21memcpy_c:
18 .p2align 4 22 CFI_STARTPROC
19__memcpy: 23 movq %rdi,%rax
20memcpy: 24 movl %edx,%ecx
25 shrl $3,%ecx
26 andl $7,%edx
27 rep movsq
28 movl %edx,%ecx
29 rep movsb
30 ret
31 CFI_ENDPROC
32ENDPROC(memcpy_c)
33
34ENTRY(__memcpy)
35ENTRY(memcpy)
36 CFI_STARTPROC
21 pushq %rbx 37 pushq %rbx
38 CFI_ADJUST_CFA_OFFSET 8
39 CFI_REL_OFFSET rbx, 0
22 movq %rdi,%rax 40 movq %rdi,%rax
23 41
24 movl %edx,%ecx 42 movl %edx,%ecx
@@ -86,36 +104,27 @@ memcpy:
86 104
87.Lende: 105.Lende:
88 popq %rbx 106 popq %rbx
107 CFI_ADJUST_CFA_OFFSET -8
108 CFI_RESTORE rbx
89 ret 109 ret
90.Lfinal: 110.Lfinal:
111 CFI_ENDPROC
112ENDPROC(memcpy)
113ENDPROC(__memcpy)
91 114
92 /* Some CPUs run faster using the string copy instructions. 115 /* Some CPUs run faster using the string copy instructions.
93 It is also a lot simpler. Use this when possible */ 116 It is also a lot simpler. Use this when possible */
94 117
118 .section .altinstr_replacement,"ax"
1191: .byte 0xeb /* jmp <disp8> */
120 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1212:
122 .previous
95 .section .altinstructions,"a" 123 .section .altinstructions,"a"
96 .align 8 124 .align 8
97 .quad memcpy 125 .quad memcpy
98 .quad memcpy_c 126 .quad 1b
99 .byte X86_FEATURE_REP_GOOD 127 .byte X86_FEATURE_REP_GOOD
100 .byte .Lfinal-memcpy 128 .byte .Lfinal - memcpy
101 .byte memcpy_c_end-memcpy_c 129 .byte 2b - 1b
102 .previous
103
104 .section .altinstr_replacement,"ax"
105 /* rdi destination
106 * rsi source
107 * rdx count
108 */
109memcpy_c:
110 movq %rdi,%rax
111 movl %edx,%ecx
112 shrl $3,%ecx
113 andl $7,%edx
114 rep
115 movsq
116 movl %edx,%ecx
117 rep
118 movsb
119 ret
120memcpy_c_end:
121 .previous 130 .previous
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S
index ad397f2c7de8..09ed1f6b0eaa 100644
--- a/arch/x86_64/lib/memset.S
+++ b/arch/x86_64/lib/memset.S
@@ -1,4 +1,9 @@
1/* Copyright 2002 Andi Kleen, SuSE Labs */ 1/* Copyright 2002 Andi Kleen, SuSE Labs */
2
3#include <linux/config.h>
4#include <linux/linkage.h>
5#include <asm/dwarf2.h>
6
2/* 7/*
3 * ISO C memset - set a memory block to a byte value. 8 * ISO C memset - set a memory block to a byte value.
4 * 9 *
@@ -8,11 +13,29 @@
8 * 13 *
9 * rax original destination 14 * rax original destination
10 */ 15 */
11 .globl __memset 16 ALIGN
12 .globl memset 17memset_c:
13 .p2align 4 18 CFI_STARTPROC
14memset: 19 movq %rdi,%r9
15__memset: 20 movl %edx,%r8d
21 andl $7,%r8d
22 movl %edx,%ecx
23 shrl $3,%ecx
24 /* expand byte value */
25 movzbl %sil,%esi
26 movabs $0x0101010101010101,%rax
27 mulq %rsi /* with rax, clobbers rdx */
28 rep stosq
29 movl %r8d,%ecx
30 rep stosb
31 movq %r9,%rax
32 ret
33 CFI_ENDPROC
34ENDPROC(memset_c)
35
36ENTRY(memset)
37ENTRY(__memset)
38 CFI_STARTPROC
16 movq %rdi,%r10 39 movq %rdi,%r10
17 movq %rdx,%r11 40 movq %rdx,%r11
18 41
@@ -25,6 +48,7 @@ __memset:
25 movl %edi,%r9d 48 movl %edi,%r9d
26 andl $7,%r9d 49 andl $7,%r9d
27 jnz .Lbad_alignment 50 jnz .Lbad_alignment
51 CFI_REMEMBER_STATE
28.Lafter_bad_alignment: 52.Lafter_bad_alignment:
29 53
30 movl %r11d,%ecx 54 movl %r11d,%ecx
@@ -75,6 +99,7 @@ __memset:
75 movq %r10,%rax 99 movq %r10,%rax
76 ret 100 ret
77 101
102 CFI_RESTORE_STATE
78.Lbad_alignment: 103.Lbad_alignment:
79 cmpq $7,%r11 104 cmpq $7,%r11
80 jbe .Lhandle_7 105 jbe .Lhandle_7
@@ -84,42 +109,26 @@ __memset:
84 addq %r8,%rdi 109 addq %r8,%rdi
85 subq %r8,%r11 110 subq %r8,%r11
86 jmp .Lafter_bad_alignment 111 jmp .Lafter_bad_alignment
112.Lfinal:
113 CFI_ENDPROC
114ENDPROC(memset)
115ENDPROC(__memset)
87 116
88 /* Some CPUs run faster using the string instructions. 117 /* Some CPUs run faster using the string instructions.
89 It is also a lot simpler. Use this when possible */ 118 It is also a lot simpler. Use this when possible */
90 119
91#include <asm/cpufeature.h> 120#include <asm/cpufeature.h>
92 121
122 .section .altinstr_replacement,"ax"
1231: .byte 0xeb /* jmp <disp8> */
124 .byte (memset_c - memset) - (2f - 1b) /* offset */
1252:
126 .previous
93 .section .altinstructions,"a" 127 .section .altinstructions,"a"
94 .align 8 128 .align 8
95 .quad memset 129 .quad memset
96 .quad memset_c 130 .quad 1b
97 .byte X86_FEATURE_REP_GOOD 131 .byte X86_FEATURE_REP_GOOD
98 .byte memset_c_end-memset_c 132 .byte .Lfinal - memset
99 .byte memset_c_end-memset_c 133 .byte 2b - 1b
100 .previous
101
102 .section .altinstr_replacement,"ax"
103 /* rdi destination
104 * rsi value
105 * rdx count
106 */
107memset_c:
108 movq %rdi,%r9
109 movl %edx,%r8d
110 andl $7,%r8d
111 movl %edx,%ecx
112 shrl $3,%ecx
113 /* expand byte value */
114 movzbl %sil,%esi
115 movabs $0x0101010101010101,%rax
116 mulq %rsi /* with rax, clobbers rdx */
117 rep
118 stosq
119 movl %r8d,%ecx
120 rep
121 stosb
122 movq %r9,%rax
123 ret
124memset_c_end:
125 .previous 134 .previous
diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S
index 7f5593974e2d..4989f5a8fa9b 100644
--- a/arch/x86_64/lib/putuser.S
+++ b/arch/x86_64/lib/putuser.S
@@ -25,25 +25,26 @@
25 */ 25 */
26 26
27#include <linux/linkage.h> 27#include <linux/linkage.h>
28#include <asm/dwarf2.h>
28#include <asm/page.h> 29#include <asm/page.h>
29#include <asm/errno.h> 30#include <asm/errno.h>
30#include <asm/asm-offsets.h> 31#include <asm/asm-offsets.h>
31#include <asm/thread_info.h> 32#include <asm/thread_info.h>
32 33
33 .text 34 .text
34 .p2align 4 35ENTRY(__put_user_1)
35.globl __put_user_1 36 CFI_STARTPROC
36__put_user_1:
37 GET_THREAD_INFO(%r8) 37 GET_THREAD_INFO(%r8)
38 cmpq threadinfo_addr_limit(%r8),%rcx 38 cmpq threadinfo_addr_limit(%r8),%rcx
39 jae bad_put_user 39 jae bad_put_user
401: movb %dl,(%rcx) 401: movb %dl,(%rcx)
41 xorl %eax,%eax 41 xorl %eax,%eax
42 ret 42 ret
43 CFI_ENDPROC
44ENDPROC(__put_user_1)
43 45
44 .p2align 4 46ENTRY(__put_user_2)
45.globl __put_user_2 47 CFI_STARTPROC
46__put_user_2:
47 GET_THREAD_INFO(%r8) 48 GET_THREAD_INFO(%r8)
48 addq $1,%rcx 49 addq $1,%rcx
49 jc 20f 50 jc 20f
@@ -55,10 +56,11 @@ __put_user_2:
55 ret 56 ret
5620: decq %rcx 5720: decq %rcx
57 jmp bad_put_user 58 jmp bad_put_user
59 CFI_ENDPROC
60ENDPROC(__put_user_2)
58 61
59 .p2align 4 62ENTRY(__put_user_4)
60.globl __put_user_4 63 CFI_STARTPROC
61__put_user_4:
62 GET_THREAD_INFO(%r8) 64 GET_THREAD_INFO(%r8)
63 addq $3,%rcx 65 addq $3,%rcx
64 jc 30f 66 jc 30f
@@ -70,10 +72,11 @@ __put_user_4:
70 ret 72 ret
7130: subq $3,%rcx 7330: subq $3,%rcx
72 jmp bad_put_user 74 jmp bad_put_user
75 CFI_ENDPROC
76ENDPROC(__put_user_4)
73 77
74 .p2align 4 78ENTRY(__put_user_8)
75.globl __put_user_8 79 CFI_STARTPROC
76__put_user_8:
77 GET_THREAD_INFO(%r8) 80 GET_THREAD_INFO(%r8)
78 addq $7,%rcx 81 addq $7,%rcx
79 jc 40f 82 jc 40f
@@ -85,10 +88,15 @@ __put_user_8:
85 ret 88 ret
8640: subq $7,%rcx 8940: subq $7,%rcx
87 jmp bad_put_user 90 jmp bad_put_user
91 CFI_ENDPROC
92ENDPROC(__put_user_8)
88 93
89bad_put_user: 94bad_put_user:
95 CFI_STARTPROC
90 movq $(-EFAULT),%rax 96 movq $(-EFAULT),%rax
91 ret 97 ret
98 CFI_ENDPROC
99END(bad_put_user)
92 100
93.section __ex_table,"a" 101.section __ex_table,"a"
94 .quad 1b,bad_put_user 102 .quad 1b,bad_put_user
diff --git a/arch/x86_64/lib/rwlock.S b/arch/x86_64/lib/rwlock.S
new file mode 100644
index 000000000000..0cde1f807314
--- /dev/null
+++ b/arch/x86_64/lib/rwlock.S
@@ -0,0 +1,38 @@
1/* Slow paths of read/write spinlocks. */
2
3#include <linux/linkage.h>
4#include <asm/rwlock.h>
5#include <asm/alternative-asm.i>
6#include <asm/dwarf2.h>
7
8/* rdi: pointer to rwlock_t */
9ENTRY(__write_lock_failed)
10 CFI_STARTPROC
11 LOCK_PREFIX
12 addl $RW_LOCK_BIAS,(%rdi)
131: rep
14 nop
15 cmpl $RW_LOCK_BIAS,(%rdi)
16 jne 1b
17 LOCK_PREFIX
18 subl $RW_LOCK_BIAS,(%rdi)
19 jnz __write_lock_failed
20 ret
21 CFI_ENDPROC
22END(__write_lock_failed)
23
24/* rdi: pointer to rwlock_t */
25ENTRY(__read_lock_failed)
26 CFI_STARTPROC
27 LOCK_PREFIX
28 incl (%rdi)
291: rep
30 nop
31 cmpl $1,(%rdi)
32 js 1b
33 LOCK_PREFIX
34 decl (%rdi)
35 js __read_lock_failed
36 ret
37 CFI_ENDPROC
38END(__read_lock_failed)
diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S
index 332ea5dff916..0025535cac8d 100644
--- a/arch/x86_64/lib/thunk.S
+++ b/arch/x86_64/lib/thunk.S
@@ -1,10 +1,9 @@
1 /* 1/*
2 * Save registers before calling assembly functions. This avoids 2 * Save registers before calling assembly functions. This avoids
3 * disturbance of register allocation in some inline assembly constructs. 3 * disturbance of register allocation in some inline assembly constructs.
4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs. 4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
5 * Subject to the GNU public license, v.2. No warranty of any kind. 5 * Subject to the GNU public license, v.2. No warranty of any kind.
6 * $Id: thunk.S,v 1.2 2002/03/13 20:06:58 ak Exp $ 6 */
7 */
8 7
9 #include <linux/config.h> 8 #include <linux/config.h>
10 #include <linux/linkage.h> 9 #include <linux/linkage.h>
@@ -67,33 +66,3 @@ restore_norax:
67 RESTORE_ARGS 1 66 RESTORE_ARGS 1
68 ret 67 ret
69 CFI_ENDPROC 68 CFI_ENDPROC
70
71#ifdef CONFIG_SMP
72/* Support for read/write spinlocks. */
73 .text
74/* rax: pointer to rwlock_t */
75ENTRY(__write_lock_failed)
76 lock
77 addl $RW_LOCK_BIAS,(%rax)
781: rep
79 nop
80 cmpl $RW_LOCK_BIAS,(%rax)
81 jne 1b
82 lock
83 subl $RW_LOCK_BIAS,(%rax)
84 jnz __write_lock_failed
85 ret
86
87/* rax: pointer to rwlock_t */
88ENTRY(__read_lock_failed)
89 lock
90 incl (%rax)
911: rep
92 nop
93 cmpl $1,(%rax)
94 js 1b
95 lock
96 decl (%rax)
97 js __read_lock_failed
98 ret
99#endif