diff options
Diffstat (limited to 'arch/x86_64/lib')
-rw-r--r-- | arch/x86_64/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/x86_64/lib/clear_page.S | 47 | ||||
-rw-r--r-- | arch/x86_64/lib/copy_page.S | 53 | ||||
-rw-r--r-- | arch/x86_64/lib/copy_user.S | 153 | ||||
-rw-r--r-- | arch/x86_64/lib/csum-copy.S | 26 | ||||
-rw-r--r-- | arch/x86_64/lib/getuser.S | 32 | ||||
-rw-r--r-- | arch/x86_64/lib/iomap_copy.S | 10 | ||||
-rw-r--r-- | arch/x86_64/lib/memcpy.S | 69 | ||||
-rw-r--r-- | arch/x86_64/lib/memset.S | 79 | ||||
-rw-r--r-- | arch/x86_64/lib/putuser.S | 32 | ||||
-rw-r--r-- | arch/x86_64/lib/rwlock.S | 38 | ||||
-rw-r--r-- | arch/x86_64/lib/thunk.S | 43 |
12 files changed, 363 insertions, 221 deletions
diff --git a/arch/x86_64/lib/Makefile b/arch/x86_64/lib/Makefile index ccef6ae747a3..b78d4170fce2 100644 --- a/arch/x86_64/lib/Makefile +++ b/arch/x86_64/lib/Makefile | |||
@@ -9,4 +9,4 @@ obj-y := io.o iomap_copy.o | |||
9 | lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \ | 9 | lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \ |
10 | usercopy.o getuser.o putuser.o \ | 10 | usercopy.o getuser.o putuser.o \ |
11 | thunk.o clear_page.o copy_page.o bitstr.o bitops.o | 11 | thunk.o clear_page.o copy_page.o bitstr.o bitops.o |
12 | lib-y += memcpy.o memmove.o memset.o copy_user.o | 12 | lib-y += memcpy.o memmove.o memset.o copy_user.o rwlock.o |
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S index 1f81b79b796c..9a10a78bb4a4 100644 --- a/arch/x86_64/lib/clear_page.S +++ b/arch/x86_64/lib/clear_page.S | |||
@@ -1,10 +1,22 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/dwarf2.h> | ||
3 | |||
1 | /* | 4 | /* |
2 | * Zero a page. | 5 | * Zero a page. |
3 | * rdi page | 6 | * rdi page |
4 | */ | 7 | */ |
5 | .globl clear_page | 8 | ALIGN |
6 | .p2align 4 | 9 | clear_page_c: |
7 | clear_page: | 10 | CFI_STARTPROC |
11 | movl $4096/8,%ecx | ||
12 | xorl %eax,%eax | ||
13 | rep stosq | ||
14 | ret | ||
15 | CFI_ENDPROC | ||
16 | ENDPROC(clear_page) | ||
17 | |||
18 | ENTRY(clear_page) | ||
19 | CFI_STARTPROC | ||
8 | xorl %eax,%eax | 20 | xorl %eax,%eax |
9 | movl $4096/64,%ecx | 21 | movl $4096/64,%ecx |
10 | .p2align 4 | 22 | .p2align 4 |
@@ -23,28 +35,25 @@ clear_page: | |||
23 | jnz .Lloop | 35 | jnz .Lloop |
24 | nop | 36 | nop |
25 | ret | 37 | ret |
26 | clear_page_end: | 38 | CFI_ENDPROC |
39 | .Lclear_page_end: | ||
40 | ENDPROC(clear_page) | ||
27 | 41 | ||
28 | /* Some CPUs run faster using the string instructions. | 42 | /* Some CPUs run faster using the string instructions. |
29 | It is also a lot simpler. Use this when possible */ | 43 | It is also a lot simpler. Use this when possible */ |
30 | 44 | ||
31 | #include <asm/cpufeature.h> | 45 | #include <asm/cpufeature.h> |
32 | 46 | ||
47 | .section .altinstr_replacement,"ax" | ||
48 | 1: .byte 0xeb /* jmp <disp8> */ | ||
49 | .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ | ||
50 | 2: | ||
51 | .previous | ||
33 | .section .altinstructions,"a" | 52 | .section .altinstructions,"a" |
34 | .align 8 | 53 | .align 8 |
35 | .quad clear_page | 54 | .quad clear_page |
36 | .quad clear_page_c | 55 | .quad 1b |
37 | .byte X86_FEATURE_REP_GOOD | 56 | .byte X86_FEATURE_REP_GOOD |
38 | .byte clear_page_end-clear_page | 57 | .byte .Lclear_page_end - clear_page |
39 | .byte clear_page_c_end-clear_page_c | 58 | .byte 2b - 1b |
40 | .previous | ||
41 | |||
42 | .section .altinstr_replacement,"ax" | ||
43 | clear_page_c: | ||
44 | movl $4096/8,%ecx | ||
45 | xorl %eax,%eax | ||
46 | rep | ||
47 | stosq | ||
48 | ret | ||
49 | clear_page_c_end: | ||
50 | .previous | 59 | .previous |
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S index 8fa19d96a7ee..0ebb03b60e79 100644 --- a/arch/x86_64/lib/copy_page.S +++ b/arch/x86_64/lib/copy_page.S | |||
@@ -1,17 +1,33 @@ | |||
1 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ | 1 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ |
2 | 2 | ||
3 | #include <linux/config.h> | ||
4 | #include <linux/linkage.h> | ||
5 | #include <asm/dwarf2.h> | ||
6 | |||
7 | ALIGN | ||
8 | copy_page_c: | ||
9 | CFI_STARTPROC | ||
10 | movl $4096/8,%ecx | ||
11 | rep movsq | ||
12 | ret | ||
13 | CFI_ENDPROC | ||
14 | ENDPROC(copy_page_c) | ||
15 | |||
3 | /* Don't use streaming store because it's better when the target | 16 | /* Don't use streaming store because it's better when the target |
4 | ends up in cache. */ | 17 | ends up in cache. */ |
5 | 18 | ||
6 | /* Could vary the prefetch distance based on SMP/UP */ | 19 | /* Could vary the prefetch distance based on SMP/UP */ |
7 | 20 | ||
8 | .globl copy_page | 21 | ENTRY(copy_page) |
9 | .p2align 4 | 22 | CFI_STARTPROC |
10 | copy_page: | ||
11 | subq $3*8,%rsp | 23 | subq $3*8,%rsp |
24 | CFI_ADJUST_CFA_OFFSET 3*8 | ||
12 | movq %rbx,(%rsp) | 25 | movq %rbx,(%rsp) |
26 | CFI_REL_OFFSET rbx, 0 | ||
13 | movq %r12,1*8(%rsp) | 27 | movq %r12,1*8(%rsp) |
28 | CFI_REL_OFFSET r12, 1*8 | ||
14 | movq %r13,2*8(%rsp) | 29 | movq %r13,2*8(%rsp) |
30 | CFI_REL_OFFSET r13, 2*8 | ||
15 | 31 | ||
16 | movl $(4096/64)-5,%ecx | 32 | movl $(4096/64)-5,%ecx |
17 | .p2align 4 | 33 | .p2align 4 |
@@ -72,30 +88,33 @@ copy_page: | |||
72 | jnz .Loop2 | 88 | jnz .Loop2 |
73 | 89 | ||
74 | movq (%rsp),%rbx | 90 | movq (%rsp),%rbx |
91 | CFI_RESTORE rbx | ||
75 | movq 1*8(%rsp),%r12 | 92 | movq 1*8(%rsp),%r12 |
93 | CFI_RESTORE r12 | ||
76 | movq 2*8(%rsp),%r13 | 94 | movq 2*8(%rsp),%r13 |
95 | CFI_RESTORE r13 | ||
77 | addq $3*8,%rsp | 96 | addq $3*8,%rsp |
97 | CFI_ADJUST_CFA_OFFSET -3*8 | ||
78 | ret | 98 | ret |
99 | .Lcopy_page_end: | ||
100 | CFI_ENDPROC | ||
101 | ENDPROC(copy_page) | ||
79 | 102 | ||
80 | /* Some CPUs run faster using the string copy instructions. | 103 | /* Some CPUs run faster using the string copy instructions. |
81 | It is also a lot simpler. Use this when possible */ | 104 | It is also a lot simpler. Use this when possible */ |
82 | 105 | ||
83 | #include <asm/cpufeature.h> | 106 | #include <asm/cpufeature.h> |
84 | 107 | ||
108 | .section .altinstr_replacement,"ax" | ||
109 | 1: .byte 0xeb /* jmp <disp8> */ | ||
110 | .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ | ||
111 | 2: | ||
112 | .previous | ||
85 | .section .altinstructions,"a" | 113 | .section .altinstructions,"a" |
86 | .align 8 | 114 | .align 8 |
87 | .quad copy_page | 115 | .quad copy_page |
88 | .quad copy_page_c | 116 | .quad 1b |
89 | .byte X86_FEATURE_REP_GOOD | 117 | .byte X86_FEATURE_REP_GOOD |
90 | .byte copy_page_c_end-copy_page_c | 118 | .byte .Lcopy_page_end - copy_page |
91 | .byte copy_page_c_end-copy_page_c | 119 | .byte 2b - 1b |
92 | .previous | ||
93 | |||
94 | .section .altinstr_replacement,"ax" | ||
95 | copy_page_c: | ||
96 | movl $4096/8,%ecx | ||
97 | rep | ||
98 | movsq | ||
99 | ret | ||
100 | copy_page_c_end: | ||
101 | .previous | 120 | .previous |
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S index f64569b83b54..70bebd310408 100644 --- a/arch/x86_64/lib/copy_user.S +++ b/arch/x86_64/lib/copy_user.S | |||
@@ -4,56 +4,78 @@ | |||
4 | * Functions to copy from and to user space. | 4 | * Functions to copy from and to user space. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/linkage.h> | ||
8 | #include <asm/dwarf2.h> | ||
9 | |||
7 | #define FIX_ALIGNMENT 1 | 10 | #define FIX_ALIGNMENT 1 |
8 | 11 | ||
9 | #include <asm/current.h> | 12 | #include <asm/current.h> |
10 | #include <asm/asm-offsets.h> | 13 | #include <asm/asm-offsets.h> |
11 | #include <asm/thread_info.h> | 14 | #include <asm/thread_info.h> |
12 | #include <asm/cpufeature.h> | 15 | #include <asm/cpufeature.h> |
13 | 16 | ||
14 | /* Standard copy_to_user with segment limit checking */ | 17 | .macro ALTERNATIVE_JUMP feature,orig,alt |
15 | .globl copy_to_user | 18 | 0: |
16 | .p2align 4 | ||
17 | copy_to_user: | ||
18 | GET_THREAD_INFO(%rax) | ||
19 | movq %rdi,%rcx | ||
20 | addq %rdx,%rcx | ||
21 | jc bad_to_user | ||
22 | cmpq threadinfo_addr_limit(%rax),%rcx | ||
23 | jae bad_to_user | ||
24 | 2: | ||
25 | .byte 0xe9 /* 32bit jump */ | 19 | .byte 0xe9 /* 32bit jump */ |
26 | .long .Lcug-1f | 20 | .long \orig-1f /* by default jump to orig */ |
27 | 1: | 21 | 1: |
28 | |||
29 | .section .altinstr_replacement,"ax" | 22 | .section .altinstr_replacement,"ax" |
30 | 3: .byte 0xe9 /* replacement jmp with 8 bit immediate */ | 23 | 2: .byte 0xe9 /* near jump with 32bit immediate */ |
31 | .long copy_user_generic_c-1b /* offset */ | 24 | .long \alt-1b /* offset */ /* or alternatively to alt */ |
32 | .previous | 25 | .previous |
33 | .section .altinstructions,"a" | 26 | .section .altinstructions,"a" |
34 | .align 8 | 27 | .align 8 |
28 | .quad 0b | ||
35 | .quad 2b | 29 | .quad 2b |
36 | .quad 3b | 30 | .byte \feature /* when feature is set */ |
37 | .byte X86_FEATURE_REP_GOOD | ||
38 | .byte 5 | 31 | .byte 5 |
39 | .byte 5 | 32 | .byte 5 |
40 | .previous | 33 | .previous |
34 | .endm | ||
35 | |||
36 | /* Standard copy_to_user with segment limit checking */ | ||
37 | ENTRY(copy_to_user) | ||
38 | CFI_STARTPROC | ||
39 | GET_THREAD_INFO(%rax) | ||
40 | movq %rdi,%rcx | ||
41 | addq %rdx,%rcx | ||
42 | jc bad_to_user | ||
43 | cmpq threadinfo_addr_limit(%rax),%rcx | ||
44 | jae bad_to_user | ||
45 | xorl %eax,%eax /* clear zero flag */ | ||
46 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
47 | CFI_ENDPROC | ||
48 | |||
49 | ENTRY(copy_user_generic) | ||
50 | CFI_STARTPROC | ||
51 | movl $1,%ecx /* set zero flag */ | ||
52 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
53 | CFI_ENDPROC | ||
54 | |||
55 | ENTRY(__copy_from_user_inatomic) | ||
56 | CFI_STARTPROC | ||
57 | xorl %ecx,%ecx /* clear zero flag */ | ||
58 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
59 | CFI_ENDPROC | ||
41 | 60 | ||
42 | /* Standard copy_from_user with segment limit checking */ | 61 | /* Standard copy_from_user with segment limit checking */ |
43 | .globl copy_from_user | 62 | ENTRY(copy_from_user) |
44 | .p2align 4 | 63 | CFI_STARTPROC |
45 | copy_from_user: | ||
46 | GET_THREAD_INFO(%rax) | 64 | GET_THREAD_INFO(%rax) |
47 | movq %rsi,%rcx | 65 | movq %rsi,%rcx |
48 | addq %rdx,%rcx | 66 | addq %rdx,%rcx |
49 | jc bad_from_user | 67 | jc bad_from_user |
50 | cmpq threadinfo_addr_limit(%rax),%rcx | 68 | cmpq threadinfo_addr_limit(%rax),%rcx |
51 | jae bad_from_user | 69 | jae bad_from_user |
52 | /* FALL THROUGH to copy_user_generic */ | 70 | movl $1,%ecx /* set zero flag */ |
71 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
72 | CFI_ENDPROC | ||
73 | ENDPROC(copy_from_user) | ||
53 | 74 | ||
54 | .section .fixup,"ax" | 75 | .section .fixup,"ax" |
55 | /* must zero dest */ | 76 | /* must zero dest */ |
56 | bad_from_user: | 77 | bad_from_user: |
78 | CFI_STARTPROC | ||
57 | movl %edx,%ecx | 79 | movl %edx,%ecx |
58 | xorl %eax,%eax | 80 | xorl %eax,%eax |
59 | rep | 81 | rep |
@@ -61,40 +83,32 @@ bad_from_user: | |||
61 | bad_to_user: | 83 | bad_to_user: |
62 | movl %edx,%eax | 84 | movl %edx,%eax |
63 | ret | 85 | ret |
86 | CFI_ENDPROC | ||
87 | END(bad_from_user) | ||
64 | .previous | 88 | .previous |
65 | 89 | ||
66 | 90 | ||
67 | /* | 91 | /* |
68 | * copy_user_generic - memory copy with exception handling. | 92 | * copy_user_generic_unrolled - memory copy with exception handling. |
93 | * This version is for CPUs like P4 that don't have efficient micro code for rep movsq | ||
69 | * | 94 | * |
70 | * Input: | 95 | * Input: |
71 | * rdi destination | 96 | * rdi destination |
72 | * rsi source | 97 | * rsi source |
73 | * rdx count | 98 | * rdx count |
99 | * ecx zero flag -- if true zero destination on error | ||
74 | * | 100 | * |
75 | * Output: | 101 | * Output: |
76 | * eax uncopied bytes or 0 if successful. | 102 | * eax uncopied bytes or 0 if successful. |
77 | */ | 103 | */ |
78 | .globl copy_user_generic | 104 | ENTRY(copy_user_generic_unrolled) |
79 | .p2align 4 | 105 | CFI_STARTPROC |
80 | copy_user_generic: | ||
81 | .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */ | ||
82 | .byte 0x66,0x90 | ||
83 | 1: | ||
84 | .section .altinstr_replacement,"ax" | ||
85 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | ||
86 | .long copy_user_generic_c-1b /* offset */ | ||
87 | .previous | ||
88 | .section .altinstructions,"a" | ||
89 | .align 8 | ||
90 | .quad copy_user_generic | ||
91 | .quad 2b | ||
92 | .byte X86_FEATURE_REP_GOOD | ||
93 | .byte 5 | ||
94 | .byte 5 | ||
95 | .previous | ||
96 | .Lcug: | ||
97 | pushq %rbx | 106 | pushq %rbx |
107 | CFI_ADJUST_CFA_OFFSET 8 | ||
108 | CFI_REL_OFFSET rbx, 0 | ||
109 | pushq %rcx | ||
110 | CFI_ADJUST_CFA_OFFSET 8 | ||
111 | CFI_REL_OFFSET rcx, 0 | ||
98 | xorl %eax,%eax /*zero for the exception handler */ | 112 | xorl %eax,%eax /*zero for the exception handler */ |
99 | 113 | ||
100 | #ifdef FIX_ALIGNMENT | 114 | #ifdef FIX_ALIGNMENT |
@@ -168,9 +182,16 @@ copy_user_generic: | |||
168 | decl %ecx | 182 | decl %ecx |
169 | jnz .Lloop_1 | 183 | jnz .Lloop_1 |
170 | 184 | ||
185 | CFI_REMEMBER_STATE | ||
171 | .Lende: | 186 | .Lende: |
187 | popq %rcx | ||
188 | CFI_ADJUST_CFA_OFFSET -8 | ||
189 | CFI_RESTORE rcx | ||
172 | popq %rbx | 190 | popq %rbx |
191 | CFI_ADJUST_CFA_OFFSET -8 | ||
192 | CFI_RESTORE rbx | ||
173 | ret | 193 | ret |
194 | CFI_RESTORE_STATE | ||
174 | 195 | ||
175 | #ifdef FIX_ALIGNMENT | 196 | #ifdef FIX_ALIGNMENT |
176 | /* align destination */ | 197 | /* align destination */ |
@@ -252,6 +273,8 @@ copy_user_generic: | |||
252 | addl %ecx,%edx | 273 | addl %ecx,%edx |
253 | /* edx: bytes to zero, rdi: dest, eax:zero */ | 274 | /* edx: bytes to zero, rdi: dest, eax:zero */ |
254 | .Lzero_rest: | 275 | .Lzero_rest: |
276 | cmpl $0,(%rsp) | ||
277 | jz .Le_zero | ||
255 | movq %rdx,%rcx | 278 | movq %rdx,%rcx |
256 | .Le_byte: | 279 | .Le_byte: |
257 | xorl %eax,%eax | 280 | xorl %eax,%eax |
@@ -261,6 +284,9 @@ copy_user_generic: | |||
261 | .Le_zero: | 284 | .Le_zero: |
262 | movq %rdx,%rax | 285 | movq %rdx,%rax |
263 | jmp .Lende | 286 | jmp .Lende |
287 | CFI_ENDPROC | ||
288 | ENDPROC(copy_user_generic) | ||
289 | |||
264 | 290 | ||
265 | /* Some CPUs run faster using the string copy instructions. | 291 | /* Some CPUs run faster using the string copy instructions. |
266 | This is also a lot simpler. Use them when possible. | 292 | This is also a lot simpler. Use them when possible. |
@@ -270,6 +296,7 @@ copy_user_generic: | |||
270 | /* rdi destination | 296 | /* rdi destination |
271 | * rsi source | 297 | * rsi source |
272 | * rdx count | 298 | * rdx count |
299 | * ecx zero flag | ||
273 | * | 300 | * |
274 | * Output: | 301 | * Output: |
275 | * eax uncopied bytes or 0 if successfull. | 302 | * eax uncopied bytes or 0 if successfull. |
@@ -280,22 +307,48 @@ copy_user_generic: | |||
280 | * And more would be dangerous because both Intel and AMD have | 307 | * And more would be dangerous because both Intel and AMD have |
281 | * errata with rep movsq > 4GB. If someone feels the need to fix | 308 | * errata with rep movsq > 4GB. If someone feels the need to fix |
282 | * this please consider this. | 309 | * this please consider this. |
283 | */ | 310 | */ |
284 | copy_user_generic_c: | 311 | ENTRY(copy_user_generic_string) |
312 | CFI_STARTPROC | ||
313 | movl %ecx,%r8d /* save zero flag */ | ||
285 | movl %edx,%ecx | 314 | movl %edx,%ecx |
286 | shrl $3,%ecx | 315 | shrl $3,%ecx |
287 | andl $7,%edx | 316 | andl $7,%edx |
317 | jz 10f | ||
288 | 1: rep | 318 | 1: rep |
289 | movsq | 319 | movsq |
290 | movl %edx,%ecx | 320 | movl %edx,%ecx |
291 | 2: rep | 321 | 2: rep |
292 | movsb | 322 | movsb |
293 | 4: movl %ecx,%eax | 323 | 9: movl %ecx,%eax |
294 | ret | 324 | ret |
295 | 3: lea (%rdx,%rcx,8),%rax | 325 | |
326 | /* multiple of 8 byte */ | ||
327 | 10: rep | ||
328 | movsq | ||
329 | xor %eax,%eax | ||
296 | ret | 330 | ret |
297 | 331 | ||
332 | /* exception handling */ | ||
333 | 3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */ | ||
334 | jmp 6f | ||
335 | 5: movl %ecx,%eax /* exception on byte loop */ | ||
336 | /* eax: left over bytes */ | ||
337 | 6: testl %r8d,%r8d /* zero flag set? */ | ||
338 | jz 7f | ||
339 | movl %eax,%ecx /* initialize x86 loop counter */ | ||
340 | push %rax | ||
341 | xorl %eax,%eax | ||
342 | 8: rep | ||
343 | stosb /* zero the rest */ | ||
344 | 11: pop %rax | ||
345 | 7: ret | ||
346 | CFI_ENDPROC | ||
347 | END(copy_user_generic_c) | ||
348 | |||
298 | .section __ex_table,"a" | 349 | .section __ex_table,"a" |
299 | .quad 1b,3b | 350 | .quad 1b,3b |
300 | .quad 2b,4b | 351 | .quad 2b,5b |
352 | .quad 8b,11b | ||
353 | .quad 10b,3b | ||
301 | .previous | 354 | .previous |
diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S index 72fd55ee896e..f0dba36578ea 100644 --- a/arch/x86_64/lib/csum-copy.S +++ b/arch/x86_64/lib/csum-copy.S | |||
@@ -5,8 +5,9 @@ | |||
5 | * License. See the file COPYING in the main directory of this archive | 5 | * License. See the file COPYING in the main directory of this archive |
6 | * for more details. No warranty for anything given at all. | 6 | * for more details. No warranty for anything given at all. |
7 | */ | 7 | */ |
8 | #include <linux/linkage.h> | 8 | #include <linux/linkage.h> |
9 | #include <asm/errno.h> | 9 | #include <asm/dwarf2.h> |
10 | #include <asm/errno.h> | ||
10 | 11 | ||
11 | /* | 12 | /* |
12 | * Checksum copy with exception handling. | 13 | * Checksum copy with exception handling. |
@@ -53,19 +54,24 @@ | |||
53 | .endm | 54 | .endm |
54 | 55 | ||
55 | 56 | ||
56 | .globl csum_partial_copy_generic | 57 | ENTRY(csum_partial_copy_generic) |
57 | .p2align 4 | 58 | CFI_STARTPROC |
58 | csum_partial_copy_generic: | ||
59 | cmpl $3*64,%edx | 59 | cmpl $3*64,%edx |
60 | jle .Lignore | 60 | jle .Lignore |
61 | 61 | ||
62 | .Lignore: | 62 | .Lignore: |
63 | subq $7*8,%rsp | 63 | subq $7*8,%rsp |
64 | CFI_ADJUST_CFA_OFFSET 7*8 | ||
64 | movq %rbx,2*8(%rsp) | 65 | movq %rbx,2*8(%rsp) |
66 | CFI_REL_OFFSET rbx, 2*8 | ||
65 | movq %r12,3*8(%rsp) | 67 | movq %r12,3*8(%rsp) |
68 | CFI_REL_OFFSET r12, 3*8 | ||
66 | movq %r14,4*8(%rsp) | 69 | movq %r14,4*8(%rsp) |
70 | CFI_REL_OFFSET r14, 4*8 | ||
67 | movq %r13,5*8(%rsp) | 71 | movq %r13,5*8(%rsp) |
72 | CFI_REL_OFFSET r13, 5*8 | ||
68 | movq %rbp,6*8(%rsp) | 73 | movq %rbp,6*8(%rsp) |
74 | CFI_REL_OFFSET rbp, 6*8 | ||
69 | 75 | ||
70 | movq %r8,(%rsp) | 76 | movq %r8,(%rsp) |
71 | movq %r9,1*8(%rsp) | 77 | movq %r9,1*8(%rsp) |
@@ -208,14 +214,22 @@ csum_partial_copy_generic: | |||
208 | addl %ebx,%eax | 214 | addl %ebx,%eax |
209 | adcl %r9d,%eax /* carry */ | 215 | adcl %r9d,%eax /* carry */ |
210 | 216 | ||
217 | CFI_REMEMBER_STATE | ||
211 | .Lende: | 218 | .Lende: |
212 | movq 2*8(%rsp),%rbx | 219 | movq 2*8(%rsp),%rbx |
220 | CFI_RESTORE rbx | ||
213 | movq 3*8(%rsp),%r12 | 221 | movq 3*8(%rsp),%r12 |
222 | CFI_RESTORE r12 | ||
214 | movq 4*8(%rsp),%r14 | 223 | movq 4*8(%rsp),%r14 |
224 | CFI_RESTORE r14 | ||
215 | movq 5*8(%rsp),%r13 | 225 | movq 5*8(%rsp),%r13 |
226 | CFI_RESTORE r13 | ||
216 | movq 6*8(%rsp),%rbp | 227 | movq 6*8(%rsp),%rbp |
228 | CFI_RESTORE rbp | ||
217 | addq $7*8,%rsp | 229 | addq $7*8,%rsp |
230 | CFI_ADJUST_CFA_OFFSET -7*8 | ||
218 | ret | 231 | ret |
232 | CFI_RESTORE_STATE | ||
219 | 233 | ||
220 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ | 234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ |
221 | .Lbad_source: | 235 | .Lbad_source: |
@@ -231,3 +245,5 @@ csum_partial_copy_generic: | |||
231 | jz .Lende | 245 | jz .Lende |
232 | movl $-EFAULT,(%rax) | 246 | movl $-EFAULT,(%rax) |
233 | jmp .Lende | 247 | jmp .Lende |
248 | CFI_ENDPROC | ||
249 | ENDPROC(csum_partial_copy_generic) | ||
diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S index 3844d5e885a4..5448876261f8 100644 --- a/arch/x86_64/lib/getuser.S +++ b/arch/x86_64/lib/getuser.S | |||
@@ -27,25 +27,26 @@ | |||
27 | */ | 27 | */ |
28 | 28 | ||
29 | #include <linux/linkage.h> | 29 | #include <linux/linkage.h> |
30 | #include <asm/dwarf2.h> | ||
30 | #include <asm/page.h> | 31 | #include <asm/page.h> |
31 | #include <asm/errno.h> | 32 | #include <asm/errno.h> |
32 | #include <asm/asm-offsets.h> | 33 | #include <asm/asm-offsets.h> |
33 | #include <asm/thread_info.h> | 34 | #include <asm/thread_info.h> |
34 | 35 | ||
35 | .text | 36 | .text |
36 | .p2align 4 | 37 | ENTRY(__get_user_1) |
37 | .globl __get_user_1 | 38 | CFI_STARTPROC |
38 | __get_user_1: | ||
39 | GET_THREAD_INFO(%r8) | 39 | GET_THREAD_INFO(%r8) |
40 | cmpq threadinfo_addr_limit(%r8),%rcx | 40 | cmpq threadinfo_addr_limit(%r8),%rcx |
41 | jae bad_get_user | 41 | jae bad_get_user |
42 | 1: movzb (%rcx),%edx | 42 | 1: movzb (%rcx),%edx |
43 | xorl %eax,%eax | 43 | xorl %eax,%eax |
44 | ret | 44 | ret |
45 | CFI_ENDPROC | ||
46 | ENDPROC(__get_user_1) | ||
45 | 47 | ||
46 | .p2align 4 | 48 | ENTRY(__get_user_2) |
47 | .globl __get_user_2 | 49 | CFI_STARTPROC |
48 | __get_user_2: | ||
49 | GET_THREAD_INFO(%r8) | 50 | GET_THREAD_INFO(%r8) |
50 | addq $1,%rcx | 51 | addq $1,%rcx |
51 | jc 20f | 52 | jc 20f |
@@ -57,10 +58,11 @@ __get_user_2: | |||
57 | ret | 58 | ret |
58 | 20: decq %rcx | 59 | 20: decq %rcx |
59 | jmp bad_get_user | 60 | jmp bad_get_user |
61 | CFI_ENDPROC | ||
62 | ENDPROC(__get_user_2) | ||
60 | 63 | ||
61 | .p2align 4 | 64 | ENTRY(__get_user_4) |
62 | .globl __get_user_4 | 65 | CFI_STARTPROC |
63 | __get_user_4: | ||
64 | GET_THREAD_INFO(%r8) | 66 | GET_THREAD_INFO(%r8) |
65 | addq $3,%rcx | 67 | addq $3,%rcx |
66 | jc 30f | 68 | jc 30f |
@@ -72,10 +74,11 @@ __get_user_4: | |||
72 | ret | 74 | ret |
73 | 30: subq $3,%rcx | 75 | 30: subq $3,%rcx |
74 | jmp bad_get_user | 76 | jmp bad_get_user |
77 | CFI_ENDPROC | ||
78 | ENDPROC(__get_user_4) | ||
75 | 79 | ||
76 | .p2align 4 | 80 | ENTRY(__get_user_8) |
77 | .globl __get_user_8 | 81 | CFI_STARTPROC |
78 | __get_user_8: | ||
79 | GET_THREAD_INFO(%r8) | 82 | GET_THREAD_INFO(%r8) |
80 | addq $7,%rcx | 83 | addq $7,%rcx |
81 | jc 40f | 84 | jc 40f |
@@ -87,11 +90,16 @@ __get_user_8: | |||
87 | ret | 90 | ret |
88 | 40: subq $7,%rcx | 91 | 40: subq $7,%rcx |
89 | jmp bad_get_user | 92 | jmp bad_get_user |
93 | CFI_ENDPROC | ||
94 | ENDPROC(__get_user_8) | ||
90 | 95 | ||
91 | bad_get_user: | 96 | bad_get_user: |
97 | CFI_STARTPROC | ||
92 | xorl %edx,%edx | 98 | xorl %edx,%edx |
93 | movq $(-EFAULT),%rax | 99 | movq $(-EFAULT),%rax |
94 | ret | 100 | ret |
101 | CFI_ENDPROC | ||
102 | END(bad_get_user) | ||
95 | 103 | ||
96 | .section __ex_table,"a" | 104 | .section __ex_table,"a" |
97 | .quad 1b,bad_get_user | 105 | .quad 1b,bad_get_user |
diff --git a/arch/x86_64/lib/iomap_copy.S b/arch/x86_64/lib/iomap_copy.S index 8bbade5fea05..05a95e713da8 100644 --- a/arch/x86_64/lib/iomap_copy.S +++ b/arch/x86_64/lib/iomap_copy.S | |||
@@ -15,12 +15,16 @@ | |||
15 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | 15 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/linkage.h> | ||
19 | #include <asm/dwarf2.h> | ||
20 | |||
18 | /* | 21 | /* |
19 | * override generic version in lib/iomap_copy.c | 22 | * override generic version in lib/iomap_copy.c |
20 | */ | 23 | */ |
21 | .globl __iowrite32_copy | 24 | ENTRY(__iowrite32_copy) |
22 | .p2align 4 | 25 | CFI_STARTPROC |
23 | __iowrite32_copy: | ||
24 | movl %edx,%ecx | 26 | movl %edx,%ecx |
25 | rep movsd | 27 | rep movsd |
26 | ret | 28 | ret |
29 | CFI_ENDPROC | ||
30 | ENDPROC(__iowrite32_copy) | ||
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S index 5554948b5554..967b22fa7d07 100644 --- a/arch/x86_64/lib/memcpy.S +++ b/arch/x86_64/lib/memcpy.S | |||
@@ -1,6 +1,10 @@ | |||
1 | /* Copyright 2002 Andi Kleen */ | 1 | /* Copyright 2002 Andi Kleen */ |
2 | 2 | ||
3 | #include <asm/cpufeature.h> | 3 | #include <linux/config.h> |
4 | #include <linux/linkage.h> | ||
5 | #include <asm/dwarf2.h> | ||
6 | #include <asm/cpufeature.h> | ||
7 | |||
4 | /* | 8 | /* |
5 | * memcpy - Copy a memory block. | 9 | * memcpy - Copy a memory block. |
6 | * | 10 | * |
@@ -13,12 +17,26 @@ | |||
13 | * rax original destination | 17 | * rax original destination |
14 | */ | 18 | */ |
15 | 19 | ||
16 | .globl __memcpy | 20 | ALIGN |
17 | .globl memcpy | 21 | memcpy_c: |
18 | .p2align 4 | 22 | CFI_STARTPROC |
19 | __memcpy: | 23 | movq %rdi,%rax |
20 | memcpy: | 24 | movl %edx,%ecx |
25 | shrl $3,%ecx | ||
26 | andl $7,%edx | ||
27 | rep movsq | ||
28 | movl %edx,%ecx | ||
29 | rep movsb | ||
30 | ret | ||
31 | CFI_ENDPROC | ||
32 | ENDPROC(memcpy_c) | ||
33 | |||
34 | ENTRY(__memcpy) | ||
35 | ENTRY(memcpy) | ||
36 | CFI_STARTPROC | ||
21 | pushq %rbx | 37 | pushq %rbx |
38 | CFI_ADJUST_CFA_OFFSET 8 | ||
39 | CFI_REL_OFFSET rbx, 0 | ||
22 | movq %rdi,%rax | 40 | movq %rdi,%rax |
23 | 41 | ||
24 | movl %edx,%ecx | 42 | movl %edx,%ecx |
@@ -86,36 +104,27 @@ memcpy: | |||
86 | 104 | ||
87 | .Lende: | 105 | .Lende: |
88 | popq %rbx | 106 | popq %rbx |
107 | CFI_ADJUST_CFA_OFFSET -8 | ||
108 | CFI_RESTORE rbx | ||
89 | ret | 109 | ret |
90 | .Lfinal: | 110 | .Lfinal: |
111 | CFI_ENDPROC | ||
112 | ENDPROC(memcpy) | ||
113 | ENDPROC(__memcpy) | ||
91 | 114 | ||
92 | /* Some CPUs run faster using the string copy instructions. | 115 | /* Some CPUs run faster using the string copy instructions. |
93 | It is also a lot simpler. Use this when possible */ | 116 | It is also a lot simpler. Use this when possible */ |
94 | 117 | ||
118 | .section .altinstr_replacement,"ax" | ||
119 | 1: .byte 0xeb /* jmp <disp8> */ | ||
120 | .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ | ||
121 | 2: | ||
122 | .previous | ||
95 | .section .altinstructions,"a" | 123 | .section .altinstructions,"a" |
96 | .align 8 | 124 | .align 8 |
97 | .quad memcpy | 125 | .quad memcpy |
98 | .quad memcpy_c | 126 | .quad 1b |
99 | .byte X86_FEATURE_REP_GOOD | 127 | .byte X86_FEATURE_REP_GOOD |
100 | .byte .Lfinal-memcpy | 128 | .byte .Lfinal - memcpy |
101 | .byte memcpy_c_end-memcpy_c | 129 | .byte 2b - 1b |
102 | .previous | ||
103 | |||
104 | .section .altinstr_replacement,"ax" | ||
105 | /* rdi destination | ||
106 | * rsi source | ||
107 | * rdx count | ||
108 | */ | ||
109 | memcpy_c: | ||
110 | movq %rdi,%rax | ||
111 | movl %edx,%ecx | ||
112 | shrl $3,%ecx | ||
113 | andl $7,%edx | ||
114 | rep | ||
115 | movsq | ||
116 | movl %edx,%ecx | ||
117 | rep | ||
118 | movsb | ||
119 | ret | ||
120 | memcpy_c_end: | ||
121 | .previous | 130 | .previous |
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S index ad397f2c7de8..09ed1f6b0eaa 100644 --- a/arch/x86_64/lib/memset.S +++ b/arch/x86_64/lib/memset.S | |||
@@ -1,4 +1,9 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs */ | 1 | /* Copyright 2002 Andi Kleen, SuSE Labs */ |
2 | |||
3 | #include <linux/config.h> | ||
4 | #include <linux/linkage.h> | ||
5 | #include <asm/dwarf2.h> | ||
6 | |||
2 | /* | 7 | /* |
3 | * ISO C memset - set a memory block to a byte value. | 8 | * ISO C memset - set a memory block to a byte value. |
4 | * | 9 | * |
@@ -8,11 +13,29 @@ | |||
8 | * | 13 | * |
9 | * rax original destination | 14 | * rax original destination |
10 | */ | 15 | */ |
11 | .globl __memset | 16 | ALIGN |
12 | .globl memset | 17 | memset_c: |
13 | .p2align 4 | 18 | CFI_STARTPROC |
14 | memset: | 19 | movq %rdi,%r9 |
15 | __memset: | 20 | movl %edx,%r8d |
21 | andl $7,%r8d | ||
22 | movl %edx,%ecx | ||
23 | shrl $3,%ecx | ||
24 | /* expand byte value */ | ||
25 | movzbl %sil,%esi | ||
26 | movabs $0x0101010101010101,%rax | ||
27 | mulq %rsi /* with rax, clobbers rdx */ | ||
28 | rep stosq | ||
29 | movl %r8d,%ecx | ||
30 | rep stosb | ||
31 | movq %r9,%rax | ||
32 | ret | ||
33 | CFI_ENDPROC | ||
34 | ENDPROC(memset_c) | ||
35 | |||
36 | ENTRY(memset) | ||
37 | ENTRY(__memset) | ||
38 | CFI_STARTPROC | ||
16 | movq %rdi,%r10 | 39 | movq %rdi,%r10 |
17 | movq %rdx,%r11 | 40 | movq %rdx,%r11 |
18 | 41 | ||
@@ -25,6 +48,7 @@ __memset: | |||
25 | movl %edi,%r9d | 48 | movl %edi,%r9d |
26 | andl $7,%r9d | 49 | andl $7,%r9d |
27 | jnz .Lbad_alignment | 50 | jnz .Lbad_alignment |
51 | CFI_REMEMBER_STATE | ||
28 | .Lafter_bad_alignment: | 52 | .Lafter_bad_alignment: |
29 | 53 | ||
30 | movl %r11d,%ecx | 54 | movl %r11d,%ecx |
@@ -75,6 +99,7 @@ __memset: | |||
75 | movq %r10,%rax | 99 | movq %r10,%rax |
76 | ret | 100 | ret |
77 | 101 | ||
102 | CFI_RESTORE_STATE | ||
78 | .Lbad_alignment: | 103 | .Lbad_alignment: |
79 | cmpq $7,%r11 | 104 | cmpq $7,%r11 |
80 | jbe .Lhandle_7 | 105 | jbe .Lhandle_7 |
@@ -84,42 +109,26 @@ __memset: | |||
84 | addq %r8,%rdi | 109 | addq %r8,%rdi |
85 | subq %r8,%r11 | 110 | subq %r8,%r11 |
86 | jmp .Lafter_bad_alignment | 111 | jmp .Lafter_bad_alignment |
112 | .Lfinal: | ||
113 | CFI_ENDPROC | ||
114 | ENDPROC(memset) | ||
115 | ENDPROC(__memset) | ||
87 | 116 | ||
88 | /* Some CPUs run faster using the string instructions. | 117 | /* Some CPUs run faster using the string instructions. |
89 | It is also a lot simpler. Use this when possible */ | 118 | It is also a lot simpler. Use this when possible */ |
90 | 119 | ||
91 | #include <asm/cpufeature.h> | 120 | #include <asm/cpufeature.h> |
92 | 121 | ||
122 | .section .altinstr_replacement,"ax" | ||
123 | 1: .byte 0xeb /* jmp <disp8> */ | ||
124 | .byte (memset_c - memset) - (2f - 1b) /* offset */ | ||
125 | 2: | ||
126 | .previous | ||
93 | .section .altinstructions,"a" | 127 | .section .altinstructions,"a" |
94 | .align 8 | 128 | .align 8 |
95 | .quad memset | 129 | .quad memset |
96 | .quad memset_c | 130 | .quad 1b |
97 | .byte X86_FEATURE_REP_GOOD | 131 | .byte X86_FEATURE_REP_GOOD |
98 | .byte memset_c_end-memset_c | 132 | .byte .Lfinal - memset |
99 | .byte memset_c_end-memset_c | 133 | .byte 2b - 1b |
100 | .previous | ||
101 | |||
102 | .section .altinstr_replacement,"ax" | ||
103 | /* rdi destination | ||
104 | * rsi value | ||
105 | * rdx count | ||
106 | */ | ||
107 | memset_c: | ||
108 | movq %rdi,%r9 | ||
109 | movl %edx,%r8d | ||
110 | andl $7,%r8d | ||
111 | movl %edx,%ecx | ||
112 | shrl $3,%ecx | ||
113 | /* expand byte value */ | ||
114 | movzbl %sil,%esi | ||
115 | movabs $0x0101010101010101,%rax | ||
116 | mulq %rsi /* with rax, clobbers rdx */ | ||
117 | rep | ||
118 | stosq | ||
119 | movl %r8d,%ecx | ||
120 | rep | ||
121 | stosb | ||
122 | movq %r9,%rax | ||
123 | ret | ||
124 | memset_c_end: | ||
125 | .previous | 134 | .previous |
diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S index 7f5593974e2d..4989f5a8fa9b 100644 --- a/arch/x86_64/lib/putuser.S +++ b/arch/x86_64/lib/putuser.S | |||
@@ -25,25 +25,26 @@ | |||
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/linkage.h> | 27 | #include <linux/linkage.h> |
28 | #include <asm/dwarf2.h> | ||
28 | #include <asm/page.h> | 29 | #include <asm/page.h> |
29 | #include <asm/errno.h> | 30 | #include <asm/errno.h> |
30 | #include <asm/asm-offsets.h> | 31 | #include <asm/asm-offsets.h> |
31 | #include <asm/thread_info.h> | 32 | #include <asm/thread_info.h> |
32 | 33 | ||
33 | .text | 34 | .text |
34 | .p2align 4 | 35 | ENTRY(__put_user_1) |
35 | .globl __put_user_1 | 36 | CFI_STARTPROC |
36 | __put_user_1: | ||
37 | GET_THREAD_INFO(%r8) | 37 | GET_THREAD_INFO(%r8) |
38 | cmpq threadinfo_addr_limit(%r8),%rcx | 38 | cmpq threadinfo_addr_limit(%r8),%rcx |
39 | jae bad_put_user | 39 | jae bad_put_user |
40 | 1: movb %dl,(%rcx) | 40 | 1: movb %dl,(%rcx) |
41 | xorl %eax,%eax | 41 | xorl %eax,%eax |
42 | ret | 42 | ret |
43 | CFI_ENDPROC | ||
44 | ENDPROC(__put_user_1) | ||
43 | 45 | ||
44 | .p2align 4 | 46 | ENTRY(__put_user_2) |
45 | .globl __put_user_2 | 47 | CFI_STARTPROC |
46 | __put_user_2: | ||
47 | GET_THREAD_INFO(%r8) | 48 | GET_THREAD_INFO(%r8) |
48 | addq $1,%rcx | 49 | addq $1,%rcx |
49 | jc 20f | 50 | jc 20f |
@@ -55,10 +56,11 @@ __put_user_2: | |||
55 | ret | 56 | ret |
56 | 20: decq %rcx | 57 | 20: decq %rcx |
57 | jmp bad_put_user | 58 | jmp bad_put_user |
59 | CFI_ENDPROC | ||
60 | ENDPROC(__put_user_2) | ||
58 | 61 | ||
59 | .p2align 4 | 62 | ENTRY(__put_user_4) |
60 | .globl __put_user_4 | 63 | CFI_STARTPROC |
61 | __put_user_4: | ||
62 | GET_THREAD_INFO(%r8) | 64 | GET_THREAD_INFO(%r8) |
63 | addq $3,%rcx | 65 | addq $3,%rcx |
64 | jc 30f | 66 | jc 30f |
@@ -70,10 +72,11 @@ __put_user_4: | |||
70 | ret | 72 | ret |
71 | 30: subq $3,%rcx | 73 | 30: subq $3,%rcx |
72 | jmp bad_put_user | 74 | jmp bad_put_user |
75 | CFI_ENDPROC | ||
76 | ENDPROC(__put_user_4) | ||
73 | 77 | ||
74 | .p2align 4 | 78 | ENTRY(__put_user_8) |
75 | .globl __put_user_8 | 79 | CFI_STARTPROC |
76 | __put_user_8: | ||
77 | GET_THREAD_INFO(%r8) | 80 | GET_THREAD_INFO(%r8) |
78 | addq $7,%rcx | 81 | addq $7,%rcx |
79 | jc 40f | 82 | jc 40f |
@@ -85,10 +88,15 @@ __put_user_8: | |||
85 | ret | 88 | ret |
86 | 40: subq $7,%rcx | 89 | 40: subq $7,%rcx |
87 | jmp bad_put_user | 90 | jmp bad_put_user |
91 | CFI_ENDPROC | ||
92 | ENDPROC(__put_user_8) | ||
88 | 93 | ||
89 | bad_put_user: | 94 | bad_put_user: |
95 | CFI_STARTPROC | ||
90 | movq $(-EFAULT),%rax | 96 | movq $(-EFAULT),%rax |
91 | ret | 97 | ret |
98 | CFI_ENDPROC | ||
99 | END(bad_put_user) | ||
92 | 100 | ||
93 | .section __ex_table,"a" | 101 | .section __ex_table,"a" |
94 | .quad 1b,bad_put_user | 102 | .quad 1b,bad_put_user |
diff --git a/arch/x86_64/lib/rwlock.S b/arch/x86_64/lib/rwlock.S new file mode 100644 index 000000000000..0cde1f807314 --- /dev/null +++ b/arch/x86_64/lib/rwlock.S | |||
@@ -0,0 +1,38 @@ | |||
1 | /* Slow paths of read/write spinlocks. */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <asm/rwlock.h> | ||
5 | #include <asm/alternative-asm.i> | ||
6 | #include <asm/dwarf2.h> | ||
7 | |||
8 | /* rdi: pointer to rwlock_t */ | ||
9 | ENTRY(__write_lock_failed) | ||
10 | CFI_STARTPROC | ||
11 | LOCK_PREFIX | ||
12 | addl $RW_LOCK_BIAS,(%rdi) | ||
13 | 1: rep | ||
14 | nop | ||
15 | cmpl $RW_LOCK_BIAS,(%rdi) | ||
16 | jne 1b | ||
17 | LOCK_PREFIX | ||
18 | subl $RW_LOCK_BIAS,(%rdi) | ||
19 | jnz __write_lock_failed | ||
20 | ret | ||
21 | CFI_ENDPROC | ||
22 | END(__write_lock_failed) | ||
23 | |||
24 | /* rdi: pointer to rwlock_t */ | ||
25 | ENTRY(__read_lock_failed) | ||
26 | CFI_STARTPROC | ||
27 | LOCK_PREFIX | ||
28 | incl (%rdi) | ||
29 | 1: rep | ||
30 | nop | ||
31 | cmpl $1,(%rdi) | ||
32 | js 1b | ||
33 | LOCK_PREFIX | ||
34 | decl (%rdi) | ||
35 | js __read_lock_failed | ||
36 | ret | ||
37 | CFI_ENDPROC | ||
38 | END(__read_lock_failed) | ||
diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S index 332ea5dff916..0025535cac8d 100644 --- a/arch/x86_64/lib/thunk.S +++ b/arch/x86_64/lib/thunk.S | |||
@@ -1,10 +1,9 @@ | |||
1 | /* | 1 | /* |
2 | * Save registers before calling assembly functions. This avoids | 2 | * Save registers before calling assembly functions. This avoids |
3 | * disturbance of register allocation in some inline assembly constructs. | 3 | * disturbance of register allocation in some inline assembly constructs. |
4 | * Copyright 2001,2002 by Andi Kleen, SuSE Labs. | 4 | * Copyright 2001,2002 by Andi Kleen, SuSE Labs. |
5 | * Subject to the GNU public license, v.2. No warranty of any kind. | 5 | * Subject to the GNU public license, v.2. No warranty of any kind. |
6 | * $Id: thunk.S,v 1.2 2002/03/13 20:06:58 ak Exp $ | 6 | */ |
7 | */ | ||
8 | 7 | ||
9 | #include <linux/config.h> | 8 | #include <linux/config.h> |
10 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
@@ -67,33 +66,3 @@ restore_norax: | |||
67 | RESTORE_ARGS 1 | 66 | RESTORE_ARGS 1 |
68 | ret | 67 | ret |
69 | CFI_ENDPROC | 68 | CFI_ENDPROC |
70 | |||
71 | #ifdef CONFIG_SMP | ||
72 | /* Support for read/write spinlocks. */ | ||
73 | .text | ||
74 | /* rax: pointer to rwlock_t */ | ||
75 | ENTRY(__write_lock_failed) | ||
76 | lock | ||
77 | addl $RW_LOCK_BIAS,(%rax) | ||
78 | 1: rep | ||
79 | nop | ||
80 | cmpl $RW_LOCK_BIAS,(%rax) | ||
81 | jne 1b | ||
82 | lock | ||
83 | subl $RW_LOCK_BIAS,(%rax) | ||
84 | jnz __write_lock_failed | ||
85 | ret | ||
86 | |||
87 | /* rax: pointer to rwlock_t */ | ||
88 | ENTRY(__read_lock_failed) | ||
89 | lock | ||
90 | incl (%rax) | ||
91 | 1: rep | ||
92 | nop | ||
93 | cmpl $1,(%rax) | ||
94 | js 1b | ||
95 | lock | ||
96 | decl (%rax) | ||
97 | js __read_lock_failed | ||
98 | ret | ||
99 | #endif | ||