diff options
-rw-r--r-- | arch/x86_64/lib/clear_page.S | 47 | ||||
-rw-r--r-- | arch/x86_64/lib/copy_page.S | 53 | ||||
-rw-r--r-- | arch/x86_64/lib/copy_user.S | 39 | ||||
-rw-r--r-- | arch/x86_64/lib/csum-copy.S | 26 | ||||
-rw-r--r-- | arch/x86_64/lib/getuser.S | 32 | ||||
-rw-r--r-- | arch/x86_64/lib/iomap_copy.S | 10 | ||||
-rw-r--r-- | arch/x86_64/lib/memcpy.S | 69 | ||||
-rw-r--r-- | arch/x86_64/lib/memset.S | 79 | ||||
-rw-r--r-- | arch/x86_64/lib/putuser.S | 32 |
9 files changed, 244 insertions, 143 deletions
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S index 1f81b79b796..9a10a78bb4a 100644 --- a/arch/x86_64/lib/clear_page.S +++ b/arch/x86_64/lib/clear_page.S | |||
@@ -1,10 +1,22 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/dwarf2.h> | ||
3 | |||
1 | /* | 4 | /* |
2 | * Zero a page. | 5 | * Zero a page. |
3 | * rdi page | 6 | * rdi page |
4 | */ | 7 | */ |
5 | .globl clear_page | 8 | ALIGN |
6 | .p2align 4 | 9 | clear_page_c: |
7 | clear_page: | 10 | CFI_STARTPROC |
11 | movl $4096/8,%ecx | ||
12 | xorl %eax,%eax | ||
13 | rep stosq | ||
14 | ret | ||
15 | CFI_ENDPROC | ||
16 | ENDPROC(clear_page) | ||
17 | |||
18 | ENTRY(clear_page) | ||
19 | CFI_STARTPROC | ||
8 | xorl %eax,%eax | 20 | xorl %eax,%eax |
9 | movl $4096/64,%ecx | 21 | movl $4096/64,%ecx |
10 | .p2align 4 | 22 | .p2align 4 |
@@ -23,28 +35,25 @@ clear_page: | |||
23 | jnz .Lloop | 35 | jnz .Lloop |
24 | nop | 36 | nop |
25 | ret | 37 | ret |
26 | clear_page_end: | 38 | CFI_ENDPROC |
39 | .Lclear_page_end: | ||
40 | ENDPROC(clear_page) | ||
27 | 41 | ||
28 | /* Some CPUs run faster using the string instructions. | 42 | /* Some CPUs run faster using the string instructions. |
29 | It is also a lot simpler. Use this when possible */ | 43 | It is also a lot simpler. Use this when possible */ |
30 | 44 | ||
31 | #include <asm/cpufeature.h> | 45 | #include <asm/cpufeature.h> |
32 | 46 | ||
47 | .section .altinstr_replacement,"ax" | ||
48 | 1: .byte 0xeb /* jmp <disp8> */ | ||
49 | .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ | ||
50 | 2: | ||
51 | .previous | ||
33 | .section .altinstructions,"a" | 52 | .section .altinstructions,"a" |
34 | .align 8 | 53 | .align 8 |
35 | .quad clear_page | 54 | .quad clear_page |
36 | .quad clear_page_c | 55 | .quad 1b |
37 | .byte X86_FEATURE_REP_GOOD | 56 | .byte X86_FEATURE_REP_GOOD |
38 | .byte clear_page_end-clear_page | 57 | .byte .Lclear_page_end - clear_page |
39 | .byte clear_page_c_end-clear_page_c | 58 | .byte 2b - 1b |
40 | .previous | ||
41 | |||
42 | .section .altinstr_replacement,"ax" | ||
43 | clear_page_c: | ||
44 | movl $4096/8,%ecx | ||
45 | xorl %eax,%eax | ||
46 | rep | ||
47 | stosq | ||
48 | ret | ||
49 | clear_page_c_end: | ||
50 | .previous | 59 | .previous |
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S index 8fa19d96a7e..0ebb03b60e7 100644 --- a/arch/x86_64/lib/copy_page.S +++ b/arch/x86_64/lib/copy_page.S | |||
@@ -1,17 +1,33 @@ | |||
1 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ | 1 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ |
2 | 2 | ||
3 | #include <linux/config.h> | ||
4 | #include <linux/linkage.h> | ||
5 | #include <asm/dwarf2.h> | ||
6 | |||
7 | ALIGN | ||
8 | copy_page_c: | ||
9 | CFI_STARTPROC | ||
10 | movl $4096/8,%ecx | ||
11 | rep movsq | ||
12 | ret | ||
13 | CFI_ENDPROC | ||
14 | ENDPROC(copy_page_c) | ||
15 | |||
3 | /* Don't use streaming store because it's better when the target | 16 | /* Don't use streaming store because it's better when the target |
4 | ends up in cache. */ | 17 | ends up in cache. */ |
5 | 18 | ||
6 | /* Could vary the prefetch distance based on SMP/UP */ | 19 | /* Could vary the prefetch distance based on SMP/UP */ |
7 | 20 | ||
8 | .globl copy_page | 21 | ENTRY(copy_page) |
9 | .p2align 4 | 22 | CFI_STARTPROC |
10 | copy_page: | ||
11 | subq $3*8,%rsp | 23 | subq $3*8,%rsp |
24 | CFI_ADJUST_CFA_OFFSET 3*8 | ||
12 | movq %rbx,(%rsp) | 25 | movq %rbx,(%rsp) |
26 | CFI_REL_OFFSET rbx, 0 | ||
13 | movq %r12,1*8(%rsp) | 27 | movq %r12,1*8(%rsp) |
28 | CFI_REL_OFFSET r12, 1*8 | ||
14 | movq %r13,2*8(%rsp) | 29 | movq %r13,2*8(%rsp) |
30 | CFI_REL_OFFSET r13, 2*8 | ||
15 | 31 | ||
16 | movl $(4096/64)-5,%ecx | 32 | movl $(4096/64)-5,%ecx |
17 | .p2align 4 | 33 | .p2align 4 |
@@ -72,30 +88,33 @@ copy_page: | |||
72 | jnz .Loop2 | 88 | jnz .Loop2 |
73 | 89 | ||
74 | movq (%rsp),%rbx | 90 | movq (%rsp),%rbx |
91 | CFI_RESTORE rbx | ||
75 | movq 1*8(%rsp),%r12 | 92 | movq 1*8(%rsp),%r12 |
93 | CFI_RESTORE r12 | ||
76 | movq 2*8(%rsp),%r13 | 94 | movq 2*8(%rsp),%r13 |
95 | CFI_RESTORE r13 | ||
77 | addq $3*8,%rsp | 96 | addq $3*8,%rsp |
97 | CFI_ADJUST_CFA_OFFSET -3*8 | ||
78 | ret | 98 | ret |
99 | .Lcopy_page_end: | ||
100 | CFI_ENDPROC | ||
101 | ENDPROC(copy_page) | ||
79 | 102 | ||
80 | /* Some CPUs run faster using the string copy instructions. | 103 | /* Some CPUs run faster using the string copy instructions. |
81 | It is also a lot simpler. Use this when possible */ | 104 | It is also a lot simpler. Use this when possible */ |
82 | 105 | ||
83 | #include <asm/cpufeature.h> | 106 | #include <asm/cpufeature.h> |
84 | 107 | ||
108 | .section .altinstr_replacement,"ax" | ||
109 | 1: .byte 0xeb /* jmp <disp8> */ | ||
110 | .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ | ||
111 | 2: | ||
112 | .previous | ||
85 | .section .altinstructions,"a" | 113 | .section .altinstructions,"a" |
86 | .align 8 | 114 | .align 8 |
87 | .quad copy_page | 115 | .quad copy_page |
88 | .quad copy_page_c | 116 | .quad 1b |
89 | .byte X86_FEATURE_REP_GOOD | 117 | .byte X86_FEATURE_REP_GOOD |
90 | .byte copy_page_c_end-copy_page_c | 118 | .byte .Lcopy_page_end - copy_page |
91 | .byte copy_page_c_end-copy_page_c | 119 | .byte 2b - 1b |
92 | .previous | ||
93 | |||
94 | .section .altinstr_replacement,"ax" | ||
95 | copy_page_c: | ||
96 | movl $4096/8,%ecx | ||
97 | rep | ||
98 | movsq | ||
99 | ret | ||
100 | copy_page_c_end: | ||
101 | .previous | 120 | .previous |
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S index f64569b83b5..962f3a693c5 100644 --- a/arch/x86_64/lib/copy_user.S +++ b/arch/x86_64/lib/copy_user.S | |||
@@ -4,6 +4,9 @@ | |||
4 | * Functions to copy from and to user space. | 4 | * Functions to copy from and to user space. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/linkage.h> | ||
8 | #include <asm/dwarf2.h> | ||
9 | |||
7 | #define FIX_ALIGNMENT 1 | 10 | #define FIX_ALIGNMENT 1 |
8 | 11 | ||
9 | #include <asm/current.h> | 12 | #include <asm/current.h> |
@@ -12,9 +15,8 @@ | |||
12 | #include <asm/cpufeature.h> | 15 | #include <asm/cpufeature.h> |
13 | 16 | ||
14 | /* Standard copy_to_user with segment limit checking */ | 17 | /* Standard copy_to_user with segment limit checking */ |
15 | .globl copy_to_user | 18 | ENTRY(copy_to_user) |
16 | .p2align 4 | 19 | CFI_STARTPROC |
17 | copy_to_user: | ||
18 | GET_THREAD_INFO(%rax) | 20 | GET_THREAD_INFO(%rax) |
19 | movq %rdi,%rcx | 21 | movq %rdi,%rcx |
20 | addq %rdx,%rcx | 22 | addq %rdx,%rcx |
@@ -25,9 +27,11 @@ copy_to_user: | |||
25 | .byte 0xe9 /* 32bit jump */ | 27 | .byte 0xe9 /* 32bit jump */ |
26 | .long .Lcug-1f | 28 | .long .Lcug-1f |
27 | 1: | 29 | 1: |
30 | CFI_ENDPROC | ||
31 | ENDPROC(copy_to_user) | ||
28 | 32 | ||
29 | .section .altinstr_replacement,"ax" | 33 | .section .altinstr_replacement,"ax" |
30 | 3: .byte 0xe9 /* replacement jmp with 8 bit immediate */ | 34 | 3: .byte 0xe9 /* replacement jmp with 32 bit immediate */ |
31 | .long copy_user_generic_c-1b /* offset */ | 35 | .long copy_user_generic_c-1b /* offset */ |
32 | .previous | 36 | .previous |
33 | .section .altinstructions,"a" | 37 | .section .altinstructions,"a" |
@@ -40,9 +44,8 @@ copy_to_user: | |||
40 | .previous | 44 | .previous |
41 | 45 | ||
42 | /* Standard copy_from_user with segment limit checking */ | 46 | /* Standard copy_from_user with segment limit checking */ |
43 | .globl copy_from_user | 47 | ENTRY(copy_from_user) |
44 | .p2align 4 | 48 | CFI_STARTPROC |
45 | copy_from_user: | ||
46 | GET_THREAD_INFO(%rax) | 49 | GET_THREAD_INFO(%rax) |
47 | movq %rsi,%rcx | 50 | movq %rsi,%rcx |
48 | addq %rdx,%rcx | 51 | addq %rdx,%rcx |
@@ -50,10 +53,13 @@ copy_from_user: | |||
50 | cmpq threadinfo_addr_limit(%rax),%rcx | 53 | cmpq threadinfo_addr_limit(%rax),%rcx |
51 | jae bad_from_user | 54 | jae bad_from_user |
52 | /* FALL THROUGH to copy_user_generic */ | 55 | /* FALL THROUGH to copy_user_generic */ |
56 | CFI_ENDPROC | ||
57 | ENDPROC(copy_from_user) | ||
53 | 58 | ||
54 | .section .fixup,"ax" | 59 | .section .fixup,"ax" |
55 | /* must zero dest */ | 60 | /* must zero dest */ |
56 | bad_from_user: | 61 | bad_from_user: |
62 | CFI_STARTPROC | ||
57 | movl %edx,%ecx | 63 | movl %edx,%ecx |
58 | xorl %eax,%eax | 64 | xorl %eax,%eax |
59 | rep | 65 | rep |
@@ -61,6 +67,8 @@ bad_from_user: | |||
61 | bad_to_user: | 67 | bad_to_user: |
62 | movl %edx,%eax | 68 | movl %edx,%eax |
63 | ret | 69 | ret |
70 | CFI_ENDPROC | ||
71 | END(bad_from_user) | ||
64 | .previous | 72 | .previous |
65 | 73 | ||
66 | 74 | ||
@@ -75,9 +83,8 @@ bad_to_user: | |||
75 | * Output: | 83 | * Output: |
76 | * eax uncopied bytes or 0 if successful. | 84 | * eax uncopied bytes or 0 if successful. |
77 | */ | 85 | */ |
78 | .globl copy_user_generic | 86 | ENTRY(copy_user_generic) |
79 | .p2align 4 | 87 | CFI_STARTPROC |
80 | copy_user_generic: | ||
81 | .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */ | 88 | .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */ |
82 | .byte 0x66,0x90 | 89 | .byte 0x66,0x90 |
83 | 1: | 90 | 1: |
@@ -95,6 +102,8 @@ copy_user_generic: | |||
95 | .previous | 102 | .previous |
96 | .Lcug: | 103 | .Lcug: |
97 | pushq %rbx | 104 | pushq %rbx |
105 | CFI_ADJUST_CFA_OFFSET 8 | ||
106 | CFI_REL_OFFSET rbx, 0 | ||
98 | xorl %eax,%eax /*zero for the exception handler */ | 107 | xorl %eax,%eax /*zero for the exception handler */ |
99 | 108 | ||
100 | #ifdef FIX_ALIGNMENT | 109 | #ifdef FIX_ALIGNMENT |
@@ -168,9 +177,13 @@ copy_user_generic: | |||
168 | decl %ecx | 177 | decl %ecx |
169 | jnz .Lloop_1 | 178 | jnz .Lloop_1 |
170 | 179 | ||
180 | CFI_REMEMBER_STATE | ||
171 | .Lende: | 181 | .Lende: |
172 | popq %rbx | 182 | popq %rbx |
183 | CFI_ADJUST_CFA_OFFSET -8 | ||
184 | CFI_RESTORE rbx | ||
173 | ret | 185 | ret |
186 | CFI_RESTORE_STATE | ||
174 | 187 | ||
175 | #ifdef FIX_ALIGNMENT | 188 | #ifdef FIX_ALIGNMENT |
176 | /* align destination */ | 189 | /* align destination */ |
@@ -261,6 +274,9 @@ copy_user_generic: | |||
261 | .Le_zero: | 274 | .Le_zero: |
262 | movq %rdx,%rax | 275 | movq %rdx,%rax |
263 | jmp .Lende | 276 | jmp .Lende |
277 | CFI_ENDPROC | ||
278 | ENDPROC(copy_user_generic) | ||
279 | |||
264 | 280 | ||
265 | /* Some CPUs run faster using the string copy instructions. | 281 | /* Some CPUs run faster using the string copy instructions. |
266 | This is also a lot simpler. Use them when possible. | 282 | This is also a lot simpler. Use them when possible. |
@@ -282,6 +298,7 @@ copy_user_generic: | |||
282 | * this please consider this. | 298 | * this please consider this. |
283 | */ | 299 | */ |
284 | copy_user_generic_c: | 300 | copy_user_generic_c: |
301 | CFI_STARTPROC | ||
285 | movl %edx,%ecx | 302 | movl %edx,%ecx |
286 | shrl $3,%ecx | 303 | shrl $3,%ecx |
287 | andl $7,%edx | 304 | andl $7,%edx |
@@ -294,6 +311,8 @@ copy_user_generic_c: | |||
294 | ret | 311 | ret |
295 | 3: lea (%rdx,%rcx,8),%rax | 312 | 3: lea (%rdx,%rcx,8),%rax |
296 | ret | 313 | ret |
314 | CFI_ENDPROC | ||
315 | END(copy_user_generic_c) | ||
297 | 316 | ||
298 | .section __ex_table,"a" | 317 | .section __ex_table,"a" |
299 | .quad 1b,3b | 318 | .quad 1b,3b |
diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S index 72fd55ee896..f0dba36578e 100644 --- a/arch/x86_64/lib/csum-copy.S +++ b/arch/x86_64/lib/csum-copy.S | |||
@@ -5,8 +5,9 @@ | |||
5 | * License. See the file COPYING in the main directory of this archive | 5 | * License. See the file COPYING in the main directory of this archive |
6 | * for more details. No warranty for anything given at all. | 6 | * for more details. No warranty for anything given at all. |
7 | */ | 7 | */ |
8 | #include <linux/linkage.h> | 8 | #include <linux/linkage.h> |
9 | #include <asm/errno.h> | 9 | #include <asm/dwarf2.h> |
10 | #include <asm/errno.h> | ||
10 | 11 | ||
11 | /* | 12 | /* |
12 | * Checksum copy with exception handling. | 13 | * Checksum copy with exception handling. |
@@ -53,19 +54,24 @@ | |||
53 | .endm | 54 | .endm |
54 | 55 | ||
55 | 56 | ||
56 | .globl csum_partial_copy_generic | 57 | ENTRY(csum_partial_copy_generic) |
57 | .p2align 4 | 58 | CFI_STARTPROC |
58 | csum_partial_copy_generic: | ||
59 | cmpl $3*64,%edx | 59 | cmpl $3*64,%edx |
60 | jle .Lignore | 60 | jle .Lignore |
61 | 61 | ||
62 | .Lignore: | 62 | .Lignore: |
63 | subq $7*8,%rsp | 63 | subq $7*8,%rsp |
64 | CFI_ADJUST_CFA_OFFSET 7*8 | ||
64 | movq %rbx,2*8(%rsp) | 65 | movq %rbx,2*8(%rsp) |
66 | CFI_REL_OFFSET rbx, 2*8 | ||
65 | movq %r12,3*8(%rsp) | 67 | movq %r12,3*8(%rsp) |
68 | CFI_REL_OFFSET r12, 3*8 | ||
66 | movq %r14,4*8(%rsp) | 69 | movq %r14,4*8(%rsp) |
70 | CFI_REL_OFFSET r14, 4*8 | ||
67 | movq %r13,5*8(%rsp) | 71 | movq %r13,5*8(%rsp) |
72 | CFI_REL_OFFSET r13, 5*8 | ||
68 | movq %rbp,6*8(%rsp) | 73 | movq %rbp,6*8(%rsp) |
74 | CFI_REL_OFFSET rbp, 6*8 | ||
69 | 75 | ||
70 | movq %r8,(%rsp) | 76 | movq %r8,(%rsp) |
71 | movq %r9,1*8(%rsp) | 77 | movq %r9,1*8(%rsp) |
@@ -208,14 +214,22 @@ csum_partial_copy_generic: | |||
208 | addl %ebx,%eax | 214 | addl %ebx,%eax |
209 | adcl %r9d,%eax /* carry */ | 215 | adcl %r9d,%eax /* carry */ |
210 | 216 | ||
217 | CFI_REMEMBER_STATE | ||
211 | .Lende: | 218 | .Lende: |
212 | movq 2*8(%rsp),%rbx | 219 | movq 2*8(%rsp),%rbx |
220 | CFI_RESTORE rbx | ||
213 | movq 3*8(%rsp),%r12 | 221 | movq 3*8(%rsp),%r12 |
222 | CFI_RESTORE r12 | ||
214 | movq 4*8(%rsp),%r14 | 223 | movq 4*8(%rsp),%r14 |
224 | CFI_RESTORE r14 | ||
215 | movq 5*8(%rsp),%r13 | 225 | movq 5*8(%rsp),%r13 |
226 | CFI_RESTORE r13 | ||
216 | movq 6*8(%rsp),%rbp | 227 | movq 6*8(%rsp),%rbp |
228 | CFI_RESTORE rbp | ||
217 | addq $7*8,%rsp | 229 | addq $7*8,%rsp |
230 | CFI_ADJUST_CFA_OFFSET -7*8 | ||
218 | ret | 231 | ret |
232 | CFI_RESTORE_STATE | ||
219 | 233 | ||
220 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ | 234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ |
221 | .Lbad_source: | 235 | .Lbad_source: |
@@ -231,3 +245,5 @@ csum_partial_copy_generic: | |||
231 | jz .Lende | 245 | jz .Lende |
232 | movl $-EFAULT,(%rax) | 246 | movl $-EFAULT,(%rax) |
233 | jmp .Lende | 247 | jmp .Lende |
248 | CFI_ENDPROC | ||
249 | ENDPROC(csum_partial_copy_generic) | ||
diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S index 3844d5e885a..5448876261f 100644 --- a/arch/x86_64/lib/getuser.S +++ b/arch/x86_64/lib/getuser.S | |||
@@ -27,25 +27,26 @@ | |||
27 | */ | 27 | */ |
28 | 28 | ||
29 | #include <linux/linkage.h> | 29 | #include <linux/linkage.h> |
30 | #include <asm/dwarf2.h> | ||
30 | #include <asm/page.h> | 31 | #include <asm/page.h> |
31 | #include <asm/errno.h> | 32 | #include <asm/errno.h> |
32 | #include <asm/asm-offsets.h> | 33 | #include <asm/asm-offsets.h> |
33 | #include <asm/thread_info.h> | 34 | #include <asm/thread_info.h> |
34 | 35 | ||
35 | .text | 36 | .text |
36 | .p2align 4 | 37 | ENTRY(__get_user_1) |
37 | .globl __get_user_1 | 38 | CFI_STARTPROC |
38 | __get_user_1: | ||
39 | GET_THREAD_INFO(%r8) | 39 | GET_THREAD_INFO(%r8) |
40 | cmpq threadinfo_addr_limit(%r8),%rcx | 40 | cmpq threadinfo_addr_limit(%r8),%rcx |
41 | jae bad_get_user | 41 | jae bad_get_user |
42 | 1: movzb (%rcx),%edx | 42 | 1: movzb (%rcx),%edx |
43 | xorl %eax,%eax | 43 | xorl %eax,%eax |
44 | ret | 44 | ret |
45 | CFI_ENDPROC | ||
46 | ENDPROC(__get_user_1) | ||
45 | 47 | ||
46 | .p2align 4 | 48 | ENTRY(__get_user_2) |
47 | .globl __get_user_2 | 49 | CFI_STARTPROC |
48 | __get_user_2: | ||
49 | GET_THREAD_INFO(%r8) | 50 | GET_THREAD_INFO(%r8) |
50 | addq $1,%rcx | 51 | addq $1,%rcx |
51 | jc 20f | 52 | jc 20f |
@@ -57,10 +58,11 @@ __get_user_2: | |||
57 | ret | 58 | ret |
58 | 20: decq %rcx | 59 | 20: decq %rcx |
59 | jmp bad_get_user | 60 | jmp bad_get_user |
61 | CFI_ENDPROC | ||
62 | ENDPROC(__get_user_2) | ||
60 | 63 | ||
61 | .p2align 4 | 64 | ENTRY(__get_user_4) |
62 | .globl __get_user_4 | 65 | CFI_STARTPROC |
63 | __get_user_4: | ||
64 | GET_THREAD_INFO(%r8) | 66 | GET_THREAD_INFO(%r8) |
65 | addq $3,%rcx | 67 | addq $3,%rcx |
66 | jc 30f | 68 | jc 30f |
@@ -72,10 +74,11 @@ __get_user_4: | |||
72 | ret | 74 | ret |
73 | 30: subq $3,%rcx | 75 | 30: subq $3,%rcx |
74 | jmp bad_get_user | 76 | jmp bad_get_user |
77 | CFI_ENDPROC | ||
78 | ENDPROC(__get_user_4) | ||
75 | 79 | ||
76 | .p2align 4 | 80 | ENTRY(__get_user_8) |
77 | .globl __get_user_8 | 81 | CFI_STARTPROC |
78 | __get_user_8: | ||
79 | GET_THREAD_INFO(%r8) | 82 | GET_THREAD_INFO(%r8) |
80 | addq $7,%rcx | 83 | addq $7,%rcx |
81 | jc 40f | 84 | jc 40f |
@@ -87,11 +90,16 @@ __get_user_8: | |||
87 | ret | 90 | ret |
88 | 40: subq $7,%rcx | 91 | 40: subq $7,%rcx |
89 | jmp bad_get_user | 92 | jmp bad_get_user |
93 | CFI_ENDPROC | ||
94 | ENDPROC(__get_user_8) | ||
90 | 95 | ||
91 | bad_get_user: | 96 | bad_get_user: |
97 | CFI_STARTPROC | ||
92 | xorl %edx,%edx | 98 | xorl %edx,%edx |
93 | movq $(-EFAULT),%rax | 99 | movq $(-EFAULT),%rax |
94 | ret | 100 | ret |
101 | CFI_ENDPROC | ||
102 | END(bad_get_user) | ||
95 | 103 | ||
96 | .section __ex_table,"a" | 104 | .section __ex_table,"a" |
97 | .quad 1b,bad_get_user | 105 | .quad 1b,bad_get_user |
diff --git a/arch/x86_64/lib/iomap_copy.S b/arch/x86_64/lib/iomap_copy.S index 8bbade5fea0..05a95e713da 100644 --- a/arch/x86_64/lib/iomap_copy.S +++ b/arch/x86_64/lib/iomap_copy.S | |||
@@ -15,12 +15,16 @@ | |||
15 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | 15 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include <linux/linkage.h> | ||
19 | #include <asm/dwarf2.h> | ||
20 | |||
18 | /* | 21 | /* |
19 | * override generic version in lib/iomap_copy.c | 22 | * override generic version in lib/iomap_copy.c |
20 | */ | 23 | */ |
21 | .globl __iowrite32_copy | 24 | ENTRY(__iowrite32_copy) |
22 | .p2align 4 | 25 | CFI_STARTPROC |
23 | __iowrite32_copy: | ||
24 | movl %edx,%ecx | 26 | movl %edx,%ecx |
25 | rep movsd | 27 | rep movsd |
26 | ret | 28 | ret |
29 | CFI_ENDPROC | ||
30 | ENDPROC(__iowrite32_copy) | ||
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S index 5554948b555..967b22fa7d0 100644 --- a/arch/x86_64/lib/memcpy.S +++ b/arch/x86_64/lib/memcpy.S | |||
@@ -1,6 +1,10 @@ | |||
1 | /* Copyright 2002 Andi Kleen */ | 1 | /* Copyright 2002 Andi Kleen */ |
2 | 2 | ||
3 | #include <asm/cpufeature.h> | 3 | #include <linux/config.h> |
4 | #include <linux/linkage.h> | ||
5 | #include <asm/dwarf2.h> | ||
6 | #include <asm/cpufeature.h> | ||
7 | |||
4 | /* | 8 | /* |
5 | * memcpy - Copy a memory block. | 9 | * memcpy - Copy a memory block. |
6 | * | 10 | * |
@@ -13,12 +17,26 @@ | |||
13 | * rax original destination | 17 | * rax original destination |
14 | */ | 18 | */ |
15 | 19 | ||
16 | .globl __memcpy | 20 | ALIGN |
17 | .globl memcpy | 21 | memcpy_c: |
18 | .p2align 4 | 22 | CFI_STARTPROC |
19 | __memcpy: | 23 | movq %rdi,%rax |
20 | memcpy: | 24 | movl %edx,%ecx |
25 | shrl $3,%ecx | ||
26 | andl $7,%edx | ||
27 | rep movsq | ||
28 | movl %edx,%ecx | ||
29 | rep movsb | ||
30 | ret | ||
31 | CFI_ENDPROC | ||
32 | ENDPROC(memcpy_c) | ||
33 | |||
34 | ENTRY(__memcpy) | ||
35 | ENTRY(memcpy) | ||
36 | CFI_STARTPROC | ||
21 | pushq %rbx | 37 | pushq %rbx |
38 | CFI_ADJUST_CFA_OFFSET 8 | ||
39 | CFI_REL_OFFSET rbx, 0 | ||
22 | movq %rdi,%rax | 40 | movq %rdi,%rax |
23 | 41 | ||
24 | movl %edx,%ecx | 42 | movl %edx,%ecx |
@@ -86,36 +104,27 @@ memcpy: | |||
86 | 104 | ||
87 | .Lende: | 105 | .Lende: |
88 | popq %rbx | 106 | popq %rbx |
107 | CFI_ADJUST_CFA_OFFSET -8 | ||
108 | CFI_RESTORE rbx | ||
89 | ret | 109 | ret |
90 | .Lfinal: | 110 | .Lfinal: |
111 | CFI_ENDPROC | ||
112 | ENDPROC(memcpy) | ||
113 | ENDPROC(__memcpy) | ||
91 | 114 | ||
92 | /* Some CPUs run faster using the string copy instructions. | 115 | /* Some CPUs run faster using the string copy instructions. |
93 | It is also a lot simpler. Use this when possible */ | 116 | It is also a lot simpler. Use this when possible */ |
94 | 117 | ||
118 | .section .altinstr_replacement,"ax" | ||
119 | 1: .byte 0xeb /* jmp <disp8> */ | ||
120 | .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ | ||
121 | 2: | ||
122 | .previous | ||
95 | .section .altinstructions,"a" | 123 | .section .altinstructions,"a" |
96 | .align 8 | 124 | .align 8 |
97 | .quad memcpy | 125 | .quad memcpy |
98 | .quad memcpy_c | 126 | .quad 1b |
99 | .byte X86_FEATURE_REP_GOOD | 127 | .byte X86_FEATURE_REP_GOOD |
100 | .byte .Lfinal-memcpy | 128 | .byte .Lfinal - memcpy |
101 | .byte memcpy_c_end-memcpy_c | 129 | .byte 2b - 1b |
102 | .previous | ||
103 | |||
104 | .section .altinstr_replacement,"ax" | ||
105 | /* rdi destination | ||
106 | * rsi source | ||
107 | * rdx count | ||
108 | */ | ||
109 | memcpy_c: | ||
110 | movq %rdi,%rax | ||
111 | movl %edx,%ecx | ||
112 | shrl $3,%ecx | ||
113 | andl $7,%edx | ||
114 | rep | ||
115 | movsq | ||
116 | movl %edx,%ecx | ||
117 | rep | ||
118 | movsb | ||
119 | ret | ||
120 | memcpy_c_end: | ||
121 | .previous | 130 | .previous |
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S index ad397f2c7de..09ed1f6b0ea 100644 --- a/arch/x86_64/lib/memset.S +++ b/arch/x86_64/lib/memset.S | |||
@@ -1,4 +1,9 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs */ | 1 | /* Copyright 2002 Andi Kleen, SuSE Labs */ |
2 | |||
3 | #include <linux/config.h> | ||
4 | #include <linux/linkage.h> | ||
5 | #include <asm/dwarf2.h> | ||
6 | |||
2 | /* | 7 | /* |
3 | * ISO C memset - set a memory block to a byte value. | 8 | * ISO C memset - set a memory block to a byte value. |
4 | * | 9 | * |
@@ -8,11 +13,29 @@ | |||
8 | * | 13 | * |
9 | * rax original destination | 14 | * rax original destination |
10 | */ | 15 | */ |
11 | .globl __memset | 16 | ALIGN |
12 | .globl memset | 17 | memset_c: |
13 | .p2align 4 | 18 | CFI_STARTPROC |
14 | memset: | 19 | movq %rdi,%r9 |
15 | __memset: | 20 | movl %edx,%r8d |
21 | andl $7,%r8d | ||
22 | movl %edx,%ecx | ||
23 | shrl $3,%ecx | ||
24 | /* expand byte value */ | ||
25 | movzbl %sil,%esi | ||
26 | movabs $0x0101010101010101,%rax | ||
27 | mulq %rsi /* with rax, clobbers rdx */ | ||
28 | rep stosq | ||
29 | movl %r8d,%ecx | ||
30 | rep stosb | ||
31 | movq %r9,%rax | ||
32 | ret | ||
33 | CFI_ENDPROC | ||
34 | ENDPROC(memset_c) | ||
35 | |||
36 | ENTRY(memset) | ||
37 | ENTRY(__memset) | ||
38 | CFI_STARTPROC | ||
16 | movq %rdi,%r10 | 39 | movq %rdi,%r10 |
17 | movq %rdx,%r11 | 40 | movq %rdx,%r11 |
18 | 41 | ||
@@ -25,6 +48,7 @@ __memset: | |||
25 | movl %edi,%r9d | 48 | movl %edi,%r9d |
26 | andl $7,%r9d | 49 | andl $7,%r9d |
27 | jnz .Lbad_alignment | 50 | jnz .Lbad_alignment |
51 | CFI_REMEMBER_STATE | ||
28 | .Lafter_bad_alignment: | 52 | .Lafter_bad_alignment: |
29 | 53 | ||
30 | movl %r11d,%ecx | 54 | movl %r11d,%ecx |
@@ -75,6 +99,7 @@ __memset: | |||
75 | movq %r10,%rax | 99 | movq %r10,%rax |
76 | ret | 100 | ret |
77 | 101 | ||
102 | CFI_RESTORE_STATE | ||
78 | .Lbad_alignment: | 103 | .Lbad_alignment: |
79 | cmpq $7,%r11 | 104 | cmpq $7,%r11 |
80 | jbe .Lhandle_7 | 105 | jbe .Lhandle_7 |
@@ -84,42 +109,26 @@ __memset: | |||
84 | addq %r8,%rdi | 109 | addq %r8,%rdi |
85 | subq %r8,%r11 | 110 | subq %r8,%r11 |
86 | jmp .Lafter_bad_alignment | 111 | jmp .Lafter_bad_alignment |
112 | .Lfinal: | ||
113 | CFI_ENDPROC | ||
114 | ENDPROC(memset) | ||
115 | ENDPROC(__memset) | ||
87 | 116 | ||
88 | /* Some CPUs run faster using the string instructions. | 117 | /* Some CPUs run faster using the string instructions. |
89 | It is also a lot simpler. Use this when possible */ | 118 | It is also a lot simpler. Use this when possible */ |
90 | 119 | ||
91 | #include <asm/cpufeature.h> | 120 | #include <asm/cpufeature.h> |
92 | 121 | ||
122 | .section .altinstr_replacement,"ax" | ||
123 | 1: .byte 0xeb /* jmp <disp8> */ | ||
124 | .byte (memset_c - memset) - (2f - 1b) /* offset */ | ||
125 | 2: | ||
126 | .previous | ||
93 | .section .altinstructions,"a" | 127 | .section .altinstructions,"a" |
94 | .align 8 | 128 | .align 8 |
95 | .quad memset | 129 | .quad memset |
96 | .quad memset_c | 130 | .quad 1b |
97 | .byte X86_FEATURE_REP_GOOD | 131 | .byte X86_FEATURE_REP_GOOD |
98 | .byte memset_c_end-memset_c | 132 | .byte .Lfinal - memset |
99 | .byte memset_c_end-memset_c | 133 | .byte 2b - 1b |
100 | .previous | ||
101 | |||
102 | .section .altinstr_replacement,"ax" | ||
103 | /* rdi destination | ||
104 | * rsi value | ||
105 | * rdx count | ||
106 | */ | ||
107 | memset_c: | ||
108 | movq %rdi,%r9 | ||
109 | movl %edx,%r8d | ||
110 | andl $7,%r8d | ||
111 | movl %edx,%ecx | ||
112 | shrl $3,%ecx | ||
113 | /* expand byte value */ | ||
114 | movzbl %sil,%esi | ||
115 | movabs $0x0101010101010101,%rax | ||
116 | mulq %rsi /* with rax, clobbers rdx */ | ||
117 | rep | ||
118 | stosq | ||
119 | movl %r8d,%ecx | ||
120 | rep | ||
121 | stosb | ||
122 | movq %r9,%rax | ||
123 | ret | ||
124 | memset_c_end: | ||
125 | .previous | 134 | .previous |
diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S index 7f5593974e2..4989f5a8fa9 100644 --- a/arch/x86_64/lib/putuser.S +++ b/arch/x86_64/lib/putuser.S | |||
@@ -25,25 +25,26 @@ | |||
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/linkage.h> | 27 | #include <linux/linkage.h> |
28 | #include <asm/dwarf2.h> | ||
28 | #include <asm/page.h> | 29 | #include <asm/page.h> |
29 | #include <asm/errno.h> | 30 | #include <asm/errno.h> |
30 | #include <asm/asm-offsets.h> | 31 | #include <asm/asm-offsets.h> |
31 | #include <asm/thread_info.h> | 32 | #include <asm/thread_info.h> |
32 | 33 | ||
33 | .text | 34 | .text |
34 | .p2align 4 | 35 | ENTRY(__put_user_1) |
35 | .globl __put_user_1 | 36 | CFI_STARTPROC |
36 | __put_user_1: | ||
37 | GET_THREAD_INFO(%r8) | 37 | GET_THREAD_INFO(%r8) |
38 | cmpq threadinfo_addr_limit(%r8),%rcx | 38 | cmpq threadinfo_addr_limit(%r8),%rcx |
39 | jae bad_put_user | 39 | jae bad_put_user |
40 | 1: movb %dl,(%rcx) | 40 | 1: movb %dl,(%rcx) |
41 | xorl %eax,%eax | 41 | xorl %eax,%eax |
42 | ret | 42 | ret |
43 | CFI_ENDPROC | ||
44 | ENDPROC(__put_user_1) | ||
43 | 45 | ||
44 | .p2align 4 | 46 | ENTRY(__put_user_2) |
45 | .globl __put_user_2 | 47 | CFI_STARTPROC |
46 | __put_user_2: | ||
47 | GET_THREAD_INFO(%r8) | 48 | GET_THREAD_INFO(%r8) |
48 | addq $1,%rcx | 49 | addq $1,%rcx |
49 | jc 20f | 50 | jc 20f |
@@ -55,10 +56,11 @@ __put_user_2: | |||
55 | ret | 56 | ret |
56 | 20: decq %rcx | 57 | 20: decq %rcx |
57 | jmp bad_put_user | 58 | jmp bad_put_user |
59 | CFI_ENDPROC | ||
60 | ENDPROC(__put_user_2) | ||
58 | 61 | ||
59 | .p2align 4 | 62 | ENTRY(__put_user_4) |
60 | .globl __put_user_4 | 63 | CFI_STARTPROC |
61 | __put_user_4: | ||
62 | GET_THREAD_INFO(%r8) | 64 | GET_THREAD_INFO(%r8) |
63 | addq $3,%rcx | 65 | addq $3,%rcx |
64 | jc 30f | 66 | jc 30f |
@@ -70,10 +72,11 @@ __put_user_4: | |||
70 | ret | 72 | ret |
71 | 30: subq $3,%rcx | 73 | 30: subq $3,%rcx |
72 | jmp bad_put_user | 74 | jmp bad_put_user |
75 | CFI_ENDPROC | ||
76 | ENDPROC(__put_user_4) | ||
73 | 77 | ||
74 | .p2align 4 | 78 | ENTRY(__put_user_8) |
75 | .globl __put_user_8 | 79 | CFI_STARTPROC |
76 | __put_user_8: | ||
77 | GET_THREAD_INFO(%r8) | 80 | GET_THREAD_INFO(%r8) |
78 | addq $7,%rcx | 81 | addq $7,%rcx |
79 | jc 40f | 82 | jc 40f |
@@ -85,10 +88,15 @@ __put_user_8: | |||
85 | ret | 88 | ret |
86 | 40: subq $7,%rcx | 89 | 40: subq $7,%rcx |
87 | jmp bad_put_user | 90 | jmp bad_put_user |
91 | CFI_ENDPROC | ||
92 | ENDPROC(__put_user_8) | ||
88 | 93 | ||
89 | bad_put_user: | 94 | bad_put_user: |
95 | CFI_STARTPROC | ||
90 | movq $(-EFAULT),%rax | 96 | movq $(-EFAULT),%rax |
91 | ret | 97 | ret |
98 | CFI_ENDPROC | ||
99 | END(bad_put_user) | ||
92 | 100 | ||
93 | .section __ex_table,"a" | 101 | .section __ex_table,"a" |
94 | .quad 1b,bad_put_user | 102 | .quad 1b,bad_put_user |