aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/x86_64/lib/clear_page.S47
-rw-r--r--arch/x86_64/lib/copy_page.S53
-rw-r--r--arch/x86_64/lib/copy_user.S39
-rw-r--r--arch/x86_64/lib/csum-copy.S26
-rw-r--r--arch/x86_64/lib/getuser.S32
-rw-r--r--arch/x86_64/lib/iomap_copy.S10
-rw-r--r--arch/x86_64/lib/memcpy.S69
-rw-r--r--arch/x86_64/lib/memset.S79
-rw-r--r--arch/x86_64/lib/putuser.S32
9 files changed, 244 insertions, 143 deletions
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S
index 1f81b79b796c..9a10a78bb4a4 100644
--- a/arch/x86_64/lib/clear_page.S
+++ b/arch/x86_64/lib/clear_page.S
@@ -1,10 +1,22 @@
1#include <linux/linkage.h>
2#include <asm/dwarf2.h>
3
1/* 4/*
2 * Zero a page. 5 * Zero a page.
3 * rdi page 6 * rdi page
4 */ 7 */
5 .globl clear_page 8 ALIGN
6 .p2align 4 9clear_page_c:
7clear_page: 10 CFI_STARTPROC
11 movl $4096/8,%ecx
12 xorl %eax,%eax
13 rep stosq
14 ret
15 CFI_ENDPROC
16ENDPROC(clear_page)
17
18ENTRY(clear_page)
19 CFI_STARTPROC
8 xorl %eax,%eax 20 xorl %eax,%eax
9 movl $4096/64,%ecx 21 movl $4096/64,%ecx
10 .p2align 4 22 .p2align 4
@@ -23,28 +35,25 @@ clear_page:
23 jnz .Lloop 35 jnz .Lloop
24 nop 36 nop
25 ret 37 ret
26clear_page_end: 38 CFI_ENDPROC
39.Lclear_page_end:
40ENDPROC(clear_page)
27 41
28 /* Some CPUs run faster using the string instructions. 42 /* Some CPUs run faster using the string instructions.
29 It is also a lot simpler. Use this when possible */ 43 It is also a lot simpler. Use this when possible */
30 44
31#include <asm/cpufeature.h> 45#include <asm/cpufeature.h>
32 46
47 .section .altinstr_replacement,"ax"
481: .byte 0xeb /* jmp <disp8> */
49 .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
502:
51 .previous
33 .section .altinstructions,"a" 52 .section .altinstructions,"a"
34 .align 8 53 .align 8
35 .quad clear_page 54 .quad clear_page
36 .quad clear_page_c 55 .quad 1b
37 .byte X86_FEATURE_REP_GOOD 56 .byte X86_FEATURE_REP_GOOD
38 .byte clear_page_end-clear_page 57 .byte .Lclear_page_end - clear_page
39 .byte clear_page_c_end-clear_page_c 58 .byte 2b - 1b
40 .previous
41
42 .section .altinstr_replacement,"ax"
43clear_page_c:
44 movl $4096/8,%ecx
45 xorl %eax,%eax
46 rep
47 stosq
48 ret
49clear_page_c_end:
50 .previous 59 .previous
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S
index 8fa19d96a7ee..0ebb03b60e79 100644
--- a/arch/x86_64/lib/copy_page.S
+++ b/arch/x86_64/lib/copy_page.S
@@ -1,17 +1,33 @@
1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ 1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
2 2
3#include <linux/config.h>
4#include <linux/linkage.h>
5#include <asm/dwarf2.h>
6
7 ALIGN
8copy_page_c:
9 CFI_STARTPROC
10 movl $4096/8,%ecx
11 rep movsq
12 ret
13 CFI_ENDPROC
14ENDPROC(copy_page_c)
15
3/* Don't use streaming store because it's better when the target 16/* Don't use streaming store because it's better when the target
4 ends up in cache. */ 17 ends up in cache. */
5 18
6/* Could vary the prefetch distance based on SMP/UP */ 19/* Could vary the prefetch distance based on SMP/UP */
7 20
8 .globl copy_page 21ENTRY(copy_page)
9 .p2align 4 22 CFI_STARTPROC
10copy_page:
11 subq $3*8,%rsp 23 subq $3*8,%rsp
24 CFI_ADJUST_CFA_OFFSET 3*8
12 movq %rbx,(%rsp) 25 movq %rbx,(%rsp)
26 CFI_REL_OFFSET rbx, 0
13 movq %r12,1*8(%rsp) 27 movq %r12,1*8(%rsp)
28 CFI_REL_OFFSET r12, 1*8
14 movq %r13,2*8(%rsp) 29 movq %r13,2*8(%rsp)
30 CFI_REL_OFFSET r13, 2*8
15 31
16 movl $(4096/64)-5,%ecx 32 movl $(4096/64)-5,%ecx
17 .p2align 4 33 .p2align 4
@@ -72,30 +88,33 @@ copy_page:
72 jnz .Loop2 88 jnz .Loop2
73 89
74 movq (%rsp),%rbx 90 movq (%rsp),%rbx
91 CFI_RESTORE rbx
75 movq 1*8(%rsp),%r12 92 movq 1*8(%rsp),%r12
93 CFI_RESTORE r12
76 movq 2*8(%rsp),%r13 94 movq 2*8(%rsp),%r13
95 CFI_RESTORE r13
77 addq $3*8,%rsp 96 addq $3*8,%rsp
97 CFI_ADJUST_CFA_OFFSET -3*8
78 ret 98 ret
99.Lcopy_page_end:
100 CFI_ENDPROC
101ENDPROC(copy_page)
79 102
80 /* Some CPUs run faster using the string copy instructions. 103 /* Some CPUs run faster using the string copy instructions.
81 It is also a lot simpler. Use this when possible */ 104 It is also a lot simpler. Use this when possible */
82 105
83#include <asm/cpufeature.h> 106#include <asm/cpufeature.h>
84 107
108 .section .altinstr_replacement,"ax"
1091: .byte 0xeb /* jmp <disp8> */
110 .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
1112:
112 .previous
85 .section .altinstructions,"a" 113 .section .altinstructions,"a"
86 .align 8 114 .align 8
87 .quad copy_page 115 .quad copy_page
88 .quad copy_page_c 116 .quad 1b
89 .byte X86_FEATURE_REP_GOOD 117 .byte X86_FEATURE_REP_GOOD
90 .byte copy_page_c_end-copy_page_c 118 .byte .Lcopy_page_end - copy_page
91 .byte copy_page_c_end-copy_page_c 119 .byte 2b - 1b
92 .previous
93
94 .section .altinstr_replacement,"ax"
95copy_page_c:
96 movl $4096/8,%ecx
97 rep
98 movsq
99 ret
100copy_page_c_end:
101 .previous 120 .previous
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S
index f64569b83b54..962f3a693c5e 100644
--- a/arch/x86_64/lib/copy_user.S
+++ b/arch/x86_64/lib/copy_user.S
@@ -4,6 +4,9 @@
4 * Functions to copy from and to user space. 4 * Functions to copy from and to user space.
5 */ 5 */
6 6
7#include <linux/linkage.h>
8#include <asm/dwarf2.h>
9
7#define FIX_ALIGNMENT 1 10#define FIX_ALIGNMENT 1
8 11
9 #include <asm/current.h> 12 #include <asm/current.h>
@@ -12,9 +15,8 @@
12 #include <asm/cpufeature.h> 15 #include <asm/cpufeature.h>
13 16
14/* Standard copy_to_user with segment limit checking */ 17/* Standard copy_to_user with segment limit checking */
15 .globl copy_to_user 18ENTRY(copy_to_user)
16 .p2align 4 19 CFI_STARTPROC
17copy_to_user:
18 GET_THREAD_INFO(%rax) 20 GET_THREAD_INFO(%rax)
19 movq %rdi,%rcx 21 movq %rdi,%rcx
20 addq %rdx,%rcx 22 addq %rdx,%rcx
@@ -25,9 +27,11 @@ copy_to_user:
25 .byte 0xe9 /* 32bit jump */ 27 .byte 0xe9 /* 32bit jump */
26 .long .Lcug-1f 28 .long .Lcug-1f
271: 291:
30 CFI_ENDPROC
31ENDPROC(copy_to_user)
28 32
29 .section .altinstr_replacement,"ax" 33 .section .altinstr_replacement,"ax"
303: .byte 0xe9 /* replacement jmp with 8 bit immediate */ 343: .byte 0xe9 /* replacement jmp with 32 bit immediate */
31 .long copy_user_generic_c-1b /* offset */ 35 .long copy_user_generic_c-1b /* offset */
32 .previous 36 .previous
33 .section .altinstructions,"a" 37 .section .altinstructions,"a"
@@ -40,9 +44,8 @@ copy_to_user:
40 .previous 44 .previous
41 45
42/* Standard copy_from_user with segment limit checking */ 46/* Standard copy_from_user with segment limit checking */
43 .globl copy_from_user 47ENTRY(copy_from_user)
44 .p2align 4 48 CFI_STARTPROC
45copy_from_user:
46 GET_THREAD_INFO(%rax) 49 GET_THREAD_INFO(%rax)
47 movq %rsi,%rcx 50 movq %rsi,%rcx
48 addq %rdx,%rcx 51 addq %rdx,%rcx
@@ -50,10 +53,13 @@ copy_from_user:
50 cmpq threadinfo_addr_limit(%rax),%rcx 53 cmpq threadinfo_addr_limit(%rax),%rcx
51 jae bad_from_user 54 jae bad_from_user
52 /* FALL THROUGH to copy_user_generic */ 55 /* FALL THROUGH to copy_user_generic */
56 CFI_ENDPROC
57ENDPROC(copy_from_user)
53 58
54 .section .fixup,"ax" 59 .section .fixup,"ax"
55 /* must zero dest */ 60 /* must zero dest */
56bad_from_user: 61bad_from_user:
62 CFI_STARTPROC
57 movl %edx,%ecx 63 movl %edx,%ecx
58 xorl %eax,%eax 64 xorl %eax,%eax
59 rep 65 rep
@@ -61,6 +67,8 @@ bad_from_user:
61bad_to_user: 67bad_to_user:
62 movl %edx,%eax 68 movl %edx,%eax
63 ret 69 ret
70 CFI_ENDPROC
71END(bad_from_user)
64 .previous 72 .previous
65 73
66 74
@@ -75,9 +83,8 @@ bad_to_user:
75 * Output: 83 * Output:
76 * eax uncopied bytes or 0 if successful. 84 * eax uncopied bytes or 0 if successful.
77 */ 85 */
78 .globl copy_user_generic 86ENTRY(copy_user_generic)
79 .p2align 4 87 CFI_STARTPROC
80copy_user_generic:
81 .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */ 88 .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */
82 .byte 0x66,0x90 89 .byte 0x66,0x90
831: 901:
@@ -95,6 +102,8 @@ copy_user_generic:
95 .previous 102 .previous
96.Lcug: 103.Lcug:
97 pushq %rbx 104 pushq %rbx
105 CFI_ADJUST_CFA_OFFSET 8
106 CFI_REL_OFFSET rbx, 0
98 xorl %eax,%eax /*zero for the exception handler */ 107 xorl %eax,%eax /*zero for the exception handler */
99 108
100#ifdef FIX_ALIGNMENT 109#ifdef FIX_ALIGNMENT
@@ -168,9 +177,13 @@ copy_user_generic:
168 decl %ecx 177 decl %ecx
169 jnz .Lloop_1 178 jnz .Lloop_1
170 179
180 CFI_REMEMBER_STATE
171.Lende: 181.Lende:
172 popq %rbx 182 popq %rbx
183 CFI_ADJUST_CFA_OFFSET -8
184 CFI_RESTORE rbx
173 ret 185 ret
186 CFI_RESTORE_STATE
174 187
175#ifdef FIX_ALIGNMENT 188#ifdef FIX_ALIGNMENT
176 /* align destination */ 189 /* align destination */
@@ -261,6 +274,9 @@ copy_user_generic:
261.Le_zero: 274.Le_zero:
262 movq %rdx,%rax 275 movq %rdx,%rax
263 jmp .Lende 276 jmp .Lende
277 CFI_ENDPROC
278ENDPROC(copy_user_generic)
279
264 280
265 /* Some CPUs run faster using the string copy instructions. 281 /* Some CPUs run faster using the string copy instructions.
266 This is also a lot simpler. Use them when possible. 282 This is also a lot simpler. Use them when possible.
@@ -282,6 +298,7 @@ copy_user_generic:
282 * this please consider this. 298 * this please consider this.
283 */ 299 */
284copy_user_generic_c: 300copy_user_generic_c:
301 CFI_STARTPROC
285 movl %edx,%ecx 302 movl %edx,%ecx
286 shrl $3,%ecx 303 shrl $3,%ecx
287 andl $7,%edx 304 andl $7,%edx
@@ -294,6 +311,8 @@ copy_user_generic_c:
294 ret 311 ret
2953: lea (%rdx,%rcx,8),%rax 3123: lea (%rdx,%rcx,8),%rax
296 ret 313 ret
314 CFI_ENDPROC
315END(copy_user_generic_c)
297 316
298 .section __ex_table,"a" 317 .section __ex_table,"a"
299 .quad 1b,3b 318 .quad 1b,3b
diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S
index 72fd55ee896e..f0dba36578ea 100644
--- a/arch/x86_64/lib/csum-copy.S
+++ b/arch/x86_64/lib/csum-copy.S
@@ -5,8 +5,9 @@
5 * License. See the file COPYING in the main directory of this archive 5 * License. See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all. 6 * for more details. No warranty for anything given at all.
7 */ 7 */
8 #include <linux/linkage.h> 8#include <linux/linkage.h>
9 #include <asm/errno.h> 9#include <asm/dwarf2.h>
10#include <asm/errno.h>
10 11
11/* 12/*
12 * Checksum copy with exception handling. 13 * Checksum copy with exception handling.
@@ -53,19 +54,24 @@
53 .endm 54 .endm
54 55
55 56
56 .globl csum_partial_copy_generic 57ENTRY(csum_partial_copy_generic)
57 .p2align 4 58 CFI_STARTPROC
58csum_partial_copy_generic:
59 cmpl $3*64,%edx 59 cmpl $3*64,%edx
60 jle .Lignore 60 jle .Lignore
61 61
62.Lignore: 62.Lignore:
63 subq $7*8,%rsp 63 subq $7*8,%rsp
64 CFI_ADJUST_CFA_OFFSET 7*8
64 movq %rbx,2*8(%rsp) 65 movq %rbx,2*8(%rsp)
66 CFI_REL_OFFSET rbx, 2*8
65 movq %r12,3*8(%rsp) 67 movq %r12,3*8(%rsp)
68 CFI_REL_OFFSET r12, 3*8
66 movq %r14,4*8(%rsp) 69 movq %r14,4*8(%rsp)
70 CFI_REL_OFFSET r14, 4*8
67 movq %r13,5*8(%rsp) 71 movq %r13,5*8(%rsp)
72 CFI_REL_OFFSET r13, 5*8
68 movq %rbp,6*8(%rsp) 73 movq %rbp,6*8(%rsp)
74 CFI_REL_OFFSET rbp, 6*8
69 75
70 movq %r8,(%rsp) 76 movq %r8,(%rsp)
71 movq %r9,1*8(%rsp) 77 movq %r9,1*8(%rsp)
@@ -208,14 +214,22 @@ csum_partial_copy_generic:
208 addl %ebx,%eax 214 addl %ebx,%eax
209 adcl %r9d,%eax /* carry */ 215 adcl %r9d,%eax /* carry */
210 216
217 CFI_REMEMBER_STATE
211.Lende: 218.Lende:
212 movq 2*8(%rsp),%rbx 219 movq 2*8(%rsp),%rbx
220 CFI_RESTORE rbx
213 movq 3*8(%rsp),%r12 221 movq 3*8(%rsp),%r12
222 CFI_RESTORE r12
214 movq 4*8(%rsp),%r14 223 movq 4*8(%rsp),%r14
224 CFI_RESTORE r14
215 movq 5*8(%rsp),%r13 225 movq 5*8(%rsp),%r13
226 CFI_RESTORE r13
216 movq 6*8(%rsp),%rbp 227 movq 6*8(%rsp),%rbp
228 CFI_RESTORE rbp
217 addq $7*8,%rsp 229 addq $7*8,%rsp
230 CFI_ADJUST_CFA_OFFSET -7*8
218 ret 231 ret
232 CFI_RESTORE_STATE
219 233
220 /* Exception handlers. Very simple, zeroing is done in the wrappers */ 234 /* Exception handlers. Very simple, zeroing is done in the wrappers */
221.Lbad_source: 235.Lbad_source:
@@ -231,3 +245,5 @@ csum_partial_copy_generic:
231 jz .Lende 245 jz .Lende
232 movl $-EFAULT,(%rax) 246 movl $-EFAULT,(%rax)
233 jmp .Lende 247 jmp .Lende
248 CFI_ENDPROC
249ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S
index 3844d5e885a4..5448876261f8 100644
--- a/arch/x86_64/lib/getuser.S
+++ b/arch/x86_64/lib/getuser.S
@@ -27,25 +27,26 @@
27 */ 27 */
28 28
29#include <linux/linkage.h> 29#include <linux/linkage.h>
30#include <asm/dwarf2.h>
30#include <asm/page.h> 31#include <asm/page.h>
31#include <asm/errno.h> 32#include <asm/errno.h>
32#include <asm/asm-offsets.h> 33#include <asm/asm-offsets.h>
33#include <asm/thread_info.h> 34#include <asm/thread_info.h>
34 35
35 .text 36 .text
36 .p2align 4 37ENTRY(__get_user_1)
37.globl __get_user_1 38 CFI_STARTPROC
38__get_user_1:
39 GET_THREAD_INFO(%r8) 39 GET_THREAD_INFO(%r8)
40 cmpq threadinfo_addr_limit(%r8),%rcx 40 cmpq threadinfo_addr_limit(%r8),%rcx
41 jae bad_get_user 41 jae bad_get_user
421: movzb (%rcx),%edx 421: movzb (%rcx),%edx
43 xorl %eax,%eax 43 xorl %eax,%eax
44 ret 44 ret
45 CFI_ENDPROC
46ENDPROC(__get_user_1)
45 47
46 .p2align 4 48ENTRY(__get_user_2)
47.globl __get_user_2 49 CFI_STARTPROC
48__get_user_2:
49 GET_THREAD_INFO(%r8) 50 GET_THREAD_INFO(%r8)
50 addq $1,%rcx 51 addq $1,%rcx
51 jc 20f 52 jc 20f
@@ -57,10 +58,11 @@ __get_user_2:
57 ret 58 ret
5820: decq %rcx 5920: decq %rcx
59 jmp bad_get_user 60 jmp bad_get_user
61 CFI_ENDPROC
62ENDPROC(__get_user_2)
60 63
61 .p2align 4 64ENTRY(__get_user_4)
62.globl __get_user_4 65 CFI_STARTPROC
63__get_user_4:
64 GET_THREAD_INFO(%r8) 66 GET_THREAD_INFO(%r8)
65 addq $3,%rcx 67 addq $3,%rcx
66 jc 30f 68 jc 30f
@@ -72,10 +74,11 @@ __get_user_4:
72 ret 74 ret
7330: subq $3,%rcx 7530: subq $3,%rcx
74 jmp bad_get_user 76 jmp bad_get_user
77 CFI_ENDPROC
78ENDPROC(__get_user_4)
75 79
76 .p2align 4 80ENTRY(__get_user_8)
77.globl __get_user_8 81 CFI_STARTPROC
78__get_user_8:
79 GET_THREAD_INFO(%r8) 82 GET_THREAD_INFO(%r8)
80 addq $7,%rcx 83 addq $7,%rcx
81 jc 40f 84 jc 40f
@@ -87,11 +90,16 @@ __get_user_8:
87 ret 90 ret
8840: subq $7,%rcx 9140: subq $7,%rcx
89 jmp bad_get_user 92 jmp bad_get_user
93 CFI_ENDPROC
94ENDPROC(__get_user_8)
90 95
91bad_get_user: 96bad_get_user:
97 CFI_STARTPROC
92 xorl %edx,%edx 98 xorl %edx,%edx
93 movq $(-EFAULT),%rax 99 movq $(-EFAULT),%rax
94 ret 100 ret
101 CFI_ENDPROC
102END(bad_get_user)
95 103
96.section __ex_table,"a" 104.section __ex_table,"a"
97 .quad 1b,bad_get_user 105 .quad 1b,bad_get_user
diff --git a/arch/x86_64/lib/iomap_copy.S b/arch/x86_64/lib/iomap_copy.S
index 8bbade5fea05..05a95e713da8 100644
--- a/arch/x86_64/lib/iomap_copy.S
+++ b/arch/x86_64/lib/iomap_copy.S
@@ -15,12 +15,16 @@
15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
16 */ 16 */
17 17
18#include <linux/linkage.h>
19#include <asm/dwarf2.h>
20
18/* 21/*
19 * override generic version in lib/iomap_copy.c 22 * override generic version in lib/iomap_copy.c
20 */ 23 */
21 .globl __iowrite32_copy 24ENTRY(__iowrite32_copy)
22 .p2align 4 25 CFI_STARTPROC
23__iowrite32_copy:
24 movl %edx,%ecx 26 movl %edx,%ecx
25 rep movsd 27 rep movsd
26 ret 28 ret
29 CFI_ENDPROC
30ENDPROC(__iowrite32_copy)
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S
index 5554948b5554..967b22fa7d07 100644
--- a/arch/x86_64/lib/memcpy.S
+++ b/arch/x86_64/lib/memcpy.S
@@ -1,6 +1,10 @@
1/* Copyright 2002 Andi Kleen */ 1/* Copyright 2002 Andi Kleen */
2 2
3 #include <asm/cpufeature.h> 3#include <linux/config.h>
4#include <linux/linkage.h>
5#include <asm/dwarf2.h>
6#include <asm/cpufeature.h>
7
4/* 8/*
5 * memcpy - Copy a memory block. 9 * memcpy - Copy a memory block.
6 * 10 *
@@ -13,12 +17,26 @@
13 * rax original destination 17 * rax original destination
14 */ 18 */
15 19
16 .globl __memcpy 20 ALIGN
17 .globl memcpy 21memcpy_c:
18 .p2align 4 22 CFI_STARTPROC
19__memcpy: 23 movq %rdi,%rax
20memcpy: 24 movl %edx,%ecx
25 shrl $3,%ecx
26 andl $7,%edx
27 rep movsq
28 movl %edx,%ecx
29 rep movsb
30 ret
31 CFI_ENDPROC
32ENDPROC(memcpy_c)
33
34ENTRY(__memcpy)
35ENTRY(memcpy)
36 CFI_STARTPROC
21 pushq %rbx 37 pushq %rbx
38 CFI_ADJUST_CFA_OFFSET 8
39 CFI_REL_OFFSET rbx, 0
22 movq %rdi,%rax 40 movq %rdi,%rax
23 41
24 movl %edx,%ecx 42 movl %edx,%ecx
@@ -86,36 +104,27 @@ memcpy:
86 104
87.Lende: 105.Lende:
88 popq %rbx 106 popq %rbx
107 CFI_ADJUST_CFA_OFFSET -8
108 CFI_RESTORE rbx
89 ret 109 ret
90.Lfinal: 110.Lfinal:
111 CFI_ENDPROC
112ENDPROC(memcpy)
113ENDPROC(__memcpy)
91 114
92 /* Some CPUs run faster using the string copy instructions. 115 /* Some CPUs run faster using the string copy instructions.
93 It is also a lot simpler. Use this when possible */ 116 It is also a lot simpler. Use this when possible */
94 117
118 .section .altinstr_replacement,"ax"
1191: .byte 0xeb /* jmp <disp8> */
120 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1212:
122 .previous
95 .section .altinstructions,"a" 123 .section .altinstructions,"a"
96 .align 8 124 .align 8
97 .quad memcpy 125 .quad memcpy
98 .quad memcpy_c 126 .quad 1b
99 .byte X86_FEATURE_REP_GOOD 127 .byte X86_FEATURE_REP_GOOD
100 .byte .Lfinal-memcpy 128 .byte .Lfinal - memcpy
101 .byte memcpy_c_end-memcpy_c 129 .byte 2b - 1b
102 .previous
103
104 .section .altinstr_replacement,"ax"
105 /* rdi destination
106 * rsi source
107 * rdx count
108 */
109memcpy_c:
110 movq %rdi,%rax
111 movl %edx,%ecx
112 shrl $3,%ecx
113 andl $7,%edx
114 rep
115 movsq
116 movl %edx,%ecx
117 rep
118 movsb
119 ret
120memcpy_c_end:
121 .previous 130 .previous
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S
index ad397f2c7de8..09ed1f6b0eaa 100644
--- a/arch/x86_64/lib/memset.S
+++ b/arch/x86_64/lib/memset.S
@@ -1,4 +1,9 @@
1/* Copyright 2002 Andi Kleen, SuSE Labs */ 1/* Copyright 2002 Andi Kleen, SuSE Labs */
2
3#include <linux/config.h>
4#include <linux/linkage.h>
5#include <asm/dwarf2.h>
6
2/* 7/*
3 * ISO C memset - set a memory block to a byte value. 8 * ISO C memset - set a memory block to a byte value.
4 * 9 *
@@ -8,11 +13,29 @@
8 * 13 *
9 * rax original destination 14 * rax original destination
10 */ 15 */
11 .globl __memset 16 ALIGN
12 .globl memset 17memset_c:
13 .p2align 4 18 CFI_STARTPROC
14memset: 19 movq %rdi,%r9
15__memset: 20 movl %edx,%r8d
21 andl $7,%r8d
22 movl %edx,%ecx
23 shrl $3,%ecx
24 /* expand byte value */
25 movzbl %sil,%esi
26 movabs $0x0101010101010101,%rax
27 mulq %rsi /* with rax, clobbers rdx */
28 rep stosq
29 movl %r8d,%ecx
30 rep stosb
31 movq %r9,%rax
32 ret
33 CFI_ENDPROC
34ENDPROC(memset_c)
35
36ENTRY(memset)
37ENTRY(__memset)
38 CFI_STARTPROC
16 movq %rdi,%r10 39 movq %rdi,%r10
17 movq %rdx,%r11 40 movq %rdx,%r11
18 41
@@ -25,6 +48,7 @@ __memset:
25 movl %edi,%r9d 48 movl %edi,%r9d
26 andl $7,%r9d 49 andl $7,%r9d
27 jnz .Lbad_alignment 50 jnz .Lbad_alignment
51 CFI_REMEMBER_STATE
28.Lafter_bad_alignment: 52.Lafter_bad_alignment:
29 53
30 movl %r11d,%ecx 54 movl %r11d,%ecx
@@ -75,6 +99,7 @@ __memset:
75 movq %r10,%rax 99 movq %r10,%rax
76 ret 100 ret
77 101
102 CFI_RESTORE_STATE
78.Lbad_alignment: 103.Lbad_alignment:
79 cmpq $7,%r11 104 cmpq $7,%r11
80 jbe .Lhandle_7 105 jbe .Lhandle_7
@@ -84,42 +109,26 @@ __memset:
84 addq %r8,%rdi 109 addq %r8,%rdi
85 subq %r8,%r11 110 subq %r8,%r11
86 jmp .Lafter_bad_alignment 111 jmp .Lafter_bad_alignment
112.Lfinal:
113 CFI_ENDPROC
114ENDPROC(memset)
115ENDPROC(__memset)
87 116
88 /* Some CPUs run faster using the string instructions. 117 /* Some CPUs run faster using the string instructions.
89 It is also a lot simpler. Use this when possible */ 118 It is also a lot simpler. Use this when possible */
90 119
91#include <asm/cpufeature.h> 120#include <asm/cpufeature.h>
92 121
122 .section .altinstr_replacement,"ax"
1231: .byte 0xeb /* jmp <disp8> */
124 .byte (memset_c - memset) - (2f - 1b) /* offset */
1252:
126 .previous
93 .section .altinstructions,"a" 127 .section .altinstructions,"a"
94 .align 8 128 .align 8
95 .quad memset 129 .quad memset
96 .quad memset_c 130 .quad 1b
97 .byte X86_FEATURE_REP_GOOD 131 .byte X86_FEATURE_REP_GOOD
98 .byte memset_c_end-memset_c 132 .byte .Lfinal - memset
99 .byte memset_c_end-memset_c 133 .byte 2b - 1b
100 .previous
101
102 .section .altinstr_replacement,"ax"
103 /* rdi destination
104 * rsi value
105 * rdx count
106 */
107memset_c:
108 movq %rdi,%r9
109 movl %edx,%r8d
110 andl $7,%r8d
111 movl %edx,%ecx
112 shrl $3,%ecx
113 /* expand byte value */
114 movzbl %sil,%esi
115 movabs $0x0101010101010101,%rax
116 mulq %rsi /* with rax, clobbers rdx */
117 rep
118 stosq
119 movl %r8d,%ecx
120 rep
121 stosb
122 movq %r9,%rax
123 ret
124memset_c_end:
125 .previous 134 .previous
diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S
index 7f5593974e2d..4989f5a8fa9b 100644
--- a/arch/x86_64/lib/putuser.S
+++ b/arch/x86_64/lib/putuser.S
@@ -25,25 +25,26 @@
25 */ 25 */
26 26
27#include <linux/linkage.h> 27#include <linux/linkage.h>
28#include <asm/dwarf2.h>
28#include <asm/page.h> 29#include <asm/page.h>
29#include <asm/errno.h> 30#include <asm/errno.h>
30#include <asm/asm-offsets.h> 31#include <asm/asm-offsets.h>
31#include <asm/thread_info.h> 32#include <asm/thread_info.h>
32 33
33 .text 34 .text
34 .p2align 4 35ENTRY(__put_user_1)
35.globl __put_user_1 36 CFI_STARTPROC
36__put_user_1:
37 GET_THREAD_INFO(%r8) 37 GET_THREAD_INFO(%r8)
38 cmpq threadinfo_addr_limit(%r8),%rcx 38 cmpq threadinfo_addr_limit(%r8),%rcx
39 jae bad_put_user 39 jae bad_put_user
401: movb %dl,(%rcx) 401: movb %dl,(%rcx)
41 xorl %eax,%eax 41 xorl %eax,%eax
42 ret 42 ret
43 CFI_ENDPROC
44ENDPROC(__put_user_1)
43 45
44 .p2align 4 46ENTRY(__put_user_2)
45.globl __put_user_2 47 CFI_STARTPROC
46__put_user_2:
47 GET_THREAD_INFO(%r8) 48 GET_THREAD_INFO(%r8)
48 addq $1,%rcx 49 addq $1,%rcx
49 jc 20f 50 jc 20f
@@ -55,10 +56,11 @@ __put_user_2:
55 ret 56 ret
5620: decq %rcx 5720: decq %rcx
57 jmp bad_put_user 58 jmp bad_put_user
59 CFI_ENDPROC
60ENDPROC(__put_user_2)
58 61
59 .p2align 4 62ENTRY(__put_user_4)
60.globl __put_user_4 63 CFI_STARTPROC
61__put_user_4:
62 GET_THREAD_INFO(%r8) 64 GET_THREAD_INFO(%r8)
63 addq $3,%rcx 65 addq $3,%rcx
64 jc 30f 66 jc 30f
@@ -70,10 +72,11 @@ __put_user_4:
70 ret 72 ret
7130: subq $3,%rcx 7330: subq $3,%rcx
72 jmp bad_put_user 74 jmp bad_put_user
75 CFI_ENDPROC
76ENDPROC(__put_user_4)
73 77
74 .p2align 4 78ENTRY(__put_user_8)
75.globl __put_user_8 79 CFI_STARTPROC
76__put_user_8:
77 GET_THREAD_INFO(%r8) 80 GET_THREAD_INFO(%r8)
78 addq $7,%rcx 81 addq $7,%rcx
79 jc 40f 82 jc 40f
@@ -85,10 +88,15 @@ __put_user_8:
85 ret 88 ret
8640: subq $7,%rcx 8940: subq $7,%rcx
87 jmp bad_put_user 90 jmp bad_put_user
91 CFI_ENDPROC
92ENDPROC(__put_user_8)
88 93
89bad_put_user: 94bad_put_user:
95 CFI_STARTPROC
90 movq $(-EFAULT),%rax 96 movq $(-EFAULT),%rax
91 ret 97 ret
98 CFI_ENDPROC
99END(bad_put_user)
92 100
93.section __ex_table,"a" 101.section __ex_table,"a"
94 .quad 1b,bad_put_user 102 .quad 1b,bad_put_user