aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/lib/Makefile2
-rw-r--r--arch/x86/lib/Makefile_6413
-rw-r--r--arch/x86/lib/bitops_64.c175
-rw-r--r--arch/x86/lib/bitstr_64.c28
-rw-r--r--arch/x86/lib/clear_page_64.S59
-rw-r--r--arch/x86/lib/copy_page_64.S119
-rw-r--r--arch/x86/lib/copy_user_64.S354
-rw-r--r--arch/x86/lib/copy_user_nocache_64.S217
-rw-r--r--arch/x86/lib/csum-copy_64.S249
-rw-r--r--arch/x86/lib/csum-partial_64.c150
-rw-r--r--arch/x86/lib/csum-wrappers_64.c135
-rw-r--r--arch/x86/lib/delay_64.c57
-rw-r--r--arch/x86/lib/getuser_64.S109
-rw-r--r--arch/x86/lib/io_64.c23
-rw-r--r--arch/x86/lib/iomap_copy_64.S30
-rw-r--r--arch/x86/lib/memcpy_64.S131
-rw-r--r--arch/x86/lib/memmove_64.c21
-rw-r--r--arch/x86/lib/memset_64.S133
-rw-r--r--arch/x86/lib/putuser_64.S106
-rw-r--r--arch/x86/lib/rwlock_64.S38
-rw-r--r--arch/x86/lib/thunk_64.S67
-rw-r--r--arch/x86/lib/usercopy_64.c166
22 files changed, 2381 insertions, 1 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 2d7d724a2a6a..329da276c6f1 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -1,5 +1,5 @@
1ifeq ($(CONFIG_X86_32),y) 1ifeq ($(CONFIG_X86_32),y)
2include ${srctree}/arch/x86/lib/Makefile_32 2include ${srctree}/arch/x86/lib/Makefile_32
3else 3else
4include ${srctree}/arch/x86_64/lib/Makefile_64 4include ${srctree}/arch/x86/lib/Makefile_64
5endif 5endif
diff --git a/arch/x86/lib/Makefile_64 b/arch/x86/lib/Makefile_64
new file mode 100644
index 000000000000..bbabad3c9335
--- /dev/null
+++ b/arch/x86/lib/Makefile_64
@@ -0,0 +1,13 @@
1#
2# Makefile for x86_64-specific library files.
3#
4
5CFLAGS_csum-partial_64.o := -funroll-loops
6
7obj-y := io_64.o iomap_copy_64.o
8obj-$(CONFIG_SMP) += msr-on-cpu.o
9
10lib-y := csum-partial_64.o csum-copy_64.o csum-wrappers_64.o delay_64.o \
11 usercopy_64.o getuser_64.o putuser_64.o \
12 thunk_64.o clear_page_64.o copy_page_64.o bitstr_64.o bitops_64.o
13lib-y += memcpy_64.o memmove_64.o memset_64.o copy_user_64.o rwlock_64.o copy_user_nocache_64.o
diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c
new file mode 100644
index 000000000000..95b6d9639fba
--- /dev/null
+++ b/arch/x86/lib/bitops_64.c
@@ -0,0 +1,175 @@
1#include <linux/bitops.h>
2
3#undef find_first_zero_bit
4#undef find_next_zero_bit
5#undef find_first_bit
6#undef find_next_bit
7
8static inline long
9__find_first_zero_bit(const unsigned long * addr, unsigned long size)
10{
11 long d0, d1, d2;
12 long res;
13
14 /*
15 * We must test the size in words, not in bits, because
16 * otherwise incoming sizes in the range -63..-1 will not run
17 * any scasq instructions, and then the flags used by the je
18 * instruction will have whatever random value was in place
19 * before. Nobody should call us like that, but
20 * find_next_zero_bit() does when offset and size are at the
21 * same word and it fails to find a zero itself.
22 */
23 size += 63;
24 size >>= 6;
25 if (!size)
26 return 0;
27 asm volatile(
28 " repe; scasq\n"
29 " je 1f\n"
30 " xorq -8(%%rdi),%%rax\n"
31 " subq $8,%%rdi\n"
32 " bsfq %%rax,%%rdx\n"
33 "1: subq %[addr],%%rdi\n"
34 " shlq $3,%%rdi\n"
35 " addq %%rdi,%%rdx"
36 :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
37 :"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
38 [addr] "S" (addr) : "memory");
39 /*
40 * Any register would do for [addr] above, but GCC tends to
41 * prefer rbx over rsi, even though rsi is readily available
42 * and doesn't have to be saved.
43 */
44 return res;
45}
46
47/**
48 * find_first_zero_bit - find the first zero bit in a memory region
49 * @addr: The address to start the search at
50 * @size: The maximum size to search
51 *
52 * Returns the bit-number of the first zero bit, not the number of the byte
53 * containing a bit.
54 */
55long find_first_zero_bit(const unsigned long * addr, unsigned long size)
56{
57 return __find_first_zero_bit (addr, size);
58}
59
60/**
61 * find_next_zero_bit - find the first zero bit in a memory region
62 * @addr: The address to base the search on
63 * @offset: The bitnumber to start searching at
64 * @size: The maximum size to search
65 */
66long find_next_zero_bit (const unsigned long * addr, long size, long offset)
67{
68 const unsigned long * p = addr + (offset >> 6);
69 unsigned long set = 0;
70 unsigned long res, bit = offset&63;
71
72 if (bit) {
73 /*
74 * Look for zero in first word
75 */
76 asm("bsfq %1,%0\n\t"
77 "cmoveq %2,%0"
78 : "=r" (set)
79 : "r" (~(*p >> bit)), "r"(64L));
80 if (set < (64 - bit))
81 return set + offset;
82 set = 64 - bit;
83 p++;
84 }
85 /*
86 * No zero yet, search remaining full words for a zero
87 */
88 res = __find_first_zero_bit (p, size - 64 * (p - addr));
89
90 return (offset + set + res);
91}
92
93static inline long
94__find_first_bit(const unsigned long * addr, unsigned long size)
95{
96 long d0, d1;
97 long res;
98
99 /*
100 * We must test the size in words, not in bits, because
101 * otherwise incoming sizes in the range -63..-1 will not run
102 * any scasq instructions, and then the flags used by the jz
103 * instruction will have whatever random value was in place
104 * before. Nobody should call us like that, but
105 * find_next_bit() does when offset and size are at the same
106 * word and it fails to find a one itself.
107 */
108 size += 63;
109 size >>= 6;
110 if (!size)
111 return 0;
112 asm volatile(
113 " repe; scasq\n"
114 " jz 1f\n"
115 " subq $8,%%rdi\n"
116 " bsfq (%%rdi),%%rax\n"
117 "1: subq %[addr],%%rdi\n"
118 " shlq $3,%%rdi\n"
119 " addq %%rdi,%%rax"
120 :"=a" (res), "=&c" (d0), "=&D" (d1)
121 :"0" (0ULL), "1" (size), "2" (addr),
122 [addr] "r" (addr) : "memory");
123 return res;
124}
125
126/**
127 * find_first_bit - find the first set bit in a memory region
128 * @addr: The address to start the search at
129 * @size: The maximum size to search
130 *
131 * Returns the bit-number of the first set bit, not the number of the byte
132 * containing a bit.
133 */
134long find_first_bit(const unsigned long * addr, unsigned long size)
135{
136 return __find_first_bit(addr,size);
137}
138
139/**
140 * find_next_bit - find the first set bit in a memory region
141 * @addr: The address to base the search on
142 * @offset: The bitnumber to start searching at
143 * @size: The maximum size to search
144 */
145long find_next_bit(const unsigned long * addr, long size, long offset)
146{
147 const unsigned long * p = addr + (offset >> 6);
148 unsigned long set = 0, bit = offset & 63, res;
149
150 if (bit) {
151 /*
152 * Look for nonzero in the first 64 bits:
153 */
154 asm("bsfq %1,%0\n\t"
155 "cmoveq %2,%0\n\t"
156 : "=r" (set)
157 : "r" (*p >> bit), "r" (64L));
158 if (set < (64 - bit))
159 return set + offset;
160 set = 64 - bit;
161 p++;
162 }
163 /*
164 * No set bit yet, search remaining full words for a bit
165 */
166 res = __find_first_bit (p, size - 64 * (p - addr));
167 return (offset + set + res);
168}
169
170#include <linux/module.h>
171
172EXPORT_SYMBOL(find_next_bit);
173EXPORT_SYMBOL(find_first_bit);
174EXPORT_SYMBOL(find_first_zero_bit);
175EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/x86/lib/bitstr_64.c b/arch/x86/lib/bitstr_64.c
new file mode 100644
index 000000000000..24676609a6ac
--- /dev/null
+++ b/arch/x86/lib/bitstr_64.c
@@ -0,0 +1,28 @@
1#include <linux/module.h>
2#include <linux/bitops.h>
3
4/* Find string of zero bits in a bitmap */
5unsigned long
6find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len)
7{
8 unsigned long n, end, i;
9
10 again:
11 n = find_next_zero_bit(bitmap, nbits, start);
12 if (n == -1)
13 return -1;
14
15 /* could test bitsliced, but it's hardly worth it */
16 end = n+len;
17 if (end >= nbits)
18 return -1;
19 for (i = n+1; i < end; i++) {
20 if (test_bit(i, bitmap)) {
21 start = i+1;
22 goto again;
23 }
24 }
25 return n;
26}
27
28EXPORT_SYMBOL(find_next_zero_string);
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
new file mode 100644
index 000000000000..9a10a78bb4a4
--- /dev/null
+++ b/arch/x86/lib/clear_page_64.S
@@ -0,0 +1,59 @@
1#include <linux/linkage.h>
2#include <asm/dwarf2.h>
3
4/*
5 * Zero a page.
6 * rdi page
7 */
8 ALIGN
9clear_page_c:
10 CFI_STARTPROC
11 movl $4096/8,%ecx
12 xorl %eax,%eax
13 rep stosq
14 ret
15 CFI_ENDPROC
16ENDPROC(clear_page)
17
18ENTRY(clear_page)
19 CFI_STARTPROC
20 xorl %eax,%eax
21 movl $4096/64,%ecx
22 .p2align 4
23.Lloop:
24 decl %ecx
25#define PUT(x) movq %rax,x*8(%rdi)
26 movq %rax,(%rdi)
27 PUT(1)
28 PUT(2)
29 PUT(3)
30 PUT(4)
31 PUT(5)
32 PUT(6)
33 PUT(7)
34 leaq 64(%rdi),%rdi
35 jnz .Lloop
36 nop
37 ret
38 CFI_ENDPROC
39.Lclear_page_end:
40ENDPROC(clear_page)
41
42 /* Some CPUs run faster using the string instructions.
43 It is also a lot simpler. Use this when possible */
44
45#include <asm/cpufeature.h>
46
47 .section .altinstr_replacement,"ax"
481: .byte 0xeb /* jmp <disp8> */
49 .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
502:
51 .previous
52 .section .altinstructions,"a"
53 .align 8
54 .quad clear_page
55 .quad 1b
56 .byte X86_FEATURE_REP_GOOD
57 .byte .Lclear_page_end - clear_page
58 .byte 2b - 1b
59 .previous
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
new file mode 100644
index 000000000000..727a5d46d2fc
--- /dev/null
+++ b/arch/x86/lib/copy_page_64.S
@@ -0,0 +1,119 @@
1/* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
2
3#include <linux/linkage.h>
4#include <asm/dwarf2.h>
5
6 ALIGN
7copy_page_c:
8 CFI_STARTPROC
9 movl $4096/8,%ecx
10 rep movsq
11 ret
12 CFI_ENDPROC
13ENDPROC(copy_page_c)
14
15/* Don't use streaming store because it's better when the target
16 ends up in cache. */
17
18/* Could vary the prefetch distance based on SMP/UP */
19
20ENTRY(copy_page)
21 CFI_STARTPROC
22 subq $3*8,%rsp
23 CFI_ADJUST_CFA_OFFSET 3*8
24 movq %rbx,(%rsp)
25 CFI_REL_OFFSET rbx, 0
26 movq %r12,1*8(%rsp)
27 CFI_REL_OFFSET r12, 1*8
28 movq %r13,2*8(%rsp)
29 CFI_REL_OFFSET r13, 2*8
30
31 movl $(4096/64)-5,%ecx
32 .p2align 4
33.Loop64:
34 dec %rcx
35
36 movq (%rsi), %rax
37 movq 8 (%rsi), %rbx
38 movq 16 (%rsi), %rdx
39 movq 24 (%rsi), %r8
40 movq 32 (%rsi), %r9
41 movq 40 (%rsi), %r10
42 movq 48 (%rsi), %r11
43 movq 56 (%rsi), %r12
44
45 prefetcht0 5*64(%rsi)
46
47 movq %rax, (%rdi)
48 movq %rbx, 8 (%rdi)
49 movq %rdx, 16 (%rdi)
50 movq %r8, 24 (%rdi)
51 movq %r9, 32 (%rdi)
52 movq %r10, 40 (%rdi)
53 movq %r11, 48 (%rdi)
54 movq %r12, 56 (%rdi)
55
56 leaq 64 (%rsi), %rsi
57 leaq 64 (%rdi), %rdi
58
59 jnz .Loop64
60
61 movl $5,%ecx
62 .p2align 4
63.Loop2:
64 decl %ecx
65
66 movq (%rsi), %rax
67 movq 8 (%rsi), %rbx
68 movq 16 (%rsi), %rdx
69 movq 24 (%rsi), %r8
70 movq 32 (%rsi), %r9
71 movq 40 (%rsi), %r10
72 movq 48 (%rsi), %r11
73 movq 56 (%rsi), %r12
74
75 movq %rax, (%rdi)
76 movq %rbx, 8 (%rdi)
77 movq %rdx, 16 (%rdi)
78 movq %r8, 24 (%rdi)
79 movq %r9, 32 (%rdi)
80 movq %r10, 40 (%rdi)
81 movq %r11, 48 (%rdi)
82 movq %r12, 56 (%rdi)
83
84 leaq 64(%rdi),%rdi
85 leaq 64(%rsi),%rsi
86
87 jnz .Loop2
88
89 movq (%rsp),%rbx
90 CFI_RESTORE rbx
91 movq 1*8(%rsp),%r12
92 CFI_RESTORE r12
93 movq 2*8(%rsp),%r13
94 CFI_RESTORE r13
95 addq $3*8,%rsp
96 CFI_ADJUST_CFA_OFFSET -3*8
97 ret
98.Lcopy_page_end:
99 CFI_ENDPROC
100ENDPROC(copy_page)
101
102 /* Some CPUs run faster using the string copy instructions.
103 It is also a lot simpler. Use this when possible */
104
105#include <asm/cpufeature.h>
106
107 .section .altinstr_replacement,"ax"
1081: .byte 0xeb /* jmp <disp8> */
109 .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
1102:
111 .previous
112 .section .altinstructions,"a"
113 .align 8
114 .quad copy_page
115 .quad 1b
116 .byte X86_FEATURE_REP_GOOD
117 .byte .Lcopy_page_end - copy_page
118 .byte 2b - 1b
119 .previous
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
new file mode 100644
index 000000000000..70bebd310408
--- /dev/null
+++ b/arch/x86/lib/copy_user_64.S
@@ -0,0 +1,354 @@
1/* Copyright 2002 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v2.
3 *
4 * Functions to copy from and to user space.
5 */
6
7#include <linux/linkage.h>
8#include <asm/dwarf2.h>
9
10#define FIX_ALIGNMENT 1
11
12#include <asm/current.h>
13#include <asm/asm-offsets.h>
14#include <asm/thread_info.h>
15#include <asm/cpufeature.h>
16
17 .macro ALTERNATIVE_JUMP feature,orig,alt
180:
19 .byte 0xe9 /* 32bit jump */
20 .long \orig-1f /* by default jump to orig */
211:
22 .section .altinstr_replacement,"ax"
232: .byte 0xe9 /* near jump with 32bit immediate */
24 .long \alt-1b /* offset */ /* or alternatively to alt */
25 .previous
26 .section .altinstructions,"a"
27 .align 8
28 .quad 0b
29 .quad 2b
30 .byte \feature /* when feature is set */
31 .byte 5
32 .byte 5
33 .previous
34 .endm
35
36/* Standard copy_to_user with segment limit checking */
37ENTRY(copy_to_user)
38 CFI_STARTPROC
39 GET_THREAD_INFO(%rax)
40 movq %rdi,%rcx
41 addq %rdx,%rcx
42 jc bad_to_user
43 cmpq threadinfo_addr_limit(%rax),%rcx
44 jae bad_to_user
45 xorl %eax,%eax /* clear zero flag */
46 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
47 CFI_ENDPROC
48
49ENTRY(copy_user_generic)
50 CFI_STARTPROC
51 movl $1,%ecx /* set zero flag */
52 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
53 CFI_ENDPROC
54
55ENTRY(__copy_from_user_inatomic)
56 CFI_STARTPROC
57 xorl %ecx,%ecx /* clear zero flag */
58 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
59 CFI_ENDPROC
60
61/* Standard copy_from_user with segment limit checking */
62ENTRY(copy_from_user)
63 CFI_STARTPROC
64 GET_THREAD_INFO(%rax)
65 movq %rsi,%rcx
66 addq %rdx,%rcx
67 jc bad_from_user
68 cmpq threadinfo_addr_limit(%rax),%rcx
69 jae bad_from_user
70 movl $1,%ecx /* set zero flag */
71 ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
72 CFI_ENDPROC
73ENDPROC(copy_from_user)
74
75 .section .fixup,"ax"
76 /* must zero dest */
77bad_from_user:
78 CFI_STARTPROC
79 movl %edx,%ecx
80 xorl %eax,%eax
81 rep
82 stosb
83bad_to_user:
84 movl %edx,%eax
85 ret
86 CFI_ENDPROC
87END(bad_from_user)
88 .previous
89
90
91/*
92 * copy_user_generic_unrolled - memory copy with exception handling.
93 * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
94 *
95 * Input:
96 * rdi destination
97 * rsi source
98 * rdx count
99 * ecx zero flag -- if true zero destination on error
100 *
101 * Output:
102 * eax uncopied bytes or 0 if successful.
103 */
104ENTRY(copy_user_generic_unrolled)
105 CFI_STARTPROC
106 pushq %rbx
107 CFI_ADJUST_CFA_OFFSET 8
108 CFI_REL_OFFSET rbx, 0
109 pushq %rcx
110 CFI_ADJUST_CFA_OFFSET 8
111 CFI_REL_OFFSET rcx, 0
112 xorl %eax,%eax /*zero for the exception handler */
113
114#ifdef FIX_ALIGNMENT
115 /* check for bad alignment of destination */
116 movl %edi,%ecx
117 andl $7,%ecx
118 jnz .Lbad_alignment
119.Lafter_bad_alignment:
120#endif
121
122 movq %rdx,%rcx
123
124 movl $64,%ebx
125 shrq $6,%rdx
126 decq %rdx
127 js .Lhandle_tail
128
129 .p2align 4
130.Lloop:
131.Ls1: movq (%rsi),%r11
132.Ls2: movq 1*8(%rsi),%r8
133.Ls3: movq 2*8(%rsi),%r9
134.Ls4: movq 3*8(%rsi),%r10
135.Ld1: movq %r11,(%rdi)
136.Ld2: movq %r8,1*8(%rdi)
137.Ld3: movq %r9,2*8(%rdi)
138.Ld4: movq %r10,3*8(%rdi)
139
140.Ls5: movq 4*8(%rsi),%r11
141.Ls6: movq 5*8(%rsi),%r8
142.Ls7: movq 6*8(%rsi),%r9
143.Ls8: movq 7*8(%rsi),%r10
144.Ld5: movq %r11,4*8(%rdi)
145.Ld6: movq %r8,5*8(%rdi)
146.Ld7: movq %r9,6*8(%rdi)
147.Ld8: movq %r10,7*8(%rdi)
148
149 decq %rdx
150
151 leaq 64(%rsi),%rsi
152 leaq 64(%rdi),%rdi
153
154 jns .Lloop
155
156 .p2align 4
157.Lhandle_tail:
158 movl %ecx,%edx
159 andl $63,%ecx
160 shrl $3,%ecx
161 jz .Lhandle_7
162 movl $8,%ebx
163 .p2align 4
164.Lloop_8:
165.Ls9: movq (%rsi),%r8
166.Ld9: movq %r8,(%rdi)
167 decl %ecx
168 leaq 8(%rdi),%rdi
169 leaq 8(%rsi),%rsi
170 jnz .Lloop_8
171
172.Lhandle_7:
173 movl %edx,%ecx
174 andl $7,%ecx
175 jz .Lende
176 .p2align 4
177.Lloop_1:
178.Ls10: movb (%rsi),%bl
179.Ld10: movb %bl,(%rdi)
180 incq %rdi
181 incq %rsi
182 decl %ecx
183 jnz .Lloop_1
184
185 CFI_REMEMBER_STATE
186.Lende:
187 popq %rcx
188 CFI_ADJUST_CFA_OFFSET -8
189 CFI_RESTORE rcx
190 popq %rbx
191 CFI_ADJUST_CFA_OFFSET -8
192 CFI_RESTORE rbx
193 ret
194 CFI_RESTORE_STATE
195
196#ifdef FIX_ALIGNMENT
197 /* align destination */
198 .p2align 4
199.Lbad_alignment:
200 movl $8,%r9d
201 subl %ecx,%r9d
202 movl %r9d,%ecx
203 cmpq %r9,%rdx
204 jz .Lhandle_7
205 js .Lhandle_7
206.Lalign_1:
207.Ls11: movb (%rsi),%bl
208.Ld11: movb %bl,(%rdi)
209 incq %rsi
210 incq %rdi
211 decl %ecx
212 jnz .Lalign_1
213 subq %r9,%rdx
214 jmp .Lafter_bad_alignment
215#endif
216
217 /* table sorted by exception address */
218 .section __ex_table,"a"
219 .align 8
220 .quad .Ls1,.Ls1e
221 .quad .Ls2,.Ls2e
222 .quad .Ls3,.Ls3e
223 .quad .Ls4,.Ls4e
224 .quad .Ld1,.Ls1e
225 .quad .Ld2,.Ls2e
226 .quad .Ld3,.Ls3e
227 .quad .Ld4,.Ls4e
228 .quad .Ls5,.Ls5e
229 .quad .Ls6,.Ls6e
230 .quad .Ls7,.Ls7e
231 .quad .Ls8,.Ls8e
232 .quad .Ld5,.Ls5e
233 .quad .Ld6,.Ls6e
234 .quad .Ld7,.Ls7e
235 .quad .Ld8,.Ls8e
236 .quad .Ls9,.Le_quad
237 .quad .Ld9,.Le_quad
238 .quad .Ls10,.Le_byte
239 .quad .Ld10,.Le_byte
240#ifdef FIX_ALIGNMENT
241 .quad .Ls11,.Lzero_rest
242 .quad .Ld11,.Lzero_rest
243#endif
244 .quad .Le5,.Le_zero
245 .previous
246
247 /* compute 64-offset for main loop. 8 bytes accuracy with error on the
248 pessimistic side. this is gross. it would be better to fix the
249 interface. */
250 /* eax: zero, ebx: 64 */
251.Ls1e: addl $8,%eax
252.Ls2e: addl $8,%eax
253.Ls3e: addl $8,%eax
254.Ls4e: addl $8,%eax
255.Ls5e: addl $8,%eax
256.Ls6e: addl $8,%eax
257.Ls7e: addl $8,%eax
258.Ls8e: addl $8,%eax
259 addq %rbx,%rdi /* +64 */
260 subq %rax,%rdi /* correct destination with computed offset */
261
262 shlq $6,%rdx /* loop counter * 64 (stride length) */
263 addq %rax,%rdx /* add offset to loopcnt */
264 andl $63,%ecx /* remaining bytes */
265 addq %rcx,%rdx /* add them */
266 jmp .Lzero_rest
267
268 /* exception on quad word loop in tail handling */
269 /* ecx: loopcnt/8, %edx: length, rdi: correct */
270.Le_quad:
271 shll $3,%ecx
272 andl $7,%edx
273 addl %ecx,%edx
274 /* edx: bytes to zero, rdi: dest, eax:zero */
275.Lzero_rest:
276 cmpl $0,(%rsp)
277 jz .Le_zero
278 movq %rdx,%rcx
279.Le_byte:
280 xorl %eax,%eax
281.Le5: rep
282 stosb
283 /* when there is another exception while zeroing the rest just return */
284.Le_zero:
285 movq %rdx,%rax
286 jmp .Lende
287 CFI_ENDPROC
288ENDPROC(copy_user_generic)
289
290
291 /* Some CPUs run faster using the string copy instructions.
292 This is also a lot simpler. Use them when possible.
293 Patch in jmps to this code instead of copying it fully
294 to avoid unwanted aliasing in the exception tables. */
295
296 /* rdi destination
297 * rsi source
298 * rdx count
299 * ecx zero flag
300 *
301 * Output:
302 * eax uncopied bytes or 0 if successfull.
303 *
304 * Only 4GB of copy is supported. This shouldn't be a problem
305 * because the kernel normally only writes from/to page sized chunks
306 * even if user space passed a longer buffer.
307 * And more would be dangerous because both Intel and AMD have
308 * errata with rep movsq > 4GB. If someone feels the need to fix
309 * this please consider this.
310 */
311ENTRY(copy_user_generic_string)
312 CFI_STARTPROC
313 movl %ecx,%r8d /* save zero flag */
314 movl %edx,%ecx
315 shrl $3,%ecx
316 andl $7,%edx
317 jz 10f
3181: rep
319 movsq
320 movl %edx,%ecx
3212: rep
322 movsb
3239: movl %ecx,%eax
324 ret
325
326 /* multiple of 8 byte */
32710: rep
328 movsq
329 xor %eax,%eax
330 ret
331
332 /* exception handling */
3333: lea (%rdx,%rcx,8),%rax /* exception on quad loop */
334 jmp 6f
3355: movl %ecx,%eax /* exception on byte loop */
336 /* eax: left over bytes */
3376: testl %r8d,%r8d /* zero flag set? */
338 jz 7f
339 movl %eax,%ecx /* initialize x86 loop counter */
340 push %rax
341 xorl %eax,%eax
3428: rep
343 stosb /* zero the rest */
34411: pop %rax
3457: ret
346 CFI_ENDPROC
347END(copy_user_generic_c)
348
349 .section __ex_table,"a"
350 .quad 1b,3b
351 .quad 2b,5b
352 .quad 8b,11b
353 .quad 10b,3b
354 .previous
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
new file mode 100644
index 000000000000..4620efb12f13
--- /dev/null
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -0,0 +1,217 @@
1/* Copyright 2002 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v2.
3 *
4 * Functions to copy from and to user space.
5 */
6
7#include <linux/linkage.h>
8#include <asm/dwarf2.h>
9
10#define FIX_ALIGNMENT 1
11
12#include <asm/current.h>
13#include <asm/asm-offsets.h>
14#include <asm/thread_info.h>
15#include <asm/cpufeature.h>
16
17/*
18 * copy_user_nocache - Uncached memory copy with exception handling
19 * This will force destination/source out of cache for more performance.
20 *
21 * Input:
22 * rdi destination
23 * rsi source
24 * rdx count
25 * rcx zero flag when 1 zero on exception
26 *
27 * Output:
28 * eax uncopied bytes or 0 if successful.
29 */
30ENTRY(__copy_user_nocache)
31 CFI_STARTPROC
32 pushq %rbx
33 CFI_ADJUST_CFA_OFFSET 8
34 CFI_REL_OFFSET rbx, 0
35 pushq %rcx /* save zero flag */
36 CFI_ADJUST_CFA_OFFSET 8
37 CFI_REL_OFFSET rcx, 0
38
39 xorl %eax,%eax /* zero for the exception handler */
40
41#ifdef FIX_ALIGNMENT
42 /* check for bad alignment of destination */
43 movl %edi,%ecx
44 andl $7,%ecx
45 jnz .Lbad_alignment
46.Lafter_bad_alignment:
47#endif
48
49 movq %rdx,%rcx
50
51 movl $64,%ebx
52 shrq $6,%rdx
53 decq %rdx
54 js .Lhandle_tail
55
56 .p2align 4
57.Lloop:
58.Ls1: movq (%rsi),%r11
59.Ls2: movq 1*8(%rsi),%r8
60.Ls3: movq 2*8(%rsi),%r9
61.Ls4: movq 3*8(%rsi),%r10
62.Ld1: movnti %r11,(%rdi)
63.Ld2: movnti %r8,1*8(%rdi)
64.Ld3: movnti %r9,2*8(%rdi)
65.Ld4: movnti %r10,3*8(%rdi)
66
67.Ls5: movq 4*8(%rsi),%r11
68.Ls6: movq 5*8(%rsi),%r8
69.Ls7: movq 6*8(%rsi),%r9
70.Ls8: movq 7*8(%rsi),%r10
71.Ld5: movnti %r11,4*8(%rdi)
72.Ld6: movnti %r8,5*8(%rdi)
73.Ld7: movnti %r9,6*8(%rdi)
74.Ld8: movnti %r10,7*8(%rdi)
75
76 dec %rdx
77
78 leaq 64(%rsi),%rsi
79 leaq 64(%rdi),%rdi
80
81 jns .Lloop
82
83 .p2align 4
84.Lhandle_tail:
85 movl %ecx,%edx
86 andl $63,%ecx
87 shrl $3,%ecx
88 jz .Lhandle_7
89 movl $8,%ebx
90 .p2align 4
91.Lloop_8:
92.Ls9: movq (%rsi),%r8
93.Ld9: movnti %r8,(%rdi)
94 decl %ecx
95 leaq 8(%rdi),%rdi
96 leaq 8(%rsi),%rsi
97 jnz .Lloop_8
98
99.Lhandle_7:
100 movl %edx,%ecx
101 andl $7,%ecx
102 jz .Lende
103 .p2align 4
104.Lloop_1:
105.Ls10: movb (%rsi),%bl
106.Ld10: movb %bl,(%rdi)
107 incq %rdi
108 incq %rsi
109 decl %ecx
110 jnz .Lloop_1
111
112 CFI_REMEMBER_STATE
113.Lende:
114 popq %rcx
115 CFI_ADJUST_CFA_OFFSET -8
116 CFI_RESTORE %rcx
117 popq %rbx
118 CFI_ADJUST_CFA_OFFSET -8
119 CFI_RESTORE rbx
120 ret
121 CFI_RESTORE_STATE
122
123#ifdef FIX_ALIGNMENT
124 /* align destination */
125 .p2align 4
126.Lbad_alignment:
127 movl $8,%r9d
128 subl %ecx,%r9d
129 movl %r9d,%ecx
130 cmpq %r9,%rdx
131 jz .Lhandle_7
132 js .Lhandle_7
133.Lalign_1:
134.Ls11: movb (%rsi),%bl
135.Ld11: movb %bl,(%rdi)
136 incq %rsi
137 incq %rdi
138 decl %ecx
139 jnz .Lalign_1
140 subq %r9,%rdx
141 jmp .Lafter_bad_alignment
142#endif
143
144 /* table sorted by exception address */
145 .section __ex_table,"a"
146 .align 8
147 .quad .Ls1,.Ls1e
148 .quad .Ls2,.Ls2e
149 .quad .Ls3,.Ls3e
150 .quad .Ls4,.Ls4e
151 .quad .Ld1,.Ls1e
152 .quad .Ld2,.Ls2e
153 .quad .Ld3,.Ls3e
154 .quad .Ld4,.Ls4e
155 .quad .Ls5,.Ls5e
156 .quad .Ls6,.Ls6e
157 .quad .Ls7,.Ls7e
158 .quad .Ls8,.Ls8e
159 .quad .Ld5,.Ls5e
160 .quad .Ld6,.Ls6e
161 .quad .Ld7,.Ls7e
162 .quad .Ld8,.Ls8e
163 .quad .Ls9,.Le_quad
164 .quad .Ld9,.Le_quad
165 .quad .Ls10,.Le_byte
166 .quad .Ld10,.Le_byte
167#ifdef FIX_ALIGNMENT
168 .quad .Ls11,.Lzero_rest
169 .quad .Ld11,.Lzero_rest
170#endif
171 .quad .Le5,.Le_zero
172 .previous
173
174 /* compute 64-offset for main loop. 8 bytes accuracy with error on the
175 pessimistic side. this is gross. it would be better to fix the
176 interface. */
177 /* eax: zero, ebx: 64 */
178.Ls1e: addl $8,%eax
179.Ls2e: addl $8,%eax
180.Ls3e: addl $8,%eax
181.Ls4e: addl $8,%eax
182.Ls5e: addl $8,%eax
183.Ls6e: addl $8,%eax
184.Ls7e: addl $8,%eax
185.Ls8e: addl $8,%eax
186 addq %rbx,%rdi /* +64 */
187 subq %rax,%rdi /* correct destination with computed offset */
188
189 shlq $6,%rdx /* loop counter * 64 (stride length) */
190 addq %rax,%rdx /* add offset to loopcnt */
191 andl $63,%ecx /* remaining bytes */
192 addq %rcx,%rdx /* add them */
193 jmp .Lzero_rest
194
195 /* exception on quad word loop in tail handling */
196 /* ecx: loopcnt/8, %edx: length, rdi: correct */
197.Le_quad:
198 shll $3,%ecx
199 andl $7,%edx
200 addl %ecx,%edx
201 /* edx: bytes to zero, rdi: dest, eax:zero */
202.Lzero_rest:
203 cmpl $0,(%rsp) /* zero flag set? */
204 jz .Le_zero
205 movq %rdx,%rcx
206.Le_byte:
207 xorl %eax,%eax
208.Le5: rep
209 stosb
210 /* when there is another exception while zeroing the rest just return */
211.Le_zero:
212 movq %rdx,%rax
213 jmp .Lende
214 CFI_ENDPROC
215ENDPROC(__copy_user_nocache)
216
217
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
new file mode 100644
index 000000000000..f0dba36578ea
--- /dev/null
+++ b/arch/x86/lib/csum-copy_64.S
@@ -0,0 +1,249 @@
1/*
2 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file COPYING in the main directory of this archive
6 * for more details. No warranty for anything given at all.
7 */
8#include <linux/linkage.h>
9#include <asm/dwarf2.h>
10#include <asm/errno.h>
11
12/*
13 * Checksum copy with exception handling.
14 * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
15 * destination is zeroed.
16 *
17 * Input
18 * rdi source
19 * rsi destination
20 * edx len (32bit)
21 * ecx sum (32bit)
22 * r8 src_err_ptr (int)
23 * r9 dst_err_ptr (int)
24 *
25 * Output
26 * eax 64bit sum. undefined in case of exception.
27 *
28 * Wrappers need to take care of valid exception sum and zeroing.
29 * They also should align source or destination to 8 bytes.
30 */
31
32 .macro source
3310:
34 .section __ex_table,"a"
35 .align 8
36 .quad 10b,.Lbad_source
37 .previous
38 .endm
39
40 .macro dest
4120:
42 .section __ex_table,"a"
43 .align 8
44 .quad 20b,.Lbad_dest
45 .previous
46 .endm
47
48 .macro ignore L=.Lignore
4930:
50 .section __ex_table,"a"
51 .align 8
52 .quad 30b,\L
53 .previous
54 .endm
55
56
57ENTRY(csum_partial_copy_generic)
58 CFI_STARTPROC
59 cmpl $3*64,%edx
60 jle .Lignore
61
62.Lignore:
63 subq $7*8,%rsp
64 CFI_ADJUST_CFA_OFFSET 7*8
65 movq %rbx,2*8(%rsp)
66 CFI_REL_OFFSET rbx, 2*8
67 movq %r12,3*8(%rsp)
68 CFI_REL_OFFSET r12, 3*8
69 movq %r14,4*8(%rsp)
70 CFI_REL_OFFSET r14, 4*8
71 movq %r13,5*8(%rsp)
72 CFI_REL_OFFSET r13, 5*8
73 movq %rbp,6*8(%rsp)
74 CFI_REL_OFFSET rbp, 6*8
75
76 movq %r8,(%rsp)
77 movq %r9,1*8(%rsp)
78
79 movl %ecx,%eax
80 movl %edx,%ecx
81
82 xorl %r9d,%r9d
83 movq %rcx,%r12
84
85 shrq $6,%r12
86 jz .Lhandle_tail /* < 64 */
87
88 clc
89
90 /* main loop. clear in 64 byte blocks */
91 /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
92 /* r11: temp3, rdx: temp4, r12 loopcnt */
93 /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */
94 .p2align 4
95.Lloop:
96 source
97 movq (%rdi),%rbx
98 source
99 movq 8(%rdi),%r8
100 source
101 movq 16(%rdi),%r11
102 source
103 movq 24(%rdi),%rdx
104
105 source
106 movq 32(%rdi),%r10
107 source
108 movq 40(%rdi),%rbp
109 source
110 movq 48(%rdi),%r14
111 source
112 movq 56(%rdi),%r13
113
114 ignore 2f
115 prefetcht0 5*64(%rdi)
1162:
117 adcq %rbx,%rax
118 adcq %r8,%rax
119 adcq %r11,%rax
120 adcq %rdx,%rax
121 adcq %r10,%rax
122 adcq %rbp,%rax
123 adcq %r14,%rax
124 adcq %r13,%rax
125
126 decl %r12d
127
128 dest
129 movq %rbx,(%rsi)
130 dest
131 movq %r8,8(%rsi)
132 dest
133 movq %r11,16(%rsi)
134 dest
135 movq %rdx,24(%rsi)
136
137 dest
138 movq %r10,32(%rsi)
139 dest
140 movq %rbp,40(%rsi)
141 dest
142 movq %r14,48(%rsi)
143 dest
144 movq %r13,56(%rsi)
145
1463:
147
148 leaq 64(%rdi),%rdi
149 leaq 64(%rsi),%rsi
150
151 jnz .Lloop
152
153 adcq %r9,%rax
154
155 /* do last upto 56 bytes */
156.Lhandle_tail:
157 /* ecx: count */
158 movl %ecx,%r10d
159 andl $63,%ecx
160 shrl $3,%ecx
161 jz .Lfold
162 clc
163 .p2align 4
164.Lloop_8:
165 source
166 movq (%rdi),%rbx
167 adcq %rbx,%rax
168 decl %ecx
169 dest
170 movq %rbx,(%rsi)
171 leaq 8(%rsi),%rsi /* preserve carry */
172 leaq 8(%rdi),%rdi
173 jnz .Lloop_8
174 adcq %r9,%rax /* add in carry */
175
176.Lfold:
177 /* reduce checksum to 32bits */
178 movl %eax,%ebx
179 shrq $32,%rax
180 addl %ebx,%eax
181 adcl %r9d,%eax
182
183 /* do last upto 6 bytes */
184.Lhandle_7:
185 movl %r10d,%ecx
186 andl $7,%ecx
187 shrl $1,%ecx
188 jz .Lhandle_1
189 movl $2,%edx
190 xorl %ebx,%ebx
191 clc
192 .p2align 4
193.Lloop_1:
194 source
195 movw (%rdi),%bx
196 adcl %ebx,%eax
197 decl %ecx
198 dest
199 movw %bx,(%rsi)
200 leaq 2(%rdi),%rdi
201 leaq 2(%rsi),%rsi
202 jnz .Lloop_1
203 adcl %r9d,%eax /* add in carry */
204
205 /* handle last odd byte */
206.Lhandle_1:
207 testl $1,%r10d
208 jz .Lende
209 xorl %ebx,%ebx
210 source
211 movb (%rdi),%bl
212 dest
213 movb %bl,(%rsi)
214 addl %ebx,%eax
215 adcl %r9d,%eax /* carry */
216
217 CFI_REMEMBER_STATE
218.Lende:
219 movq 2*8(%rsp),%rbx
220 CFI_RESTORE rbx
221 movq 3*8(%rsp),%r12
222 CFI_RESTORE r12
223 movq 4*8(%rsp),%r14
224 CFI_RESTORE r14
225 movq 5*8(%rsp),%r13
226 CFI_RESTORE r13
227 movq 6*8(%rsp),%rbp
228 CFI_RESTORE rbp
229 addq $7*8,%rsp
230 CFI_ADJUST_CFA_OFFSET -7*8
231 ret
232 CFI_RESTORE_STATE
233
234 /* Exception handlers. Very simple, zeroing is done in the wrappers */
235.Lbad_source:
236 movq (%rsp),%rax
237 testq %rax,%rax
238 jz .Lende
239 movl $-EFAULT,(%rax)
240 jmp .Lende
241
242.Lbad_dest:
243 movq 8(%rsp),%rax
244 testq %rax,%rax
245 jz .Lende
246 movl $-EFAULT,(%rax)
247 jmp .Lende
248 CFI_ENDPROC
249ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c
new file mode 100644
index 000000000000..bc503f506903
--- /dev/null
+++ b/arch/x86/lib/csum-partial_64.c
@@ -0,0 +1,150 @@
1/*
2 * arch/x86_64/lib/csum-partial.c
3 *
4 * This file contains network checksum routines that are better done
5 * in an architecture-specific manner due to speed.
6 */
7
8#include <linux/compiler.h>
9#include <linux/module.h>
10#include <asm/checksum.h>
11
12static inline unsigned short from32to16(unsigned a)
13{
14 unsigned short b = a >> 16;
15 asm("addw %w2,%w0\n\t"
16 "adcw $0,%w0\n"
17 : "=r" (b)
18 : "0" (b), "r" (a));
19 return b;
20}
21
22/*
23 * Do a 64-bit checksum on an arbitrary memory area.
24 * Returns a 32bit checksum.
25 *
26 * This isn't as time critical as it used to be because many NICs
27 * do hardware checksumming these days.
28 *
29 * Things tried and found to not make it faster:
30 * Manual Prefetching
31 * Unrolling to an 128 bytes inner loop.
32 * Using interleaving with more registers to break the carry chains.
33 */
34static unsigned do_csum(const unsigned char *buff, unsigned len)
35{
36 unsigned odd, count;
37 unsigned long result = 0;
38
39 if (unlikely(len == 0))
40 return result;
41 odd = 1 & (unsigned long) buff;
42 if (unlikely(odd)) {
43 result = *buff << 8;
44 len--;
45 buff++;
46 }
47 count = len >> 1; /* nr of 16-bit words.. */
48 if (count) {
49 if (2 & (unsigned long) buff) {
50 result += *(unsigned short *)buff;
51 count--;
52 len -= 2;
53 buff += 2;
54 }
55 count >>= 1; /* nr of 32-bit words.. */
56 if (count) {
57 unsigned long zero;
58 unsigned count64;
59 if (4 & (unsigned long) buff) {
60 result += *(unsigned int *) buff;
61 count--;
62 len -= 4;
63 buff += 4;
64 }
65 count >>= 1; /* nr of 64-bit words.. */
66
67 /* main loop using 64byte blocks */
68 zero = 0;
69 count64 = count >> 3;
70 while (count64) {
71 asm("addq 0*8(%[src]),%[res]\n\t"
72 "adcq 1*8(%[src]),%[res]\n\t"
73 "adcq 2*8(%[src]),%[res]\n\t"
74 "adcq 3*8(%[src]),%[res]\n\t"
75 "adcq 4*8(%[src]),%[res]\n\t"
76 "adcq 5*8(%[src]),%[res]\n\t"
77 "adcq 6*8(%[src]),%[res]\n\t"
78 "adcq 7*8(%[src]),%[res]\n\t"
79 "adcq %[zero],%[res]"
80 : [res] "=r" (result)
81 : [src] "r" (buff), [zero] "r" (zero),
82 "[res]" (result));
83 buff += 64;
84 count64--;
85 }
86
87 /* last upto 7 8byte blocks */
88 count %= 8;
89 while (count) {
90 asm("addq %1,%0\n\t"
91 "adcq %2,%0\n"
92 : "=r" (result)
93 : "m" (*(unsigned long *)buff),
94 "r" (zero), "0" (result));
95 --count;
96 buff += 8;
97 }
98 result = add32_with_carry(result>>32,
99 result&0xffffffff);
100
101 if (len & 4) {
102 result += *(unsigned int *) buff;
103 buff += 4;
104 }
105 }
106 if (len & 2) {
107 result += *(unsigned short *) buff;
108 buff += 2;
109 }
110 }
111 if (len & 1)
112 result += *buff;
113 result = add32_with_carry(result>>32, result & 0xffffffff);
114 if (unlikely(odd)) {
115 result = from32to16(result);
116 result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
117 }
118 return result;
119}
120
121/*
122 * computes the checksum of a memory block at buff, length len,
123 * and adds in "sum" (32-bit)
124 *
125 * returns a 32-bit number suitable for feeding into itself
126 * or csum_tcpudp_magic
127 *
128 * this function must be called with even lengths, except
129 * for the last fragment, which may be odd
130 *
131 * it's best to have buff aligned on a 64-bit boundary
132 */
133__wsum csum_partial(const void *buff, int len, __wsum sum)
134{
135 return (__force __wsum)add32_with_carry(do_csum(buff, len),
136 (__force u32)sum);
137}
138
139EXPORT_SYMBOL(csum_partial);
140
141/*
142 * this routine is used for miscellaneous IP-like checksums, mainly
143 * in icmp.c
144 */
145__sum16 ip_compute_csum(const void *buff, int len)
146{
147 return csum_fold(csum_partial(buff,len,0));
148}
149EXPORT_SYMBOL(ip_compute_csum);
150
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
new file mode 100644
index 000000000000..fd42a4a095fc
--- /dev/null
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -0,0 +1,135 @@
1/* Copyright 2002,2003 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v.2
3 *
4 * Wrappers of assembly checksum functions for x86-64.
5 */
6
7#include <asm/checksum.h>
8#include <linux/module.h>
9
10/**
11 * csum_partial_copy_from_user - Copy and checksum from user space.
12 * @src: source address (user space)
13 * @dst: destination address
14 * @len: number of bytes to be copied.
15 * @isum: initial sum that is added into the result (32bit unfolded)
16 * @errp: set to -EFAULT for an bad source address.
17 *
18 * Returns an 32bit unfolded checksum of the buffer.
19 * src and dst are best aligned to 64bits.
20 */
21__wsum
22csum_partial_copy_from_user(const void __user *src, void *dst,
23 int len, __wsum isum, int *errp)
24{
25 might_sleep();
26 *errp = 0;
27 if (likely(access_ok(VERIFY_READ,src, len))) {
28 /* Why 6, not 7? To handle odd addresses aligned we
29 would need to do considerable complications to fix the
30 checksum which is defined as an 16bit accumulator. The
31 fix alignment code is primarily for performance
32 compatibility with 32bit and that will handle odd
33 addresses slowly too. */
34 if (unlikely((unsigned long)src & 6)) {
35 while (((unsigned long)src & 6) && len >= 2) {
36 __u16 val16;
37 *errp = __get_user(val16, (const __u16 __user *)src);
38 if (*errp)
39 return isum;
40 *(__u16 *)dst = val16;
41 isum = (__force __wsum)add32_with_carry(
42 (__force unsigned)isum, val16);
43 src += 2;
44 dst += 2;
45 len -= 2;
46 }
47 }
48 isum = csum_partial_copy_generic((__force const void *)src,
49 dst, len, isum, errp, NULL);
50 if (likely(*errp == 0))
51 return isum;
52 }
53 *errp = -EFAULT;
54 memset(dst,0,len);
55 return isum;
56}
57
58EXPORT_SYMBOL(csum_partial_copy_from_user);
59
60/**
61 * csum_partial_copy_to_user - Copy and checksum to user space.
62 * @src: source address
63 * @dst: destination address (user space)
64 * @len: number of bytes to be copied.
65 * @isum: initial sum that is added into the result (32bit unfolded)
66 * @errp: set to -EFAULT for an bad destination address.
67 *
68 * Returns an 32bit unfolded checksum of the buffer.
69 * src and dst are best aligned to 64bits.
70 */
71__wsum
72csum_partial_copy_to_user(const void *src, void __user *dst,
73 int len, __wsum isum, int *errp)
74{
75 might_sleep();
76 if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) {
77 *errp = -EFAULT;
78 return 0;
79 }
80
81 if (unlikely((unsigned long)dst & 6)) {
82 while (((unsigned long)dst & 6) && len >= 2) {
83 __u16 val16 = *(__u16 *)src;
84 isum = (__force __wsum)add32_with_carry(
85 (__force unsigned)isum, val16);
86 *errp = __put_user(val16, (__u16 __user *)dst);
87 if (*errp)
88 return isum;
89 src += 2;
90 dst += 2;
91 len -= 2;
92 }
93 }
94
95 *errp = 0;
96 return csum_partial_copy_generic(src, (void __force *)dst,len,isum,NULL,errp);
97}
98
99EXPORT_SYMBOL(csum_partial_copy_to_user);
100
101/**
102 * csum_partial_copy_nocheck - Copy and checksum.
103 * @src: source address
104 * @dst: destination address
105 * @len: number of bytes to be copied.
106 * @isum: initial sum that is added into the result (32bit unfolded)
107 *
108 * Returns an 32bit unfolded checksum of the buffer.
109 */
110__wsum
111csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
112{
113 return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL);
114}
115EXPORT_SYMBOL(csum_partial_copy_nocheck);
116
117__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
118 const struct in6_addr *daddr,
119 __u32 len, unsigned short proto, __wsum sum)
120{
121 __u64 rest, sum64;
122
123 rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
124 (__force __u64)sum;
125 asm(" addq (%[saddr]),%[sum]\n"
126 " adcq 8(%[saddr]),%[sum]\n"
127 " adcq (%[daddr]),%[sum]\n"
128 " adcq 8(%[daddr]),%[sum]\n"
129 " adcq $0,%[sum]\n"
130 : [sum] "=r" (sum64)
131 : "[sum]" (rest),[saddr] "r" (saddr), [daddr] "r" (daddr));
132 return csum_fold((__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
133}
134
135EXPORT_SYMBOL(csum_ipv6_magic);
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c
new file mode 100644
index 000000000000..2dbebd308347
--- /dev/null
+++ b/arch/x86/lib/delay_64.c
@@ -0,0 +1,57 @@
1/*
2 * Precise Delay Loops for x86-64
3 *
4 * Copyright (C) 1993 Linus Torvalds
5 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
6 *
7 * The __delay function must _NOT_ be inlined as its execution time
8 * depends wildly on alignment on many x86 processors.
9 */
10
11#include <linux/module.h>
12#include <linux/sched.h>
13#include <linux/delay.h>
14#include <asm/delay.h>
15#include <asm/msr.h>
16
17#ifdef CONFIG_SMP
18#include <asm/smp.h>
19#endif
20
21int read_current_timer(unsigned long *timer_value)
22{
23 rdtscll(*timer_value);
24 return 0;
25}
26
27void __delay(unsigned long loops)
28{
29 unsigned bclock, now;
30
31 rdtscl(bclock);
32 do
33 {
34 rep_nop();
35 rdtscl(now);
36 }
37 while((now-bclock) < loops);
38}
39EXPORT_SYMBOL(__delay);
40
41inline void __const_udelay(unsigned long xloops)
42{
43 __delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1);
44}
45EXPORT_SYMBOL(__const_udelay);
46
47void __udelay(unsigned long usecs)
48{
49 __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
50}
51EXPORT_SYMBOL(__udelay);
52
53void __ndelay(unsigned long nsecs)
54{
55 __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
56}
57EXPORT_SYMBOL(__ndelay);
diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser_64.S
new file mode 100644
index 000000000000..5448876261f8
--- /dev/null
+++ b/arch/x86/lib/getuser_64.S
@@ -0,0 +1,109 @@
1/*
2 * __get_user functions.
3 *
4 * (C) Copyright 1998 Linus Torvalds
5 * (C) Copyright 2005 Andi Kleen
6 *
7 * These functions have a non-standard call interface
8 * to make them more efficient, especially as they
9 * return an error value in addition to the "real"
10 * return value.
11 */
12
13/*
14 * __get_user_X
15 *
16 * Inputs: %rcx contains the address.
17 * The register is modified, but all changes are undone
18 * before returning because the C code doesn't know about it.
19 *
20 * Outputs: %rax is error code (0 or -EFAULT)
21 * %rdx contains zero-extended value
22 *
23 * %r8 is destroyed.
24 *
25 * These functions should not modify any other registers,
26 * as they get called from within inline assembly.
27 */
28
29#include <linux/linkage.h>
30#include <asm/dwarf2.h>
31#include <asm/page.h>
32#include <asm/errno.h>
33#include <asm/asm-offsets.h>
34#include <asm/thread_info.h>
35
36 .text
37ENTRY(__get_user_1)
38 CFI_STARTPROC
39 GET_THREAD_INFO(%r8)
40 cmpq threadinfo_addr_limit(%r8),%rcx
41 jae bad_get_user
421: movzb (%rcx),%edx
43 xorl %eax,%eax
44 ret
45 CFI_ENDPROC
46ENDPROC(__get_user_1)
47
48ENTRY(__get_user_2)
49 CFI_STARTPROC
50 GET_THREAD_INFO(%r8)
51 addq $1,%rcx
52 jc 20f
53 cmpq threadinfo_addr_limit(%r8),%rcx
54 jae 20f
55 decq %rcx
562: movzwl (%rcx),%edx
57 xorl %eax,%eax
58 ret
5920: decq %rcx
60 jmp bad_get_user
61 CFI_ENDPROC
62ENDPROC(__get_user_2)
63
64ENTRY(__get_user_4)
65 CFI_STARTPROC
66 GET_THREAD_INFO(%r8)
67 addq $3,%rcx
68 jc 30f
69 cmpq threadinfo_addr_limit(%r8),%rcx
70 jae 30f
71 subq $3,%rcx
723: movl (%rcx),%edx
73 xorl %eax,%eax
74 ret
7530: subq $3,%rcx
76 jmp bad_get_user
77 CFI_ENDPROC
78ENDPROC(__get_user_4)
79
80ENTRY(__get_user_8)
81 CFI_STARTPROC
82 GET_THREAD_INFO(%r8)
83 addq $7,%rcx
84 jc 40f
85 cmpq threadinfo_addr_limit(%r8),%rcx
86 jae 40f
87 subq $7,%rcx
884: movq (%rcx),%rdx
89 xorl %eax,%eax
90 ret
9140: subq $7,%rcx
92 jmp bad_get_user
93 CFI_ENDPROC
94ENDPROC(__get_user_8)
95
96bad_get_user:
97 CFI_STARTPROC
98 xorl %edx,%edx
99 movq $(-EFAULT),%rax
100 ret
101 CFI_ENDPROC
102END(bad_get_user)
103
104.section __ex_table,"a"
105 .quad 1b,bad_get_user
106 .quad 2b,bad_get_user
107 .quad 3b,bad_get_user
108 .quad 4b,bad_get_user
109.previous
diff --git a/arch/x86/lib/io_64.c b/arch/x86/lib/io_64.c
new file mode 100644
index 000000000000..87b4a4e18039
--- /dev/null
+++ b/arch/x86/lib/io_64.c
@@ -0,0 +1,23 @@
1#include <linux/string.h>
2#include <asm/io.h>
3#include <linux/module.h>
4
5void __memcpy_toio(unsigned long dst,const void*src,unsigned len)
6{
7 __inline_memcpy((void *) dst,src,len);
8}
9EXPORT_SYMBOL(__memcpy_toio);
10
11void __memcpy_fromio(void *dst,unsigned long src,unsigned len)
12{
13 __inline_memcpy(dst,(const void *) src,len);
14}
15EXPORT_SYMBOL(__memcpy_fromio);
16
17void memset_io(volatile void __iomem *a, int b, size_t c)
18{
19 /* XXX: memset can mangle the IO patterns quite a bit.
20 perhaps it would be better to use a dumb one */
21 memset((void *)a,b,c);
22}
23EXPORT_SYMBOL(memset_io);
diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S
new file mode 100644
index 000000000000..05a95e713da8
--- /dev/null
+++ b/arch/x86/lib/iomap_copy_64.S
@@ -0,0 +1,30 @@
1/*
2 * Copyright 2006 PathScale, Inc. All Rights Reserved.
3 *
4 * This file is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License
6 * as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software Foundation,
15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
16 */
17
18#include <linux/linkage.h>
19#include <asm/dwarf2.h>
20
21/*
22 * override generic version in lib/iomap_copy.c
23 */
24ENTRY(__iowrite32_copy)
25 CFI_STARTPROC
26 movl %edx,%ecx
27 rep movsd
28 ret
29 CFI_ENDPROC
30ENDPROC(__iowrite32_copy)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
new file mode 100644
index 000000000000..c22981fa2f3a
--- /dev/null
+++ b/arch/x86/lib/memcpy_64.S
@@ -0,0 +1,131 @@
1/* Copyright 2002 Andi Kleen */
2
3#include <linux/linkage.h>
4#include <asm/dwarf2.h>
5#include <asm/cpufeature.h>
6
7/*
8 * memcpy - Copy a memory block.
9 *
10 * Input:
11 * rdi destination
12 * rsi source
13 * rdx count
14 *
15 * Output:
16 * rax original destination
17 */
18
19 ALIGN
20memcpy_c:
21 CFI_STARTPROC
22 movq %rdi,%rax
23 movl %edx,%ecx
24 shrl $3,%ecx
25 andl $7,%edx
26 rep movsq
27 movl %edx,%ecx
28 rep movsb
29 ret
30 CFI_ENDPROC
31ENDPROC(memcpy_c)
32
33ENTRY(__memcpy)
34ENTRY(memcpy)
35 CFI_STARTPROC
36 pushq %rbx
37 CFI_ADJUST_CFA_OFFSET 8
38 CFI_REL_OFFSET rbx, 0
39 movq %rdi,%rax
40
41 movl %edx,%ecx
42 shrl $6,%ecx
43 jz .Lhandle_tail
44
45 .p2align 4
46.Lloop_64:
47 decl %ecx
48
49 movq (%rsi),%r11
50 movq 8(%rsi),%r8
51
52 movq %r11,(%rdi)
53 movq %r8,1*8(%rdi)
54
55 movq 2*8(%rsi),%r9
56 movq 3*8(%rsi),%r10
57
58 movq %r9,2*8(%rdi)
59 movq %r10,3*8(%rdi)
60
61 movq 4*8(%rsi),%r11
62 movq 5*8(%rsi),%r8
63
64 movq %r11,4*8(%rdi)
65 movq %r8,5*8(%rdi)
66
67 movq 6*8(%rsi),%r9
68 movq 7*8(%rsi),%r10
69
70 movq %r9,6*8(%rdi)
71 movq %r10,7*8(%rdi)
72
73 leaq 64(%rsi),%rsi
74 leaq 64(%rdi),%rdi
75 jnz .Lloop_64
76
77.Lhandle_tail:
78 movl %edx,%ecx
79 andl $63,%ecx
80 shrl $3,%ecx
81 jz .Lhandle_7
82 .p2align 4
83.Lloop_8:
84 decl %ecx
85 movq (%rsi),%r8
86 movq %r8,(%rdi)
87 leaq 8(%rdi),%rdi
88 leaq 8(%rsi),%rsi
89 jnz .Lloop_8
90
91.Lhandle_7:
92 movl %edx,%ecx
93 andl $7,%ecx
94 jz .Lende
95 .p2align 4
96.Lloop_1:
97 movb (%rsi),%r8b
98 movb %r8b,(%rdi)
99 incq %rdi
100 incq %rsi
101 decl %ecx
102 jnz .Lloop_1
103
104.Lende:
105 popq %rbx
106 CFI_ADJUST_CFA_OFFSET -8
107 CFI_RESTORE rbx
108 ret
109.Lfinal:
110 CFI_ENDPROC
111ENDPROC(memcpy)
112ENDPROC(__memcpy)
113
114 /* Some CPUs run faster using the string copy instructions.
115 It is also a lot simpler. Use this when possible */
116
117 .section .altinstr_replacement,"ax"
1181: .byte 0xeb /* jmp <disp8> */
119 .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
1202:
121 .previous
122 .section .altinstructions,"a"
123 .align 8
124 .quad memcpy
125 .quad 1b
126 .byte X86_FEATURE_REP_GOOD
127 /* Replace only beginning, memcpy is used to apply alternatives, so it
128 * is silly to overwrite itself with nops - reboot is only outcome... */
129 .byte 2b - 1b
130 .byte 2b - 1b
131 .previous
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
new file mode 100644
index 000000000000..751ebae8ec42
--- /dev/null
+++ b/arch/x86/lib/memmove_64.c
@@ -0,0 +1,21 @@
1/* Normally compiler builtins are used, but sometimes the compiler calls out
2 of line code. Based on asm-i386/string.h.
3 */
4#define _STRING_C
5#include <linux/string.h>
6#include <linux/module.h>
7
8#undef memmove
9void *memmove(void * dest,const void *src,size_t count)
10{
11 if (dest < src) {
12 return memcpy(dest,src,count);
13 } else {
14 char *p = (char *) dest + count;
15 char *s = (char *) src + count;
16 while (count--)
17 *--p = *--s;
18 }
19 return dest;
20}
21EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
new file mode 100644
index 000000000000..2c5948116bd2
--- /dev/null
+++ b/arch/x86/lib/memset_64.S
@@ -0,0 +1,133 @@
1/* Copyright 2002 Andi Kleen, SuSE Labs */
2
3#include <linux/linkage.h>
4#include <asm/dwarf2.h>
5
6/*
7 * ISO C memset - set a memory block to a byte value.
8 *
9 * rdi destination
10 * rsi value (char)
11 * rdx count (bytes)
12 *
13 * rax original destination
14 */
15 ALIGN
16memset_c:
17 CFI_STARTPROC
18 movq %rdi,%r9
19 movl %edx,%r8d
20 andl $7,%r8d
21 movl %edx,%ecx
22 shrl $3,%ecx
23 /* expand byte value */
24 movzbl %sil,%esi
25 movabs $0x0101010101010101,%rax
26 mulq %rsi /* with rax, clobbers rdx */
27 rep stosq
28 movl %r8d,%ecx
29 rep stosb
30 movq %r9,%rax
31 ret
32 CFI_ENDPROC
33ENDPROC(memset_c)
34
35ENTRY(memset)
36ENTRY(__memset)
37 CFI_STARTPROC
38 movq %rdi,%r10
39 movq %rdx,%r11
40
41 /* expand byte value */
42 movzbl %sil,%ecx
43 movabs $0x0101010101010101,%rax
44 mul %rcx /* with rax, clobbers rdx */
45
46 /* align dst */
47 movl %edi,%r9d
48 andl $7,%r9d
49 jnz .Lbad_alignment
50 CFI_REMEMBER_STATE
51.Lafter_bad_alignment:
52
53 movl %r11d,%ecx
54 shrl $6,%ecx
55 jz .Lhandle_tail
56
57 .p2align 4
58.Lloop_64:
59 decl %ecx
60 movq %rax,(%rdi)
61 movq %rax,8(%rdi)
62 movq %rax,16(%rdi)
63 movq %rax,24(%rdi)
64 movq %rax,32(%rdi)
65 movq %rax,40(%rdi)
66 movq %rax,48(%rdi)
67 movq %rax,56(%rdi)
68 leaq 64(%rdi),%rdi
69 jnz .Lloop_64
70
71 /* Handle tail in loops. The loops should be faster than hard
72 to predict jump tables. */
73 .p2align 4
74.Lhandle_tail:
75 movl %r11d,%ecx
76 andl $63&(~7),%ecx
77 jz .Lhandle_7
78 shrl $3,%ecx
79 .p2align 4
80.Lloop_8:
81 decl %ecx
82 movq %rax,(%rdi)
83 leaq 8(%rdi),%rdi
84 jnz .Lloop_8
85
86.Lhandle_7:
87 movl %r11d,%ecx
88 andl $7,%ecx
89 jz .Lende
90 .p2align 4
91.Lloop_1:
92 decl %ecx
93 movb %al,(%rdi)
94 leaq 1(%rdi),%rdi
95 jnz .Lloop_1
96
97.Lende:
98 movq %r10,%rax
99 ret
100
101 CFI_RESTORE_STATE
102.Lbad_alignment:
103 cmpq $7,%r11
104 jbe .Lhandle_7
105 movq %rax,(%rdi) /* unaligned store */
106 movq $8,%r8
107 subq %r9,%r8
108 addq %r8,%rdi
109 subq %r8,%r11
110 jmp .Lafter_bad_alignment
111.Lfinal:
112 CFI_ENDPROC
113ENDPROC(memset)
114ENDPROC(__memset)
115
116 /* Some CPUs run faster using the string instructions.
117 It is also a lot simpler. Use this when possible */
118
119#include <asm/cpufeature.h>
120
121 .section .altinstr_replacement,"ax"
1221: .byte 0xeb /* jmp <disp8> */
123 .byte (memset_c - memset) - (2f - 1b) /* offset */
1242:
125 .previous
126 .section .altinstructions,"a"
127 .align 8
128 .quad memset
129 .quad 1b
130 .byte X86_FEATURE_REP_GOOD
131 .byte .Lfinal - memset
132 .byte 2b - 1b
133 .previous
diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S
new file mode 100644
index 000000000000..4989f5a8fa9b
--- /dev/null
+++ b/arch/x86/lib/putuser_64.S
@@ -0,0 +1,106 @@
1/*
2 * __put_user functions.
3 *
4 * (C) Copyright 1998 Linus Torvalds
5 * (C) Copyright 2005 Andi Kleen
6 *
7 * These functions have a non-standard call interface
8 * to make them more efficient, especially as they
9 * return an error value in addition to the "real"
10 * return value.
11 */
12
13/*
14 * __put_user_X
15 *
16 * Inputs: %rcx contains the address
17 * %rdx contains new value
18 *
19 * Outputs: %rax is error code (0 or -EFAULT)
20 *
21 * %r8 is destroyed.
22 *
23 * These functions should not modify any other registers,
24 * as they get called from within inline assembly.
25 */
26
27#include <linux/linkage.h>
28#include <asm/dwarf2.h>
29#include <asm/page.h>
30#include <asm/errno.h>
31#include <asm/asm-offsets.h>
32#include <asm/thread_info.h>
33
34 .text
35ENTRY(__put_user_1)
36 CFI_STARTPROC
37 GET_THREAD_INFO(%r8)
38 cmpq threadinfo_addr_limit(%r8),%rcx
39 jae bad_put_user
401: movb %dl,(%rcx)
41 xorl %eax,%eax
42 ret
43 CFI_ENDPROC
44ENDPROC(__put_user_1)
45
46ENTRY(__put_user_2)
47 CFI_STARTPROC
48 GET_THREAD_INFO(%r8)
49 addq $1,%rcx
50 jc 20f
51 cmpq threadinfo_addr_limit(%r8),%rcx
52 jae 20f
53 decq %rcx
542: movw %dx,(%rcx)
55 xorl %eax,%eax
56 ret
5720: decq %rcx
58 jmp bad_put_user
59 CFI_ENDPROC
60ENDPROC(__put_user_2)
61
62ENTRY(__put_user_4)
63 CFI_STARTPROC
64 GET_THREAD_INFO(%r8)
65 addq $3,%rcx
66 jc 30f
67 cmpq threadinfo_addr_limit(%r8),%rcx
68 jae 30f
69 subq $3,%rcx
703: movl %edx,(%rcx)
71 xorl %eax,%eax
72 ret
7330: subq $3,%rcx
74 jmp bad_put_user
75 CFI_ENDPROC
76ENDPROC(__put_user_4)
77
78ENTRY(__put_user_8)
79 CFI_STARTPROC
80 GET_THREAD_INFO(%r8)
81 addq $7,%rcx
82 jc 40f
83 cmpq threadinfo_addr_limit(%r8),%rcx
84 jae 40f
85 subq $7,%rcx
864: movq %rdx,(%rcx)
87 xorl %eax,%eax
88 ret
8940: subq $7,%rcx
90 jmp bad_put_user
91 CFI_ENDPROC
92ENDPROC(__put_user_8)
93
94bad_put_user:
95 CFI_STARTPROC
96 movq $(-EFAULT),%rax
97 ret
98 CFI_ENDPROC
99END(bad_put_user)
100
101.section __ex_table,"a"
102 .quad 1b,bad_put_user
103 .quad 2b,bad_put_user
104 .quad 3b,bad_put_user
105 .quad 4b,bad_put_user
106.previous
diff --git a/arch/x86/lib/rwlock_64.S b/arch/x86/lib/rwlock_64.S
new file mode 100644
index 000000000000..0cde1f807314
--- /dev/null
+++ b/arch/x86/lib/rwlock_64.S
@@ -0,0 +1,38 @@
1/* Slow paths of read/write spinlocks. */
2
3#include <linux/linkage.h>
4#include <asm/rwlock.h>
5#include <asm/alternative-asm.i>
6#include <asm/dwarf2.h>
7
8/* rdi: pointer to rwlock_t */
9ENTRY(__write_lock_failed)
10 CFI_STARTPROC
11 LOCK_PREFIX
12 addl $RW_LOCK_BIAS,(%rdi)
131: rep
14 nop
15 cmpl $RW_LOCK_BIAS,(%rdi)
16 jne 1b
17 LOCK_PREFIX
18 subl $RW_LOCK_BIAS,(%rdi)
19 jnz __write_lock_failed
20 ret
21 CFI_ENDPROC
22END(__write_lock_failed)
23
24/* rdi: pointer to rwlock_t */
25ENTRY(__read_lock_failed)
26 CFI_STARTPROC
27 LOCK_PREFIX
28 incl (%rdi)
291: rep
30 nop
31 cmpl $1,(%rdi)
32 js 1b
33 LOCK_PREFIX
34 decl (%rdi)
35 js __read_lock_failed
36 ret
37 CFI_ENDPROC
38END(__read_lock_failed)
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
new file mode 100644
index 000000000000..55e586d352d3
--- /dev/null
+++ b/arch/x86/lib/thunk_64.S
@@ -0,0 +1,67 @@
1/*
2 * Save registers before calling assembly functions. This avoids
3 * disturbance of register allocation in some inline assembly constructs.
4 * Copyright 2001,2002 by Andi Kleen, SuSE Labs.
5 * Subject to the GNU public license, v.2. No warranty of any kind.
6 */
7
8 #include <linux/linkage.h>
9 #include <asm/dwarf2.h>
10 #include <asm/calling.h>
11 #include <asm/rwlock.h>
12
13 /* rdi: arg1 ... normal C conventions. rax is saved/restored. */
14 .macro thunk name,func
15 .globl \name
16\name:
17 CFI_STARTPROC
18 SAVE_ARGS
19 call \func
20 jmp restore
21 CFI_ENDPROC
22 .endm
23
24 /* rdi: arg1 ... normal C conventions. rax is passed from C. */
25 .macro thunk_retrax name,func
26 .globl \name
27\name:
28 CFI_STARTPROC
29 SAVE_ARGS
30 call \func
31 jmp restore_norax
32 CFI_ENDPROC
33 .endm
34
35
36 .section .sched.text
37#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
38 thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed
39 thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed
40 thunk rwsem_wake_thunk,rwsem_wake
41 thunk rwsem_downgrade_thunk,rwsem_downgrade_wake
42#endif
43
44 thunk __down_failed,__down
45 thunk_retrax __down_failed_interruptible,__down_interruptible
46 thunk_retrax __down_failed_trylock,__down_trylock
47 thunk __up_wakeup,__up
48
49#ifdef CONFIG_TRACE_IRQFLAGS
50 thunk trace_hardirqs_on_thunk,trace_hardirqs_on
51 thunk trace_hardirqs_off_thunk,trace_hardirqs_off
52#endif
53
54 /* SAVE_ARGS below is used only for the .cfi directives it contains. */
55 CFI_STARTPROC
56 SAVE_ARGS
57restore:
58 RESTORE_ARGS
59 ret
60 CFI_ENDPROC
61
62 CFI_STARTPROC
63 SAVE_ARGS
64restore_norax:
65 RESTORE_ARGS 1
66 ret
67 CFI_ENDPROC
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
new file mode 100644
index 000000000000..893d43f838cc
--- /dev/null
+++ b/arch/x86/lib/usercopy_64.c
@@ -0,0 +1,166 @@
1/*
2 * User address space access functions.
3 *
4 * Copyright 1997 Andi Kleen <ak@muc.de>
5 * Copyright 1997 Linus Torvalds
6 * Copyright 2002 Andi Kleen <ak@suse.de>
7 */
8#include <linux/module.h>
9#include <asm/uaccess.h>
10
11/*
12 * Copy a null terminated string from userspace.
13 */
14
15#define __do_strncpy_from_user(dst,src,count,res) \
16do { \
17 long __d0, __d1, __d2; \
18 might_sleep(); \
19 __asm__ __volatile__( \
20 " testq %1,%1\n" \
21 " jz 2f\n" \
22 "0: lodsb\n" \
23 " stosb\n" \
24 " testb %%al,%%al\n" \
25 " jz 1f\n" \
26 " decq %1\n" \
27 " jnz 0b\n" \
28 "1: subq %1,%0\n" \
29 "2:\n" \
30 ".section .fixup,\"ax\"\n" \
31 "3: movq %5,%0\n" \
32 " jmp 2b\n" \
33 ".previous\n" \
34 ".section __ex_table,\"a\"\n" \
35 " .align 8\n" \
36 " .quad 0b,3b\n" \
37 ".previous" \
38 : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \
39 "=&D" (__d2) \
40 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
41 : "memory"); \
42} while (0)
43
44long
45__strncpy_from_user(char *dst, const char __user *src, long count)
46{
47 long res;
48 __do_strncpy_from_user(dst, src, count, res);
49 return res;
50}
51EXPORT_SYMBOL(__strncpy_from_user);
52
53long
54strncpy_from_user(char *dst, const char __user *src, long count)
55{
56 long res = -EFAULT;
57 if (access_ok(VERIFY_READ, src, 1))
58 return __strncpy_from_user(dst, src, count);
59 return res;
60}
61EXPORT_SYMBOL(strncpy_from_user);
62
63/*
64 * Zero Userspace
65 */
66
67unsigned long __clear_user(void __user *addr, unsigned long size)
68{
69 long __d0;
70 might_sleep();
71 /* no memory constraint because it doesn't change any memory gcc knows
72 about */
73 asm volatile(
74 " testq %[size8],%[size8]\n"
75 " jz 4f\n"
76 "0: movq %[zero],(%[dst])\n"
77 " addq %[eight],%[dst]\n"
78 " decl %%ecx ; jnz 0b\n"
79 "4: movq %[size1],%%rcx\n"
80 " testl %%ecx,%%ecx\n"
81 " jz 2f\n"
82 "1: movb %b[zero],(%[dst])\n"
83 " incq %[dst]\n"
84 " decl %%ecx ; jnz 1b\n"
85 "2:\n"
86 ".section .fixup,\"ax\"\n"
87 "3: lea 0(%[size1],%[size8],8),%[size8]\n"
88 " jmp 2b\n"
89 ".previous\n"
90 ".section __ex_table,\"a\"\n"
91 " .align 8\n"
92 " .quad 0b,3b\n"
93 " .quad 1b,2b\n"
94 ".previous"
95 : [size8] "=c"(size), [dst] "=&D" (__d0)
96 : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
97 [zero] "r" (0UL), [eight] "r" (8UL));
98 return size;
99}
100EXPORT_SYMBOL(__clear_user);
101
102unsigned long clear_user(void __user *to, unsigned long n)
103{
104 if (access_ok(VERIFY_WRITE, to, n))
105 return __clear_user(to, n);
106 return n;
107}
108EXPORT_SYMBOL(clear_user);
109
110/*
111 * Return the size of a string (including the ending 0)
112 *
113 * Return 0 on exception, a value greater than N if too long
114 */
115
116long __strnlen_user(const char __user *s, long n)
117{
118 long res = 0;
119 char c;
120
121 while (1) {
122 if (res>n)
123 return n+1;
124 if (__get_user(c, s))
125 return 0;
126 if (!c)
127 return res+1;
128 res++;
129 s++;
130 }
131}
132EXPORT_SYMBOL(__strnlen_user);
133
134long strnlen_user(const char __user *s, long n)
135{
136 if (!access_ok(VERIFY_READ, s, n))
137 return 0;
138 return __strnlen_user(s, n);
139}
140EXPORT_SYMBOL(strnlen_user);
141
142long strlen_user(const char __user *s)
143{
144 long res = 0;
145 char c;
146
147 for (;;) {
148 if (get_user(c, s))
149 return 0;
150 if (!c)
151 return res+1;
152 res++;
153 s++;
154 }
155}
156EXPORT_SYMBOL(strlen_user);
157
158unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len)
159{
160 if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) {
161 return copy_user_generic((__force void *)to, (__force void *)from, len);
162 }
163 return len;
164}
165EXPORT_SYMBOL(copy_in_user);
166