diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:17:08 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:17:08 -0400 |
commit | 185f3d38900f750a4566f87cde6a178f3595a115 (patch) | |
tree | d463f6da1af452b1bbdf476828ea88427087f255 /arch/x86 | |
parent | 51b2833060f26258ea2da091c7b9c6a358ac9dd2 (diff) |
x86_64: move lib
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/lib/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/lib/Makefile_64 | 13 | ||||
-rw-r--r-- | arch/x86/lib/bitops_64.c | 175 | ||||
-rw-r--r-- | arch/x86/lib/bitstr_64.c | 28 | ||||
-rw-r--r-- | arch/x86/lib/clear_page_64.S | 59 | ||||
-rw-r--r-- | arch/x86/lib/copy_page_64.S | 119 | ||||
-rw-r--r-- | arch/x86/lib/copy_user_64.S | 354 | ||||
-rw-r--r-- | arch/x86/lib/copy_user_nocache_64.S | 217 | ||||
-rw-r--r-- | arch/x86/lib/csum-copy_64.S | 249 | ||||
-rw-r--r-- | arch/x86/lib/csum-partial_64.c | 150 | ||||
-rw-r--r-- | arch/x86/lib/csum-wrappers_64.c | 135 | ||||
-rw-r--r-- | arch/x86/lib/delay_64.c | 57 | ||||
-rw-r--r-- | arch/x86/lib/getuser_64.S | 109 | ||||
-rw-r--r-- | arch/x86/lib/io_64.c | 23 | ||||
-rw-r--r-- | arch/x86/lib/iomap_copy_64.S | 30 | ||||
-rw-r--r-- | arch/x86/lib/memcpy_64.S | 131 | ||||
-rw-r--r-- | arch/x86/lib/memmove_64.c | 21 | ||||
-rw-r--r-- | arch/x86/lib/memset_64.S | 133 | ||||
-rw-r--r-- | arch/x86/lib/putuser_64.S | 106 | ||||
-rw-r--r-- | arch/x86/lib/rwlock_64.S | 38 | ||||
-rw-r--r-- | arch/x86/lib/thunk_64.S | 67 | ||||
-rw-r--r-- | arch/x86/lib/usercopy_64.c | 166 |
22 files changed, 2381 insertions, 1 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 2d7d724a2a6a..329da276c6f1 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | ifeq ($(CONFIG_X86_32),y) | 1 | ifeq ($(CONFIG_X86_32),y) |
2 | include ${srctree}/arch/x86/lib/Makefile_32 | 2 | include ${srctree}/arch/x86/lib/Makefile_32 |
3 | else | 3 | else |
4 | include ${srctree}/arch/x86_64/lib/Makefile_64 | 4 | include ${srctree}/arch/x86/lib/Makefile_64 |
5 | endif | 5 | endif |
diff --git a/arch/x86/lib/Makefile_64 b/arch/x86/lib/Makefile_64 new file mode 100644 index 000000000000..bbabad3c9335 --- /dev/null +++ b/arch/x86/lib/Makefile_64 | |||
@@ -0,0 +1,13 @@ | |||
1 | # | ||
2 | # Makefile for x86_64-specific library files. | ||
3 | # | ||
4 | |||
5 | CFLAGS_csum-partial_64.o := -funroll-loops | ||
6 | |||
7 | obj-y := io_64.o iomap_copy_64.o | ||
8 | obj-$(CONFIG_SMP) += msr-on-cpu.o | ||
9 | |||
10 | lib-y := csum-partial_64.o csum-copy_64.o csum-wrappers_64.o delay_64.o \ | ||
11 | usercopy_64.o getuser_64.o putuser_64.o \ | ||
12 | thunk_64.o clear_page_64.o copy_page_64.o bitstr_64.o bitops_64.o | ||
13 | lib-y += memcpy_64.o memmove_64.o memset_64.o copy_user_64.o rwlock_64.o copy_user_nocache_64.o | ||
diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c new file mode 100644 index 000000000000..95b6d9639fba --- /dev/null +++ b/arch/x86/lib/bitops_64.c | |||
@@ -0,0 +1,175 @@ | |||
1 | #include <linux/bitops.h> | ||
2 | |||
3 | #undef find_first_zero_bit | ||
4 | #undef find_next_zero_bit | ||
5 | #undef find_first_bit | ||
6 | #undef find_next_bit | ||
7 | |||
8 | static inline long | ||
9 | __find_first_zero_bit(const unsigned long * addr, unsigned long size) | ||
10 | { | ||
11 | long d0, d1, d2; | ||
12 | long res; | ||
13 | |||
14 | /* | ||
15 | * We must test the size in words, not in bits, because | ||
16 | * otherwise incoming sizes in the range -63..-1 will not run | ||
17 | * any scasq instructions, and then the flags used by the je | ||
18 | * instruction will have whatever random value was in place | ||
19 | * before. Nobody should call us like that, but | ||
20 | * find_next_zero_bit() does when offset and size are at the | ||
21 | * same word and it fails to find a zero itself. | ||
22 | */ | ||
23 | size += 63; | ||
24 | size >>= 6; | ||
25 | if (!size) | ||
26 | return 0; | ||
27 | asm volatile( | ||
28 | " repe; scasq\n" | ||
29 | " je 1f\n" | ||
30 | " xorq -8(%%rdi),%%rax\n" | ||
31 | " subq $8,%%rdi\n" | ||
32 | " bsfq %%rax,%%rdx\n" | ||
33 | "1: subq %[addr],%%rdi\n" | ||
34 | " shlq $3,%%rdi\n" | ||
35 | " addq %%rdi,%%rdx" | ||
36 | :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) | ||
37 | :"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL), | ||
38 | [addr] "S" (addr) : "memory"); | ||
39 | /* | ||
40 | * Any register would do for [addr] above, but GCC tends to | ||
41 | * prefer rbx over rsi, even though rsi is readily available | ||
42 | * and doesn't have to be saved. | ||
43 | */ | ||
44 | return res; | ||
45 | } | ||
46 | |||
47 | /** | ||
48 | * find_first_zero_bit - find the first zero bit in a memory region | ||
49 | * @addr: The address to start the search at | ||
50 | * @size: The maximum size to search | ||
51 | * | ||
52 | * Returns the bit-number of the first zero bit, not the number of the byte | ||
53 | * containing a bit. | ||
54 | */ | ||
55 | long find_first_zero_bit(const unsigned long * addr, unsigned long size) | ||
56 | { | ||
57 | return __find_first_zero_bit (addr, size); | ||
58 | } | ||
59 | |||
60 | /** | ||
61 | * find_next_zero_bit - find the first zero bit in a memory region | ||
62 | * @addr: The address to base the search on | ||
63 | * @offset: The bitnumber to start searching at | ||
64 | * @size: The maximum size to search | ||
65 | */ | ||
66 | long find_next_zero_bit (const unsigned long * addr, long size, long offset) | ||
67 | { | ||
68 | const unsigned long * p = addr + (offset >> 6); | ||
69 | unsigned long set = 0; | ||
70 | unsigned long res, bit = offset&63; | ||
71 | |||
72 | if (bit) { | ||
73 | /* | ||
74 | * Look for zero in first word | ||
75 | */ | ||
76 | asm("bsfq %1,%0\n\t" | ||
77 | "cmoveq %2,%0" | ||
78 | : "=r" (set) | ||
79 | : "r" (~(*p >> bit)), "r"(64L)); | ||
80 | if (set < (64 - bit)) | ||
81 | return set + offset; | ||
82 | set = 64 - bit; | ||
83 | p++; | ||
84 | } | ||
85 | /* | ||
86 | * No zero yet, search remaining full words for a zero | ||
87 | */ | ||
88 | res = __find_first_zero_bit (p, size - 64 * (p - addr)); | ||
89 | |||
90 | return (offset + set + res); | ||
91 | } | ||
92 | |||
93 | static inline long | ||
94 | __find_first_bit(const unsigned long * addr, unsigned long size) | ||
95 | { | ||
96 | long d0, d1; | ||
97 | long res; | ||
98 | |||
99 | /* | ||
100 | * We must test the size in words, not in bits, because | ||
101 | * otherwise incoming sizes in the range -63..-1 will not run | ||
102 | * any scasq instructions, and then the flags used by the jz | ||
103 | * instruction will have whatever random value was in place | ||
104 | * before. Nobody should call us like that, but | ||
105 | * find_next_bit() does when offset and size are at the same | ||
106 | * word and it fails to find a one itself. | ||
107 | */ | ||
108 | size += 63; | ||
109 | size >>= 6; | ||
110 | if (!size) | ||
111 | return 0; | ||
112 | asm volatile( | ||
113 | " repe; scasq\n" | ||
114 | " jz 1f\n" | ||
115 | " subq $8,%%rdi\n" | ||
116 | " bsfq (%%rdi),%%rax\n" | ||
117 | "1: subq %[addr],%%rdi\n" | ||
118 | " shlq $3,%%rdi\n" | ||
119 | " addq %%rdi,%%rax" | ||
120 | :"=a" (res), "=&c" (d0), "=&D" (d1) | ||
121 | :"0" (0ULL), "1" (size), "2" (addr), | ||
122 | [addr] "r" (addr) : "memory"); | ||
123 | return res; | ||
124 | } | ||
125 | |||
126 | /** | ||
127 | * find_first_bit - find the first set bit in a memory region | ||
128 | * @addr: The address to start the search at | ||
129 | * @size: The maximum size to search | ||
130 | * | ||
131 | * Returns the bit-number of the first set bit, not the number of the byte | ||
132 | * containing a bit. | ||
133 | */ | ||
134 | long find_first_bit(const unsigned long * addr, unsigned long size) | ||
135 | { | ||
136 | return __find_first_bit(addr,size); | ||
137 | } | ||
138 | |||
139 | /** | ||
140 | * find_next_bit - find the first set bit in a memory region | ||
141 | * @addr: The address to base the search on | ||
142 | * @offset: The bitnumber to start searching at | ||
143 | * @size: The maximum size to search | ||
144 | */ | ||
145 | long find_next_bit(const unsigned long * addr, long size, long offset) | ||
146 | { | ||
147 | const unsigned long * p = addr + (offset >> 6); | ||
148 | unsigned long set = 0, bit = offset & 63, res; | ||
149 | |||
150 | if (bit) { | ||
151 | /* | ||
152 | * Look for nonzero in the first 64 bits: | ||
153 | */ | ||
154 | asm("bsfq %1,%0\n\t" | ||
155 | "cmoveq %2,%0\n\t" | ||
156 | : "=r" (set) | ||
157 | : "r" (*p >> bit), "r" (64L)); | ||
158 | if (set < (64 - bit)) | ||
159 | return set + offset; | ||
160 | set = 64 - bit; | ||
161 | p++; | ||
162 | } | ||
163 | /* | ||
164 | * No set bit yet, search remaining full words for a bit | ||
165 | */ | ||
166 | res = __find_first_bit (p, size - 64 * (p - addr)); | ||
167 | return (offset + set + res); | ||
168 | } | ||
169 | |||
170 | #include <linux/module.h> | ||
171 | |||
172 | EXPORT_SYMBOL(find_next_bit); | ||
173 | EXPORT_SYMBOL(find_first_bit); | ||
174 | EXPORT_SYMBOL(find_first_zero_bit); | ||
175 | EXPORT_SYMBOL(find_next_zero_bit); | ||
diff --git a/arch/x86/lib/bitstr_64.c b/arch/x86/lib/bitstr_64.c new file mode 100644 index 000000000000..24676609a6ac --- /dev/null +++ b/arch/x86/lib/bitstr_64.c | |||
@@ -0,0 +1,28 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/bitops.h> | ||
3 | |||
4 | /* Find string of zero bits in a bitmap */ | ||
5 | unsigned long | ||
6 | find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len) | ||
7 | { | ||
8 | unsigned long n, end, i; | ||
9 | |||
10 | again: | ||
11 | n = find_next_zero_bit(bitmap, nbits, start); | ||
12 | if (n == -1) | ||
13 | return -1; | ||
14 | |||
15 | /* could test bitsliced, but it's hardly worth it */ | ||
16 | end = n+len; | ||
17 | if (end >= nbits) | ||
18 | return -1; | ||
19 | for (i = n+1; i < end; i++) { | ||
20 | if (test_bit(i, bitmap)) { | ||
21 | start = i+1; | ||
22 | goto again; | ||
23 | } | ||
24 | } | ||
25 | return n; | ||
26 | } | ||
27 | |||
28 | EXPORT_SYMBOL(find_next_zero_string); | ||
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S new file mode 100644 index 000000000000..9a10a78bb4a4 --- /dev/null +++ b/arch/x86/lib/clear_page_64.S | |||
@@ -0,0 +1,59 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <asm/dwarf2.h> | ||
3 | |||
4 | /* | ||
5 | * Zero a page. | ||
6 | * rdi page | ||
7 | */ | ||
8 | ALIGN | ||
9 | clear_page_c: | ||
10 | CFI_STARTPROC | ||
11 | movl $4096/8,%ecx | ||
12 | xorl %eax,%eax | ||
13 | rep stosq | ||
14 | ret | ||
15 | CFI_ENDPROC | ||
16 | ENDPROC(clear_page) | ||
17 | |||
18 | ENTRY(clear_page) | ||
19 | CFI_STARTPROC | ||
20 | xorl %eax,%eax | ||
21 | movl $4096/64,%ecx | ||
22 | .p2align 4 | ||
23 | .Lloop: | ||
24 | decl %ecx | ||
25 | #define PUT(x) movq %rax,x*8(%rdi) | ||
26 | movq %rax,(%rdi) | ||
27 | PUT(1) | ||
28 | PUT(2) | ||
29 | PUT(3) | ||
30 | PUT(4) | ||
31 | PUT(5) | ||
32 | PUT(6) | ||
33 | PUT(7) | ||
34 | leaq 64(%rdi),%rdi | ||
35 | jnz .Lloop | ||
36 | nop | ||
37 | ret | ||
38 | CFI_ENDPROC | ||
39 | .Lclear_page_end: | ||
40 | ENDPROC(clear_page) | ||
41 | |||
42 | /* Some CPUs run faster using the string instructions. | ||
43 | It is also a lot simpler. Use this when possible */ | ||
44 | |||
45 | #include <asm/cpufeature.h> | ||
46 | |||
47 | .section .altinstr_replacement,"ax" | ||
48 | 1: .byte 0xeb /* jmp <disp8> */ | ||
49 | .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ | ||
50 | 2: | ||
51 | .previous | ||
52 | .section .altinstructions,"a" | ||
53 | .align 8 | ||
54 | .quad clear_page | ||
55 | .quad 1b | ||
56 | .byte X86_FEATURE_REP_GOOD | ||
57 | .byte .Lclear_page_end - clear_page | ||
58 | .byte 2b - 1b | ||
59 | .previous | ||
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S new file mode 100644 index 000000000000..727a5d46d2fc --- /dev/null +++ b/arch/x86/lib/copy_page_64.S | |||
@@ -0,0 +1,119 @@ | |||
1 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <asm/dwarf2.h> | ||
5 | |||
6 | ALIGN | ||
7 | copy_page_c: | ||
8 | CFI_STARTPROC | ||
9 | movl $4096/8,%ecx | ||
10 | rep movsq | ||
11 | ret | ||
12 | CFI_ENDPROC | ||
13 | ENDPROC(copy_page_c) | ||
14 | |||
15 | /* Don't use streaming store because it's better when the target | ||
16 | ends up in cache. */ | ||
17 | |||
18 | /* Could vary the prefetch distance based on SMP/UP */ | ||
19 | |||
20 | ENTRY(copy_page) | ||
21 | CFI_STARTPROC | ||
22 | subq $3*8,%rsp | ||
23 | CFI_ADJUST_CFA_OFFSET 3*8 | ||
24 | movq %rbx,(%rsp) | ||
25 | CFI_REL_OFFSET rbx, 0 | ||
26 | movq %r12,1*8(%rsp) | ||
27 | CFI_REL_OFFSET r12, 1*8 | ||
28 | movq %r13,2*8(%rsp) | ||
29 | CFI_REL_OFFSET r13, 2*8 | ||
30 | |||
31 | movl $(4096/64)-5,%ecx | ||
32 | .p2align 4 | ||
33 | .Loop64: | ||
34 | dec %rcx | ||
35 | |||
36 | movq (%rsi), %rax | ||
37 | movq 8 (%rsi), %rbx | ||
38 | movq 16 (%rsi), %rdx | ||
39 | movq 24 (%rsi), %r8 | ||
40 | movq 32 (%rsi), %r9 | ||
41 | movq 40 (%rsi), %r10 | ||
42 | movq 48 (%rsi), %r11 | ||
43 | movq 56 (%rsi), %r12 | ||
44 | |||
45 | prefetcht0 5*64(%rsi) | ||
46 | |||
47 | movq %rax, (%rdi) | ||
48 | movq %rbx, 8 (%rdi) | ||
49 | movq %rdx, 16 (%rdi) | ||
50 | movq %r8, 24 (%rdi) | ||
51 | movq %r9, 32 (%rdi) | ||
52 | movq %r10, 40 (%rdi) | ||
53 | movq %r11, 48 (%rdi) | ||
54 | movq %r12, 56 (%rdi) | ||
55 | |||
56 | leaq 64 (%rsi), %rsi | ||
57 | leaq 64 (%rdi), %rdi | ||
58 | |||
59 | jnz .Loop64 | ||
60 | |||
61 | movl $5,%ecx | ||
62 | .p2align 4 | ||
63 | .Loop2: | ||
64 | decl %ecx | ||
65 | |||
66 | movq (%rsi), %rax | ||
67 | movq 8 (%rsi), %rbx | ||
68 | movq 16 (%rsi), %rdx | ||
69 | movq 24 (%rsi), %r8 | ||
70 | movq 32 (%rsi), %r9 | ||
71 | movq 40 (%rsi), %r10 | ||
72 | movq 48 (%rsi), %r11 | ||
73 | movq 56 (%rsi), %r12 | ||
74 | |||
75 | movq %rax, (%rdi) | ||
76 | movq %rbx, 8 (%rdi) | ||
77 | movq %rdx, 16 (%rdi) | ||
78 | movq %r8, 24 (%rdi) | ||
79 | movq %r9, 32 (%rdi) | ||
80 | movq %r10, 40 (%rdi) | ||
81 | movq %r11, 48 (%rdi) | ||
82 | movq %r12, 56 (%rdi) | ||
83 | |||
84 | leaq 64(%rdi),%rdi | ||
85 | leaq 64(%rsi),%rsi | ||
86 | |||
87 | jnz .Loop2 | ||
88 | |||
89 | movq (%rsp),%rbx | ||
90 | CFI_RESTORE rbx | ||
91 | movq 1*8(%rsp),%r12 | ||
92 | CFI_RESTORE r12 | ||
93 | movq 2*8(%rsp),%r13 | ||
94 | CFI_RESTORE r13 | ||
95 | addq $3*8,%rsp | ||
96 | CFI_ADJUST_CFA_OFFSET -3*8 | ||
97 | ret | ||
98 | .Lcopy_page_end: | ||
99 | CFI_ENDPROC | ||
100 | ENDPROC(copy_page) | ||
101 | |||
102 | /* Some CPUs run faster using the string copy instructions. | ||
103 | It is also a lot simpler. Use this when possible */ | ||
104 | |||
105 | #include <asm/cpufeature.h> | ||
106 | |||
107 | .section .altinstr_replacement,"ax" | ||
108 | 1: .byte 0xeb /* jmp <disp8> */ | ||
109 | .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */ | ||
110 | 2: | ||
111 | .previous | ||
112 | .section .altinstructions,"a" | ||
113 | .align 8 | ||
114 | .quad copy_page | ||
115 | .quad 1b | ||
116 | .byte X86_FEATURE_REP_GOOD | ||
117 | .byte .Lcopy_page_end - copy_page | ||
118 | .byte 2b - 1b | ||
119 | .previous | ||
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S new file mode 100644 index 000000000000..70bebd310408 --- /dev/null +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -0,0 +1,354 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs. | ||
2 | * Subject to the GNU Public License v2. | ||
3 | * | ||
4 | * Functions to copy from and to user space. | ||
5 | */ | ||
6 | |||
7 | #include <linux/linkage.h> | ||
8 | #include <asm/dwarf2.h> | ||
9 | |||
10 | #define FIX_ALIGNMENT 1 | ||
11 | |||
12 | #include <asm/current.h> | ||
13 | #include <asm/asm-offsets.h> | ||
14 | #include <asm/thread_info.h> | ||
15 | #include <asm/cpufeature.h> | ||
16 | |||
17 | .macro ALTERNATIVE_JUMP feature,orig,alt | ||
18 | 0: | ||
19 | .byte 0xe9 /* 32bit jump */ | ||
20 | .long \orig-1f /* by default jump to orig */ | ||
21 | 1: | ||
22 | .section .altinstr_replacement,"ax" | ||
23 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | ||
24 | .long \alt-1b /* offset */ /* or alternatively to alt */ | ||
25 | .previous | ||
26 | .section .altinstructions,"a" | ||
27 | .align 8 | ||
28 | .quad 0b | ||
29 | .quad 2b | ||
30 | .byte \feature /* when feature is set */ | ||
31 | .byte 5 | ||
32 | .byte 5 | ||
33 | .previous | ||
34 | .endm | ||
35 | |||
36 | /* Standard copy_to_user with segment limit checking */ | ||
37 | ENTRY(copy_to_user) | ||
38 | CFI_STARTPROC | ||
39 | GET_THREAD_INFO(%rax) | ||
40 | movq %rdi,%rcx | ||
41 | addq %rdx,%rcx | ||
42 | jc bad_to_user | ||
43 | cmpq threadinfo_addr_limit(%rax),%rcx | ||
44 | jae bad_to_user | ||
45 | xorl %eax,%eax /* clear zero flag */ | ||
46 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
47 | CFI_ENDPROC | ||
48 | |||
49 | ENTRY(copy_user_generic) | ||
50 | CFI_STARTPROC | ||
51 | movl $1,%ecx /* set zero flag */ | ||
52 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
53 | CFI_ENDPROC | ||
54 | |||
55 | ENTRY(__copy_from_user_inatomic) | ||
56 | CFI_STARTPROC | ||
57 | xorl %ecx,%ecx /* clear zero flag */ | ||
58 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
59 | CFI_ENDPROC | ||
60 | |||
61 | /* Standard copy_from_user with segment limit checking */ | ||
62 | ENTRY(copy_from_user) | ||
63 | CFI_STARTPROC | ||
64 | GET_THREAD_INFO(%rax) | ||
65 | movq %rsi,%rcx | ||
66 | addq %rdx,%rcx | ||
67 | jc bad_from_user | ||
68 | cmpq threadinfo_addr_limit(%rax),%rcx | ||
69 | jae bad_from_user | ||
70 | movl $1,%ecx /* set zero flag */ | ||
71 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | ||
72 | CFI_ENDPROC | ||
73 | ENDPROC(copy_from_user) | ||
74 | |||
75 | .section .fixup,"ax" | ||
76 | /* must zero dest */ | ||
77 | bad_from_user: | ||
78 | CFI_STARTPROC | ||
79 | movl %edx,%ecx | ||
80 | xorl %eax,%eax | ||
81 | rep | ||
82 | stosb | ||
83 | bad_to_user: | ||
84 | movl %edx,%eax | ||
85 | ret | ||
86 | CFI_ENDPROC | ||
87 | END(bad_from_user) | ||
88 | .previous | ||
89 | |||
90 | |||
91 | /* | ||
92 | * copy_user_generic_unrolled - memory copy with exception handling. | ||
93 | * This version is for CPUs like P4 that don't have efficient micro code for rep movsq | ||
94 | * | ||
95 | * Input: | ||
96 | * rdi destination | ||
97 | * rsi source | ||
98 | * rdx count | ||
99 | * ecx zero flag -- if true zero destination on error | ||
100 | * | ||
101 | * Output: | ||
102 | * eax uncopied bytes or 0 if successful. | ||
103 | */ | ||
104 | ENTRY(copy_user_generic_unrolled) | ||
105 | CFI_STARTPROC | ||
106 | pushq %rbx | ||
107 | CFI_ADJUST_CFA_OFFSET 8 | ||
108 | CFI_REL_OFFSET rbx, 0 | ||
109 | pushq %rcx | ||
110 | CFI_ADJUST_CFA_OFFSET 8 | ||
111 | CFI_REL_OFFSET rcx, 0 | ||
112 | xorl %eax,%eax /*zero for the exception handler */ | ||
113 | |||
114 | #ifdef FIX_ALIGNMENT | ||
115 | /* check for bad alignment of destination */ | ||
116 | movl %edi,%ecx | ||
117 | andl $7,%ecx | ||
118 | jnz .Lbad_alignment | ||
119 | .Lafter_bad_alignment: | ||
120 | #endif | ||
121 | |||
122 | movq %rdx,%rcx | ||
123 | |||
124 | movl $64,%ebx | ||
125 | shrq $6,%rdx | ||
126 | decq %rdx | ||
127 | js .Lhandle_tail | ||
128 | |||
129 | .p2align 4 | ||
130 | .Lloop: | ||
131 | .Ls1: movq (%rsi),%r11 | ||
132 | .Ls2: movq 1*8(%rsi),%r8 | ||
133 | .Ls3: movq 2*8(%rsi),%r9 | ||
134 | .Ls4: movq 3*8(%rsi),%r10 | ||
135 | .Ld1: movq %r11,(%rdi) | ||
136 | .Ld2: movq %r8,1*8(%rdi) | ||
137 | .Ld3: movq %r9,2*8(%rdi) | ||
138 | .Ld4: movq %r10,3*8(%rdi) | ||
139 | |||
140 | .Ls5: movq 4*8(%rsi),%r11 | ||
141 | .Ls6: movq 5*8(%rsi),%r8 | ||
142 | .Ls7: movq 6*8(%rsi),%r9 | ||
143 | .Ls8: movq 7*8(%rsi),%r10 | ||
144 | .Ld5: movq %r11,4*8(%rdi) | ||
145 | .Ld6: movq %r8,5*8(%rdi) | ||
146 | .Ld7: movq %r9,6*8(%rdi) | ||
147 | .Ld8: movq %r10,7*8(%rdi) | ||
148 | |||
149 | decq %rdx | ||
150 | |||
151 | leaq 64(%rsi),%rsi | ||
152 | leaq 64(%rdi),%rdi | ||
153 | |||
154 | jns .Lloop | ||
155 | |||
156 | .p2align 4 | ||
157 | .Lhandle_tail: | ||
158 | movl %ecx,%edx | ||
159 | andl $63,%ecx | ||
160 | shrl $3,%ecx | ||
161 | jz .Lhandle_7 | ||
162 | movl $8,%ebx | ||
163 | .p2align 4 | ||
164 | .Lloop_8: | ||
165 | .Ls9: movq (%rsi),%r8 | ||
166 | .Ld9: movq %r8,(%rdi) | ||
167 | decl %ecx | ||
168 | leaq 8(%rdi),%rdi | ||
169 | leaq 8(%rsi),%rsi | ||
170 | jnz .Lloop_8 | ||
171 | |||
172 | .Lhandle_7: | ||
173 | movl %edx,%ecx | ||
174 | andl $7,%ecx | ||
175 | jz .Lende | ||
176 | .p2align 4 | ||
177 | .Lloop_1: | ||
178 | .Ls10: movb (%rsi),%bl | ||
179 | .Ld10: movb %bl,(%rdi) | ||
180 | incq %rdi | ||
181 | incq %rsi | ||
182 | decl %ecx | ||
183 | jnz .Lloop_1 | ||
184 | |||
185 | CFI_REMEMBER_STATE | ||
186 | .Lende: | ||
187 | popq %rcx | ||
188 | CFI_ADJUST_CFA_OFFSET -8 | ||
189 | CFI_RESTORE rcx | ||
190 | popq %rbx | ||
191 | CFI_ADJUST_CFA_OFFSET -8 | ||
192 | CFI_RESTORE rbx | ||
193 | ret | ||
194 | CFI_RESTORE_STATE | ||
195 | |||
196 | #ifdef FIX_ALIGNMENT | ||
197 | /* align destination */ | ||
198 | .p2align 4 | ||
199 | .Lbad_alignment: | ||
200 | movl $8,%r9d | ||
201 | subl %ecx,%r9d | ||
202 | movl %r9d,%ecx | ||
203 | cmpq %r9,%rdx | ||
204 | jz .Lhandle_7 | ||
205 | js .Lhandle_7 | ||
206 | .Lalign_1: | ||
207 | .Ls11: movb (%rsi),%bl | ||
208 | .Ld11: movb %bl,(%rdi) | ||
209 | incq %rsi | ||
210 | incq %rdi | ||
211 | decl %ecx | ||
212 | jnz .Lalign_1 | ||
213 | subq %r9,%rdx | ||
214 | jmp .Lafter_bad_alignment | ||
215 | #endif | ||
216 | |||
217 | /* table sorted by exception address */ | ||
218 | .section __ex_table,"a" | ||
219 | .align 8 | ||
220 | .quad .Ls1,.Ls1e | ||
221 | .quad .Ls2,.Ls2e | ||
222 | .quad .Ls3,.Ls3e | ||
223 | .quad .Ls4,.Ls4e | ||
224 | .quad .Ld1,.Ls1e | ||
225 | .quad .Ld2,.Ls2e | ||
226 | .quad .Ld3,.Ls3e | ||
227 | .quad .Ld4,.Ls4e | ||
228 | .quad .Ls5,.Ls5e | ||
229 | .quad .Ls6,.Ls6e | ||
230 | .quad .Ls7,.Ls7e | ||
231 | .quad .Ls8,.Ls8e | ||
232 | .quad .Ld5,.Ls5e | ||
233 | .quad .Ld6,.Ls6e | ||
234 | .quad .Ld7,.Ls7e | ||
235 | .quad .Ld8,.Ls8e | ||
236 | .quad .Ls9,.Le_quad | ||
237 | .quad .Ld9,.Le_quad | ||
238 | .quad .Ls10,.Le_byte | ||
239 | .quad .Ld10,.Le_byte | ||
240 | #ifdef FIX_ALIGNMENT | ||
241 | .quad .Ls11,.Lzero_rest | ||
242 | .quad .Ld11,.Lzero_rest | ||
243 | #endif | ||
244 | .quad .Le5,.Le_zero | ||
245 | .previous | ||
246 | |||
247 | /* compute 64-offset for main loop. 8 bytes accuracy with error on the | ||
248 | pessimistic side. this is gross. it would be better to fix the | ||
249 | interface. */ | ||
250 | /* eax: zero, ebx: 64 */ | ||
251 | .Ls1e: addl $8,%eax | ||
252 | .Ls2e: addl $8,%eax | ||
253 | .Ls3e: addl $8,%eax | ||
254 | .Ls4e: addl $8,%eax | ||
255 | .Ls5e: addl $8,%eax | ||
256 | .Ls6e: addl $8,%eax | ||
257 | .Ls7e: addl $8,%eax | ||
258 | .Ls8e: addl $8,%eax | ||
259 | addq %rbx,%rdi /* +64 */ | ||
260 | subq %rax,%rdi /* correct destination with computed offset */ | ||
261 | |||
262 | shlq $6,%rdx /* loop counter * 64 (stride length) */ | ||
263 | addq %rax,%rdx /* add offset to loopcnt */ | ||
264 | andl $63,%ecx /* remaining bytes */ | ||
265 | addq %rcx,%rdx /* add them */ | ||
266 | jmp .Lzero_rest | ||
267 | |||
268 | /* exception on quad word loop in tail handling */ | ||
269 | /* ecx: loopcnt/8, %edx: length, rdi: correct */ | ||
270 | .Le_quad: | ||
271 | shll $3,%ecx | ||
272 | andl $7,%edx | ||
273 | addl %ecx,%edx | ||
274 | /* edx: bytes to zero, rdi: dest, eax:zero */ | ||
275 | .Lzero_rest: | ||
276 | cmpl $0,(%rsp) | ||
277 | jz .Le_zero | ||
278 | movq %rdx,%rcx | ||
279 | .Le_byte: | ||
280 | xorl %eax,%eax | ||
281 | .Le5: rep | ||
282 | stosb | ||
283 | /* when there is another exception while zeroing the rest just return */ | ||
284 | .Le_zero: | ||
285 | movq %rdx,%rax | ||
286 | jmp .Lende | ||
287 | CFI_ENDPROC | ||
288 | ENDPROC(copy_user_generic) | ||
289 | |||
290 | |||
291 | /* Some CPUs run faster using the string copy instructions. | ||
292 | This is also a lot simpler. Use them when possible. | ||
293 | Patch in jmps to this code instead of copying it fully | ||
294 | to avoid unwanted aliasing in the exception tables. */ | ||
295 | |||
296 | /* rdi destination | ||
297 | * rsi source | ||
298 | * rdx count | ||
299 | * ecx zero flag | ||
300 | * | ||
301 | * Output: | ||
302 | * eax uncopied bytes or 0 if successfull. | ||
303 | * | ||
304 | * Only 4GB of copy is supported. This shouldn't be a problem | ||
305 | * because the kernel normally only writes from/to page sized chunks | ||
306 | * even if user space passed a longer buffer. | ||
307 | * And more would be dangerous because both Intel and AMD have | ||
308 | * errata with rep movsq > 4GB. If someone feels the need to fix | ||
309 | * this please consider this. | ||
310 | */ | ||
311 | ENTRY(copy_user_generic_string) | ||
312 | CFI_STARTPROC | ||
313 | movl %ecx,%r8d /* save zero flag */ | ||
314 | movl %edx,%ecx | ||
315 | shrl $3,%ecx | ||
316 | andl $7,%edx | ||
317 | jz 10f | ||
318 | 1: rep | ||
319 | movsq | ||
320 | movl %edx,%ecx | ||
321 | 2: rep | ||
322 | movsb | ||
323 | 9: movl %ecx,%eax | ||
324 | ret | ||
325 | |||
326 | /* multiple of 8 byte */ | ||
327 | 10: rep | ||
328 | movsq | ||
329 | xor %eax,%eax | ||
330 | ret | ||
331 | |||
332 | /* exception handling */ | ||
333 | 3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */ | ||
334 | jmp 6f | ||
335 | 5: movl %ecx,%eax /* exception on byte loop */ | ||
336 | /* eax: left over bytes */ | ||
337 | 6: testl %r8d,%r8d /* zero flag set? */ | ||
338 | jz 7f | ||
339 | movl %eax,%ecx /* initialize x86 loop counter */ | ||
340 | push %rax | ||
341 | xorl %eax,%eax | ||
342 | 8: rep | ||
343 | stosb /* zero the rest */ | ||
344 | 11: pop %rax | ||
345 | 7: ret | ||
346 | CFI_ENDPROC | ||
347 | END(copy_user_generic_c) | ||
348 | |||
349 | .section __ex_table,"a" | ||
350 | .quad 1b,3b | ||
351 | .quad 2b,5b | ||
352 | .quad 8b,11b | ||
353 | .quad 10b,3b | ||
354 | .previous | ||
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S new file mode 100644 index 000000000000..4620efb12f13 --- /dev/null +++ b/arch/x86/lib/copy_user_nocache_64.S | |||
@@ -0,0 +1,217 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs. | ||
2 | * Subject to the GNU Public License v2. | ||
3 | * | ||
4 | * Functions to copy from and to user space. | ||
5 | */ | ||
6 | |||
7 | #include <linux/linkage.h> | ||
8 | #include <asm/dwarf2.h> | ||
9 | |||
10 | #define FIX_ALIGNMENT 1 | ||
11 | |||
12 | #include <asm/current.h> | ||
13 | #include <asm/asm-offsets.h> | ||
14 | #include <asm/thread_info.h> | ||
15 | #include <asm/cpufeature.h> | ||
16 | |||
17 | /* | ||
18 | * copy_user_nocache - Uncached memory copy with exception handling | ||
19 | * This will force destination/source out of cache for more performance. | ||
20 | * | ||
21 | * Input: | ||
22 | * rdi destination | ||
23 | * rsi source | ||
24 | * rdx count | ||
25 | * rcx zero flag when 1 zero on exception | ||
26 | * | ||
27 | * Output: | ||
28 | * eax uncopied bytes or 0 if successful. | ||
29 | */ | ||
30 | ENTRY(__copy_user_nocache) | ||
31 | CFI_STARTPROC | ||
32 | pushq %rbx | ||
33 | CFI_ADJUST_CFA_OFFSET 8 | ||
34 | CFI_REL_OFFSET rbx, 0 | ||
35 | pushq %rcx /* save zero flag */ | ||
36 | CFI_ADJUST_CFA_OFFSET 8 | ||
37 | CFI_REL_OFFSET rcx, 0 | ||
38 | |||
39 | xorl %eax,%eax /* zero for the exception handler */ | ||
40 | |||
41 | #ifdef FIX_ALIGNMENT | ||
42 | /* check for bad alignment of destination */ | ||
43 | movl %edi,%ecx | ||
44 | andl $7,%ecx | ||
45 | jnz .Lbad_alignment | ||
46 | .Lafter_bad_alignment: | ||
47 | #endif | ||
48 | |||
49 | movq %rdx,%rcx | ||
50 | |||
51 | movl $64,%ebx | ||
52 | shrq $6,%rdx | ||
53 | decq %rdx | ||
54 | js .Lhandle_tail | ||
55 | |||
56 | .p2align 4 | ||
57 | .Lloop: | ||
58 | .Ls1: movq (%rsi),%r11 | ||
59 | .Ls2: movq 1*8(%rsi),%r8 | ||
60 | .Ls3: movq 2*8(%rsi),%r9 | ||
61 | .Ls4: movq 3*8(%rsi),%r10 | ||
62 | .Ld1: movnti %r11,(%rdi) | ||
63 | .Ld2: movnti %r8,1*8(%rdi) | ||
64 | .Ld3: movnti %r9,2*8(%rdi) | ||
65 | .Ld4: movnti %r10,3*8(%rdi) | ||
66 | |||
67 | .Ls5: movq 4*8(%rsi),%r11 | ||
68 | .Ls6: movq 5*8(%rsi),%r8 | ||
69 | .Ls7: movq 6*8(%rsi),%r9 | ||
70 | .Ls8: movq 7*8(%rsi),%r10 | ||
71 | .Ld5: movnti %r11,4*8(%rdi) | ||
72 | .Ld6: movnti %r8,5*8(%rdi) | ||
73 | .Ld7: movnti %r9,6*8(%rdi) | ||
74 | .Ld8: movnti %r10,7*8(%rdi) | ||
75 | |||
76 | dec %rdx | ||
77 | |||
78 | leaq 64(%rsi),%rsi | ||
79 | leaq 64(%rdi),%rdi | ||
80 | |||
81 | jns .Lloop | ||
82 | |||
83 | .p2align 4 | ||
84 | .Lhandle_tail: | ||
85 | movl %ecx,%edx | ||
86 | andl $63,%ecx | ||
87 | shrl $3,%ecx | ||
88 | jz .Lhandle_7 | ||
89 | movl $8,%ebx | ||
90 | .p2align 4 | ||
91 | .Lloop_8: | ||
92 | .Ls9: movq (%rsi),%r8 | ||
93 | .Ld9: movnti %r8,(%rdi) | ||
94 | decl %ecx | ||
95 | leaq 8(%rdi),%rdi | ||
96 | leaq 8(%rsi),%rsi | ||
97 | jnz .Lloop_8 | ||
98 | |||
99 | .Lhandle_7: | ||
100 | movl %edx,%ecx | ||
101 | andl $7,%ecx | ||
102 | jz .Lende | ||
103 | .p2align 4 | ||
104 | .Lloop_1: | ||
105 | .Ls10: movb (%rsi),%bl | ||
106 | .Ld10: movb %bl,(%rdi) | ||
107 | incq %rdi | ||
108 | incq %rsi | ||
109 | decl %ecx | ||
110 | jnz .Lloop_1 | ||
111 | |||
112 | CFI_REMEMBER_STATE | ||
113 | .Lende: | ||
114 | popq %rcx | ||
115 | CFI_ADJUST_CFA_OFFSET -8 | ||
116 | CFI_RESTORE %rcx | ||
117 | popq %rbx | ||
118 | CFI_ADJUST_CFA_OFFSET -8 | ||
119 | CFI_RESTORE rbx | ||
120 | ret | ||
121 | CFI_RESTORE_STATE | ||
122 | |||
123 | #ifdef FIX_ALIGNMENT | ||
124 | /* align destination */ | ||
125 | .p2align 4 | ||
126 | .Lbad_alignment: | ||
127 | movl $8,%r9d | ||
128 | subl %ecx,%r9d | ||
129 | movl %r9d,%ecx | ||
130 | cmpq %r9,%rdx | ||
131 | jz .Lhandle_7 | ||
132 | js .Lhandle_7 | ||
133 | .Lalign_1: | ||
134 | .Ls11: movb (%rsi),%bl | ||
135 | .Ld11: movb %bl,(%rdi) | ||
136 | incq %rsi | ||
137 | incq %rdi | ||
138 | decl %ecx | ||
139 | jnz .Lalign_1 | ||
140 | subq %r9,%rdx | ||
141 | jmp .Lafter_bad_alignment | ||
142 | #endif | ||
143 | |||
144 | /* table sorted by exception address */ | ||
145 | .section __ex_table,"a" | ||
146 | .align 8 | ||
147 | .quad .Ls1,.Ls1e | ||
148 | .quad .Ls2,.Ls2e | ||
149 | .quad .Ls3,.Ls3e | ||
150 | .quad .Ls4,.Ls4e | ||
151 | .quad .Ld1,.Ls1e | ||
152 | .quad .Ld2,.Ls2e | ||
153 | .quad .Ld3,.Ls3e | ||
154 | .quad .Ld4,.Ls4e | ||
155 | .quad .Ls5,.Ls5e | ||
156 | .quad .Ls6,.Ls6e | ||
157 | .quad .Ls7,.Ls7e | ||
158 | .quad .Ls8,.Ls8e | ||
159 | .quad .Ld5,.Ls5e | ||
160 | .quad .Ld6,.Ls6e | ||
161 | .quad .Ld7,.Ls7e | ||
162 | .quad .Ld8,.Ls8e | ||
163 | .quad .Ls9,.Le_quad | ||
164 | .quad .Ld9,.Le_quad | ||
165 | .quad .Ls10,.Le_byte | ||
166 | .quad .Ld10,.Le_byte | ||
167 | #ifdef FIX_ALIGNMENT | ||
168 | .quad .Ls11,.Lzero_rest | ||
169 | .quad .Ld11,.Lzero_rest | ||
170 | #endif | ||
171 | .quad .Le5,.Le_zero | ||
172 | .previous | ||
173 | |||
174 | /* compute 64-offset for main loop. 8 bytes accuracy with error on the | ||
175 | pessimistic side. this is gross. it would be better to fix the | ||
176 | interface. */ | ||
177 | /* eax: zero, ebx: 64 */ | ||
178 | .Ls1e: addl $8,%eax | ||
179 | .Ls2e: addl $8,%eax | ||
180 | .Ls3e: addl $8,%eax | ||
181 | .Ls4e: addl $8,%eax | ||
182 | .Ls5e: addl $8,%eax | ||
183 | .Ls6e: addl $8,%eax | ||
184 | .Ls7e: addl $8,%eax | ||
185 | .Ls8e: addl $8,%eax | ||
186 | addq %rbx,%rdi /* +64 */ | ||
187 | subq %rax,%rdi /* correct destination with computed offset */ | ||
188 | |||
189 | shlq $6,%rdx /* loop counter * 64 (stride length) */ | ||
190 | addq %rax,%rdx /* add offset to loopcnt */ | ||
191 | andl $63,%ecx /* remaining bytes */ | ||
192 | addq %rcx,%rdx /* add them */ | ||
193 | jmp .Lzero_rest | ||
194 | |||
195 | /* exception on quad word loop in tail handling */ | ||
196 | /* ecx: loopcnt/8, %edx: length, rdi: correct */ | ||
197 | .Le_quad: | ||
198 | shll $3,%ecx | ||
199 | andl $7,%edx | ||
200 | addl %ecx,%edx | ||
201 | /* edx: bytes to zero, rdi: dest, eax:zero */ | ||
202 | .Lzero_rest: | ||
203 | cmpl $0,(%rsp) /* zero flag set? */ | ||
204 | jz .Le_zero | ||
205 | movq %rdx,%rcx | ||
206 | .Le_byte: | ||
207 | xorl %eax,%eax | ||
208 | .Le5: rep | ||
209 | stosb | ||
210 | /* when there is another exception while zeroing the rest just return */ | ||
211 | .Le_zero: | ||
212 | movq %rdx,%rax | ||
213 | jmp .Lende | ||
214 | CFI_ENDPROC | ||
215 | ENDPROC(__copy_user_nocache) | ||
216 | |||
217 | |||
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S new file mode 100644 index 000000000000..f0dba36578ea --- /dev/null +++ b/arch/x86/lib/csum-copy_64.S | |||
@@ -0,0 +1,249 @@ | |||
1 | /* | ||
2 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | ||
3 | * | ||
4 | * This file is subject to the terms and conditions of the GNU General Public | ||
5 | * License. See the file COPYING in the main directory of this archive | ||
6 | * for more details. No warranty for anything given at all. | ||
7 | */ | ||
8 | #include <linux/linkage.h> | ||
9 | #include <asm/dwarf2.h> | ||
10 | #include <asm/errno.h> | ||
11 | |||
12 | /* | ||
13 | * Checksum copy with exception handling. | ||
14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the | ||
15 | * destination is zeroed. | ||
16 | * | ||
17 | * Input | ||
18 | * rdi source | ||
19 | * rsi destination | ||
20 | * edx len (32bit) | ||
21 | * ecx sum (32bit) | ||
22 | * r8 src_err_ptr (int) | ||
23 | * r9 dst_err_ptr (int) | ||
24 | * | ||
25 | * Output | ||
26 | * eax 64bit sum. undefined in case of exception. | ||
27 | * | ||
28 | * Wrappers need to take care of valid exception sum and zeroing. | ||
29 | * They also should align source or destination to 8 bytes. | ||
30 | */ | ||
31 | |||
32 | .macro source | ||
33 | 10: | ||
34 | .section __ex_table,"a" | ||
35 | .align 8 | ||
36 | .quad 10b,.Lbad_source | ||
37 | .previous | ||
38 | .endm | ||
39 | |||
40 | .macro dest | ||
41 | 20: | ||
42 | .section __ex_table,"a" | ||
43 | .align 8 | ||
44 | .quad 20b,.Lbad_dest | ||
45 | .previous | ||
46 | .endm | ||
47 | |||
48 | .macro ignore L=.Lignore | ||
49 | 30: | ||
50 | .section __ex_table,"a" | ||
51 | .align 8 | ||
52 | .quad 30b,\L | ||
53 | .previous | ||
54 | .endm | ||
55 | |||
56 | |||
57 | ENTRY(csum_partial_copy_generic) | ||
58 | CFI_STARTPROC | ||
59 | cmpl $3*64,%edx | ||
60 | jle .Lignore | ||
61 | |||
62 | .Lignore: | ||
63 | subq $7*8,%rsp | ||
64 | CFI_ADJUST_CFA_OFFSET 7*8 | ||
65 | movq %rbx,2*8(%rsp) | ||
66 | CFI_REL_OFFSET rbx, 2*8 | ||
67 | movq %r12,3*8(%rsp) | ||
68 | CFI_REL_OFFSET r12, 3*8 | ||
69 | movq %r14,4*8(%rsp) | ||
70 | CFI_REL_OFFSET r14, 4*8 | ||
71 | movq %r13,5*8(%rsp) | ||
72 | CFI_REL_OFFSET r13, 5*8 | ||
73 | movq %rbp,6*8(%rsp) | ||
74 | CFI_REL_OFFSET rbp, 6*8 | ||
75 | |||
76 | movq %r8,(%rsp) | ||
77 | movq %r9,1*8(%rsp) | ||
78 | |||
79 | movl %ecx,%eax | ||
80 | movl %edx,%ecx | ||
81 | |||
82 | xorl %r9d,%r9d | ||
83 | movq %rcx,%r12 | ||
84 | |||
85 | shrq $6,%r12 | ||
86 | jz .Lhandle_tail /* < 64 */ | ||
87 | |||
88 | clc | ||
89 | |||
90 | /* main loop. clear in 64 byte blocks */ | ||
91 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ | ||
92 | /* r11: temp3, rdx: temp4, r12 loopcnt */ | ||
93 | /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ | ||
94 | .p2align 4 | ||
95 | .Lloop: | ||
96 | source | ||
97 | movq (%rdi),%rbx | ||
98 | source | ||
99 | movq 8(%rdi),%r8 | ||
100 | source | ||
101 | movq 16(%rdi),%r11 | ||
102 | source | ||
103 | movq 24(%rdi),%rdx | ||
104 | |||
105 | source | ||
106 | movq 32(%rdi),%r10 | ||
107 | source | ||
108 | movq 40(%rdi),%rbp | ||
109 | source | ||
110 | movq 48(%rdi),%r14 | ||
111 | source | ||
112 | movq 56(%rdi),%r13 | ||
113 | |||
114 | ignore 2f | ||
115 | prefetcht0 5*64(%rdi) | ||
116 | 2: | ||
117 | adcq %rbx,%rax | ||
118 | adcq %r8,%rax | ||
119 | adcq %r11,%rax | ||
120 | adcq %rdx,%rax | ||
121 | adcq %r10,%rax | ||
122 | adcq %rbp,%rax | ||
123 | adcq %r14,%rax | ||
124 | adcq %r13,%rax | ||
125 | |||
126 | decl %r12d | ||
127 | |||
128 | dest | ||
129 | movq %rbx,(%rsi) | ||
130 | dest | ||
131 | movq %r8,8(%rsi) | ||
132 | dest | ||
133 | movq %r11,16(%rsi) | ||
134 | dest | ||
135 | movq %rdx,24(%rsi) | ||
136 | |||
137 | dest | ||
138 | movq %r10,32(%rsi) | ||
139 | dest | ||
140 | movq %rbp,40(%rsi) | ||
141 | dest | ||
142 | movq %r14,48(%rsi) | ||
143 | dest | ||
144 | movq %r13,56(%rsi) | ||
145 | |||
146 | 3: | ||
147 | |||
148 | leaq 64(%rdi),%rdi | ||
149 | leaq 64(%rsi),%rsi | ||
150 | |||
151 | jnz .Lloop | ||
152 | |||
153 | adcq %r9,%rax | ||
154 | |||
155 | /* do last upto 56 bytes */ | ||
156 | .Lhandle_tail: | ||
157 | /* ecx: count */ | ||
158 | movl %ecx,%r10d | ||
159 | andl $63,%ecx | ||
160 | shrl $3,%ecx | ||
161 | jz .Lfold | ||
162 | clc | ||
163 | .p2align 4 | ||
164 | .Lloop_8: | ||
165 | source | ||
166 | movq (%rdi),%rbx | ||
167 | adcq %rbx,%rax | ||
168 | decl %ecx | ||
169 | dest | ||
170 | movq %rbx,(%rsi) | ||
171 | leaq 8(%rsi),%rsi /* preserve carry */ | ||
172 | leaq 8(%rdi),%rdi | ||
173 | jnz .Lloop_8 | ||
174 | adcq %r9,%rax /* add in carry */ | ||
175 | |||
176 | .Lfold: | ||
177 | /* reduce checksum to 32bits */ | ||
178 | movl %eax,%ebx | ||
179 | shrq $32,%rax | ||
180 | addl %ebx,%eax | ||
181 | adcl %r9d,%eax | ||
182 | |||
183 | /* do last upto 6 bytes */ | ||
184 | .Lhandle_7: | ||
185 | movl %r10d,%ecx | ||
186 | andl $7,%ecx | ||
187 | shrl $1,%ecx | ||
188 | jz .Lhandle_1 | ||
189 | movl $2,%edx | ||
190 | xorl %ebx,%ebx | ||
191 | clc | ||
192 | .p2align 4 | ||
193 | .Lloop_1: | ||
194 | source | ||
195 | movw (%rdi),%bx | ||
196 | adcl %ebx,%eax | ||
197 | decl %ecx | ||
198 | dest | ||
199 | movw %bx,(%rsi) | ||
200 | leaq 2(%rdi),%rdi | ||
201 | leaq 2(%rsi),%rsi | ||
202 | jnz .Lloop_1 | ||
203 | adcl %r9d,%eax /* add in carry */ | ||
204 | |||
205 | /* handle last odd byte */ | ||
206 | .Lhandle_1: | ||
207 | testl $1,%r10d | ||
208 | jz .Lende | ||
209 | xorl %ebx,%ebx | ||
210 | source | ||
211 | movb (%rdi),%bl | ||
212 | dest | ||
213 | movb %bl,(%rsi) | ||
214 | addl %ebx,%eax | ||
215 | adcl %r9d,%eax /* carry */ | ||
216 | |||
217 | CFI_REMEMBER_STATE | ||
218 | .Lende: | ||
219 | movq 2*8(%rsp),%rbx | ||
220 | CFI_RESTORE rbx | ||
221 | movq 3*8(%rsp),%r12 | ||
222 | CFI_RESTORE r12 | ||
223 | movq 4*8(%rsp),%r14 | ||
224 | CFI_RESTORE r14 | ||
225 | movq 5*8(%rsp),%r13 | ||
226 | CFI_RESTORE r13 | ||
227 | movq 6*8(%rsp),%rbp | ||
228 | CFI_RESTORE rbp | ||
229 | addq $7*8,%rsp | ||
230 | CFI_ADJUST_CFA_OFFSET -7*8 | ||
231 | ret | ||
232 | CFI_RESTORE_STATE | ||
233 | |||
234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ | ||
235 | .Lbad_source: | ||
236 | movq (%rsp),%rax | ||
237 | testq %rax,%rax | ||
238 | jz .Lende | ||
239 | movl $-EFAULT,(%rax) | ||
240 | jmp .Lende | ||
241 | |||
242 | .Lbad_dest: | ||
243 | movq 8(%rsp),%rax | ||
244 | testq %rax,%rax | ||
245 | jz .Lende | ||
246 | movl $-EFAULT,(%rax) | ||
247 | jmp .Lende | ||
248 | CFI_ENDPROC | ||
249 | ENDPROC(csum_partial_copy_generic) | ||
diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c new file mode 100644 index 000000000000..bc503f506903 --- /dev/null +++ b/arch/x86/lib/csum-partial_64.c | |||
@@ -0,0 +1,150 @@ | |||
1 | /* | ||
2 | * arch/x86_64/lib/csum-partial.c | ||
3 | * | ||
4 | * This file contains network checksum routines that are better done | ||
5 | * in an architecture-specific manner due to speed. | ||
6 | */ | ||
7 | |||
8 | #include <linux/compiler.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <asm/checksum.h> | ||
11 | |||
12 | static inline unsigned short from32to16(unsigned a) | ||
13 | { | ||
14 | unsigned short b = a >> 16; | ||
15 | asm("addw %w2,%w0\n\t" | ||
16 | "adcw $0,%w0\n" | ||
17 | : "=r" (b) | ||
18 | : "0" (b), "r" (a)); | ||
19 | return b; | ||
20 | } | ||
21 | |||
22 | /* | ||
23 | * Do a 64-bit checksum on an arbitrary memory area. | ||
24 | * Returns a 32bit checksum. | ||
25 | * | ||
26 | * This isn't as time critical as it used to be because many NICs | ||
27 | * do hardware checksumming these days. | ||
28 | * | ||
29 | * Things tried and found to not make it faster: | ||
30 | * Manual Prefetching | ||
31 | * Unrolling to an 128 bytes inner loop. | ||
32 | * Using interleaving with more registers to break the carry chains. | ||
33 | */ | ||
34 | static unsigned do_csum(const unsigned char *buff, unsigned len) | ||
35 | { | ||
36 | unsigned odd, count; | ||
37 | unsigned long result = 0; | ||
38 | |||
39 | if (unlikely(len == 0)) | ||
40 | return result; | ||
41 | odd = 1 & (unsigned long) buff; | ||
42 | if (unlikely(odd)) { | ||
43 | result = *buff << 8; | ||
44 | len--; | ||
45 | buff++; | ||
46 | } | ||
47 | count = len >> 1; /* nr of 16-bit words.. */ | ||
48 | if (count) { | ||
49 | if (2 & (unsigned long) buff) { | ||
50 | result += *(unsigned short *)buff; | ||
51 | count--; | ||
52 | len -= 2; | ||
53 | buff += 2; | ||
54 | } | ||
55 | count >>= 1; /* nr of 32-bit words.. */ | ||
56 | if (count) { | ||
57 | unsigned long zero; | ||
58 | unsigned count64; | ||
59 | if (4 & (unsigned long) buff) { | ||
60 | result += *(unsigned int *) buff; | ||
61 | count--; | ||
62 | len -= 4; | ||
63 | buff += 4; | ||
64 | } | ||
65 | count >>= 1; /* nr of 64-bit words.. */ | ||
66 | |||
67 | /* main loop using 64byte blocks */ | ||
68 | zero = 0; | ||
69 | count64 = count >> 3; | ||
70 | while (count64) { | ||
71 | asm("addq 0*8(%[src]),%[res]\n\t" | ||
72 | "adcq 1*8(%[src]),%[res]\n\t" | ||
73 | "adcq 2*8(%[src]),%[res]\n\t" | ||
74 | "adcq 3*8(%[src]),%[res]\n\t" | ||
75 | "adcq 4*8(%[src]),%[res]\n\t" | ||
76 | "adcq 5*8(%[src]),%[res]\n\t" | ||
77 | "adcq 6*8(%[src]),%[res]\n\t" | ||
78 | "adcq 7*8(%[src]),%[res]\n\t" | ||
79 | "adcq %[zero],%[res]" | ||
80 | : [res] "=r" (result) | ||
81 | : [src] "r" (buff), [zero] "r" (zero), | ||
82 | "[res]" (result)); | ||
83 | buff += 64; | ||
84 | count64--; | ||
85 | } | ||
86 | |||
87 | /* last upto 7 8byte blocks */ | ||
88 | count %= 8; | ||
89 | while (count) { | ||
90 | asm("addq %1,%0\n\t" | ||
91 | "adcq %2,%0\n" | ||
92 | : "=r" (result) | ||
93 | : "m" (*(unsigned long *)buff), | ||
94 | "r" (zero), "0" (result)); | ||
95 | --count; | ||
96 | buff += 8; | ||
97 | } | ||
98 | result = add32_with_carry(result>>32, | ||
99 | result&0xffffffff); | ||
100 | |||
101 | if (len & 4) { | ||
102 | result += *(unsigned int *) buff; | ||
103 | buff += 4; | ||
104 | } | ||
105 | } | ||
106 | if (len & 2) { | ||
107 | result += *(unsigned short *) buff; | ||
108 | buff += 2; | ||
109 | } | ||
110 | } | ||
111 | if (len & 1) | ||
112 | result += *buff; | ||
113 | result = add32_with_carry(result>>32, result & 0xffffffff); | ||
114 | if (unlikely(odd)) { | ||
115 | result = from32to16(result); | ||
116 | result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); | ||
117 | } | ||
118 | return result; | ||
119 | } | ||
120 | |||
121 | /* | ||
122 | * computes the checksum of a memory block at buff, length len, | ||
123 | * and adds in "sum" (32-bit) | ||
124 | * | ||
125 | * returns a 32-bit number suitable for feeding into itself | ||
126 | * or csum_tcpudp_magic | ||
127 | * | ||
128 | * this function must be called with even lengths, except | ||
129 | * for the last fragment, which may be odd | ||
130 | * | ||
131 | * it's best to have buff aligned on a 64-bit boundary | ||
132 | */ | ||
133 | __wsum csum_partial(const void *buff, int len, __wsum sum) | ||
134 | { | ||
135 | return (__force __wsum)add32_with_carry(do_csum(buff, len), | ||
136 | (__force u32)sum); | ||
137 | } | ||
138 | |||
139 | EXPORT_SYMBOL(csum_partial); | ||
140 | |||
141 | /* | ||
142 | * this routine is used for miscellaneous IP-like checksums, mainly | ||
143 | * in icmp.c | ||
144 | */ | ||
145 | __sum16 ip_compute_csum(const void *buff, int len) | ||
146 | { | ||
147 | return csum_fold(csum_partial(buff,len,0)); | ||
148 | } | ||
149 | EXPORT_SYMBOL(ip_compute_csum); | ||
150 | |||
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c new file mode 100644 index 000000000000..fd42a4a095fc --- /dev/null +++ b/arch/x86/lib/csum-wrappers_64.c | |||
@@ -0,0 +1,135 @@ | |||
1 | /* Copyright 2002,2003 Andi Kleen, SuSE Labs. | ||
2 | * Subject to the GNU Public License v.2 | ||
3 | * | ||
4 | * Wrappers of assembly checksum functions for x86-64. | ||
5 | */ | ||
6 | |||
7 | #include <asm/checksum.h> | ||
8 | #include <linux/module.h> | ||
9 | |||
10 | /** | ||
11 | * csum_partial_copy_from_user - Copy and checksum from user space. | ||
12 | * @src: source address (user space) | ||
13 | * @dst: destination address | ||
14 | * @len: number of bytes to be copied. | ||
15 | * @isum: initial sum that is added into the result (32bit unfolded) | ||
16 | * @errp: set to -EFAULT for an bad source address. | ||
17 | * | ||
18 | * Returns an 32bit unfolded checksum of the buffer. | ||
19 | * src and dst are best aligned to 64bits. | ||
20 | */ | ||
21 | __wsum | ||
22 | csum_partial_copy_from_user(const void __user *src, void *dst, | ||
23 | int len, __wsum isum, int *errp) | ||
24 | { | ||
25 | might_sleep(); | ||
26 | *errp = 0; | ||
27 | if (likely(access_ok(VERIFY_READ,src, len))) { | ||
28 | /* Why 6, not 7? To handle odd addresses aligned we | ||
29 | would need to do considerable complications to fix the | ||
30 | checksum which is defined as an 16bit accumulator. The | ||
31 | fix alignment code is primarily for performance | ||
32 | compatibility with 32bit and that will handle odd | ||
33 | addresses slowly too. */ | ||
34 | if (unlikely((unsigned long)src & 6)) { | ||
35 | while (((unsigned long)src & 6) && len >= 2) { | ||
36 | __u16 val16; | ||
37 | *errp = __get_user(val16, (const __u16 __user *)src); | ||
38 | if (*errp) | ||
39 | return isum; | ||
40 | *(__u16 *)dst = val16; | ||
41 | isum = (__force __wsum)add32_with_carry( | ||
42 | (__force unsigned)isum, val16); | ||
43 | src += 2; | ||
44 | dst += 2; | ||
45 | len -= 2; | ||
46 | } | ||
47 | } | ||
48 | isum = csum_partial_copy_generic((__force const void *)src, | ||
49 | dst, len, isum, errp, NULL); | ||
50 | if (likely(*errp == 0)) | ||
51 | return isum; | ||
52 | } | ||
53 | *errp = -EFAULT; | ||
54 | memset(dst,0,len); | ||
55 | return isum; | ||
56 | } | ||
57 | |||
58 | EXPORT_SYMBOL(csum_partial_copy_from_user); | ||
59 | |||
60 | /** | ||
61 | * csum_partial_copy_to_user - Copy and checksum to user space. | ||
62 | * @src: source address | ||
63 | * @dst: destination address (user space) | ||
64 | * @len: number of bytes to be copied. | ||
65 | * @isum: initial sum that is added into the result (32bit unfolded) | ||
66 | * @errp: set to -EFAULT for an bad destination address. | ||
67 | * | ||
68 | * Returns an 32bit unfolded checksum of the buffer. | ||
69 | * src and dst are best aligned to 64bits. | ||
70 | */ | ||
71 | __wsum | ||
72 | csum_partial_copy_to_user(const void *src, void __user *dst, | ||
73 | int len, __wsum isum, int *errp) | ||
74 | { | ||
75 | might_sleep(); | ||
76 | if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) { | ||
77 | *errp = -EFAULT; | ||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | if (unlikely((unsigned long)dst & 6)) { | ||
82 | while (((unsigned long)dst & 6) && len >= 2) { | ||
83 | __u16 val16 = *(__u16 *)src; | ||
84 | isum = (__force __wsum)add32_with_carry( | ||
85 | (__force unsigned)isum, val16); | ||
86 | *errp = __put_user(val16, (__u16 __user *)dst); | ||
87 | if (*errp) | ||
88 | return isum; | ||
89 | src += 2; | ||
90 | dst += 2; | ||
91 | len -= 2; | ||
92 | } | ||
93 | } | ||
94 | |||
95 | *errp = 0; | ||
96 | return csum_partial_copy_generic(src, (void __force *)dst,len,isum,NULL,errp); | ||
97 | } | ||
98 | |||
99 | EXPORT_SYMBOL(csum_partial_copy_to_user); | ||
100 | |||
101 | /** | ||
102 | * csum_partial_copy_nocheck - Copy and checksum. | ||
103 | * @src: source address | ||
104 | * @dst: destination address | ||
105 | * @len: number of bytes to be copied. | ||
106 | * @isum: initial sum that is added into the result (32bit unfolded) | ||
107 | * | ||
108 | * Returns an 32bit unfolded checksum of the buffer. | ||
109 | */ | ||
110 | __wsum | ||
111 | csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) | ||
112 | { | ||
113 | return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL); | ||
114 | } | ||
115 | EXPORT_SYMBOL(csum_partial_copy_nocheck); | ||
116 | |||
117 | __sum16 csum_ipv6_magic(const struct in6_addr *saddr, | ||
118 | const struct in6_addr *daddr, | ||
119 | __u32 len, unsigned short proto, __wsum sum) | ||
120 | { | ||
121 | __u64 rest, sum64; | ||
122 | |||
123 | rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) + | ||
124 | (__force __u64)sum; | ||
125 | asm(" addq (%[saddr]),%[sum]\n" | ||
126 | " adcq 8(%[saddr]),%[sum]\n" | ||
127 | " adcq (%[daddr]),%[sum]\n" | ||
128 | " adcq 8(%[daddr]),%[sum]\n" | ||
129 | " adcq $0,%[sum]\n" | ||
130 | : [sum] "=r" (sum64) | ||
131 | : "[sum]" (rest),[saddr] "r" (saddr), [daddr] "r" (daddr)); | ||
132 | return csum_fold((__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32)); | ||
133 | } | ||
134 | |||
135 | EXPORT_SYMBOL(csum_ipv6_magic); | ||
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c new file mode 100644 index 000000000000..2dbebd308347 --- /dev/null +++ b/arch/x86/lib/delay_64.c | |||
@@ -0,0 +1,57 @@ | |||
1 | /* | ||
2 | * Precise Delay Loops for x86-64 | ||
3 | * | ||
4 | * Copyright (C) 1993 Linus Torvalds | ||
5 | * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> | ||
6 | * | ||
7 | * The __delay function must _NOT_ be inlined as its execution time | ||
8 | * depends wildly on alignment on many x86 processors. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/delay.h> | ||
14 | #include <asm/delay.h> | ||
15 | #include <asm/msr.h> | ||
16 | |||
17 | #ifdef CONFIG_SMP | ||
18 | #include <asm/smp.h> | ||
19 | #endif | ||
20 | |||
21 | int read_current_timer(unsigned long *timer_value) | ||
22 | { | ||
23 | rdtscll(*timer_value); | ||
24 | return 0; | ||
25 | } | ||
26 | |||
27 | void __delay(unsigned long loops) | ||
28 | { | ||
29 | unsigned bclock, now; | ||
30 | |||
31 | rdtscl(bclock); | ||
32 | do | ||
33 | { | ||
34 | rep_nop(); | ||
35 | rdtscl(now); | ||
36 | } | ||
37 | while((now-bclock) < loops); | ||
38 | } | ||
39 | EXPORT_SYMBOL(__delay); | ||
40 | |||
41 | inline void __const_udelay(unsigned long xloops) | ||
42 | { | ||
43 | __delay(((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) + 1); | ||
44 | } | ||
45 | EXPORT_SYMBOL(__const_udelay); | ||
46 | |||
47 | void __udelay(unsigned long usecs) | ||
48 | { | ||
49 | __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ | ||
50 | } | ||
51 | EXPORT_SYMBOL(__udelay); | ||
52 | |||
53 | void __ndelay(unsigned long nsecs) | ||
54 | { | ||
55 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ | ||
56 | } | ||
57 | EXPORT_SYMBOL(__ndelay); | ||
diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser_64.S new file mode 100644 index 000000000000..5448876261f8 --- /dev/null +++ b/arch/x86/lib/getuser_64.S | |||
@@ -0,0 +1,109 @@ | |||
1 | /* | ||
2 | * __get_user functions. | ||
3 | * | ||
4 | * (C) Copyright 1998 Linus Torvalds | ||
5 | * (C) Copyright 2005 Andi Kleen | ||
6 | * | ||
7 | * These functions have a non-standard call interface | ||
8 | * to make them more efficient, especially as they | ||
9 | * return an error value in addition to the "real" | ||
10 | * return value. | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * __get_user_X | ||
15 | * | ||
16 | * Inputs: %rcx contains the address. | ||
17 | * The register is modified, but all changes are undone | ||
18 | * before returning because the C code doesn't know about it. | ||
19 | * | ||
20 | * Outputs: %rax is error code (0 or -EFAULT) | ||
21 | * %rdx contains zero-extended value | ||
22 | * | ||
23 | * %r8 is destroyed. | ||
24 | * | ||
25 | * These functions should not modify any other registers, | ||
26 | * as they get called from within inline assembly. | ||
27 | */ | ||
28 | |||
29 | #include <linux/linkage.h> | ||
30 | #include <asm/dwarf2.h> | ||
31 | #include <asm/page.h> | ||
32 | #include <asm/errno.h> | ||
33 | #include <asm/asm-offsets.h> | ||
34 | #include <asm/thread_info.h> | ||
35 | |||
36 | .text | ||
37 | ENTRY(__get_user_1) | ||
38 | CFI_STARTPROC | ||
39 | GET_THREAD_INFO(%r8) | ||
40 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
41 | jae bad_get_user | ||
42 | 1: movzb (%rcx),%edx | ||
43 | xorl %eax,%eax | ||
44 | ret | ||
45 | CFI_ENDPROC | ||
46 | ENDPROC(__get_user_1) | ||
47 | |||
48 | ENTRY(__get_user_2) | ||
49 | CFI_STARTPROC | ||
50 | GET_THREAD_INFO(%r8) | ||
51 | addq $1,%rcx | ||
52 | jc 20f | ||
53 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
54 | jae 20f | ||
55 | decq %rcx | ||
56 | 2: movzwl (%rcx),%edx | ||
57 | xorl %eax,%eax | ||
58 | ret | ||
59 | 20: decq %rcx | ||
60 | jmp bad_get_user | ||
61 | CFI_ENDPROC | ||
62 | ENDPROC(__get_user_2) | ||
63 | |||
64 | ENTRY(__get_user_4) | ||
65 | CFI_STARTPROC | ||
66 | GET_THREAD_INFO(%r8) | ||
67 | addq $3,%rcx | ||
68 | jc 30f | ||
69 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
70 | jae 30f | ||
71 | subq $3,%rcx | ||
72 | 3: movl (%rcx),%edx | ||
73 | xorl %eax,%eax | ||
74 | ret | ||
75 | 30: subq $3,%rcx | ||
76 | jmp bad_get_user | ||
77 | CFI_ENDPROC | ||
78 | ENDPROC(__get_user_4) | ||
79 | |||
80 | ENTRY(__get_user_8) | ||
81 | CFI_STARTPROC | ||
82 | GET_THREAD_INFO(%r8) | ||
83 | addq $7,%rcx | ||
84 | jc 40f | ||
85 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
86 | jae 40f | ||
87 | subq $7,%rcx | ||
88 | 4: movq (%rcx),%rdx | ||
89 | xorl %eax,%eax | ||
90 | ret | ||
91 | 40: subq $7,%rcx | ||
92 | jmp bad_get_user | ||
93 | CFI_ENDPROC | ||
94 | ENDPROC(__get_user_8) | ||
95 | |||
96 | bad_get_user: | ||
97 | CFI_STARTPROC | ||
98 | xorl %edx,%edx | ||
99 | movq $(-EFAULT),%rax | ||
100 | ret | ||
101 | CFI_ENDPROC | ||
102 | END(bad_get_user) | ||
103 | |||
104 | .section __ex_table,"a" | ||
105 | .quad 1b,bad_get_user | ||
106 | .quad 2b,bad_get_user | ||
107 | .quad 3b,bad_get_user | ||
108 | .quad 4b,bad_get_user | ||
109 | .previous | ||
diff --git a/arch/x86/lib/io_64.c b/arch/x86/lib/io_64.c new file mode 100644 index 000000000000..87b4a4e18039 --- /dev/null +++ b/arch/x86/lib/io_64.c | |||
@@ -0,0 +1,23 @@ | |||
1 | #include <linux/string.h> | ||
2 | #include <asm/io.h> | ||
3 | #include <linux/module.h> | ||
4 | |||
5 | void __memcpy_toio(unsigned long dst,const void*src,unsigned len) | ||
6 | { | ||
7 | __inline_memcpy((void *) dst,src,len); | ||
8 | } | ||
9 | EXPORT_SYMBOL(__memcpy_toio); | ||
10 | |||
11 | void __memcpy_fromio(void *dst,unsigned long src,unsigned len) | ||
12 | { | ||
13 | __inline_memcpy(dst,(const void *) src,len); | ||
14 | } | ||
15 | EXPORT_SYMBOL(__memcpy_fromio); | ||
16 | |||
17 | void memset_io(volatile void __iomem *a, int b, size_t c) | ||
18 | { | ||
19 | /* XXX: memset can mangle the IO patterns quite a bit. | ||
20 | perhaps it would be better to use a dumb one */ | ||
21 | memset((void *)a,b,c); | ||
22 | } | ||
23 | EXPORT_SYMBOL(memset_io); | ||
diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S new file mode 100644 index 000000000000..05a95e713da8 --- /dev/null +++ b/arch/x86/lib/iomap_copy_64.S | |||
@@ -0,0 +1,30 @@ | |||
1 | /* | ||
2 | * Copyright 2006 PathScale, Inc. All Rights Reserved. | ||
3 | * | ||
4 | * This file is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of version 2 of the GNU General Public License | ||
6 | * as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public License | ||
14 | * along with this program; if not, write to the Free Software Foundation, | ||
15 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | ||
16 | */ | ||
17 | |||
18 | #include <linux/linkage.h> | ||
19 | #include <asm/dwarf2.h> | ||
20 | |||
21 | /* | ||
22 | * override generic version in lib/iomap_copy.c | ||
23 | */ | ||
24 | ENTRY(__iowrite32_copy) | ||
25 | CFI_STARTPROC | ||
26 | movl %edx,%ecx | ||
27 | rep movsd | ||
28 | ret | ||
29 | CFI_ENDPROC | ||
30 | ENDPROC(__iowrite32_copy) | ||
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S new file mode 100644 index 000000000000..c22981fa2f3a --- /dev/null +++ b/arch/x86/lib/memcpy_64.S | |||
@@ -0,0 +1,131 @@ | |||
1 | /* Copyright 2002 Andi Kleen */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <asm/dwarf2.h> | ||
5 | #include <asm/cpufeature.h> | ||
6 | |||
7 | /* | ||
8 | * memcpy - Copy a memory block. | ||
9 | * | ||
10 | * Input: | ||
11 | * rdi destination | ||
12 | * rsi source | ||
13 | * rdx count | ||
14 | * | ||
15 | * Output: | ||
16 | * rax original destination | ||
17 | */ | ||
18 | |||
19 | ALIGN | ||
20 | memcpy_c: | ||
21 | CFI_STARTPROC | ||
22 | movq %rdi,%rax | ||
23 | movl %edx,%ecx | ||
24 | shrl $3,%ecx | ||
25 | andl $7,%edx | ||
26 | rep movsq | ||
27 | movl %edx,%ecx | ||
28 | rep movsb | ||
29 | ret | ||
30 | CFI_ENDPROC | ||
31 | ENDPROC(memcpy_c) | ||
32 | |||
33 | ENTRY(__memcpy) | ||
34 | ENTRY(memcpy) | ||
35 | CFI_STARTPROC | ||
36 | pushq %rbx | ||
37 | CFI_ADJUST_CFA_OFFSET 8 | ||
38 | CFI_REL_OFFSET rbx, 0 | ||
39 | movq %rdi,%rax | ||
40 | |||
41 | movl %edx,%ecx | ||
42 | shrl $6,%ecx | ||
43 | jz .Lhandle_tail | ||
44 | |||
45 | .p2align 4 | ||
46 | .Lloop_64: | ||
47 | decl %ecx | ||
48 | |||
49 | movq (%rsi),%r11 | ||
50 | movq 8(%rsi),%r8 | ||
51 | |||
52 | movq %r11,(%rdi) | ||
53 | movq %r8,1*8(%rdi) | ||
54 | |||
55 | movq 2*8(%rsi),%r9 | ||
56 | movq 3*8(%rsi),%r10 | ||
57 | |||
58 | movq %r9,2*8(%rdi) | ||
59 | movq %r10,3*8(%rdi) | ||
60 | |||
61 | movq 4*8(%rsi),%r11 | ||
62 | movq 5*8(%rsi),%r8 | ||
63 | |||
64 | movq %r11,4*8(%rdi) | ||
65 | movq %r8,5*8(%rdi) | ||
66 | |||
67 | movq 6*8(%rsi),%r9 | ||
68 | movq 7*8(%rsi),%r10 | ||
69 | |||
70 | movq %r9,6*8(%rdi) | ||
71 | movq %r10,7*8(%rdi) | ||
72 | |||
73 | leaq 64(%rsi),%rsi | ||
74 | leaq 64(%rdi),%rdi | ||
75 | jnz .Lloop_64 | ||
76 | |||
77 | .Lhandle_tail: | ||
78 | movl %edx,%ecx | ||
79 | andl $63,%ecx | ||
80 | shrl $3,%ecx | ||
81 | jz .Lhandle_7 | ||
82 | .p2align 4 | ||
83 | .Lloop_8: | ||
84 | decl %ecx | ||
85 | movq (%rsi),%r8 | ||
86 | movq %r8,(%rdi) | ||
87 | leaq 8(%rdi),%rdi | ||
88 | leaq 8(%rsi),%rsi | ||
89 | jnz .Lloop_8 | ||
90 | |||
91 | .Lhandle_7: | ||
92 | movl %edx,%ecx | ||
93 | andl $7,%ecx | ||
94 | jz .Lende | ||
95 | .p2align 4 | ||
96 | .Lloop_1: | ||
97 | movb (%rsi),%r8b | ||
98 | movb %r8b,(%rdi) | ||
99 | incq %rdi | ||
100 | incq %rsi | ||
101 | decl %ecx | ||
102 | jnz .Lloop_1 | ||
103 | |||
104 | .Lende: | ||
105 | popq %rbx | ||
106 | CFI_ADJUST_CFA_OFFSET -8 | ||
107 | CFI_RESTORE rbx | ||
108 | ret | ||
109 | .Lfinal: | ||
110 | CFI_ENDPROC | ||
111 | ENDPROC(memcpy) | ||
112 | ENDPROC(__memcpy) | ||
113 | |||
114 | /* Some CPUs run faster using the string copy instructions. | ||
115 | It is also a lot simpler. Use this when possible */ | ||
116 | |||
117 | .section .altinstr_replacement,"ax" | ||
118 | 1: .byte 0xeb /* jmp <disp8> */ | ||
119 | .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ | ||
120 | 2: | ||
121 | .previous | ||
122 | .section .altinstructions,"a" | ||
123 | .align 8 | ||
124 | .quad memcpy | ||
125 | .quad 1b | ||
126 | .byte X86_FEATURE_REP_GOOD | ||
127 | /* Replace only beginning, memcpy is used to apply alternatives, so it | ||
128 | * is silly to overwrite itself with nops - reboot is only outcome... */ | ||
129 | .byte 2b - 1b | ||
130 | .byte 2b - 1b | ||
131 | .previous | ||
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c new file mode 100644 index 000000000000..751ebae8ec42 --- /dev/null +++ b/arch/x86/lib/memmove_64.c | |||
@@ -0,0 +1,21 @@ | |||
1 | /* Normally compiler builtins are used, but sometimes the compiler calls out | ||
2 | of line code. Based on asm-i386/string.h. | ||
3 | */ | ||
4 | #define _STRING_C | ||
5 | #include <linux/string.h> | ||
6 | #include <linux/module.h> | ||
7 | |||
8 | #undef memmove | ||
9 | void *memmove(void * dest,const void *src,size_t count) | ||
10 | { | ||
11 | if (dest < src) { | ||
12 | return memcpy(dest,src,count); | ||
13 | } else { | ||
14 | char *p = (char *) dest + count; | ||
15 | char *s = (char *) src + count; | ||
16 | while (count--) | ||
17 | *--p = *--s; | ||
18 | } | ||
19 | return dest; | ||
20 | } | ||
21 | EXPORT_SYMBOL(memmove); | ||
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S new file mode 100644 index 000000000000..2c5948116bd2 --- /dev/null +++ b/arch/x86/lib/memset_64.S | |||
@@ -0,0 +1,133 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <asm/dwarf2.h> | ||
5 | |||
6 | /* | ||
7 | * ISO C memset - set a memory block to a byte value. | ||
8 | * | ||
9 | * rdi destination | ||
10 | * rsi value (char) | ||
11 | * rdx count (bytes) | ||
12 | * | ||
13 | * rax original destination | ||
14 | */ | ||
15 | ALIGN | ||
16 | memset_c: | ||
17 | CFI_STARTPROC | ||
18 | movq %rdi,%r9 | ||
19 | movl %edx,%r8d | ||
20 | andl $7,%r8d | ||
21 | movl %edx,%ecx | ||
22 | shrl $3,%ecx | ||
23 | /* expand byte value */ | ||
24 | movzbl %sil,%esi | ||
25 | movabs $0x0101010101010101,%rax | ||
26 | mulq %rsi /* with rax, clobbers rdx */ | ||
27 | rep stosq | ||
28 | movl %r8d,%ecx | ||
29 | rep stosb | ||
30 | movq %r9,%rax | ||
31 | ret | ||
32 | CFI_ENDPROC | ||
33 | ENDPROC(memset_c) | ||
34 | |||
35 | ENTRY(memset) | ||
36 | ENTRY(__memset) | ||
37 | CFI_STARTPROC | ||
38 | movq %rdi,%r10 | ||
39 | movq %rdx,%r11 | ||
40 | |||
41 | /* expand byte value */ | ||
42 | movzbl %sil,%ecx | ||
43 | movabs $0x0101010101010101,%rax | ||
44 | mul %rcx /* with rax, clobbers rdx */ | ||
45 | |||
46 | /* align dst */ | ||
47 | movl %edi,%r9d | ||
48 | andl $7,%r9d | ||
49 | jnz .Lbad_alignment | ||
50 | CFI_REMEMBER_STATE | ||
51 | .Lafter_bad_alignment: | ||
52 | |||
53 | movl %r11d,%ecx | ||
54 | shrl $6,%ecx | ||
55 | jz .Lhandle_tail | ||
56 | |||
57 | .p2align 4 | ||
58 | .Lloop_64: | ||
59 | decl %ecx | ||
60 | movq %rax,(%rdi) | ||
61 | movq %rax,8(%rdi) | ||
62 | movq %rax,16(%rdi) | ||
63 | movq %rax,24(%rdi) | ||
64 | movq %rax,32(%rdi) | ||
65 | movq %rax,40(%rdi) | ||
66 | movq %rax,48(%rdi) | ||
67 | movq %rax,56(%rdi) | ||
68 | leaq 64(%rdi),%rdi | ||
69 | jnz .Lloop_64 | ||
70 | |||
71 | /* Handle tail in loops. The loops should be faster than hard | ||
72 | to predict jump tables. */ | ||
73 | .p2align 4 | ||
74 | .Lhandle_tail: | ||
75 | movl %r11d,%ecx | ||
76 | andl $63&(~7),%ecx | ||
77 | jz .Lhandle_7 | ||
78 | shrl $3,%ecx | ||
79 | .p2align 4 | ||
80 | .Lloop_8: | ||
81 | decl %ecx | ||
82 | movq %rax,(%rdi) | ||
83 | leaq 8(%rdi),%rdi | ||
84 | jnz .Lloop_8 | ||
85 | |||
86 | .Lhandle_7: | ||
87 | movl %r11d,%ecx | ||
88 | andl $7,%ecx | ||
89 | jz .Lende | ||
90 | .p2align 4 | ||
91 | .Lloop_1: | ||
92 | decl %ecx | ||
93 | movb %al,(%rdi) | ||
94 | leaq 1(%rdi),%rdi | ||
95 | jnz .Lloop_1 | ||
96 | |||
97 | .Lende: | ||
98 | movq %r10,%rax | ||
99 | ret | ||
100 | |||
101 | CFI_RESTORE_STATE | ||
102 | .Lbad_alignment: | ||
103 | cmpq $7,%r11 | ||
104 | jbe .Lhandle_7 | ||
105 | movq %rax,(%rdi) /* unaligned store */ | ||
106 | movq $8,%r8 | ||
107 | subq %r9,%r8 | ||
108 | addq %r8,%rdi | ||
109 | subq %r8,%r11 | ||
110 | jmp .Lafter_bad_alignment | ||
111 | .Lfinal: | ||
112 | CFI_ENDPROC | ||
113 | ENDPROC(memset) | ||
114 | ENDPROC(__memset) | ||
115 | |||
116 | /* Some CPUs run faster using the string instructions. | ||
117 | It is also a lot simpler. Use this when possible */ | ||
118 | |||
119 | #include <asm/cpufeature.h> | ||
120 | |||
121 | .section .altinstr_replacement,"ax" | ||
122 | 1: .byte 0xeb /* jmp <disp8> */ | ||
123 | .byte (memset_c - memset) - (2f - 1b) /* offset */ | ||
124 | 2: | ||
125 | .previous | ||
126 | .section .altinstructions,"a" | ||
127 | .align 8 | ||
128 | .quad memset | ||
129 | .quad 1b | ||
130 | .byte X86_FEATURE_REP_GOOD | ||
131 | .byte .Lfinal - memset | ||
132 | .byte 2b - 1b | ||
133 | .previous | ||
diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S new file mode 100644 index 000000000000..4989f5a8fa9b --- /dev/null +++ b/arch/x86/lib/putuser_64.S | |||
@@ -0,0 +1,106 @@ | |||
1 | /* | ||
2 | * __put_user functions. | ||
3 | * | ||
4 | * (C) Copyright 1998 Linus Torvalds | ||
5 | * (C) Copyright 2005 Andi Kleen | ||
6 | * | ||
7 | * These functions have a non-standard call interface | ||
8 | * to make them more efficient, especially as they | ||
9 | * return an error value in addition to the "real" | ||
10 | * return value. | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * __put_user_X | ||
15 | * | ||
16 | * Inputs: %rcx contains the address | ||
17 | * %rdx contains new value | ||
18 | * | ||
19 | * Outputs: %rax is error code (0 or -EFAULT) | ||
20 | * | ||
21 | * %r8 is destroyed. | ||
22 | * | ||
23 | * These functions should not modify any other registers, | ||
24 | * as they get called from within inline assembly. | ||
25 | */ | ||
26 | |||
27 | #include <linux/linkage.h> | ||
28 | #include <asm/dwarf2.h> | ||
29 | #include <asm/page.h> | ||
30 | #include <asm/errno.h> | ||
31 | #include <asm/asm-offsets.h> | ||
32 | #include <asm/thread_info.h> | ||
33 | |||
34 | .text | ||
35 | ENTRY(__put_user_1) | ||
36 | CFI_STARTPROC | ||
37 | GET_THREAD_INFO(%r8) | ||
38 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
39 | jae bad_put_user | ||
40 | 1: movb %dl,(%rcx) | ||
41 | xorl %eax,%eax | ||
42 | ret | ||
43 | CFI_ENDPROC | ||
44 | ENDPROC(__put_user_1) | ||
45 | |||
46 | ENTRY(__put_user_2) | ||
47 | CFI_STARTPROC | ||
48 | GET_THREAD_INFO(%r8) | ||
49 | addq $1,%rcx | ||
50 | jc 20f | ||
51 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
52 | jae 20f | ||
53 | decq %rcx | ||
54 | 2: movw %dx,(%rcx) | ||
55 | xorl %eax,%eax | ||
56 | ret | ||
57 | 20: decq %rcx | ||
58 | jmp bad_put_user | ||
59 | CFI_ENDPROC | ||
60 | ENDPROC(__put_user_2) | ||
61 | |||
62 | ENTRY(__put_user_4) | ||
63 | CFI_STARTPROC | ||
64 | GET_THREAD_INFO(%r8) | ||
65 | addq $3,%rcx | ||
66 | jc 30f | ||
67 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
68 | jae 30f | ||
69 | subq $3,%rcx | ||
70 | 3: movl %edx,(%rcx) | ||
71 | xorl %eax,%eax | ||
72 | ret | ||
73 | 30: subq $3,%rcx | ||
74 | jmp bad_put_user | ||
75 | CFI_ENDPROC | ||
76 | ENDPROC(__put_user_4) | ||
77 | |||
78 | ENTRY(__put_user_8) | ||
79 | CFI_STARTPROC | ||
80 | GET_THREAD_INFO(%r8) | ||
81 | addq $7,%rcx | ||
82 | jc 40f | ||
83 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
84 | jae 40f | ||
85 | subq $7,%rcx | ||
86 | 4: movq %rdx,(%rcx) | ||
87 | xorl %eax,%eax | ||
88 | ret | ||
89 | 40: subq $7,%rcx | ||
90 | jmp bad_put_user | ||
91 | CFI_ENDPROC | ||
92 | ENDPROC(__put_user_8) | ||
93 | |||
94 | bad_put_user: | ||
95 | CFI_STARTPROC | ||
96 | movq $(-EFAULT),%rax | ||
97 | ret | ||
98 | CFI_ENDPROC | ||
99 | END(bad_put_user) | ||
100 | |||
101 | .section __ex_table,"a" | ||
102 | .quad 1b,bad_put_user | ||
103 | .quad 2b,bad_put_user | ||
104 | .quad 3b,bad_put_user | ||
105 | .quad 4b,bad_put_user | ||
106 | .previous | ||
diff --git a/arch/x86/lib/rwlock_64.S b/arch/x86/lib/rwlock_64.S new file mode 100644 index 000000000000..0cde1f807314 --- /dev/null +++ b/arch/x86/lib/rwlock_64.S | |||
@@ -0,0 +1,38 @@ | |||
1 | /* Slow paths of read/write spinlocks. */ | ||
2 | |||
3 | #include <linux/linkage.h> | ||
4 | #include <asm/rwlock.h> | ||
5 | #include <asm/alternative-asm.i> | ||
6 | #include <asm/dwarf2.h> | ||
7 | |||
8 | /* rdi: pointer to rwlock_t */ | ||
9 | ENTRY(__write_lock_failed) | ||
10 | CFI_STARTPROC | ||
11 | LOCK_PREFIX | ||
12 | addl $RW_LOCK_BIAS,(%rdi) | ||
13 | 1: rep | ||
14 | nop | ||
15 | cmpl $RW_LOCK_BIAS,(%rdi) | ||
16 | jne 1b | ||
17 | LOCK_PREFIX | ||
18 | subl $RW_LOCK_BIAS,(%rdi) | ||
19 | jnz __write_lock_failed | ||
20 | ret | ||
21 | CFI_ENDPROC | ||
22 | END(__write_lock_failed) | ||
23 | |||
24 | /* rdi: pointer to rwlock_t */ | ||
25 | ENTRY(__read_lock_failed) | ||
26 | CFI_STARTPROC | ||
27 | LOCK_PREFIX | ||
28 | incl (%rdi) | ||
29 | 1: rep | ||
30 | nop | ||
31 | cmpl $1,(%rdi) | ||
32 | js 1b | ||
33 | LOCK_PREFIX | ||
34 | decl (%rdi) | ||
35 | js __read_lock_failed | ||
36 | ret | ||
37 | CFI_ENDPROC | ||
38 | END(__read_lock_failed) | ||
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S new file mode 100644 index 000000000000..55e586d352d3 --- /dev/null +++ b/arch/x86/lib/thunk_64.S | |||
@@ -0,0 +1,67 @@ | |||
1 | /* | ||
2 | * Save registers before calling assembly functions. This avoids | ||
3 | * disturbance of register allocation in some inline assembly constructs. | ||
4 | * Copyright 2001,2002 by Andi Kleen, SuSE Labs. | ||
5 | * Subject to the GNU public license, v.2. No warranty of any kind. | ||
6 | */ | ||
7 | |||
8 | #include <linux/linkage.h> | ||
9 | #include <asm/dwarf2.h> | ||
10 | #include <asm/calling.h> | ||
11 | #include <asm/rwlock.h> | ||
12 | |||
13 | /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ | ||
14 | .macro thunk name,func | ||
15 | .globl \name | ||
16 | \name: | ||
17 | CFI_STARTPROC | ||
18 | SAVE_ARGS | ||
19 | call \func | ||
20 | jmp restore | ||
21 | CFI_ENDPROC | ||
22 | .endm | ||
23 | |||
24 | /* rdi: arg1 ... normal C conventions. rax is passed from C. */ | ||
25 | .macro thunk_retrax name,func | ||
26 | .globl \name | ||
27 | \name: | ||
28 | CFI_STARTPROC | ||
29 | SAVE_ARGS | ||
30 | call \func | ||
31 | jmp restore_norax | ||
32 | CFI_ENDPROC | ||
33 | .endm | ||
34 | |||
35 | |||
36 | .section .sched.text | ||
37 | #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM | ||
38 | thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed | ||
39 | thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed | ||
40 | thunk rwsem_wake_thunk,rwsem_wake | ||
41 | thunk rwsem_downgrade_thunk,rwsem_downgrade_wake | ||
42 | #endif | ||
43 | |||
44 | thunk __down_failed,__down | ||
45 | thunk_retrax __down_failed_interruptible,__down_interruptible | ||
46 | thunk_retrax __down_failed_trylock,__down_trylock | ||
47 | thunk __up_wakeup,__up | ||
48 | |||
49 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
50 | thunk trace_hardirqs_on_thunk,trace_hardirqs_on | ||
51 | thunk trace_hardirqs_off_thunk,trace_hardirqs_off | ||
52 | #endif | ||
53 | |||
54 | /* SAVE_ARGS below is used only for the .cfi directives it contains. */ | ||
55 | CFI_STARTPROC | ||
56 | SAVE_ARGS | ||
57 | restore: | ||
58 | RESTORE_ARGS | ||
59 | ret | ||
60 | CFI_ENDPROC | ||
61 | |||
62 | CFI_STARTPROC | ||
63 | SAVE_ARGS | ||
64 | restore_norax: | ||
65 | RESTORE_ARGS 1 | ||
66 | ret | ||
67 | CFI_ENDPROC | ||
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c new file mode 100644 index 000000000000..893d43f838cc --- /dev/null +++ b/arch/x86/lib/usercopy_64.c | |||
@@ -0,0 +1,166 @@ | |||
1 | /* | ||
2 | * User address space access functions. | ||
3 | * | ||
4 | * Copyright 1997 Andi Kleen <ak@muc.de> | ||
5 | * Copyright 1997 Linus Torvalds | ||
6 | * Copyright 2002 Andi Kleen <ak@suse.de> | ||
7 | */ | ||
8 | #include <linux/module.h> | ||
9 | #include <asm/uaccess.h> | ||
10 | |||
11 | /* | ||
12 | * Copy a null terminated string from userspace. | ||
13 | */ | ||
14 | |||
15 | #define __do_strncpy_from_user(dst,src,count,res) \ | ||
16 | do { \ | ||
17 | long __d0, __d1, __d2; \ | ||
18 | might_sleep(); \ | ||
19 | __asm__ __volatile__( \ | ||
20 | " testq %1,%1\n" \ | ||
21 | " jz 2f\n" \ | ||
22 | "0: lodsb\n" \ | ||
23 | " stosb\n" \ | ||
24 | " testb %%al,%%al\n" \ | ||
25 | " jz 1f\n" \ | ||
26 | " decq %1\n" \ | ||
27 | " jnz 0b\n" \ | ||
28 | "1: subq %1,%0\n" \ | ||
29 | "2:\n" \ | ||
30 | ".section .fixup,\"ax\"\n" \ | ||
31 | "3: movq %5,%0\n" \ | ||
32 | " jmp 2b\n" \ | ||
33 | ".previous\n" \ | ||
34 | ".section __ex_table,\"a\"\n" \ | ||
35 | " .align 8\n" \ | ||
36 | " .quad 0b,3b\n" \ | ||
37 | ".previous" \ | ||
38 | : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ | ||
39 | "=&D" (__d2) \ | ||
40 | : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ | ||
41 | : "memory"); \ | ||
42 | } while (0) | ||
43 | |||
44 | long | ||
45 | __strncpy_from_user(char *dst, const char __user *src, long count) | ||
46 | { | ||
47 | long res; | ||
48 | __do_strncpy_from_user(dst, src, count, res); | ||
49 | return res; | ||
50 | } | ||
51 | EXPORT_SYMBOL(__strncpy_from_user); | ||
52 | |||
53 | long | ||
54 | strncpy_from_user(char *dst, const char __user *src, long count) | ||
55 | { | ||
56 | long res = -EFAULT; | ||
57 | if (access_ok(VERIFY_READ, src, 1)) | ||
58 | return __strncpy_from_user(dst, src, count); | ||
59 | return res; | ||
60 | } | ||
61 | EXPORT_SYMBOL(strncpy_from_user); | ||
62 | |||
63 | /* | ||
64 | * Zero Userspace | ||
65 | */ | ||
66 | |||
67 | unsigned long __clear_user(void __user *addr, unsigned long size) | ||
68 | { | ||
69 | long __d0; | ||
70 | might_sleep(); | ||
71 | /* no memory constraint because it doesn't change any memory gcc knows | ||
72 | about */ | ||
73 | asm volatile( | ||
74 | " testq %[size8],%[size8]\n" | ||
75 | " jz 4f\n" | ||
76 | "0: movq %[zero],(%[dst])\n" | ||
77 | " addq %[eight],%[dst]\n" | ||
78 | " decl %%ecx ; jnz 0b\n" | ||
79 | "4: movq %[size1],%%rcx\n" | ||
80 | " testl %%ecx,%%ecx\n" | ||
81 | " jz 2f\n" | ||
82 | "1: movb %b[zero],(%[dst])\n" | ||
83 | " incq %[dst]\n" | ||
84 | " decl %%ecx ; jnz 1b\n" | ||
85 | "2:\n" | ||
86 | ".section .fixup,\"ax\"\n" | ||
87 | "3: lea 0(%[size1],%[size8],8),%[size8]\n" | ||
88 | " jmp 2b\n" | ||
89 | ".previous\n" | ||
90 | ".section __ex_table,\"a\"\n" | ||
91 | " .align 8\n" | ||
92 | " .quad 0b,3b\n" | ||
93 | " .quad 1b,2b\n" | ||
94 | ".previous" | ||
95 | : [size8] "=c"(size), [dst] "=&D" (__d0) | ||
96 | : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), | ||
97 | [zero] "r" (0UL), [eight] "r" (8UL)); | ||
98 | return size; | ||
99 | } | ||
100 | EXPORT_SYMBOL(__clear_user); | ||
101 | |||
102 | unsigned long clear_user(void __user *to, unsigned long n) | ||
103 | { | ||
104 | if (access_ok(VERIFY_WRITE, to, n)) | ||
105 | return __clear_user(to, n); | ||
106 | return n; | ||
107 | } | ||
108 | EXPORT_SYMBOL(clear_user); | ||
109 | |||
110 | /* | ||
111 | * Return the size of a string (including the ending 0) | ||
112 | * | ||
113 | * Return 0 on exception, a value greater than N if too long | ||
114 | */ | ||
115 | |||
116 | long __strnlen_user(const char __user *s, long n) | ||
117 | { | ||
118 | long res = 0; | ||
119 | char c; | ||
120 | |||
121 | while (1) { | ||
122 | if (res>n) | ||
123 | return n+1; | ||
124 | if (__get_user(c, s)) | ||
125 | return 0; | ||
126 | if (!c) | ||
127 | return res+1; | ||
128 | res++; | ||
129 | s++; | ||
130 | } | ||
131 | } | ||
132 | EXPORT_SYMBOL(__strnlen_user); | ||
133 | |||
134 | long strnlen_user(const char __user *s, long n) | ||
135 | { | ||
136 | if (!access_ok(VERIFY_READ, s, n)) | ||
137 | return 0; | ||
138 | return __strnlen_user(s, n); | ||
139 | } | ||
140 | EXPORT_SYMBOL(strnlen_user); | ||
141 | |||
142 | long strlen_user(const char __user *s) | ||
143 | { | ||
144 | long res = 0; | ||
145 | char c; | ||
146 | |||
147 | for (;;) { | ||
148 | if (get_user(c, s)) | ||
149 | return 0; | ||
150 | if (!c) | ||
151 | return res+1; | ||
152 | res++; | ||
153 | s++; | ||
154 | } | ||
155 | } | ||
156 | EXPORT_SYMBOL(strlen_user); | ||
157 | |||
158 | unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) | ||
159 | { | ||
160 | if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { | ||
161 | return copy_user_generic((__force void *)to, (__force void *)from, len); | ||
162 | } | ||
163 | return len; | ||
164 | } | ||
165 | EXPORT_SYMBOL(copy_in_user); | ||
166 | |||