diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/x86_64/lib |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/x86_64/lib')
-rw-r--r-- | arch/x86_64/lib/Makefile | 14 | ||||
-rw-r--r-- | arch/x86_64/lib/bitops.c | 141 | ||||
-rw-r--r-- | arch/x86_64/lib/bitstr.c | 28 | ||||
-rw-r--r-- | arch/x86_64/lib/clear_page.S | 50 | ||||
-rw-r--r-- | arch/x86_64/lib/copy_page.S | 101 | ||||
-rw-r--r-- | arch/x86_64/lib/copy_user.S | 294 | ||||
-rw-r--r-- | arch/x86_64/lib/csum-copy.S | 233 | ||||
-rw-r--r-- | arch/x86_64/lib/csum-partial.c | 150 | ||||
-rw-r--r-- | arch/x86_64/lib/csum-wrappers.c | 129 | ||||
-rw-r--r-- | arch/x86_64/lib/dec_and_lock.c | 40 | ||||
-rw-r--r-- | arch/x86_64/lib/delay.c | 48 | ||||
-rw-r--r-- | arch/x86_64/lib/getuser.S | 101 | ||||
-rw-r--r-- | arch/x86_64/lib/io.c | 23 | ||||
-rw-r--r-- | arch/x86_64/lib/memcpy.S | 121 | ||||
-rw-r--r-- | arch/x86_64/lib/memmove.c | 19 | ||||
-rw-r--r-- | arch/x86_64/lib/memset.S | 125 | ||||
-rw-r--r-- | arch/x86_64/lib/putuser.S | 89 | ||||
-rw-r--r-- | arch/x86_64/lib/thunk.S | 95 | ||||
-rw-r--r-- | arch/x86_64/lib/usercopy.c | 153 |
19 files changed, 1954 insertions, 0 deletions
diff --git a/arch/x86_64/lib/Makefile b/arch/x86_64/lib/Makefile new file mode 100644 index 000000000000..6b26a1c1e9ff --- /dev/null +++ b/arch/x86_64/lib/Makefile | |||
@@ -0,0 +1,14 @@ | |||
1 | # | ||
2 | # Makefile for x86_64-specific library files. | ||
3 | # | ||
4 | |||
5 | CFLAGS_csum-partial.o := -funroll-loops | ||
6 | |||
7 | obj-y := io.o | ||
8 | |||
9 | lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \ | ||
10 | usercopy.o getuser.o putuser.o \ | ||
11 | thunk.o clear_page.o copy_page.o bitstr.o bitops.o | ||
12 | lib-y += memcpy.o memmove.o memset.o copy_user.o | ||
13 | |||
14 | lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o | ||
diff --git a/arch/x86_64/lib/bitops.c b/arch/x86_64/lib/bitops.c new file mode 100644 index 000000000000..a29fb75b33ac --- /dev/null +++ b/arch/x86_64/lib/bitops.c | |||
@@ -0,0 +1,141 @@ | |||
1 | #include <linux/bitops.h> | ||
2 | |||
3 | #undef find_first_zero_bit | ||
4 | #undef find_next_zero_bit | ||
5 | #undef find_first_bit | ||
6 | #undef find_next_bit | ||
7 | |||
8 | /** | ||
9 | * find_first_zero_bit - find the first zero bit in a memory region | ||
10 | * @addr: The address to start the search at | ||
11 | * @size: The maximum size to search | ||
12 | * | ||
13 | * Returns the bit-number of the first zero bit, not the number of the byte | ||
14 | * containing a bit. | ||
15 | */ | ||
16 | inline long find_first_zero_bit(const unsigned long * addr, unsigned long size) | ||
17 | { | ||
18 | long d0, d1, d2; | ||
19 | long res; | ||
20 | |||
21 | if (!size) | ||
22 | return 0; | ||
23 | asm volatile( | ||
24 | " repe; scasq\n" | ||
25 | " je 1f\n" | ||
26 | " xorq -8(%%rdi),%%rax\n" | ||
27 | " subq $8,%%rdi\n" | ||
28 | " bsfq %%rax,%%rdx\n" | ||
29 | "1: subq %[addr],%%rdi\n" | ||
30 | " shlq $3,%%rdi\n" | ||
31 | " addq %%rdi,%%rdx" | ||
32 | :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) | ||
33 | :"0" (0ULL), "1" ((size + 63) >> 6), "2" (addr), "3" (-1ULL), | ||
34 | [addr] "r" (addr) : "memory"); | ||
35 | return res; | ||
36 | } | ||
37 | |||
38 | /** | ||
39 | * find_next_zero_bit - find the first zero bit in a memory region | ||
40 | * @addr: The address to base the search on | ||
41 | * @offset: The bitnumber to start searching at | ||
42 | * @size: The maximum size to search | ||
43 | */ | ||
44 | long find_next_zero_bit (const unsigned long * addr, long size, long offset) | ||
45 | { | ||
46 | unsigned long * p = ((unsigned long *) addr) + (offset >> 6); | ||
47 | unsigned long set = 0; | ||
48 | unsigned long res, bit = offset&63; | ||
49 | |||
50 | if (bit) { | ||
51 | /* | ||
52 | * Look for zero in first word | ||
53 | */ | ||
54 | asm("bsfq %1,%0\n\t" | ||
55 | "cmoveq %2,%0" | ||
56 | : "=r" (set) | ||
57 | : "r" (~(*p >> bit)), "r"(64L)); | ||
58 | if (set < (64 - bit)) | ||
59 | return set + offset; | ||
60 | set = 64 - bit; | ||
61 | p++; | ||
62 | } | ||
63 | /* | ||
64 | * No zero yet, search remaining full words for a zero | ||
65 | */ | ||
66 | res = find_first_zero_bit ((const unsigned long *)p, | ||
67 | size - 64 * (p - (unsigned long *) addr)); | ||
68 | return (offset + set + res); | ||
69 | } | ||
70 | |||
71 | static inline long | ||
72 | __find_first_bit(const unsigned long * addr, unsigned long size) | ||
73 | { | ||
74 | long d0, d1; | ||
75 | long res; | ||
76 | |||
77 | asm volatile( | ||
78 | " repe; scasq\n" | ||
79 | " jz 1f\n" | ||
80 | " subq $8,%%rdi\n" | ||
81 | " bsfq (%%rdi),%%rax\n" | ||
82 | "1: subq %[addr],%%rdi\n" | ||
83 | " shlq $3,%%rdi\n" | ||
84 | " addq %%rdi,%%rax" | ||
85 | :"=a" (res), "=&c" (d0), "=&D" (d1) | ||
86 | :"0" (0ULL), | ||
87 | "1" ((size + 63) >> 6), "2" (addr), | ||
88 | [addr] "r" (addr) : "memory"); | ||
89 | return res; | ||
90 | } | ||
91 | |||
92 | /** | ||
93 | * find_first_bit - find the first set bit in a memory region | ||
94 | * @addr: The address to start the search at | ||
95 | * @size: The maximum size to search | ||
96 | * | ||
97 | * Returns the bit-number of the first set bit, not the number of the byte | ||
98 | * containing a bit. | ||
99 | */ | ||
100 | long find_first_bit(const unsigned long * addr, unsigned long size) | ||
101 | { | ||
102 | return __find_first_bit(addr,size); | ||
103 | } | ||
104 | |||
105 | /** | ||
106 | * find_next_bit - find the first set bit in a memory region | ||
107 | * @addr: The address to base the search on | ||
108 | * @offset: The bitnumber to start searching at | ||
109 | * @size: The maximum size to search | ||
110 | */ | ||
111 | long find_next_bit(const unsigned long * addr, long size, long offset) | ||
112 | { | ||
113 | const unsigned long * p = addr + (offset >> 6); | ||
114 | unsigned long set = 0, bit = offset & 63, res; | ||
115 | |||
116 | if (bit) { | ||
117 | /* | ||
118 | * Look for nonzero in the first 64 bits: | ||
119 | */ | ||
120 | asm("bsfq %1,%0\n\t" | ||
121 | "cmoveq %2,%0\n\t" | ||
122 | : "=r" (set) | ||
123 | : "r" (*p >> bit), "r" (64L)); | ||
124 | if (set < (64 - bit)) | ||
125 | return set + offset; | ||
126 | set = 64 - bit; | ||
127 | p++; | ||
128 | } | ||
129 | /* | ||
130 | * No set bit yet, search remaining full words for a bit | ||
131 | */ | ||
132 | res = __find_first_bit (p, size - 64 * (p - addr)); | ||
133 | return (offset + set + res); | ||
134 | } | ||
135 | |||
136 | #include <linux/module.h> | ||
137 | |||
138 | EXPORT_SYMBOL(find_next_bit); | ||
139 | EXPORT_SYMBOL(find_first_bit); | ||
140 | EXPORT_SYMBOL(find_first_zero_bit); | ||
141 | EXPORT_SYMBOL(find_next_zero_bit); | ||
diff --git a/arch/x86_64/lib/bitstr.c b/arch/x86_64/lib/bitstr.c new file mode 100644 index 000000000000..24676609a6ac --- /dev/null +++ b/arch/x86_64/lib/bitstr.c | |||
@@ -0,0 +1,28 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/bitops.h> | ||
3 | |||
4 | /* Find string of zero bits in a bitmap */ | ||
5 | unsigned long | ||
6 | find_next_zero_string(unsigned long *bitmap, long start, long nbits, int len) | ||
7 | { | ||
8 | unsigned long n, end, i; | ||
9 | |||
10 | again: | ||
11 | n = find_next_zero_bit(bitmap, nbits, start); | ||
12 | if (n == -1) | ||
13 | return -1; | ||
14 | |||
15 | /* could test bitsliced, but it's hardly worth it */ | ||
16 | end = n+len; | ||
17 | if (end >= nbits) | ||
18 | return -1; | ||
19 | for (i = n+1; i < end; i++) { | ||
20 | if (test_bit(i, bitmap)) { | ||
21 | start = i+1; | ||
22 | goto again; | ||
23 | } | ||
24 | } | ||
25 | return n; | ||
26 | } | ||
27 | |||
28 | EXPORT_SYMBOL(find_next_zero_string); | ||
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S new file mode 100644 index 000000000000..30a9da458c15 --- /dev/null +++ b/arch/x86_64/lib/clear_page.S | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * Zero a page. | ||
3 | * rdi page | ||
4 | */ | ||
5 | .globl clear_page | ||
6 | .p2align 4 | ||
7 | clear_page: | ||
8 | xorl %eax,%eax | ||
9 | movl $4096/64,%ecx | ||
10 | .p2align 4 | ||
11 | .Lloop: | ||
12 | decl %ecx | ||
13 | #define PUT(x) movq %rax,x*8(%rdi) | ||
14 | movq %rax,(%rdi) | ||
15 | PUT(1) | ||
16 | PUT(2) | ||
17 | PUT(3) | ||
18 | PUT(4) | ||
19 | PUT(5) | ||
20 | PUT(6) | ||
21 | PUT(7) | ||
22 | leaq 64(%rdi),%rdi | ||
23 | jnz .Lloop | ||
24 | nop | ||
25 | ret | ||
26 | clear_page_end: | ||
27 | |||
28 | /* C stepping K8 run faster using the string instructions. | ||
29 | It is also a lot simpler. Use this when possible */ | ||
30 | |||
31 | #include <asm/cpufeature.h> | ||
32 | |||
33 | .section .altinstructions,"a" | ||
34 | .align 8 | ||
35 | .quad clear_page | ||
36 | .quad clear_page_c | ||
37 | .byte X86_FEATURE_K8_C | ||
38 | .byte clear_page_end-clear_page | ||
39 | .byte clear_page_c_end-clear_page_c | ||
40 | .previous | ||
41 | |||
42 | .section .altinstr_replacement,"ax" | ||
43 | clear_page_c: | ||
44 | movl $4096/8,%ecx | ||
45 | xorl %eax,%eax | ||
46 | rep | ||
47 | stosq | ||
48 | ret | ||
49 | clear_page_c_end: | ||
50 | .previous | ||
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S new file mode 100644 index 000000000000..dd3aa47b6bf5 --- /dev/null +++ b/arch/x86_64/lib/copy_page.S | |||
@@ -0,0 +1,101 @@ | |||
1 | /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */ | ||
2 | |||
3 | /* Don't use streaming store because it's better when the target | ||
4 | ends up in cache. */ | ||
5 | |||
6 | /* Could vary the prefetch distance based on SMP/UP */ | ||
7 | |||
8 | .globl copy_page | ||
9 | .p2align 4 | ||
10 | copy_page: | ||
11 | subq $3*8,%rsp | ||
12 | movq %rbx,(%rsp) | ||
13 | movq %r12,1*8(%rsp) | ||
14 | movq %r13,2*8(%rsp) | ||
15 | |||
16 | movl $(4096/64)-5,%ecx | ||
17 | .p2align 4 | ||
18 | .Loop64: | ||
19 | dec %rcx | ||
20 | |||
21 | movq (%rsi), %rax | ||
22 | movq 8 (%rsi), %rbx | ||
23 | movq 16 (%rsi), %rdx | ||
24 | movq 24 (%rsi), %r8 | ||
25 | movq 32 (%rsi), %r9 | ||
26 | movq 40 (%rsi), %r10 | ||
27 | movq 48 (%rsi), %r11 | ||
28 | movq 56 (%rsi), %r12 | ||
29 | |||
30 | prefetcht0 5*64(%rsi) | ||
31 | |||
32 | movq %rax, (%rdi) | ||
33 | movq %rbx, 8 (%rdi) | ||
34 | movq %rdx, 16 (%rdi) | ||
35 | movq %r8, 24 (%rdi) | ||
36 | movq %r9, 32 (%rdi) | ||
37 | movq %r10, 40 (%rdi) | ||
38 | movq %r11, 48 (%rdi) | ||
39 | movq %r12, 56 (%rdi) | ||
40 | |||
41 | leaq 64 (%rsi), %rsi | ||
42 | leaq 64 (%rdi), %rdi | ||
43 | |||
44 | jnz .Loop64 | ||
45 | |||
46 | movl $5,%ecx | ||
47 | .p2align 4 | ||
48 | .Loop2: | ||
49 | decl %ecx | ||
50 | |||
51 | movq (%rsi), %rax | ||
52 | movq 8 (%rsi), %rbx | ||
53 | movq 16 (%rsi), %rdx | ||
54 | movq 24 (%rsi), %r8 | ||
55 | movq 32 (%rsi), %r9 | ||
56 | movq 40 (%rsi), %r10 | ||
57 | movq 48 (%rsi), %r11 | ||
58 | movq 56 (%rsi), %r12 | ||
59 | |||
60 | movq %rax, (%rdi) | ||
61 | movq %rbx, 8 (%rdi) | ||
62 | movq %rdx, 16 (%rdi) | ||
63 | movq %r8, 24 (%rdi) | ||
64 | movq %r9, 32 (%rdi) | ||
65 | movq %r10, 40 (%rdi) | ||
66 | movq %r11, 48 (%rdi) | ||
67 | movq %r12, 56 (%rdi) | ||
68 | |||
69 | leaq 64(%rdi),%rdi | ||
70 | leaq 64(%rsi),%rsi | ||
71 | |||
72 | jnz .Loop2 | ||
73 | |||
74 | movq (%rsp),%rbx | ||
75 | movq 1*8(%rsp),%r12 | ||
76 | movq 2*8(%rsp),%r13 | ||
77 | addq $3*8,%rsp | ||
78 | ret | ||
79 | |||
80 | /* C stepping K8 run faster using the string copy instructions. | ||
81 | It is also a lot simpler. Use this when possible */ | ||
82 | |||
83 | #include <asm/cpufeature.h> | ||
84 | |||
85 | .section .altinstructions,"a" | ||
86 | .align 8 | ||
87 | .quad copy_page | ||
88 | .quad copy_page_c | ||
89 | .byte X86_FEATURE_K8_C | ||
90 | .byte copy_page_c_end-copy_page_c | ||
91 | .byte copy_page_c_end-copy_page_c | ||
92 | .previous | ||
93 | |||
94 | .section .altinstr_replacement,"ax" | ||
95 | copy_page_c: | ||
96 | movl $4096/8,%ecx | ||
97 | rep | ||
98 | movsq | ||
99 | ret | ||
100 | copy_page_c_end: | ||
101 | .previous | ||
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S new file mode 100644 index 000000000000..bd556c804248 --- /dev/null +++ b/arch/x86_64/lib/copy_user.S | |||
@@ -0,0 +1,294 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs. | ||
2 | * Subject to the GNU Public License v2. | ||
3 | * | ||
4 | * Functions to copy from and to user space. | ||
5 | */ | ||
6 | |||
7 | #define FIX_ALIGNMENT 1 | ||
8 | |||
9 | #include <asm/current.h> | ||
10 | #include <asm/offset.h> | ||
11 | #include <asm/thread_info.h> | ||
12 | #include <asm/cpufeature.h> | ||
13 | |||
14 | /* Standard copy_to_user with segment limit checking */ | ||
15 | .globl copy_to_user | ||
16 | .p2align 4 | ||
17 | copy_to_user: | ||
18 | GET_THREAD_INFO(%rax) | ||
19 | movq %rdi,%rcx | ||
20 | addq %rdx,%rcx | ||
21 | jc bad_to_user | ||
22 | cmpq threadinfo_addr_limit(%rax),%rcx | ||
23 | jae bad_to_user | ||
24 | 2: | ||
25 | .byte 0xe9 /* 32bit jump */ | ||
26 | .long .Lcug-1f | ||
27 | 1: | ||
28 | |||
29 | .section .altinstr_replacement,"ax" | ||
30 | 3: .byte 0xe9 /* replacement jmp with 8 bit immediate */ | ||
31 | .long copy_user_generic_c-1b /* offset */ | ||
32 | .previous | ||
33 | .section .altinstructions,"a" | ||
34 | .align 8 | ||
35 | .quad 2b | ||
36 | .quad 3b | ||
37 | .byte X86_FEATURE_K8_C | ||
38 | .byte 5 | ||
39 | .byte 5 | ||
40 | .previous | ||
41 | |||
42 | /* Standard copy_from_user with segment limit checking */ | ||
43 | .globl copy_from_user | ||
44 | .p2align 4 | ||
45 | copy_from_user: | ||
46 | GET_THREAD_INFO(%rax) | ||
47 | movq %rsi,%rcx | ||
48 | addq %rdx,%rcx | ||
49 | jc bad_from_user | ||
50 | cmpq threadinfo_addr_limit(%rax),%rcx | ||
51 | jae bad_from_user | ||
52 | /* FALL THROUGH to copy_user_generic */ | ||
53 | |||
54 | .section .fixup,"ax" | ||
55 | /* must zero dest */ | ||
56 | bad_from_user: | ||
57 | movl %edx,%ecx | ||
58 | xorl %eax,%eax | ||
59 | rep | ||
60 | stosb | ||
61 | bad_to_user: | ||
62 | movl %edx,%eax | ||
63 | ret | ||
64 | .previous | ||
65 | |||
66 | |||
67 | /* | ||
68 | * copy_user_generic - memory copy with exception handling. | ||
69 | * | ||
70 | * Input: | ||
71 | * rdi destination | ||
72 | * rsi source | ||
73 | * rdx count | ||
74 | * | ||
75 | * Output: | ||
76 | * eax uncopied bytes or 0 if successful. | ||
77 | */ | ||
78 | .globl copy_user_generic | ||
79 | .p2align 4 | ||
80 | copy_user_generic: | ||
81 | .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */ | ||
82 | .byte 0x66,0x90 | ||
83 | 1: | ||
84 | .section .altinstr_replacement,"ax" | ||
85 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | ||
86 | .long copy_user_generic_c-1b /* offset */ | ||
87 | .previous | ||
88 | .section .altinstructions,"a" | ||
89 | .align 8 | ||
90 | .quad copy_user_generic | ||
91 | .quad 2b | ||
92 | .byte X86_FEATURE_K8_C | ||
93 | .byte 5 | ||
94 | .byte 5 | ||
95 | .previous | ||
96 | .Lcug: | ||
97 | pushq %rbx | ||
98 | xorl %eax,%eax /*zero for the exception handler */ | ||
99 | |||
100 | #ifdef FIX_ALIGNMENT | ||
101 | /* check for bad alignment of destination */ | ||
102 | movl %edi,%ecx | ||
103 | andl $7,%ecx | ||
104 | jnz .Lbad_alignment | ||
105 | .Lafter_bad_alignment: | ||
106 | #endif | ||
107 | |||
108 | movq %rdx,%rcx | ||
109 | |||
110 | movl $64,%ebx | ||
111 | shrq $6,%rdx | ||
112 | decq %rdx | ||
113 | js .Lhandle_tail | ||
114 | |||
115 | .p2align 4 | ||
116 | .Lloop: | ||
117 | .Ls1: movq (%rsi),%r11 | ||
118 | .Ls2: movq 1*8(%rsi),%r8 | ||
119 | .Ls3: movq 2*8(%rsi),%r9 | ||
120 | .Ls4: movq 3*8(%rsi),%r10 | ||
121 | .Ld1: movq %r11,(%rdi) | ||
122 | .Ld2: movq %r8,1*8(%rdi) | ||
123 | .Ld3: movq %r9,2*8(%rdi) | ||
124 | .Ld4: movq %r10,3*8(%rdi) | ||
125 | |||
126 | .Ls5: movq 4*8(%rsi),%r11 | ||
127 | .Ls6: movq 5*8(%rsi),%r8 | ||
128 | .Ls7: movq 6*8(%rsi),%r9 | ||
129 | .Ls8: movq 7*8(%rsi),%r10 | ||
130 | .Ld5: movq %r11,4*8(%rdi) | ||
131 | .Ld6: movq %r8,5*8(%rdi) | ||
132 | .Ld7: movq %r9,6*8(%rdi) | ||
133 | .Ld8: movq %r10,7*8(%rdi) | ||
134 | |||
135 | decq %rdx | ||
136 | |||
137 | leaq 64(%rsi),%rsi | ||
138 | leaq 64(%rdi),%rdi | ||
139 | |||
140 | jns .Lloop | ||
141 | |||
142 | .p2align 4 | ||
143 | .Lhandle_tail: | ||
144 | movl %ecx,%edx | ||
145 | andl $63,%ecx | ||
146 | shrl $3,%ecx | ||
147 | jz .Lhandle_7 | ||
148 | movl $8,%ebx | ||
149 | .p2align 4 | ||
150 | .Lloop_8: | ||
151 | .Ls9: movq (%rsi),%r8 | ||
152 | .Ld9: movq %r8,(%rdi) | ||
153 | decl %ecx | ||
154 | leaq 8(%rdi),%rdi | ||
155 | leaq 8(%rsi),%rsi | ||
156 | jnz .Lloop_8 | ||
157 | |||
158 | .Lhandle_7: | ||
159 | movl %edx,%ecx | ||
160 | andl $7,%ecx | ||
161 | jz .Lende | ||
162 | .p2align 4 | ||
163 | .Lloop_1: | ||
164 | .Ls10: movb (%rsi),%bl | ||
165 | .Ld10: movb %bl,(%rdi) | ||
166 | incq %rdi | ||
167 | incq %rsi | ||
168 | decl %ecx | ||
169 | jnz .Lloop_1 | ||
170 | |||
171 | .Lende: | ||
172 | popq %rbx | ||
173 | ret | ||
174 | |||
175 | #ifdef FIX_ALIGNMENT | ||
176 | /* align destination */ | ||
177 | .p2align 4 | ||
178 | .Lbad_alignment: | ||
179 | movl $8,%r9d | ||
180 | subl %ecx,%r9d | ||
181 | movl %r9d,%ecx | ||
182 | cmpq %r9,%rdx | ||
183 | jz .Lhandle_7 | ||
184 | js .Lhandle_7 | ||
185 | .Lalign_1: | ||
186 | .Ls11: movb (%rsi),%bl | ||
187 | .Ld11: movb %bl,(%rdi) | ||
188 | incq %rsi | ||
189 | incq %rdi | ||
190 | decl %ecx | ||
191 | jnz .Lalign_1 | ||
192 | subq %r9,%rdx | ||
193 | jmp .Lafter_bad_alignment | ||
194 | #endif | ||
195 | |||
196 | /* table sorted by exception address */ | ||
197 | .section __ex_table,"a" | ||
198 | .align 8 | ||
199 | .quad .Ls1,.Ls1e | ||
200 | .quad .Ls2,.Ls2e | ||
201 | .quad .Ls3,.Ls3e | ||
202 | .quad .Ls4,.Ls4e | ||
203 | .quad .Ld1,.Ls1e | ||
204 | .quad .Ld2,.Ls2e | ||
205 | .quad .Ld3,.Ls3e | ||
206 | .quad .Ld4,.Ls4e | ||
207 | .quad .Ls5,.Ls5e | ||
208 | .quad .Ls6,.Ls6e | ||
209 | .quad .Ls7,.Ls7e | ||
210 | .quad .Ls8,.Ls8e | ||
211 | .quad .Ld5,.Ls5e | ||
212 | .quad .Ld6,.Ls6e | ||
213 | .quad .Ld7,.Ls7e | ||
214 | .quad .Ld8,.Ls8e | ||
215 | .quad .Ls9,.Le_quad | ||
216 | .quad .Ld9,.Le_quad | ||
217 | .quad .Ls10,.Le_byte | ||
218 | .quad .Ld10,.Le_byte | ||
219 | #ifdef FIX_ALIGNMENT | ||
220 | .quad .Ls11,.Lzero_rest | ||
221 | .quad .Ld11,.Lzero_rest | ||
222 | #endif | ||
223 | .quad .Le5,.Le_zero | ||
224 | .previous | ||
225 | |||
226 | /* compute 64-offset for main loop. 8 bytes accuracy with error on the | ||
227 | pessimistic side. this is gross. it would be better to fix the | ||
228 | interface. */ | ||
229 | /* eax: zero, ebx: 64 */ | ||
230 | .Ls1e: addl $8,%eax | ||
231 | .Ls2e: addl $8,%eax | ||
232 | .Ls3e: addl $8,%eax | ||
233 | .Ls4e: addl $8,%eax | ||
234 | .Ls5e: addl $8,%eax | ||
235 | .Ls6e: addl $8,%eax | ||
236 | .Ls7e: addl $8,%eax | ||
237 | .Ls8e: addl $8,%eax | ||
238 | addq %rbx,%rdi /* +64 */ | ||
239 | subq %rax,%rdi /* correct destination with computed offset */ | ||
240 | |||
241 | shlq $6,%rdx /* loop counter * 64 (stride length) */ | ||
242 | addq %rax,%rdx /* add offset to loopcnt */ | ||
243 | andl $63,%ecx /* remaining bytes */ | ||
244 | addq %rcx,%rdx /* add them */ | ||
245 | jmp .Lzero_rest | ||
246 | |||
247 | /* exception on quad word loop in tail handling */ | ||
248 | /* ecx: loopcnt/8, %edx: length, rdi: correct */ | ||
249 | .Le_quad: | ||
250 | shll $3,%ecx | ||
251 | andl $7,%edx | ||
252 | addl %ecx,%edx | ||
253 | /* edx: bytes to zero, rdi: dest, eax:zero */ | ||
254 | .Lzero_rest: | ||
255 | movq %rdx,%rcx | ||
256 | .Le_byte: | ||
257 | xorl %eax,%eax | ||
258 | .Le5: rep | ||
259 | stosb | ||
260 | /* when there is another exception while zeroing the rest just return */ | ||
261 | .Le_zero: | ||
262 | movq %rdx,%rax | ||
263 | jmp .Lende | ||
264 | |||
265 | /* C stepping K8 run faster using the string copy instructions. | ||
266 | This is also a lot simpler. Use them when possible. | ||
267 | Patch in jmps to this code instead of copying it fully | ||
268 | to avoid unwanted aliasing in the exception tables. */ | ||
269 | |||
270 | /* rdi destination | ||
271 | * rsi source | ||
272 | * rdx count | ||
273 | * | ||
274 | * Output: | ||
275 | * eax uncopied bytes or 0 if successfull. | ||
276 | */ | ||
277 | copy_user_generic_c: | ||
278 | movl %edx,%ecx | ||
279 | shrl $3,%ecx | ||
280 | andl $7,%edx | ||
281 | 1: rep | ||
282 | movsq | ||
283 | movl %edx,%ecx | ||
284 | 2: rep | ||
285 | movsb | ||
286 | 4: movl %ecx,%eax | ||
287 | ret | ||
288 | 3: lea (%rdx,%rcx,8),%rax | ||
289 | ret | ||
290 | |||
291 | .section __ex_table,"a" | ||
292 | .quad 1b,3b | ||
293 | .quad 2b,4b | ||
294 | .previous | ||
diff --git a/arch/x86_64/lib/csum-copy.S b/arch/x86_64/lib/csum-copy.S new file mode 100644 index 000000000000..01808ec37836 --- /dev/null +++ b/arch/x86_64/lib/csum-copy.S | |||
@@ -0,0 +1,233 @@ | |||
1 | /* | ||
2 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | ||
3 | * | ||
4 | * This file is subject to the terms and conditions of the GNU General Public | ||
5 | * License. See the file COPYING in the main directory of this archive | ||
6 | * for more details. No warranty for anything given at all. | ||
7 | */ | ||
8 | #include <linux/linkage.h> | ||
9 | #include <asm/errno.h> | ||
10 | |||
11 | /* | ||
12 | * Checksum copy with exception handling. | ||
13 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the | ||
14 | * destination is zeroed. | ||
15 | * | ||
16 | * Input | ||
17 | * rdi source | ||
18 | * rsi destination | ||
19 | * edx len (32bit) | ||
20 | * ecx sum (32bit) | ||
21 | * r8 src_err_ptr (int) | ||
22 | * r9 dst_err_ptr (int) | ||
23 | * | ||
24 | * Output | ||
25 | * eax 64bit sum. undefined in case of exception. | ||
26 | * | ||
27 | * Wrappers need to take care of valid exception sum and zeroing. | ||
28 | * They also should align source or destination to 8 bytes. | ||
29 | */ | ||
30 | |||
31 | .macro source | ||
32 | 10: | ||
33 | .section __ex_table,"a" | ||
34 | .align 8 | ||
35 | .quad 10b,.Lbad_source | ||
36 | .previous | ||
37 | .endm | ||
38 | |||
39 | .macro dest | ||
40 | 20: | ||
41 | .section __ex_table,"a" | ||
42 | .align 8 | ||
43 | .quad 20b,.Lbad_dest | ||
44 | .previous | ||
45 | .endm | ||
46 | |||
47 | .macro ignore L=.Lignore | ||
48 | 30: | ||
49 | .section __ex_table,"a" | ||
50 | .align 8 | ||
51 | .quad 30b,\L | ||
52 | .previous | ||
53 | .endm | ||
54 | |||
55 | |||
56 | .globl csum_partial_copy_generic | ||
57 | .p2align 4 | ||
58 | csum_partial_copy_generic: | ||
59 | cmpl $3*64,%edx | ||
60 | jle .Lignore | ||
61 | |||
62 | .Lignore: | ||
63 | subq $7*8,%rsp | ||
64 | movq %rbx,2*8(%rsp) | ||
65 | movq %r12,3*8(%rsp) | ||
66 | movq %r14,4*8(%rsp) | ||
67 | movq %r13,5*8(%rsp) | ||
68 | movq %rbp,6*8(%rsp) | ||
69 | |||
70 | movq %r8,(%rsp) | ||
71 | movq %r9,1*8(%rsp) | ||
72 | |||
73 | movl %ecx,%eax | ||
74 | movl %edx,%ecx | ||
75 | |||
76 | xorl %r9d,%r9d | ||
77 | movq %rcx,%r12 | ||
78 | |||
79 | shrq $6,%r12 | ||
80 | jz .Lhandle_tail /* < 64 */ | ||
81 | |||
82 | clc | ||
83 | |||
84 | /* main loop. clear in 64 byte blocks */ | ||
85 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ | ||
86 | /* r11: temp3, rdx: temp4, r12 loopcnt */ | ||
87 | /* r10: temp5, rbp: temp6, r14 temp7, r13 temp8 */ | ||
88 | .p2align 4 | ||
89 | .Lloop: | ||
90 | source | ||
91 | movq (%rdi),%rbx | ||
92 | source | ||
93 | movq 8(%rdi),%r8 | ||
94 | source | ||
95 | movq 16(%rdi),%r11 | ||
96 | source | ||
97 | movq 24(%rdi),%rdx | ||
98 | |||
99 | source | ||
100 | movq 32(%rdi),%r10 | ||
101 | source | ||
102 | movq 40(%rdi),%rbp | ||
103 | source | ||
104 | movq 48(%rdi),%r14 | ||
105 | source | ||
106 | movq 56(%rdi),%r13 | ||
107 | |||
108 | ignore 2f | ||
109 | prefetcht0 5*64(%rdi) | ||
110 | 2: | ||
111 | adcq %rbx,%rax | ||
112 | adcq %r8,%rax | ||
113 | adcq %r11,%rax | ||
114 | adcq %rdx,%rax | ||
115 | adcq %r10,%rax | ||
116 | adcq %rbp,%rax | ||
117 | adcq %r14,%rax | ||
118 | adcq %r13,%rax | ||
119 | |||
120 | decl %r12d | ||
121 | |||
122 | dest | ||
123 | movq %rbx,(%rsi) | ||
124 | dest | ||
125 | movq %r8,8(%rsi) | ||
126 | dest | ||
127 | movq %r11,16(%rsi) | ||
128 | dest | ||
129 | movq %rdx,24(%rsi) | ||
130 | |||
131 | dest | ||
132 | movq %r10,32(%rsi) | ||
133 | dest | ||
134 | movq %rbp,40(%rsi) | ||
135 | dest | ||
136 | movq %r14,48(%rsi) | ||
137 | dest | ||
138 | movq %r13,56(%rsi) | ||
139 | |||
140 | 3: | ||
141 | |||
142 | leaq 64(%rdi),%rdi | ||
143 | leaq 64(%rsi),%rsi | ||
144 | |||
145 | jnz .Lloop | ||
146 | |||
147 | adcq %r9,%rax | ||
148 | |||
149 | /* do last upto 56 bytes */ | ||
150 | .Lhandle_tail: | ||
151 | /* ecx: count */ | ||
152 | movl %ecx,%r10d | ||
153 | andl $63,%ecx | ||
154 | shrl $3,%ecx | ||
155 | jz .Lfold | ||
156 | clc | ||
157 | .p2align 4 | ||
158 | .Lloop_8: | ||
159 | source | ||
160 | movq (%rdi),%rbx | ||
161 | adcq %rbx,%rax | ||
162 | decl %ecx | ||
163 | dest | ||
164 | movq %rbx,(%rsi) | ||
165 | leaq 8(%rsi),%rsi /* preserve carry */ | ||
166 | leaq 8(%rdi),%rdi | ||
167 | jnz .Lloop_8 | ||
168 | adcq %r9,%rax /* add in carry */ | ||
169 | |||
170 | .Lfold: | ||
171 | /* reduce checksum to 32bits */ | ||
172 | movl %eax,%ebx | ||
173 | shrq $32,%rax | ||
174 | addl %ebx,%eax | ||
175 | adcl %r9d,%eax | ||
176 | |||
177 | /* do last upto 6 bytes */ | ||
178 | .Lhandle_7: | ||
179 | movl %r10d,%ecx | ||
180 | andl $7,%ecx | ||
181 | shrl $1,%ecx | ||
182 | jz .Lhandle_1 | ||
183 | movl $2,%edx | ||
184 | xorl %ebx,%ebx | ||
185 | clc | ||
186 | .p2align 4 | ||
187 | .Lloop_1: | ||
188 | source | ||
189 | movw (%rdi),%bx | ||
190 | adcl %ebx,%eax | ||
191 | dest | ||
192 | decl %ecx | ||
193 | movw %bx,(%rsi) | ||
194 | leaq 2(%rdi),%rdi | ||
195 | leaq 2(%rsi),%rsi | ||
196 | jnz .Lloop_1 | ||
197 | adcl %r9d,%eax /* add in carry */ | ||
198 | |||
199 | /* handle last odd byte */ | ||
200 | .Lhandle_1: | ||
201 | testl $1,%r10d | ||
202 | jz .Lende | ||
203 | xorl %ebx,%ebx | ||
204 | source | ||
205 | movb (%rdi),%bl | ||
206 | dest | ||
207 | movb %bl,(%rsi) | ||
208 | addl %ebx,%eax | ||
209 | adcl %r9d,%eax /* carry */ | ||
210 | |||
211 | .Lende: | ||
212 | movq 2*8(%rsp),%rbx | ||
213 | movq 3*8(%rsp),%r12 | ||
214 | movq 4*8(%rsp),%r14 | ||
215 | movq 5*8(%rsp),%r13 | ||
216 | movq 6*8(%rsp),%rbp | ||
217 | addq $7*8,%rsp | ||
218 | ret | ||
219 | |||
220 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ | ||
221 | .Lbad_source: | ||
222 | movq (%rsp),%rax | ||
223 | testq %rax,%rax | ||
224 | jz .Lende | ||
225 | movl $-EFAULT,(%rax) | ||
226 | jmp .Lende | ||
227 | |||
228 | .Lbad_dest: | ||
229 | movq 8(%rsp),%rax | ||
230 | testq %rax,%rax | ||
231 | jz .Lende | ||
232 | movl $-EFAULT,(%rax) | ||
233 | jmp .Lende | ||
diff --git a/arch/x86_64/lib/csum-partial.c b/arch/x86_64/lib/csum-partial.c new file mode 100644 index 000000000000..5384e227cdf6 --- /dev/null +++ b/arch/x86_64/lib/csum-partial.c | |||
@@ -0,0 +1,150 @@ | |||
1 | /* | ||
2 | * arch/x86_64/lib/csum-partial.c | ||
3 | * | ||
4 | * This file contains network checksum routines that are better done | ||
5 | * in an architecture-specific manner due to speed. | ||
6 | */ | ||
7 | |||
8 | #include <linux/compiler.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <asm/checksum.h> | ||
11 | |||
12 | #define __force_inline inline __attribute__((always_inline)) | ||
13 | |||
14 | static inline unsigned short from32to16(unsigned a) | ||
15 | { | ||
16 | unsigned short b = a >> 16; | ||
17 | asm("addw %w2,%w0\n\t" | ||
18 | "adcw $0,%w0\n" | ||
19 | : "=r" (b) | ||
20 | : "0" (b), "r" (a)); | ||
21 | return b; | ||
22 | } | ||
23 | |||
24 | /* | ||
25 | * Do a 64-bit checksum on an arbitrary memory area. | ||
26 | * Returns a 32bit checksum. | ||
27 | * | ||
28 | * This isn't as time critical as it used to be because many NICs | ||
29 | * do hardware checksumming these days. | ||
30 | * | ||
31 | * Things tried and found to not make it faster: | ||
32 | * Manual Prefetching | ||
33 | * Unrolling to an 128 bytes inner loop. | ||
34 | * Using interleaving with more registers to break the carry chains. | ||
35 | */ | ||
36 | static __force_inline unsigned do_csum(const unsigned char *buff, unsigned len) | ||
37 | { | ||
38 | unsigned odd, count; | ||
39 | unsigned long result = 0; | ||
40 | |||
41 | if (unlikely(len == 0)) | ||
42 | return result; | ||
43 | odd = 1 & (unsigned long) buff; | ||
44 | if (unlikely(odd)) { | ||
45 | result = *buff << 8; | ||
46 | len--; | ||
47 | buff++; | ||
48 | } | ||
49 | count = len >> 1; /* nr of 16-bit words.. */ | ||
50 | if (count) { | ||
51 | if (2 & (unsigned long) buff) { | ||
52 | result += *(unsigned short *)buff; | ||
53 | count--; | ||
54 | len -= 2; | ||
55 | buff += 2; | ||
56 | } | ||
57 | count >>= 1; /* nr of 32-bit words.. */ | ||
58 | if (count) { | ||
59 | unsigned long zero; | ||
60 | unsigned count64; | ||
61 | if (4 & (unsigned long) buff) { | ||
62 | result += *(unsigned int *) buff; | ||
63 | count--; | ||
64 | len -= 4; | ||
65 | buff += 4; | ||
66 | } | ||
67 | count >>= 1; /* nr of 64-bit words.. */ | ||
68 | |||
69 | /* main loop using 64byte blocks */ | ||
70 | zero = 0; | ||
71 | count64 = count >> 3; | ||
72 | while (count64) { | ||
73 | asm("addq 0*8(%[src]),%[res]\n\t" | ||
74 | "adcq 1*8(%[src]),%[res]\n\t" | ||
75 | "adcq 2*8(%[src]),%[res]\n\t" | ||
76 | "adcq 3*8(%[src]),%[res]\n\t" | ||
77 | "adcq 4*8(%[src]),%[res]\n\t" | ||
78 | "adcq 5*8(%[src]),%[res]\n\t" | ||
79 | "adcq 6*8(%[src]),%[res]\n\t" | ||
80 | "adcq 7*8(%[src]),%[res]\n\t" | ||
81 | "adcq %[zero],%[res]" | ||
82 | : [res] "=r" (result) | ||
83 | : [src] "r" (buff), [zero] "r" (zero), | ||
84 | "[res]" (result)); | ||
85 | buff += 64; | ||
86 | count64--; | ||
87 | } | ||
88 | |||
89 | /* last upto 7 8byte blocks */ | ||
90 | count %= 8; | ||
91 | while (count) { | ||
92 | asm("addq %1,%0\n\t" | ||
93 | "adcq %2,%0\n" | ||
94 | : "=r" (result) | ||
95 | : "m" (*(unsigned long *)buff), | ||
96 | "r" (zero), "0" (result)); | ||
97 | --count; | ||
98 | buff += 8; | ||
99 | } | ||
100 | result = add32_with_carry(result>>32, | ||
101 | result&0xffffffff); | ||
102 | |||
103 | if (len & 4) { | ||
104 | result += *(unsigned int *) buff; | ||
105 | buff += 4; | ||
106 | } | ||
107 | } | ||
108 | if (len & 2) { | ||
109 | result += *(unsigned short *) buff; | ||
110 | buff += 2; | ||
111 | } | ||
112 | } | ||
113 | if (len & 1) | ||
114 | result += *buff; | ||
115 | result = add32_with_carry(result>>32, result & 0xffffffff); | ||
116 | if (unlikely(odd)) { | ||
117 | result = from32to16(result); | ||
118 | result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); | ||
119 | } | ||
120 | return result; | ||
121 | } | ||
122 | |||
123 | /* | ||
124 | * computes the checksum of a memory block at buff, length len, | ||
125 | * and adds in "sum" (32-bit) | ||
126 | * | ||
127 | * returns a 32-bit number suitable for feeding into itself | ||
128 | * or csum_tcpudp_magic | ||
129 | * | ||
130 | * this function must be called with even lengths, except | ||
131 | * for the last fragment, which may be odd | ||
132 | * | ||
133 | * it's best to have buff aligned on a 64-bit boundary | ||
134 | */ | ||
135 | unsigned csum_partial(const unsigned char *buff, unsigned len, unsigned sum) | ||
136 | { | ||
137 | return add32_with_carry(do_csum(buff, len), sum); | ||
138 | } | ||
139 | |||
140 | EXPORT_SYMBOL(csum_partial); | ||
141 | |||
142 | /* | ||
143 | * this routine is used for miscellaneous IP-like checksums, mainly | ||
144 | * in icmp.c | ||
145 | */ | ||
146 | unsigned short ip_compute_csum(unsigned char * buff, int len) | ||
147 | { | ||
148 | return csum_fold(csum_partial(buff,len,0)); | ||
149 | } | ||
150 | |||
diff --git a/arch/x86_64/lib/csum-wrappers.c b/arch/x86_64/lib/csum-wrappers.c new file mode 100644 index 000000000000..94323f20816e --- /dev/null +++ b/arch/x86_64/lib/csum-wrappers.c | |||
@@ -0,0 +1,129 @@ | |||
1 | /* Copyright 2002,2003 Andi Kleen, SuSE Labs. | ||
2 | * Subject to the GNU Public License v.2 | ||
3 | * | ||
4 | * Wrappers of assembly checksum functions for x86-64. | ||
5 | */ | ||
6 | |||
7 | #include <asm/checksum.h> | ||
8 | #include <linux/module.h> | ||
9 | |||
10 | /** | ||
11 | * csum_partial_copy_from_user - Copy and checksum from user space. | ||
12 | * @src: source address (user space) | ||
13 | * @dst: destination address | ||
14 | * @len: number of bytes to be copied. | ||
15 | * @isum: initial sum that is added into the result (32bit unfolded) | ||
16 | * @errp: set to -EFAULT for an bad source address. | ||
17 | * | ||
18 | * Returns an 32bit unfolded checksum of the buffer. | ||
19 | * src and dst are best aligned to 64bits. | ||
20 | */ | ||
21 | unsigned int | ||
22 | csum_partial_copy_from_user(const unsigned char __user *src, unsigned char *dst, | ||
23 | int len, unsigned int isum, int *errp) | ||
24 | { | ||
25 | might_sleep(); | ||
26 | *errp = 0; | ||
27 | if (likely(access_ok(VERIFY_READ,src, len))) { | ||
28 | /* Why 6, not 7? To handle odd addresses aligned we | ||
29 | would need to do considerable complications to fix the | ||
30 | checksum which is defined as an 16bit accumulator. The | ||
31 | fix alignment code is primarily for performance | ||
32 | compatibility with 32bit and that will handle odd | ||
33 | addresses slowly too. */ | ||
34 | if (unlikely((unsigned long)src & 6)) { | ||
35 | while (((unsigned long)src & 6) && len >= 2) { | ||
36 | __u16 val16; | ||
37 | *errp = __get_user(val16, (__u16 __user *)src); | ||
38 | if (*errp) | ||
39 | return isum; | ||
40 | *(__u16 *)dst = val16; | ||
41 | isum = add32_with_carry(isum, val16); | ||
42 | src += 2; | ||
43 | dst += 2; | ||
44 | len -= 2; | ||
45 | } | ||
46 | } | ||
47 | isum = csum_partial_copy_generic((__force void *)src,dst,len,isum,errp,NULL); | ||
48 | if (likely(*errp == 0)) | ||
49 | return isum; | ||
50 | } | ||
51 | *errp = -EFAULT; | ||
52 | memset(dst,0,len); | ||
53 | return isum; | ||
54 | } | ||
55 | |||
56 | EXPORT_SYMBOL(csum_partial_copy_from_user); | ||
57 | |||
58 | /** | ||
59 | * csum_partial_copy_to_user - Copy and checksum to user space. | ||
60 | * @src: source address | ||
61 | * @dst: destination address (user space) | ||
62 | * @len: number of bytes to be copied. | ||
63 | * @isum: initial sum that is added into the result (32bit unfolded) | ||
64 | * @errp: set to -EFAULT for an bad destination address. | ||
65 | * | ||
66 | * Returns an 32bit unfolded checksum of the buffer. | ||
67 | * src and dst are best aligned to 64bits. | ||
68 | */ | ||
69 | unsigned int | ||
70 | csum_partial_copy_to_user(unsigned const char *src, unsigned char __user *dst, | ||
71 | int len, unsigned int isum, int *errp) | ||
72 | { | ||
73 | might_sleep(); | ||
74 | if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) { | ||
75 | *errp = -EFAULT; | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | if (unlikely((unsigned long)dst & 6)) { | ||
80 | while (((unsigned long)dst & 6) && len >= 2) { | ||
81 | __u16 val16 = *(__u16 *)src; | ||
82 | isum = add32_with_carry(isum, val16); | ||
83 | *errp = __put_user(val16, (__u16 __user *)dst); | ||
84 | if (*errp) | ||
85 | return isum; | ||
86 | src += 2; | ||
87 | dst += 2; | ||
88 | len -= 2; | ||
89 | } | ||
90 | } | ||
91 | |||
92 | *errp = 0; | ||
93 | return csum_partial_copy_generic(src, (void __force *)dst,len,isum,NULL,errp); | ||
94 | } | ||
95 | |||
96 | EXPORT_SYMBOL(csum_partial_copy_to_user); | ||
97 | |||
98 | /** | ||
99 | * csum_partial_copy_nocheck - Copy and checksum. | ||
100 | * @src: source address | ||
101 | * @dst: destination address | ||
102 | * @len: number of bytes to be copied. | ||
103 | * @isum: initial sum that is added into the result (32bit unfolded) | ||
104 | * | ||
105 | * Returns an 32bit unfolded checksum of the buffer. | ||
106 | */ | ||
107 | unsigned int | ||
108 | csum_partial_copy_nocheck(const unsigned char *src, unsigned char *dst, int len, unsigned int sum) | ||
109 | { | ||
110 | return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL); | ||
111 | } | ||
112 | |||
113 | unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr, | ||
114 | __u32 len, unsigned short proto, unsigned int sum) | ||
115 | { | ||
116 | __u64 rest, sum64; | ||
117 | |||
118 | rest = (__u64)htonl(len) + (__u64)htons(proto) + (__u64)sum; | ||
119 | asm(" addq (%[saddr]),%[sum]\n" | ||
120 | " adcq 8(%[saddr]),%[sum]\n" | ||
121 | " adcq (%[daddr]),%[sum]\n" | ||
122 | " adcq 8(%[daddr]),%[sum]\n" | ||
123 | " adcq $0,%[sum]\n" | ||
124 | : [sum] "=r" (sum64) | ||
125 | : "[sum]" (rest),[saddr] "r" (saddr), [daddr] "r" (daddr)); | ||
126 | return csum_fold(add32_with_carry(sum64 & 0xffffffff, sum64>>32)); | ||
127 | } | ||
128 | |||
129 | EXPORT_SYMBOL(csum_ipv6_magic); | ||
diff --git a/arch/x86_64/lib/dec_and_lock.c b/arch/x86_64/lib/dec_and_lock.c new file mode 100644 index 000000000000..ab43394dc775 --- /dev/null +++ b/arch/x86_64/lib/dec_and_lock.c | |||
@@ -0,0 +1,40 @@ | |||
1 | /* | ||
2 | * x86 version of "atomic_dec_and_lock()" using | ||
3 | * the atomic "cmpxchg" instruction. | ||
4 | * | ||
5 | * (For CPU's lacking cmpxchg, we use the slow | ||
6 | * generic version, and this one never even gets | ||
7 | * compiled). | ||
8 | */ | ||
9 | |||
10 | #include <linux/spinlock.h> | ||
11 | #include <asm/atomic.h> | ||
12 | |||
13 | int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) | ||
14 | { | ||
15 | int counter; | ||
16 | int newcount; | ||
17 | |||
18 | repeat: | ||
19 | counter = atomic_read(atomic); | ||
20 | newcount = counter-1; | ||
21 | |||
22 | if (!newcount) | ||
23 | goto slow_path; | ||
24 | |||
25 | asm volatile("lock; cmpxchgl %1,%2" | ||
26 | :"=a" (newcount) | ||
27 | :"r" (newcount), "m" (atomic->counter), "0" (counter)); | ||
28 | |||
29 | /* If the above failed, "eax" will have changed */ | ||
30 | if (newcount != counter) | ||
31 | goto repeat; | ||
32 | return 0; | ||
33 | |||
34 | slow_path: | ||
35 | spin_lock(lock); | ||
36 | if (atomic_dec_and_test(atomic)) | ||
37 | return 1; | ||
38 | spin_unlock(lock); | ||
39 | return 0; | ||
40 | } | ||
diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c new file mode 100644 index 000000000000..6e2d66472eb1 --- /dev/null +++ b/arch/x86_64/lib/delay.c | |||
@@ -0,0 +1,48 @@ | |||
1 | /* | ||
2 | * Precise Delay Loops for x86-64 | ||
3 | * | ||
4 | * Copyright (C) 1993 Linus Torvalds | ||
5 | * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> | ||
6 | * | ||
7 | * The __delay function must _NOT_ be inlined as its execution time | ||
8 | * depends wildly on alignment on many x86 processors. | ||
9 | */ | ||
10 | |||
11 | #include <linux/config.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/delay.h> | ||
14 | #include <asm/delay.h> | ||
15 | |||
16 | #ifdef CONFIG_SMP | ||
17 | #include <asm/smp.h> | ||
18 | #endif | ||
19 | |||
20 | int x86_udelay_tsc = 0; /* Delay via TSC */ | ||
21 | |||
22 | void __delay(unsigned long loops) | ||
23 | { | ||
24 | unsigned bclock, now; | ||
25 | |||
26 | rdtscl(bclock); | ||
27 | do | ||
28 | { | ||
29 | rep_nop(); | ||
30 | rdtscl(now); | ||
31 | } | ||
32 | while((now-bclock) < loops); | ||
33 | } | ||
34 | |||
35 | inline void __const_udelay(unsigned long xloops) | ||
36 | { | ||
37 | __delay(((xloops * cpu_data[_smp_processor_id()].loops_per_jiffy) >> 32) * HZ); | ||
38 | } | ||
39 | |||
40 | void __udelay(unsigned long usecs) | ||
41 | { | ||
42 | __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ | ||
43 | } | ||
44 | |||
45 | void __ndelay(unsigned long nsecs) | ||
46 | { | ||
47 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ | ||
48 | } | ||
diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S new file mode 100644 index 000000000000..f94ea8a44051 --- /dev/null +++ b/arch/x86_64/lib/getuser.S | |||
@@ -0,0 +1,101 @@ | |||
1 | /* | ||
2 | * __get_user functions. | ||
3 | * | ||
4 | * (C) Copyright 1998 Linus Torvalds | ||
5 | * (C) Copyright 2005 Andi Kleen | ||
6 | * | ||
7 | * These functions have a non-standard call interface | ||
8 | * to make them more efficient, especially as they | ||
9 | * return an error value in addition to the "real" | ||
10 | * return value. | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * __get_user_X | ||
15 | * | ||
16 | * Inputs: %rcx contains the address. | ||
17 | * The register is modified, but all changes are undone | ||
18 | * before returning because the C code doesn't know about it. | ||
19 | * | ||
20 | * Outputs: %rax is error code (0 or -EFAULT) | ||
21 | * %rdx contains zero-extended value | ||
22 | * | ||
23 | * %r8 is destroyed. | ||
24 | * | ||
25 | * These functions should not modify any other registers, | ||
26 | * as they get called from within inline assembly. | ||
27 | */ | ||
28 | |||
29 | #include <linux/linkage.h> | ||
30 | #include <asm/page.h> | ||
31 | #include <asm/errno.h> | ||
32 | #include <asm/offset.h> | ||
33 | #include <asm/thread_info.h> | ||
34 | |||
35 | .text | ||
36 | .p2align 4 | ||
37 | .globl __get_user_1 | ||
38 | __get_user_1: | ||
39 | GET_THREAD_INFO(%r8) | ||
40 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
41 | jae bad_get_user | ||
42 | 1: movzb (%rcx),%edx | ||
43 | xorl %eax,%eax | ||
44 | ret | ||
45 | |||
46 | .p2align 4 | ||
47 | .globl __get_user_2 | ||
48 | __get_user_2: | ||
49 | GET_THREAD_INFO(%r8) | ||
50 | addq $1,%rcx | ||
51 | jc 20f | ||
52 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
53 | jae 20f | ||
54 | decq %rcx | ||
55 | 2: movzwl (%rcx),%edx | ||
56 | xorl %eax,%eax | ||
57 | ret | ||
58 | 20: decq %rcx | ||
59 | jmp bad_get_user | ||
60 | |||
61 | .p2align 4 | ||
62 | .globl __get_user_4 | ||
63 | __get_user_4: | ||
64 | GET_THREAD_INFO(%r8) | ||
65 | addq $3,%rcx | ||
66 | jc 30f | ||
67 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
68 | jae 30f | ||
69 | subq $3,%rcx | ||
70 | 3: movl (%rcx),%edx | ||
71 | xorl %eax,%eax | ||
72 | ret | ||
73 | 30: subq $3,%rcx | ||
74 | jmp bad_get_user | ||
75 | |||
76 | .p2align 4 | ||
77 | .globl __get_user_8 | ||
78 | __get_user_8: | ||
79 | GET_THREAD_INFO(%r8) | ||
80 | addq $7,%rcx | ||
81 | jc bad_get_user | ||
82 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
83 | jae bad_get_user | ||
84 | subq $7,%rcx | ||
85 | 4: movq (%rcx),%rdx | ||
86 | xorl %eax,%eax | ||
87 | ret | ||
88 | 40: subq $7,%rcx | ||
89 | jmp bad_get_user | ||
90 | |||
91 | bad_get_user: | ||
92 | xorl %edx,%edx | ||
93 | movq $(-EFAULT),%rax | ||
94 | ret | ||
95 | |||
96 | .section __ex_table,"a" | ||
97 | .quad 1b,bad_get_user | ||
98 | .quad 2b,bad_get_user | ||
99 | .quad 3b,bad_get_user | ||
100 | .quad 4b,bad_get_user | ||
101 | .previous | ||
diff --git a/arch/x86_64/lib/io.c b/arch/x86_64/lib/io.c new file mode 100644 index 000000000000..87b4a4e18039 --- /dev/null +++ b/arch/x86_64/lib/io.c | |||
@@ -0,0 +1,23 @@ | |||
1 | #include <linux/string.h> | ||
2 | #include <asm/io.h> | ||
3 | #include <linux/module.h> | ||
4 | |||
5 | void __memcpy_toio(unsigned long dst,const void*src,unsigned len) | ||
6 | { | ||
7 | __inline_memcpy((void *) dst,src,len); | ||
8 | } | ||
9 | EXPORT_SYMBOL(__memcpy_toio); | ||
10 | |||
11 | void __memcpy_fromio(void *dst,unsigned long src,unsigned len) | ||
12 | { | ||
13 | __inline_memcpy(dst,(const void *) src,len); | ||
14 | } | ||
15 | EXPORT_SYMBOL(__memcpy_fromio); | ||
16 | |||
17 | void memset_io(volatile void __iomem *a, int b, size_t c) | ||
18 | { | ||
19 | /* XXX: memset can mangle the IO patterns quite a bit. | ||
20 | perhaps it would be better to use a dumb one */ | ||
21 | memset((void *)a,b,c); | ||
22 | } | ||
23 | EXPORT_SYMBOL(memset_io); | ||
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S new file mode 100644 index 000000000000..c6c46494fef5 --- /dev/null +++ b/arch/x86_64/lib/memcpy.S | |||
@@ -0,0 +1,121 @@ | |||
1 | /* Copyright 2002 Andi Kleen */ | ||
2 | |||
3 | #include <asm/cpufeature.h> | ||
4 | /* | ||
5 | * memcpy - Copy a memory block. | ||
6 | * | ||
7 | * Input: | ||
8 | * rdi destination | ||
9 | * rsi source | ||
10 | * rdx count | ||
11 | * | ||
12 | * Output: | ||
13 | * rax original destination | ||
14 | */ | ||
15 | |||
16 | .globl __memcpy | ||
17 | .globl memcpy | ||
18 | .p2align 4 | ||
19 | __memcpy: | ||
20 | memcpy: | ||
21 | pushq %rbx | ||
22 | movq %rdi,%rax | ||
23 | |||
24 | movl %edx,%ecx | ||
25 | shrl $6,%ecx | ||
26 | jz .Lhandle_tail | ||
27 | |||
28 | .p2align 4 | ||
29 | .Lloop_64: | ||
30 | decl %ecx | ||
31 | |||
32 | movq (%rsi),%r11 | ||
33 | movq 8(%rsi),%r8 | ||
34 | |||
35 | movq %r11,(%rdi) | ||
36 | movq %r8,1*8(%rdi) | ||
37 | |||
38 | movq 2*8(%rsi),%r9 | ||
39 | movq 3*8(%rsi),%r10 | ||
40 | |||
41 | movq %r9,2*8(%rdi) | ||
42 | movq %r10,3*8(%rdi) | ||
43 | |||
44 | movq 4*8(%rsi),%r11 | ||
45 | movq 5*8(%rsi),%r8 | ||
46 | |||
47 | movq %r11,4*8(%rdi) | ||
48 | movq %r8,5*8(%rdi) | ||
49 | |||
50 | movq 6*8(%rsi),%r9 | ||
51 | movq 7*8(%rsi),%r10 | ||
52 | |||
53 | movq %r9,6*8(%rdi) | ||
54 | movq %r10,7*8(%rdi) | ||
55 | |||
56 | leaq 64(%rsi),%rsi | ||
57 | leaq 64(%rdi),%rdi | ||
58 | jnz .Lloop_64 | ||
59 | |||
60 | .Lhandle_tail: | ||
61 | movl %edx,%ecx | ||
62 | andl $63,%ecx | ||
63 | shrl $3,%ecx | ||
64 | jz .Lhandle_7 | ||
65 | .p2align 4 | ||
66 | .Lloop_8: | ||
67 | decl %ecx | ||
68 | movq (%rsi),%r8 | ||
69 | movq %r8,(%rdi) | ||
70 | leaq 8(%rdi),%rdi | ||
71 | leaq 8(%rsi),%rsi | ||
72 | jnz .Lloop_8 | ||
73 | |||
74 | .Lhandle_7: | ||
75 | movl %edx,%ecx | ||
76 | andl $7,%ecx | ||
77 | jz .Lende | ||
78 | .p2align 4 | ||
79 | .Lloop_1: | ||
80 | movb (%rsi),%r8b | ||
81 | movb %r8b,(%rdi) | ||
82 | incq %rdi | ||
83 | incq %rsi | ||
84 | decl %ecx | ||
85 | jnz .Lloop_1 | ||
86 | |||
87 | .Lende: | ||
88 | popq %rbx | ||
89 | ret | ||
90 | .Lfinal: | ||
91 | |||
92 | /* C stepping K8 run faster using the string copy instructions. | ||
93 | It is also a lot simpler. Use this when possible */ | ||
94 | |||
95 | .section .altinstructions,"a" | ||
96 | .align 8 | ||
97 | .quad memcpy | ||
98 | .quad memcpy_c | ||
99 | .byte X86_FEATURE_K8_C | ||
100 | .byte .Lfinal-memcpy | ||
101 | .byte memcpy_c_end-memcpy_c | ||
102 | .previous | ||
103 | |||
104 | .section .altinstr_replacement,"ax" | ||
105 | /* rdi destination | ||
106 | * rsi source | ||
107 | * rdx count | ||
108 | */ | ||
109 | memcpy_c: | ||
110 | movq %rdi,%rax | ||
111 | movl %edx,%ecx | ||
112 | shrl $3,%ecx | ||
113 | andl $7,%edx | ||
114 | rep | ||
115 | movsq | ||
116 | movl %edx,%ecx | ||
117 | rep | ||
118 | movsb | ||
119 | ret | ||
120 | memcpy_c_end: | ||
121 | .previous | ||
diff --git a/arch/x86_64/lib/memmove.c b/arch/x86_64/lib/memmove.c new file mode 100644 index 000000000000..e93d5255fdc9 --- /dev/null +++ b/arch/x86_64/lib/memmove.c | |||
@@ -0,0 +1,19 @@ | |||
1 | /* Normally compiler builtins are used, but sometimes the compiler calls out | ||
2 | of line code. Based on asm-i386/string.h. | ||
3 | */ | ||
4 | #define _STRING_C | ||
5 | #include <linux/string.h> | ||
6 | |||
7 | #undef memmove | ||
8 | void *memmove(void * dest,const void *src,size_t count) | ||
9 | { | ||
10 | if (dest < src) { | ||
11 | __inline_memcpy(dest,src,count); | ||
12 | } else { | ||
13 | char *p = (char *) dest + count; | ||
14 | char *s = (char *) src + count; | ||
15 | while (count--) | ||
16 | *--p = *--s; | ||
17 | } | ||
18 | return dest; | ||
19 | } | ||
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S new file mode 100644 index 000000000000..4b4c40638640 --- /dev/null +++ b/arch/x86_64/lib/memset.S | |||
@@ -0,0 +1,125 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs */ | ||
2 | /* | ||
3 | * ISO C memset - set a memory block to a byte value. | ||
4 | * | ||
5 | * rdi destination | ||
6 | * rsi value (char) | ||
7 | * rdx count (bytes) | ||
8 | * | ||
9 | * rax original destination | ||
10 | */ | ||
11 | .globl __memset | ||
12 | .globl memset | ||
13 | .p2align 4 | ||
14 | memset: | ||
15 | __memset: | ||
16 | movq %rdi,%r10 | ||
17 | movq %rdx,%r11 | ||
18 | |||
19 | /* expand byte value */ | ||
20 | movzbl %sil,%ecx | ||
21 | movabs $0x0101010101010101,%rax | ||
22 | mul %rcx /* with rax, clobbers rdx */ | ||
23 | |||
24 | /* align dst */ | ||
25 | movl %edi,%r9d | ||
26 | andl $7,%r9d | ||
27 | jnz .Lbad_alignment | ||
28 | .Lafter_bad_alignment: | ||
29 | |||
30 | movl %r11d,%ecx | ||
31 | shrl $6,%ecx | ||
32 | jz .Lhandle_tail | ||
33 | |||
34 | .p2align 4 | ||
35 | .Lloop_64: | ||
36 | decl %ecx | ||
37 | movq %rax,(%rdi) | ||
38 | movq %rax,8(%rdi) | ||
39 | movq %rax,16(%rdi) | ||
40 | movq %rax,24(%rdi) | ||
41 | movq %rax,32(%rdi) | ||
42 | movq %rax,40(%rdi) | ||
43 | movq %rax,48(%rdi) | ||
44 | movq %rax,56(%rdi) | ||
45 | leaq 64(%rdi),%rdi | ||
46 | jnz .Lloop_64 | ||
47 | |||
48 | /* Handle tail in loops. The loops should be faster than hard | ||
49 | to predict jump tables. */ | ||
50 | .p2align 4 | ||
51 | .Lhandle_tail: | ||
52 | movl %r11d,%ecx | ||
53 | andl $63&(~7),%ecx | ||
54 | jz .Lhandle_7 | ||
55 | shrl $3,%ecx | ||
56 | .p2align 4 | ||
57 | .Lloop_8: | ||
58 | decl %ecx | ||
59 | movq %rax,(%rdi) | ||
60 | leaq 8(%rdi),%rdi | ||
61 | jnz .Lloop_8 | ||
62 | |||
63 | .Lhandle_7: | ||
64 | movl %r11d,%ecx | ||
65 | andl $7,%ecx | ||
66 | jz .Lende | ||
67 | .p2align 4 | ||
68 | .Lloop_1: | ||
69 | decl %ecx | ||
70 | movb %al,(%rdi) | ||
71 | leaq 1(%rdi),%rdi | ||
72 | jnz .Lloop_1 | ||
73 | |||
74 | .Lende: | ||
75 | movq %r10,%rax | ||
76 | ret | ||
77 | |||
78 | .Lbad_alignment: | ||
79 | cmpq $7,%r11 | ||
80 | jbe .Lhandle_7 | ||
81 | movq %rax,(%rdi) /* unaligned store */ | ||
82 | movq $8,%r8 | ||
83 | subq %r9,%r8 | ||
84 | addq %r8,%rdi | ||
85 | subq %r8,%r11 | ||
86 | jmp .Lafter_bad_alignment | ||
87 | |||
88 | /* C stepping K8 run faster using the string instructions. | ||
89 | It is also a lot simpler. Use this when possible */ | ||
90 | |||
91 | #include <asm/cpufeature.h> | ||
92 | |||
93 | .section .altinstructions,"a" | ||
94 | .align 8 | ||
95 | .quad memset | ||
96 | .quad memset_c | ||
97 | .byte X86_FEATURE_K8_C | ||
98 | .byte memset_c_end-memset_c | ||
99 | .byte memset_c_end-memset_c | ||
100 | .previous | ||
101 | |||
102 | .section .altinstr_replacement,"ax" | ||
103 | /* rdi destination | ||
104 | * rsi value | ||
105 | * rdx count | ||
106 | */ | ||
107 | memset_c: | ||
108 | movq %rdi,%r9 | ||
109 | movl %edx,%r8d | ||
110 | andl $7,%r8d | ||
111 | movl %edx,%ecx | ||
112 | shrl $3,%ecx | ||
113 | /* expand byte value */ | ||
114 | movzbl %sil,%esi | ||
115 | movabs $0x0101010101010101,%rax | ||
116 | mulq %rsi /* with rax, clobbers rdx */ | ||
117 | rep | ||
118 | stosq | ||
119 | movl %r8d,%ecx | ||
120 | rep | ||
121 | stosb | ||
122 | movq %r9,%rax | ||
123 | ret | ||
124 | memset_c_end: | ||
125 | .previous | ||
diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S new file mode 100644 index 000000000000..0dee1fdcb162 --- /dev/null +++ b/arch/x86_64/lib/putuser.S | |||
@@ -0,0 +1,89 @@ | |||
1 | /* | ||
2 | * __put_user functions. | ||
3 | * | ||
4 | * (C) Copyright 1998 Linus Torvalds | ||
5 | * (C) Copyright 2005 Andi Kleen | ||
6 | * | ||
7 | * These functions have a non-standard call interface | ||
8 | * to make them more efficient, especially as they | ||
9 | * return an error value in addition to the "real" | ||
10 | * return value. | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * __put_user_X | ||
15 | * | ||
16 | * Inputs: %rcx contains the address | ||
17 | * %rdx contains new value | ||
18 | * | ||
19 | * Outputs: %rax is error code (0 or -EFAULT) | ||
20 | * | ||
21 | * %r8 is destroyed. | ||
22 | * | ||
23 | * These functions should not modify any other registers, | ||
24 | * as they get called from within inline assembly. | ||
25 | */ | ||
26 | |||
27 | #include <linux/linkage.h> | ||
28 | #include <asm/page.h> | ||
29 | #include <asm/errno.h> | ||
30 | #include <asm/offset.h> | ||
31 | #include <asm/thread_info.h> | ||
32 | |||
33 | .text | ||
34 | .p2align 4 | ||
35 | .globl __put_user_1 | ||
36 | __put_user_1: | ||
37 | GET_THREAD_INFO(%r8) | ||
38 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
39 | jae bad_put_user | ||
40 | 1: movb %dl,(%rcx) | ||
41 | xorl %eax,%eax | ||
42 | ret | ||
43 | |||
44 | .p2align 4 | ||
45 | .globl __put_user_2 | ||
46 | __put_user_2: | ||
47 | GET_THREAD_INFO(%r8) | ||
48 | addq $1,%rcx | ||
49 | jc bad_put_user | ||
50 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
51 | jae bad_put_user | ||
52 | 2: movw %dx,-1(%rcx) | ||
53 | xorl %eax,%eax | ||
54 | ret | ||
55 | |||
56 | .p2align 4 | ||
57 | .globl __put_user_4 | ||
58 | __put_user_4: | ||
59 | GET_THREAD_INFO(%r8) | ||
60 | addq $3,%rcx | ||
61 | jc bad_put_user | ||
62 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
63 | jae bad_put_user | ||
64 | 3: movl %edx,-3(%rcx) | ||
65 | xorl %eax,%eax | ||
66 | ret | ||
67 | |||
68 | .p2align 4 | ||
69 | .globl __put_user_8 | ||
70 | __put_user_8: | ||
71 | GET_THREAD_INFO(%r8) | ||
72 | addq $7,%rcx | ||
73 | jc bad_put_user | ||
74 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
75 | jae bad_put_user | ||
76 | 4: movq %rdx,-7(%rcx) | ||
77 | xorl %eax,%eax | ||
78 | ret | ||
79 | |||
80 | bad_put_user: | ||
81 | movq $(-EFAULT),%rax | ||
82 | ret | ||
83 | |||
84 | .section __ex_table,"a" | ||
85 | .quad 1b,bad_put_user | ||
86 | .quad 2b,bad_put_user | ||
87 | .quad 3b,bad_put_user | ||
88 | .quad 4b,bad_put_user | ||
89 | .previous | ||
diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S new file mode 100644 index 000000000000..acc1e2ca7ed7 --- /dev/null +++ b/arch/x86_64/lib/thunk.S | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * Save registers before calling assembly functions. This avoids | ||
3 | * disturbance of register allocation in some inline assembly constructs. | ||
4 | * Copyright 2001,2002 by Andi Kleen, SuSE Labs. | ||
5 | * Subject to the GNU public license, v.2. No warranty of any kind. | ||
6 | * $Id: thunk.S,v 1.2 2002/03/13 20:06:58 ak Exp $ | ||
7 | */ | ||
8 | |||
9 | #include <linux/config.h> | ||
10 | #include <linux/linkage.h> | ||
11 | #include <asm/dwarf2.h> | ||
12 | #include <asm/calling.h> | ||
13 | #include <asm/rwlock.h> | ||
14 | |||
15 | /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ | ||
16 | .macro thunk name,func | ||
17 | .globl \name | ||
18 | \name: | ||
19 | CFI_STARTPROC | ||
20 | SAVE_ARGS | ||
21 | call \func | ||
22 | jmp restore | ||
23 | CFI_ENDPROC | ||
24 | .endm | ||
25 | |||
26 | /* rdi: arg1 ... normal C conventions. rax is passed from C. */ | ||
27 | .macro thunk_retrax name,func | ||
28 | .globl \name | ||
29 | \name: | ||
30 | CFI_STARTPROC | ||
31 | SAVE_ARGS | ||
32 | call \func | ||
33 | jmp restore_norax | ||
34 | CFI_ENDPROC | ||
35 | .endm | ||
36 | |||
37 | |||
38 | .section .sched.text | ||
39 | #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM | ||
40 | thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed | ||
41 | thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed | ||
42 | thunk rwsem_wake_thunk,rwsem_wake | ||
43 | thunk rwsem_downgrade_thunk,rwsem_downgrade_wake | ||
44 | #endif | ||
45 | thunk do_softirq_thunk,do_softirq | ||
46 | |||
47 | thunk __down_failed,__down | ||
48 | thunk_retrax __down_failed_interruptible,__down_interruptible | ||
49 | thunk_retrax __down_failed_trylock,__down_trylock | ||
50 | thunk __up_wakeup,__up | ||
51 | |||
52 | /* SAVE_ARGS below is used only for the .cfi directives it contains. */ | ||
53 | CFI_STARTPROC | ||
54 | SAVE_ARGS | ||
55 | restore: | ||
56 | RESTORE_ARGS | ||
57 | ret | ||
58 | CFI_ENDPROC | ||
59 | |||
60 | CFI_STARTPROC | ||
61 | SAVE_ARGS | ||
62 | restore_norax: | ||
63 | RESTORE_ARGS 1 | ||
64 | ret | ||
65 | CFI_ENDPROC | ||
66 | |||
67 | #ifdef CONFIG_SMP | ||
68 | /* Support for read/write spinlocks. */ | ||
69 | .text | ||
70 | /* rax: pointer to rwlock_t */ | ||
71 | ENTRY(__write_lock_failed) | ||
72 | lock | ||
73 | addl $RW_LOCK_BIAS,(%rax) | ||
74 | 1: rep | ||
75 | nop | ||
76 | cmpl $RW_LOCK_BIAS,(%rax) | ||
77 | jne 1b | ||
78 | lock | ||
79 | subl $RW_LOCK_BIAS,(%rax) | ||
80 | jnz __write_lock_failed | ||
81 | ret | ||
82 | |||
83 | /* rax: pointer to rwlock_t */ | ||
84 | ENTRY(__read_lock_failed) | ||
85 | lock | ||
86 | incl (%rax) | ||
87 | 1: rep | ||
88 | nop | ||
89 | cmpl $1,(%rax) | ||
90 | js 1b | ||
91 | lock | ||
92 | decl (%rax) | ||
93 | js __read_lock_failed | ||
94 | ret | ||
95 | #endif | ||
diff --git a/arch/x86_64/lib/usercopy.c b/arch/x86_64/lib/usercopy.c new file mode 100644 index 000000000000..db8abba1ad81 --- /dev/null +++ b/arch/x86_64/lib/usercopy.c | |||
@@ -0,0 +1,153 @@ | |||
1 | /* | ||
2 | * User address space access functions. | ||
3 | * | ||
4 | * Copyright 1997 Andi Kleen <ak@muc.de> | ||
5 | * Copyright 1997 Linus Torvalds | ||
6 | * Copyright 2002 Andi Kleen <ak@suse.de> | ||
7 | */ | ||
8 | #include <asm/uaccess.h> | ||
9 | |||
10 | /* | ||
11 | * Copy a null terminated string from userspace. | ||
12 | */ | ||
13 | |||
14 | #define __do_strncpy_from_user(dst,src,count,res) \ | ||
15 | do { \ | ||
16 | long __d0, __d1, __d2; \ | ||
17 | might_sleep(); \ | ||
18 | __asm__ __volatile__( \ | ||
19 | " testq %1,%1\n" \ | ||
20 | " jz 2f\n" \ | ||
21 | "0: lodsb\n" \ | ||
22 | " stosb\n" \ | ||
23 | " testb %%al,%%al\n" \ | ||
24 | " jz 1f\n" \ | ||
25 | " decq %1\n" \ | ||
26 | " jnz 0b\n" \ | ||
27 | "1: subq %1,%0\n" \ | ||
28 | "2:\n" \ | ||
29 | ".section .fixup,\"ax\"\n" \ | ||
30 | "3: movq %5,%0\n" \ | ||
31 | " jmp 2b\n" \ | ||
32 | ".previous\n" \ | ||
33 | ".section __ex_table,\"a\"\n" \ | ||
34 | " .align 8\n" \ | ||
35 | " .quad 0b,3b\n" \ | ||
36 | ".previous" \ | ||
37 | : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ | ||
38 | "=&D" (__d2) \ | ||
39 | : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ | ||
40 | : "memory"); \ | ||
41 | } while (0) | ||
42 | |||
43 | long | ||
44 | __strncpy_from_user(char *dst, const char __user *src, long count) | ||
45 | { | ||
46 | long res; | ||
47 | __do_strncpy_from_user(dst, src, count, res); | ||
48 | return res; | ||
49 | } | ||
50 | |||
51 | long | ||
52 | strncpy_from_user(char *dst, const char __user *src, long count) | ||
53 | { | ||
54 | long res = -EFAULT; | ||
55 | if (access_ok(VERIFY_READ, src, 1)) | ||
56 | __do_strncpy_from_user(dst, src, count, res); | ||
57 | return res; | ||
58 | } | ||
59 | |||
60 | /* | ||
61 | * Zero Userspace | ||
62 | */ | ||
63 | |||
64 | unsigned long __clear_user(void __user *addr, unsigned long size) | ||
65 | { | ||
66 | long __d0; | ||
67 | might_sleep(); | ||
68 | /* no memory constraint because it doesn't change any memory gcc knows | ||
69 | about */ | ||
70 | asm volatile( | ||
71 | " testq %[size8],%[size8]\n" | ||
72 | " jz 4f\n" | ||
73 | "0: movq %[zero],(%[dst])\n" | ||
74 | " addq %[eight],%[dst]\n" | ||
75 | " decl %%ecx ; jnz 0b\n" | ||
76 | "4: movq %[size1],%%rcx\n" | ||
77 | " testl %%ecx,%%ecx\n" | ||
78 | " jz 2f\n" | ||
79 | "1: movb %b[zero],(%[dst])\n" | ||
80 | " incq %[dst]\n" | ||
81 | " decl %%ecx ; jnz 1b\n" | ||
82 | "2:\n" | ||
83 | ".section .fixup,\"ax\"\n" | ||
84 | "3: lea 0(%[size1],%[size8],8),%[size8]\n" | ||
85 | " jmp 2b\n" | ||
86 | ".previous\n" | ||
87 | ".section __ex_table,\"a\"\n" | ||
88 | " .align 8\n" | ||
89 | " .quad 0b,3b\n" | ||
90 | " .quad 1b,2b\n" | ||
91 | ".previous" | ||
92 | : [size8] "=c"(size), [dst] "=&D" (__d0) | ||
93 | : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), | ||
94 | [zero] "r" (0UL), [eight] "r" (8UL)); | ||
95 | return size; | ||
96 | } | ||
97 | |||
98 | |||
99 | unsigned long clear_user(void __user *to, unsigned long n) | ||
100 | { | ||
101 | if (access_ok(VERIFY_WRITE, to, n)) | ||
102 | return __clear_user(to, n); | ||
103 | return n; | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * Return the size of a string (including the ending 0) | ||
108 | * | ||
109 | * Return 0 on exception, a value greater than N if too long | ||
110 | */ | ||
111 | |||
112 | long strnlen_user(const char __user *s, long n) | ||
113 | { | ||
114 | long res = 0; | ||
115 | char c; | ||
116 | |||
117 | if (!access_ok(VERIFY_READ, s, n)) | ||
118 | return 0; | ||
119 | |||
120 | while (1) { | ||
121 | if (res>n) | ||
122 | return n+1; | ||
123 | if (__get_user(c, s)) | ||
124 | return 0; | ||
125 | if (!c) | ||
126 | return res+1; | ||
127 | res++; | ||
128 | s++; | ||
129 | } | ||
130 | } | ||
131 | |||
132 | long strlen_user(const char __user *s) | ||
133 | { | ||
134 | long res = 0; | ||
135 | char c; | ||
136 | |||
137 | for (;;) { | ||
138 | if (get_user(c, s)) | ||
139 | return 0; | ||
140 | if (!c) | ||
141 | return res+1; | ||
142 | res++; | ||
143 | s++; | ||
144 | } | ||
145 | } | ||
146 | |||
147 | unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) | ||
148 | { | ||
149 | if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { | ||
150 | return copy_user_generic((__force void *)to, (__force void *)from, len); | ||
151 | } | ||
152 | return len; | ||
153 | } | ||