diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/linkage.h | 18 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable.h | 12 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_32.h | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable_64.h | 3 | ||||
-rw-r--r-- | arch/x86/include/asm/required-features.h | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/xor.h | 491 | ||||
-rw-r--r-- | arch/x86/include/asm/xor_32.h | 309 | ||||
-rw-r--r-- | arch/x86/include/asm/xor_64.h | 305 | ||||
-rw-r--r-- | arch/x86/kernel/head_32.S | 93 | ||||
-rw-r--r-- | arch/x86/kernel/sys_x86_64.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 4 |
12 files changed, 574 insertions, 679 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f8130a770653..a9e50ac90838 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -116,6 +116,7 @@ config X86 | |||
116 | select MODULES_USE_ELF_RELA if X86_64 | 116 | select MODULES_USE_ELF_RELA if X86_64 |
117 | select CLONE_BACKWARDS if X86_32 | 117 | select CLONE_BACKWARDS if X86_32 |
118 | select GENERIC_SIGALTSTACK | 118 | select GENERIC_SIGALTSTACK |
119 | select ARCH_USE_BUILTIN_BSWAP | ||
119 | 120 | ||
120 | config INSTRUCTION_DECODER | 121 | config INSTRUCTION_DECODER |
121 | def_bool y | 122 | def_bool y |
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 48142971b25d..79327e9483a3 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h | |||
@@ -27,20 +27,20 @@ | |||
27 | #define __asmlinkage_protect0(ret) \ | 27 | #define __asmlinkage_protect0(ret) \ |
28 | __asmlinkage_protect_n(ret) | 28 | __asmlinkage_protect_n(ret) |
29 | #define __asmlinkage_protect1(ret, arg1) \ | 29 | #define __asmlinkage_protect1(ret, arg1) \ |
30 | __asmlinkage_protect_n(ret, "g" (arg1)) | 30 | __asmlinkage_protect_n(ret, "m" (arg1)) |
31 | #define __asmlinkage_protect2(ret, arg1, arg2) \ | 31 | #define __asmlinkage_protect2(ret, arg1, arg2) \ |
32 | __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2)) | 32 | __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2)) |
33 | #define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ | 33 | #define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ |
34 | __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3)) | 34 | __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3)) |
35 | #define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ | 35 | #define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ |
36 | __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ | 36 | __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ |
37 | "g" (arg4)) | 37 | "m" (arg4)) |
38 | #define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ | 38 | #define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ |
39 | __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ | 39 | __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ |
40 | "g" (arg4), "g" (arg5)) | 40 | "m" (arg4), "m" (arg5)) |
41 | #define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ | 41 | #define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ |
42 | __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ | 42 | __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ |
43 | "g" (arg4), "g" (arg5), "g" (arg6)) | 43 | "m" (arg4), "m" (arg5), "m" (arg6)) |
44 | 44 | ||
45 | #endif /* CONFIG_X86_32 */ | 45 | #endif /* CONFIG_X86_32 */ |
46 | 46 | ||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1c1a955e67c0..fc304279b559 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -786,6 +786,18 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) | |||
786 | memcpy(dst, src, count * sizeof(pgd_t)); | 786 | memcpy(dst, src, count * sizeof(pgd_t)); |
787 | } | 787 | } |
788 | 788 | ||
789 | /* | ||
790 | * The x86 doesn't have any external MMU info: the kernel page | ||
791 | * tables contain all the necessary information. | ||
792 | */ | ||
793 | static inline void update_mmu_cache(struct vm_area_struct *vma, | ||
794 | unsigned long addr, pte_t *ptep) | ||
795 | { | ||
796 | } | ||
797 | static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, | ||
798 | unsigned long addr, pmd_t *pmd) | ||
799 | { | ||
800 | } | ||
789 | 801 | ||
790 | #include <asm-generic/pgtable.h> | 802 | #include <asm-generic/pgtable.h> |
791 | #endif /* __ASSEMBLY__ */ | 803 | #endif /* __ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 8faa215a503e..9ee322103c6d 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h | |||
@@ -66,13 +66,6 @@ do { \ | |||
66 | __flush_tlb_one((vaddr)); \ | 66 | __flush_tlb_one((vaddr)); \ |
67 | } while (0) | 67 | } while (0) |
68 | 68 | ||
69 | /* | ||
70 | * The i386 doesn't have any external MMU info: the kernel page | ||
71 | * tables contain all the necessary information. | ||
72 | */ | ||
73 | #define update_mmu_cache(vma, address, ptep) do { } while (0) | ||
74 | #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) | ||
75 | |||
76 | #endif /* !__ASSEMBLY__ */ | 69 | #endif /* !__ASSEMBLY__ */ |
77 | 70 | ||
78 | /* | 71 | /* |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 47356f9df82e..615b0c78449f 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -142,9 +142,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } | |||
142 | #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) | 142 | #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) |
143 | #define pte_unmap(pte) ((void)(pte))/* NOP */ | 143 | #define pte_unmap(pte) ((void)(pte))/* NOP */ |
144 | 144 | ||
145 | #define update_mmu_cache(vma, address, ptep) do { } while (0) | ||
146 | #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) | ||
147 | |||
148 | /* Encode and de-code a swap entry */ | 145 | /* Encode and de-code a swap entry */ |
149 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE | 146 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE |
150 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) | 147 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) |
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 6c7fc25f2c34..5c6e4fb370f5 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h | |||
@@ -47,6 +47,12 @@ | |||
47 | # define NEED_NOPL 0 | 47 | # define NEED_NOPL 0 |
48 | #endif | 48 | #endif |
49 | 49 | ||
50 | #ifdef CONFIG_MATOM | ||
51 | # define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31)) | ||
52 | #else | ||
53 | # define NEED_MOVBE 0 | ||
54 | #endif | ||
55 | |||
50 | #ifdef CONFIG_X86_64 | 56 | #ifdef CONFIG_X86_64 |
51 | #ifdef CONFIG_PARAVIRT | 57 | #ifdef CONFIG_PARAVIRT |
52 | /* Paravirtualized systems may not have PSE or PGE available */ | 58 | /* Paravirtualized systems may not have PSE or PGE available */ |
@@ -80,7 +86,7 @@ | |||
80 | 86 | ||
81 | #define REQUIRED_MASK2 0 | 87 | #define REQUIRED_MASK2 0 |
82 | #define REQUIRED_MASK3 (NEED_NOPL) | 88 | #define REQUIRED_MASK3 (NEED_NOPL) |
83 | #define REQUIRED_MASK4 0 | 89 | #define REQUIRED_MASK4 (NEED_MOVBE) |
84 | #define REQUIRED_MASK5 0 | 90 | #define REQUIRED_MASK5 0 |
85 | #define REQUIRED_MASK6 0 | 91 | #define REQUIRED_MASK6 0 |
86 | #define REQUIRED_MASK7 0 | 92 | #define REQUIRED_MASK7 0 |
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index f8fde90bc45e..d8829751b3f8 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h | |||
@@ -1,10 +1,499 @@ | |||
1 | #ifdef CONFIG_KMEMCHECK | 1 | #ifdef CONFIG_KMEMCHECK |
2 | /* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ | 2 | /* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ |
3 | # include <asm-generic/xor.h> | 3 | # include <asm-generic/xor.h> |
4 | #elif !defined(_ASM_X86_XOR_H) | ||
5 | #define _ASM_X86_XOR_H | ||
6 | |||
7 | /* | ||
8 | * Optimized RAID-5 checksumming functions for SSE. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2, or (at your option) | ||
13 | * any later version. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * (for example /usr/src/linux/COPYING); if not, write to the Free | ||
17 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
18 | */ | ||
19 | |||
20 | /* | ||
21 | * Cache avoiding checksumming functions utilizing KNI instructions | ||
22 | * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * Based on | ||
27 | * High-speed RAID5 checksumming functions utilizing SSE instructions. | ||
28 | * Copyright (C) 1998 Ingo Molnar. | ||
29 | */ | ||
30 | |||
31 | /* | ||
32 | * x86-64 changes / gcc fixes from Andi Kleen. | ||
33 | * Copyright 2002 Andi Kleen, SuSE Labs. | ||
34 | * | ||
35 | * This hasn't been optimized for the hammer yet, but there are likely | ||
36 | * no advantages to be gotten from x86-64 here anyways. | ||
37 | */ | ||
38 | |||
39 | #include <asm/i387.h> | ||
40 | |||
41 | #ifdef CONFIG_X86_32 | ||
42 | /* reduce register pressure */ | ||
43 | # define XOR_CONSTANT_CONSTRAINT "i" | ||
4 | #else | 44 | #else |
45 | # define XOR_CONSTANT_CONSTRAINT "re" | ||
46 | #endif | ||
47 | |||
48 | #define OFFS(x) "16*("#x")" | ||
49 | #define PF_OFFS(x) "256+16*("#x")" | ||
50 | #define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n" | ||
51 | #define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n" | ||
52 | #define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n" | ||
53 | #define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n" | ||
54 | #define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n" | ||
55 | #define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n" | ||
56 | #define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n" | ||
57 | #define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n" | ||
58 | #define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n" | ||
59 | #define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n" | ||
60 | #define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n" | ||
61 | #define NOP(x) | ||
62 | |||
63 | #define BLK64(pf, op, i) \ | ||
64 | pf(i) \ | ||
65 | op(i, 0) \ | ||
66 | op(i + 1, 1) \ | ||
67 | op(i + 2, 2) \ | ||
68 | op(i + 3, 3) | ||
69 | |||
70 | static void | ||
71 | xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | ||
72 | { | ||
73 | unsigned long lines = bytes >> 8; | ||
74 | |||
75 | kernel_fpu_begin(); | ||
76 | |||
77 | asm volatile( | ||
78 | #undef BLOCK | ||
79 | #define BLOCK(i) \ | ||
80 | LD(i, 0) \ | ||
81 | LD(i + 1, 1) \ | ||
82 | PF1(i) \ | ||
83 | PF1(i + 2) \ | ||
84 | LD(i + 2, 2) \ | ||
85 | LD(i + 3, 3) \ | ||
86 | PF0(i + 4) \ | ||
87 | PF0(i + 6) \ | ||
88 | XO1(i, 0) \ | ||
89 | XO1(i + 1, 1) \ | ||
90 | XO1(i + 2, 2) \ | ||
91 | XO1(i + 3, 3) \ | ||
92 | ST(i, 0) \ | ||
93 | ST(i + 1, 1) \ | ||
94 | ST(i + 2, 2) \ | ||
95 | ST(i + 3, 3) \ | ||
96 | |||
97 | |||
98 | PF0(0) | ||
99 | PF0(2) | ||
100 | |||
101 | " .align 32 ;\n" | ||
102 | " 1: ;\n" | ||
103 | |||
104 | BLOCK(0) | ||
105 | BLOCK(4) | ||
106 | BLOCK(8) | ||
107 | BLOCK(12) | ||
108 | |||
109 | " add %[inc], %[p1] ;\n" | ||
110 | " add %[inc], %[p2] ;\n" | ||
111 | " dec %[cnt] ;\n" | ||
112 | " jnz 1b ;\n" | ||
113 | : [cnt] "+r" (lines), | ||
114 | [p1] "+r" (p1), [p2] "+r" (p2) | ||
115 | : [inc] XOR_CONSTANT_CONSTRAINT (256UL) | ||
116 | : "memory"); | ||
117 | |||
118 | kernel_fpu_end(); | ||
119 | } | ||
120 | |||
121 | static void | ||
122 | xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2) | ||
123 | { | ||
124 | unsigned long lines = bytes >> 8; | ||
125 | |||
126 | kernel_fpu_begin(); | ||
127 | |||
128 | asm volatile( | ||
129 | #undef BLOCK | ||
130 | #define BLOCK(i) \ | ||
131 | BLK64(PF0, LD, i) \ | ||
132 | BLK64(PF1, XO1, i) \ | ||
133 | BLK64(NOP, ST, i) \ | ||
134 | |||
135 | " .align 32 ;\n" | ||
136 | " 1: ;\n" | ||
137 | |||
138 | BLOCK(0) | ||
139 | BLOCK(4) | ||
140 | BLOCK(8) | ||
141 | BLOCK(12) | ||
142 | |||
143 | " add %[inc], %[p1] ;\n" | ||
144 | " add %[inc], %[p2] ;\n" | ||
145 | " dec %[cnt] ;\n" | ||
146 | " jnz 1b ;\n" | ||
147 | : [cnt] "+r" (lines), | ||
148 | [p1] "+r" (p1), [p2] "+r" (p2) | ||
149 | : [inc] XOR_CONSTANT_CONSTRAINT (256UL) | ||
150 | : "memory"); | ||
151 | |||
152 | kernel_fpu_end(); | ||
153 | } | ||
154 | |||
155 | static void | ||
156 | xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
157 | unsigned long *p3) | ||
158 | { | ||
159 | unsigned long lines = bytes >> 8; | ||
160 | |||
161 | kernel_fpu_begin(); | ||
162 | |||
163 | asm volatile( | ||
164 | #undef BLOCK | ||
165 | #define BLOCK(i) \ | ||
166 | PF1(i) \ | ||
167 | PF1(i + 2) \ | ||
168 | LD(i, 0) \ | ||
169 | LD(i + 1, 1) \ | ||
170 | LD(i + 2, 2) \ | ||
171 | LD(i + 3, 3) \ | ||
172 | PF2(i) \ | ||
173 | PF2(i + 2) \ | ||
174 | PF0(i + 4) \ | ||
175 | PF0(i + 6) \ | ||
176 | XO1(i, 0) \ | ||
177 | XO1(i + 1, 1) \ | ||
178 | XO1(i + 2, 2) \ | ||
179 | XO1(i + 3, 3) \ | ||
180 | XO2(i, 0) \ | ||
181 | XO2(i + 1, 1) \ | ||
182 | XO2(i + 2, 2) \ | ||
183 | XO2(i + 3, 3) \ | ||
184 | ST(i, 0) \ | ||
185 | ST(i + 1, 1) \ | ||
186 | ST(i + 2, 2) \ | ||
187 | ST(i + 3, 3) \ | ||
188 | |||
189 | |||
190 | PF0(0) | ||
191 | PF0(2) | ||
192 | |||
193 | " .align 32 ;\n" | ||
194 | " 1: ;\n" | ||
195 | |||
196 | BLOCK(0) | ||
197 | BLOCK(4) | ||
198 | BLOCK(8) | ||
199 | BLOCK(12) | ||
200 | |||
201 | " add %[inc], %[p1] ;\n" | ||
202 | " add %[inc], %[p2] ;\n" | ||
203 | " add %[inc], %[p3] ;\n" | ||
204 | " dec %[cnt] ;\n" | ||
205 | " jnz 1b ;\n" | ||
206 | : [cnt] "+r" (lines), | ||
207 | [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) | ||
208 | : [inc] XOR_CONSTANT_CONSTRAINT (256UL) | ||
209 | : "memory"); | ||
210 | |||
211 | kernel_fpu_end(); | ||
212 | } | ||
213 | |||
214 | static void | ||
215 | xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
216 | unsigned long *p3) | ||
217 | { | ||
218 | unsigned long lines = bytes >> 8; | ||
219 | |||
220 | kernel_fpu_begin(); | ||
221 | |||
222 | asm volatile( | ||
223 | #undef BLOCK | ||
224 | #define BLOCK(i) \ | ||
225 | BLK64(PF0, LD, i) \ | ||
226 | BLK64(PF1, XO1, i) \ | ||
227 | BLK64(PF2, XO2, i) \ | ||
228 | BLK64(NOP, ST, i) \ | ||
229 | |||
230 | " .align 32 ;\n" | ||
231 | " 1: ;\n" | ||
232 | |||
233 | BLOCK(0) | ||
234 | BLOCK(4) | ||
235 | BLOCK(8) | ||
236 | BLOCK(12) | ||
237 | |||
238 | " add %[inc], %[p1] ;\n" | ||
239 | " add %[inc], %[p2] ;\n" | ||
240 | " add %[inc], %[p3] ;\n" | ||
241 | " dec %[cnt] ;\n" | ||
242 | " jnz 1b ;\n" | ||
243 | : [cnt] "+r" (lines), | ||
244 | [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) | ||
245 | : [inc] XOR_CONSTANT_CONSTRAINT (256UL) | ||
246 | : "memory"); | ||
247 | |||
248 | kernel_fpu_end(); | ||
249 | } | ||
250 | |||
251 | static void | ||
252 | xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
253 | unsigned long *p3, unsigned long *p4) | ||
254 | { | ||
255 | unsigned long lines = bytes >> 8; | ||
256 | |||
257 | kernel_fpu_begin(); | ||
258 | |||
259 | asm volatile( | ||
260 | #undef BLOCK | ||
261 | #define BLOCK(i) \ | ||
262 | PF1(i) \ | ||
263 | PF1(i + 2) \ | ||
264 | LD(i, 0) \ | ||
265 | LD(i + 1, 1) \ | ||
266 | LD(i + 2, 2) \ | ||
267 | LD(i + 3, 3) \ | ||
268 | PF2(i) \ | ||
269 | PF2(i + 2) \ | ||
270 | XO1(i, 0) \ | ||
271 | XO1(i + 1, 1) \ | ||
272 | XO1(i + 2, 2) \ | ||
273 | XO1(i + 3, 3) \ | ||
274 | PF3(i) \ | ||
275 | PF3(i + 2) \ | ||
276 | PF0(i + 4) \ | ||
277 | PF0(i + 6) \ | ||
278 | XO2(i, 0) \ | ||
279 | XO2(i + 1, 1) \ | ||
280 | XO2(i + 2, 2) \ | ||
281 | XO2(i + 3, 3) \ | ||
282 | XO3(i, 0) \ | ||
283 | XO3(i + 1, 1) \ | ||
284 | XO3(i + 2, 2) \ | ||
285 | XO3(i + 3, 3) \ | ||
286 | ST(i, 0) \ | ||
287 | ST(i + 1, 1) \ | ||
288 | ST(i + 2, 2) \ | ||
289 | ST(i + 3, 3) \ | ||
290 | |||
291 | |||
292 | PF0(0) | ||
293 | PF0(2) | ||
294 | |||
295 | " .align 32 ;\n" | ||
296 | " 1: ;\n" | ||
297 | |||
298 | BLOCK(0) | ||
299 | BLOCK(4) | ||
300 | BLOCK(8) | ||
301 | BLOCK(12) | ||
302 | |||
303 | " add %[inc], %[p1] ;\n" | ||
304 | " add %[inc], %[p2] ;\n" | ||
305 | " add %[inc], %[p3] ;\n" | ||
306 | " add %[inc], %[p4] ;\n" | ||
307 | " dec %[cnt] ;\n" | ||
308 | " jnz 1b ;\n" | ||
309 | : [cnt] "+r" (lines), [p1] "+r" (p1), | ||
310 | [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) | ||
311 | : [inc] XOR_CONSTANT_CONSTRAINT (256UL) | ||
312 | : "memory"); | ||
313 | |||
314 | kernel_fpu_end(); | ||
315 | } | ||
316 | |||
317 | static void | ||
318 | xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
319 | unsigned long *p3, unsigned long *p4) | ||
320 | { | ||
321 | unsigned long lines = bytes >> 8; | ||
322 | |||
323 | kernel_fpu_begin(); | ||
324 | |||
325 | asm volatile( | ||
326 | #undef BLOCK | ||
327 | #define BLOCK(i) \ | ||
328 | BLK64(PF0, LD, i) \ | ||
329 | BLK64(PF1, XO1, i) \ | ||
330 | BLK64(PF2, XO2, i) \ | ||
331 | BLK64(PF3, XO3, i) \ | ||
332 | BLK64(NOP, ST, i) \ | ||
333 | |||
334 | " .align 32 ;\n" | ||
335 | " 1: ;\n" | ||
336 | |||
337 | BLOCK(0) | ||
338 | BLOCK(4) | ||
339 | BLOCK(8) | ||
340 | BLOCK(12) | ||
341 | |||
342 | " add %[inc], %[p1] ;\n" | ||
343 | " add %[inc], %[p2] ;\n" | ||
344 | " add %[inc], %[p3] ;\n" | ||
345 | " add %[inc], %[p4] ;\n" | ||
346 | " dec %[cnt] ;\n" | ||
347 | " jnz 1b ;\n" | ||
348 | : [cnt] "+r" (lines), [p1] "+r" (p1), | ||
349 | [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) | ||
350 | : [inc] XOR_CONSTANT_CONSTRAINT (256UL) | ||
351 | : "memory"); | ||
352 | |||
353 | kernel_fpu_end(); | ||
354 | } | ||
355 | |||
356 | static void | ||
357 | xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
358 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | ||
359 | { | ||
360 | unsigned long lines = bytes >> 8; | ||
361 | |||
362 | kernel_fpu_begin(); | ||
363 | |||
364 | asm volatile( | ||
365 | #undef BLOCK | ||
366 | #define BLOCK(i) \ | ||
367 | PF1(i) \ | ||
368 | PF1(i + 2) \ | ||
369 | LD(i, 0) \ | ||
370 | LD(i + 1, 1) \ | ||
371 | LD(i + 2, 2) \ | ||
372 | LD(i + 3, 3) \ | ||
373 | PF2(i) \ | ||
374 | PF2(i + 2) \ | ||
375 | XO1(i, 0) \ | ||
376 | XO1(i + 1, 1) \ | ||
377 | XO1(i + 2, 2) \ | ||
378 | XO1(i + 3, 3) \ | ||
379 | PF3(i) \ | ||
380 | PF3(i + 2) \ | ||
381 | XO2(i, 0) \ | ||
382 | XO2(i + 1, 1) \ | ||
383 | XO2(i + 2, 2) \ | ||
384 | XO2(i + 3, 3) \ | ||
385 | PF4(i) \ | ||
386 | PF4(i + 2) \ | ||
387 | PF0(i + 4) \ | ||
388 | PF0(i + 6) \ | ||
389 | XO3(i, 0) \ | ||
390 | XO3(i + 1, 1) \ | ||
391 | XO3(i + 2, 2) \ | ||
392 | XO3(i + 3, 3) \ | ||
393 | XO4(i, 0) \ | ||
394 | XO4(i + 1, 1) \ | ||
395 | XO4(i + 2, 2) \ | ||
396 | XO4(i + 3, 3) \ | ||
397 | ST(i, 0) \ | ||
398 | ST(i + 1, 1) \ | ||
399 | ST(i + 2, 2) \ | ||
400 | ST(i + 3, 3) \ | ||
401 | |||
402 | |||
403 | PF0(0) | ||
404 | PF0(2) | ||
405 | |||
406 | " .align 32 ;\n" | ||
407 | " 1: ;\n" | ||
408 | |||
409 | BLOCK(0) | ||
410 | BLOCK(4) | ||
411 | BLOCK(8) | ||
412 | BLOCK(12) | ||
413 | |||
414 | " add %[inc], %[p1] ;\n" | ||
415 | " add %[inc], %[p2] ;\n" | ||
416 | " add %[inc], %[p3] ;\n" | ||
417 | " add %[inc], %[p4] ;\n" | ||
418 | " add %[inc], %[p5] ;\n" | ||
419 | " dec %[cnt] ;\n" | ||
420 | " jnz 1b ;\n" | ||
421 | : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2), | ||
422 | [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) | ||
423 | : [inc] XOR_CONSTANT_CONSTRAINT (256UL) | ||
424 | : "memory"); | ||
425 | |||
426 | kernel_fpu_end(); | ||
427 | } | ||
428 | |||
429 | static void | ||
430 | xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
431 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | ||
432 | { | ||
433 | unsigned long lines = bytes >> 8; | ||
434 | |||
435 | kernel_fpu_begin(); | ||
436 | |||
437 | asm volatile( | ||
438 | #undef BLOCK | ||
439 | #define BLOCK(i) \ | ||
440 | BLK64(PF0, LD, i) \ | ||
441 | BLK64(PF1, XO1, i) \ | ||
442 | BLK64(PF2, XO2, i) \ | ||
443 | BLK64(PF3, XO3, i) \ | ||
444 | BLK64(PF4, XO4, i) \ | ||
445 | BLK64(NOP, ST, i) \ | ||
446 | |||
447 | " .align 32 ;\n" | ||
448 | " 1: ;\n" | ||
449 | |||
450 | BLOCK(0) | ||
451 | BLOCK(4) | ||
452 | BLOCK(8) | ||
453 | BLOCK(12) | ||
454 | |||
455 | " add %[inc], %[p1] ;\n" | ||
456 | " add %[inc], %[p2] ;\n" | ||
457 | " add %[inc], %[p3] ;\n" | ||
458 | " add %[inc], %[p4] ;\n" | ||
459 | " add %[inc], %[p5] ;\n" | ||
460 | " dec %[cnt] ;\n" | ||
461 | " jnz 1b ;\n" | ||
462 | : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2), | ||
463 | [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) | ||
464 | : [inc] XOR_CONSTANT_CONSTRAINT (256UL) | ||
465 | : "memory"); | ||
466 | |||
467 | kernel_fpu_end(); | ||
468 | } | ||
469 | |||
470 | static struct xor_block_template xor_block_sse_pf64 = { | ||
471 | .name = "prefetch64-sse", | ||
472 | .do_2 = xor_sse_2_pf64, | ||
473 | .do_3 = xor_sse_3_pf64, | ||
474 | .do_4 = xor_sse_4_pf64, | ||
475 | .do_5 = xor_sse_5_pf64, | ||
476 | }; | ||
477 | |||
478 | #undef LD | ||
479 | #undef XO1 | ||
480 | #undef XO2 | ||
481 | #undef XO3 | ||
482 | #undef XO4 | ||
483 | #undef ST | ||
484 | #undef NOP | ||
485 | #undef BLK64 | ||
486 | #undef BLOCK | ||
487 | |||
488 | #undef XOR_CONSTANT_CONSTRAINT | ||
489 | |||
5 | #ifdef CONFIG_X86_32 | 490 | #ifdef CONFIG_X86_32 |
6 | # include <asm/xor_32.h> | 491 | # include <asm/xor_32.h> |
7 | #else | 492 | #else |
8 | # include <asm/xor_64.h> | 493 | # include <asm/xor_64.h> |
9 | #endif | 494 | #endif |
10 | #endif | 495 | |
496 | #define XOR_SELECT_TEMPLATE(FASTEST) \ | ||
497 | AVX_SELECT(FASTEST) | ||
498 | |||
499 | #endif /* _ASM_X86_XOR_H */ | ||
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index f79cb7ec0e06..ce05722e3c68 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h | |||
@@ -2,7 +2,7 @@ | |||
2 | #define _ASM_X86_XOR_32_H | 2 | #define _ASM_X86_XOR_32_H |
3 | 3 | ||
4 | /* | 4 | /* |
5 | * Optimized RAID-5 checksumming functions for MMX and SSE. | 5 | * Optimized RAID-5 checksumming functions for MMX. |
6 | * | 6 | * |
7 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
8 | * it under the terms of the GNU General Public License as published by | 8 | * it under the terms of the GNU General Public License as published by |
@@ -529,290 +529,6 @@ static struct xor_block_template xor_block_p5_mmx = { | |||
529 | .do_5 = xor_p5_mmx_5, | 529 | .do_5 = xor_p5_mmx_5, |
530 | }; | 530 | }; |
531 | 531 | ||
532 | /* | ||
533 | * Cache avoiding checksumming functions utilizing KNI instructions | ||
534 | * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) | ||
535 | */ | ||
536 | |||
537 | #define OFFS(x) "16*("#x")" | ||
538 | #define PF_OFFS(x) "256+16*("#x")" | ||
539 | #define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" | ||
540 | #define LD(x, y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" | ||
541 | #define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" | ||
542 | #define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n" | ||
543 | #define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n" | ||
544 | #define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n" | ||
545 | #define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n" | ||
546 | #define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n" | ||
547 | #define XO1(x, y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" | ||
548 | #define XO2(x, y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" | ||
549 | #define XO3(x, y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" | ||
550 | #define XO4(x, y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" | ||
551 | #define XO5(x, y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" | ||
552 | |||
553 | |||
554 | static void | ||
555 | xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | ||
556 | { | ||
557 | unsigned long lines = bytes >> 8; | ||
558 | |||
559 | kernel_fpu_begin(); | ||
560 | |||
561 | asm volatile( | ||
562 | #undef BLOCK | ||
563 | #define BLOCK(i) \ | ||
564 | LD(i, 0) \ | ||
565 | LD(i + 1, 1) \ | ||
566 | PF1(i) \ | ||
567 | PF1(i + 2) \ | ||
568 | LD(i + 2, 2) \ | ||
569 | LD(i + 3, 3) \ | ||
570 | PF0(i + 4) \ | ||
571 | PF0(i + 6) \ | ||
572 | XO1(i, 0) \ | ||
573 | XO1(i + 1, 1) \ | ||
574 | XO1(i + 2, 2) \ | ||
575 | XO1(i + 3, 3) \ | ||
576 | ST(i, 0) \ | ||
577 | ST(i + 1, 1) \ | ||
578 | ST(i + 2, 2) \ | ||
579 | ST(i + 3, 3) \ | ||
580 | |||
581 | |||
582 | PF0(0) | ||
583 | PF0(2) | ||
584 | |||
585 | " .align 32 ;\n" | ||
586 | " 1: ;\n" | ||
587 | |||
588 | BLOCK(0) | ||
589 | BLOCK(4) | ||
590 | BLOCK(8) | ||
591 | BLOCK(12) | ||
592 | |||
593 | " addl $256, %1 ;\n" | ||
594 | " addl $256, %2 ;\n" | ||
595 | " decl %0 ;\n" | ||
596 | " jnz 1b ;\n" | ||
597 | : "+r" (lines), | ||
598 | "+r" (p1), "+r" (p2) | ||
599 | : | ||
600 | : "memory"); | ||
601 | |||
602 | kernel_fpu_end(); | ||
603 | } | ||
604 | |||
605 | static void | ||
606 | xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
607 | unsigned long *p3) | ||
608 | { | ||
609 | unsigned long lines = bytes >> 8; | ||
610 | |||
611 | kernel_fpu_begin(); | ||
612 | |||
613 | asm volatile( | ||
614 | #undef BLOCK | ||
615 | #define BLOCK(i) \ | ||
616 | PF1(i) \ | ||
617 | PF1(i + 2) \ | ||
618 | LD(i,0) \ | ||
619 | LD(i + 1, 1) \ | ||
620 | LD(i + 2, 2) \ | ||
621 | LD(i + 3, 3) \ | ||
622 | PF2(i) \ | ||
623 | PF2(i + 2) \ | ||
624 | PF0(i + 4) \ | ||
625 | PF0(i + 6) \ | ||
626 | XO1(i,0) \ | ||
627 | XO1(i + 1, 1) \ | ||
628 | XO1(i + 2, 2) \ | ||
629 | XO1(i + 3, 3) \ | ||
630 | XO2(i,0) \ | ||
631 | XO2(i + 1, 1) \ | ||
632 | XO2(i + 2, 2) \ | ||
633 | XO2(i + 3, 3) \ | ||
634 | ST(i,0) \ | ||
635 | ST(i + 1, 1) \ | ||
636 | ST(i + 2, 2) \ | ||
637 | ST(i + 3, 3) \ | ||
638 | |||
639 | |||
640 | PF0(0) | ||
641 | PF0(2) | ||
642 | |||
643 | " .align 32 ;\n" | ||
644 | " 1: ;\n" | ||
645 | |||
646 | BLOCK(0) | ||
647 | BLOCK(4) | ||
648 | BLOCK(8) | ||
649 | BLOCK(12) | ||
650 | |||
651 | " addl $256, %1 ;\n" | ||
652 | " addl $256, %2 ;\n" | ||
653 | " addl $256, %3 ;\n" | ||
654 | " decl %0 ;\n" | ||
655 | " jnz 1b ;\n" | ||
656 | : "+r" (lines), | ||
657 | "+r" (p1), "+r"(p2), "+r"(p3) | ||
658 | : | ||
659 | : "memory" ); | ||
660 | |||
661 | kernel_fpu_end(); | ||
662 | } | ||
663 | |||
664 | static void | ||
665 | xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
666 | unsigned long *p3, unsigned long *p4) | ||
667 | { | ||
668 | unsigned long lines = bytes >> 8; | ||
669 | |||
670 | kernel_fpu_begin(); | ||
671 | |||
672 | asm volatile( | ||
673 | #undef BLOCK | ||
674 | #define BLOCK(i) \ | ||
675 | PF1(i) \ | ||
676 | PF1(i + 2) \ | ||
677 | LD(i,0) \ | ||
678 | LD(i + 1, 1) \ | ||
679 | LD(i + 2, 2) \ | ||
680 | LD(i + 3, 3) \ | ||
681 | PF2(i) \ | ||
682 | PF2(i + 2) \ | ||
683 | XO1(i,0) \ | ||
684 | XO1(i + 1, 1) \ | ||
685 | XO1(i + 2, 2) \ | ||
686 | XO1(i + 3, 3) \ | ||
687 | PF3(i) \ | ||
688 | PF3(i + 2) \ | ||
689 | PF0(i + 4) \ | ||
690 | PF0(i + 6) \ | ||
691 | XO2(i,0) \ | ||
692 | XO2(i + 1, 1) \ | ||
693 | XO2(i + 2, 2) \ | ||
694 | XO2(i + 3, 3) \ | ||
695 | XO3(i,0) \ | ||
696 | XO3(i + 1, 1) \ | ||
697 | XO3(i + 2, 2) \ | ||
698 | XO3(i + 3, 3) \ | ||
699 | ST(i,0) \ | ||
700 | ST(i + 1, 1) \ | ||
701 | ST(i + 2, 2) \ | ||
702 | ST(i + 3, 3) \ | ||
703 | |||
704 | |||
705 | PF0(0) | ||
706 | PF0(2) | ||
707 | |||
708 | " .align 32 ;\n" | ||
709 | " 1: ;\n" | ||
710 | |||
711 | BLOCK(0) | ||
712 | BLOCK(4) | ||
713 | BLOCK(8) | ||
714 | BLOCK(12) | ||
715 | |||
716 | " addl $256, %1 ;\n" | ||
717 | " addl $256, %2 ;\n" | ||
718 | " addl $256, %3 ;\n" | ||
719 | " addl $256, %4 ;\n" | ||
720 | " decl %0 ;\n" | ||
721 | " jnz 1b ;\n" | ||
722 | : "+r" (lines), | ||
723 | "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) | ||
724 | : | ||
725 | : "memory" ); | ||
726 | |||
727 | kernel_fpu_end(); | ||
728 | } | ||
729 | |||
730 | static void | ||
731 | xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
732 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | ||
733 | { | ||
734 | unsigned long lines = bytes >> 8; | ||
735 | |||
736 | kernel_fpu_begin(); | ||
737 | |||
738 | /* Make sure GCC forgets anything it knows about p4 or p5, | ||
739 | such that it won't pass to the asm volatile below a | ||
740 | register that is shared with any other variable. That's | ||
741 | because we modify p4 and p5 there, but we can't mark them | ||
742 | as read/write, otherwise we'd overflow the 10-asm-operands | ||
743 | limit of GCC < 3.1. */ | ||
744 | asm("" : "+r" (p4), "+r" (p5)); | ||
745 | |||
746 | asm volatile( | ||
747 | #undef BLOCK | ||
748 | #define BLOCK(i) \ | ||
749 | PF1(i) \ | ||
750 | PF1(i + 2) \ | ||
751 | LD(i,0) \ | ||
752 | LD(i + 1, 1) \ | ||
753 | LD(i + 2, 2) \ | ||
754 | LD(i + 3, 3) \ | ||
755 | PF2(i) \ | ||
756 | PF2(i + 2) \ | ||
757 | XO1(i,0) \ | ||
758 | XO1(i + 1, 1) \ | ||
759 | XO1(i + 2, 2) \ | ||
760 | XO1(i + 3, 3) \ | ||
761 | PF3(i) \ | ||
762 | PF3(i + 2) \ | ||
763 | XO2(i,0) \ | ||
764 | XO2(i + 1, 1) \ | ||
765 | XO2(i + 2, 2) \ | ||
766 | XO2(i + 3, 3) \ | ||
767 | PF4(i) \ | ||
768 | PF4(i + 2) \ | ||
769 | PF0(i + 4) \ | ||
770 | PF0(i + 6) \ | ||
771 | XO3(i,0) \ | ||
772 | XO3(i + 1, 1) \ | ||
773 | XO3(i + 2, 2) \ | ||
774 | XO3(i + 3, 3) \ | ||
775 | XO4(i,0) \ | ||
776 | XO4(i + 1, 1) \ | ||
777 | XO4(i + 2, 2) \ | ||
778 | XO4(i + 3, 3) \ | ||
779 | ST(i,0) \ | ||
780 | ST(i + 1, 1) \ | ||
781 | ST(i + 2, 2) \ | ||
782 | ST(i + 3, 3) \ | ||
783 | |||
784 | |||
785 | PF0(0) | ||
786 | PF0(2) | ||
787 | |||
788 | " .align 32 ;\n" | ||
789 | " 1: ;\n" | ||
790 | |||
791 | BLOCK(0) | ||
792 | BLOCK(4) | ||
793 | BLOCK(8) | ||
794 | BLOCK(12) | ||
795 | |||
796 | " addl $256, %1 ;\n" | ||
797 | " addl $256, %2 ;\n" | ||
798 | " addl $256, %3 ;\n" | ||
799 | " addl $256, %4 ;\n" | ||
800 | " addl $256, %5 ;\n" | ||
801 | " decl %0 ;\n" | ||
802 | " jnz 1b ;\n" | ||
803 | : "+r" (lines), | ||
804 | "+r" (p1), "+r" (p2), "+r" (p3) | ||
805 | : "r" (p4), "r" (p5) | ||
806 | : "memory"); | ||
807 | |||
808 | /* p4 and p5 were modified, and now the variables are dead. | ||
809 | Clobber them just to be sure nobody does something stupid | ||
810 | like assuming they have some legal value. */ | ||
811 | asm("" : "=r" (p4), "=r" (p5)); | ||
812 | |||
813 | kernel_fpu_end(); | ||
814 | } | ||
815 | |||
816 | static struct xor_block_template xor_block_pIII_sse = { | 532 | static struct xor_block_template xor_block_pIII_sse = { |
817 | .name = "pIII_sse", | 533 | .name = "pIII_sse", |
818 | .do_2 = xor_sse_2, | 534 | .do_2 = xor_sse_2, |
@@ -827,26 +543,25 @@ static struct xor_block_template xor_block_pIII_sse = { | |||
827 | /* Also try the generic routines. */ | 543 | /* Also try the generic routines. */ |
828 | #include <asm-generic/xor.h> | 544 | #include <asm-generic/xor.h> |
829 | 545 | ||
546 | /* We force the use of the SSE xor block because it can write around L2. | ||
547 | We may also be able to load into the L1 only depending on how the cpu | ||
548 | deals with a load to a line that is being prefetched. */ | ||
830 | #undef XOR_TRY_TEMPLATES | 549 | #undef XOR_TRY_TEMPLATES |
831 | #define XOR_TRY_TEMPLATES \ | 550 | #define XOR_TRY_TEMPLATES \ |
832 | do { \ | 551 | do { \ |
833 | xor_speed(&xor_block_8regs); \ | ||
834 | xor_speed(&xor_block_8regs_p); \ | ||
835 | xor_speed(&xor_block_32regs); \ | ||
836 | xor_speed(&xor_block_32regs_p); \ | ||
837 | AVX_XOR_SPEED; \ | 552 | AVX_XOR_SPEED; \ |
838 | if (cpu_has_xmm) \ | 553 | if (cpu_has_xmm) { \ |
839 | xor_speed(&xor_block_pIII_sse); \ | 554 | xor_speed(&xor_block_pIII_sse); \ |
840 | if (cpu_has_mmx) { \ | 555 | xor_speed(&xor_block_sse_pf64); \ |
556 | } else if (cpu_has_mmx) { \ | ||
841 | xor_speed(&xor_block_pII_mmx); \ | 557 | xor_speed(&xor_block_pII_mmx); \ |
842 | xor_speed(&xor_block_p5_mmx); \ | 558 | xor_speed(&xor_block_p5_mmx); \ |
559 | } else { \ | ||
560 | xor_speed(&xor_block_8regs); \ | ||
561 | xor_speed(&xor_block_8regs_p); \ | ||
562 | xor_speed(&xor_block_32regs); \ | ||
563 | xor_speed(&xor_block_32regs_p); \ | ||
843 | } \ | 564 | } \ |
844 | } while (0) | 565 | } while (0) |
845 | 566 | ||
846 | /* We force the use of the SSE xor block because it can write around L2. | ||
847 | We may also be able to load into the L1 only depending on how the cpu | ||
848 | deals with a load to a line that is being prefetched. */ | ||
849 | #define XOR_SELECT_TEMPLATE(FASTEST) \ | ||
850 | AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) | ||
851 | |||
852 | #endif /* _ASM_X86_XOR_32_H */ | 567 | #endif /* _ASM_X86_XOR_32_H */ |
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h index 87ac522c4af5..546f1e3b87cc 100644 --- a/arch/x86/include/asm/xor_64.h +++ b/arch/x86/include/asm/xor_64.h | |||
@@ -1,301 +1,6 @@ | |||
1 | #ifndef _ASM_X86_XOR_64_H | 1 | #ifndef _ASM_X86_XOR_64_H |
2 | #define _ASM_X86_XOR_64_H | 2 | #define _ASM_X86_XOR_64_H |
3 | 3 | ||
4 | /* | ||
5 | * Optimized RAID-5 checksumming functions for MMX and SSE. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2, or (at your option) | ||
10 | * any later version. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * (for example /usr/src/linux/COPYING); if not, write to the Free | ||
14 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
15 | */ | ||
16 | |||
17 | |||
18 | /* | ||
19 | * Cache avoiding checksumming functions utilizing KNI instructions | ||
20 | * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) | ||
21 | */ | ||
22 | |||
23 | /* | ||
24 | * Based on | ||
25 | * High-speed RAID5 checksumming functions utilizing SSE instructions. | ||
26 | * Copyright (C) 1998 Ingo Molnar. | ||
27 | */ | ||
28 | |||
29 | /* | ||
30 | * x86-64 changes / gcc fixes from Andi Kleen. | ||
31 | * Copyright 2002 Andi Kleen, SuSE Labs. | ||
32 | * | ||
33 | * This hasn't been optimized for the hammer yet, but there are likely | ||
34 | * no advantages to be gotten from x86-64 here anyways. | ||
35 | */ | ||
36 | |||
37 | #include <asm/i387.h> | ||
38 | |||
39 | #define OFFS(x) "16*("#x")" | ||
40 | #define PF_OFFS(x) "256+16*("#x")" | ||
41 | #define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n" | ||
42 | #define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n" | ||
43 | #define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n" | ||
44 | #define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n" | ||
45 | #define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n" | ||
46 | #define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n" | ||
47 | #define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n" | ||
48 | #define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n" | ||
49 | #define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n" | ||
50 | #define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n" | ||
51 | #define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n" | ||
52 | #define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n" | ||
53 | #define XO5(x, y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n" | ||
54 | |||
55 | |||
56 | static void | ||
57 | xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | ||
58 | { | ||
59 | unsigned int lines = bytes >> 8; | ||
60 | |||
61 | kernel_fpu_begin(); | ||
62 | |||
63 | asm volatile( | ||
64 | #undef BLOCK | ||
65 | #define BLOCK(i) \ | ||
66 | LD(i, 0) \ | ||
67 | LD(i + 1, 1) \ | ||
68 | PF1(i) \ | ||
69 | PF1(i + 2) \ | ||
70 | LD(i + 2, 2) \ | ||
71 | LD(i + 3, 3) \ | ||
72 | PF0(i + 4) \ | ||
73 | PF0(i + 6) \ | ||
74 | XO1(i, 0) \ | ||
75 | XO1(i + 1, 1) \ | ||
76 | XO1(i + 2, 2) \ | ||
77 | XO1(i + 3, 3) \ | ||
78 | ST(i, 0) \ | ||
79 | ST(i + 1, 1) \ | ||
80 | ST(i + 2, 2) \ | ||
81 | ST(i + 3, 3) \ | ||
82 | |||
83 | |||
84 | PF0(0) | ||
85 | PF0(2) | ||
86 | |||
87 | " .align 32 ;\n" | ||
88 | " 1: ;\n" | ||
89 | |||
90 | BLOCK(0) | ||
91 | BLOCK(4) | ||
92 | BLOCK(8) | ||
93 | BLOCK(12) | ||
94 | |||
95 | " addq %[inc], %[p1] ;\n" | ||
96 | " addq %[inc], %[p2] ;\n" | ||
97 | " decl %[cnt] ; jnz 1b" | ||
98 | : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines) | ||
99 | : [inc] "r" (256UL) | ||
100 | : "memory"); | ||
101 | |||
102 | kernel_fpu_end(); | ||
103 | } | ||
104 | |||
105 | static void | ||
106 | xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
107 | unsigned long *p3) | ||
108 | { | ||
109 | unsigned int lines = bytes >> 8; | ||
110 | |||
111 | kernel_fpu_begin(); | ||
112 | asm volatile( | ||
113 | #undef BLOCK | ||
114 | #define BLOCK(i) \ | ||
115 | PF1(i) \ | ||
116 | PF1(i + 2) \ | ||
117 | LD(i, 0) \ | ||
118 | LD(i + 1, 1) \ | ||
119 | LD(i + 2, 2) \ | ||
120 | LD(i + 3, 3) \ | ||
121 | PF2(i) \ | ||
122 | PF2(i + 2) \ | ||
123 | PF0(i + 4) \ | ||
124 | PF0(i + 6) \ | ||
125 | XO1(i, 0) \ | ||
126 | XO1(i + 1, 1) \ | ||
127 | XO1(i + 2, 2) \ | ||
128 | XO1(i + 3, 3) \ | ||
129 | XO2(i, 0) \ | ||
130 | XO2(i + 1, 1) \ | ||
131 | XO2(i + 2, 2) \ | ||
132 | XO2(i + 3, 3) \ | ||
133 | ST(i, 0) \ | ||
134 | ST(i + 1, 1) \ | ||
135 | ST(i + 2, 2) \ | ||
136 | ST(i + 3, 3) \ | ||
137 | |||
138 | |||
139 | PF0(0) | ||
140 | PF0(2) | ||
141 | |||
142 | " .align 32 ;\n" | ||
143 | " 1: ;\n" | ||
144 | |||
145 | BLOCK(0) | ||
146 | BLOCK(4) | ||
147 | BLOCK(8) | ||
148 | BLOCK(12) | ||
149 | |||
150 | " addq %[inc], %[p1] ;\n" | ||
151 | " addq %[inc], %[p2] ;\n" | ||
152 | " addq %[inc], %[p3] ;\n" | ||
153 | " decl %[cnt] ; jnz 1b" | ||
154 | : [cnt] "+r" (lines), | ||
155 | [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) | ||
156 | : [inc] "r" (256UL) | ||
157 | : "memory"); | ||
158 | kernel_fpu_end(); | ||
159 | } | ||
160 | |||
161 | static void | ||
162 | xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
163 | unsigned long *p3, unsigned long *p4) | ||
164 | { | ||
165 | unsigned int lines = bytes >> 8; | ||
166 | |||
167 | kernel_fpu_begin(); | ||
168 | |||
169 | asm volatile( | ||
170 | #undef BLOCK | ||
171 | #define BLOCK(i) \ | ||
172 | PF1(i) \ | ||
173 | PF1(i + 2) \ | ||
174 | LD(i, 0) \ | ||
175 | LD(i + 1, 1) \ | ||
176 | LD(i + 2, 2) \ | ||
177 | LD(i + 3, 3) \ | ||
178 | PF2(i) \ | ||
179 | PF2(i + 2) \ | ||
180 | XO1(i, 0) \ | ||
181 | XO1(i + 1, 1) \ | ||
182 | XO1(i + 2, 2) \ | ||
183 | XO1(i + 3, 3) \ | ||
184 | PF3(i) \ | ||
185 | PF3(i + 2) \ | ||
186 | PF0(i + 4) \ | ||
187 | PF0(i + 6) \ | ||
188 | XO2(i, 0) \ | ||
189 | XO2(i + 1, 1) \ | ||
190 | XO2(i + 2, 2) \ | ||
191 | XO2(i + 3, 3) \ | ||
192 | XO3(i, 0) \ | ||
193 | XO3(i + 1, 1) \ | ||
194 | XO3(i + 2, 2) \ | ||
195 | XO3(i + 3, 3) \ | ||
196 | ST(i, 0) \ | ||
197 | ST(i + 1, 1) \ | ||
198 | ST(i + 2, 2) \ | ||
199 | ST(i + 3, 3) \ | ||
200 | |||
201 | |||
202 | PF0(0) | ||
203 | PF0(2) | ||
204 | |||
205 | " .align 32 ;\n" | ||
206 | " 1: ;\n" | ||
207 | |||
208 | BLOCK(0) | ||
209 | BLOCK(4) | ||
210 | BLOCK(8) | ||
211 | BLOCK(12) | ||
212 | |||
213 | " addq %[inc], %[p1] ;\n" | ||
214 | " addq %[inc], %[p2] ;\n" | ||
215 | " addq %[inc], %[p3] ;\n" | ||
216 | " addq %[inc], %[p4] ;\n" | ||
217 | " decl %[cnt] ; jnz 1b" | ||
218 | : [cnt] "+c" (lines), | ||
219 | [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) | ||
220 | : [inc] "r" (256UL) | ||
221 | : "memory" ); | ||
222 | |||
223 | kernel_fpu_end(); | ||
224 | } | ||
225 | |||
226 | static void | ||
227 | xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | ||
228 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | ||
229 | { | ||
230 | unsigned int lines = bytes >> 8; | ||
231 | |||
232 | kernel_fpu_begin(); | ||
233 | |||
234 | asm volatile( | ||
235 | #undef BLOCK | ||
236 | #define BLOCK(i) \ | ||
237 | PF1(i) \ | ||
238 | PF1(i + 2) \ | ||
239 | LD(i, 0) \ | ||
240 | LD(i + 1, 1) \ | ||
241 | LD(i + 2, 2) \ | ||
242 | LD(i + 3, 3) \ | ||
243 | PF2(i) \ | ||
244 | PF2(i + 2) \ | ||
245 | XO1(i, 0) \ | ||
246 | XO1(i + 1, 1) \ | ||
247 | XO1(i + 2, 2) \ | ||
248 | XO1(i + 3, 3) \ | ||
249 | PF3(i) \ | ||
250 | PF3(i + 2) \ | ||
251 | XO2(i, 0) \ | ||
252 | XO2(i + 1, 1) \ | ||
253 | XO2(i + 2, 2) \ | ||
254 | XO2(i + 3, 3) \ | ||
255 | PF4(i) \ | ||
256 | PF4(i + 2) \ | ||
257 | PF0(i + 4) \ | ||
258 | PF0(i + 6) \ | ||
259 | XO3(i, 0) \ | ||
260 | XO3(i + 1, 1) \ | ||
261 | XO3(i + 2, 2) \ | ||
262 | XO3(i + 3, 3) \ | ||
263 | XO4(i, 0) \ | ||
264 | XO4(i + 1, 1) \ | ||
265 | XO4(i + 2, 2) \ | ||
266 | XO4(i + 3, 3) \ | ||
267 | ST(i, 0) \ | ||
268 | ST(i + 1, 1) \ | ||
269 | ST(i + 2, 2) \ | ||
270 | ST(i + 3, 3) \ | ||
271 | |||
272 | |||
273 | PF0(0) | ||
274 | PF0(2) | ||
275 | |||
276 | " .align 32 ;\n" | ||
277 | " 1: ;\n" | ||
278 | |||
279 | BLOCK(0) | ||
280 | BLOCK(4) | ||
281 | BLOCK(8) | ||
282 | BLOCK(12) | ||
283 | |||
284 | " addq %[inc], %[p1] ;\n" | ||
285 | " addq %[inc], %[p2] ;\n" | ||
286 | " addq %[inc], %[p3] ;\n" | ||
287 | " addq %[inc], %[p4] ;\n" | ||
288 | " addq %[inc], %[p5] ;\n" | ||
289 | " decl %[cnt] ; jnz 1b" | ||
290 | : [cnt] "+c" (lines), | ||
291 | [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4), | ||
292 | [p5] "+r" (p5) | ||
293 | : [inc] "r" (256UL) | ||
294 | : "memory"); | ||
295 | |||
296 | kernel_fpu_end(); | ||
297 | } | ||
298 | |||
299 | static struct xor_block_template xor_block_sse = { | 4 | static struct xor_block_template xor_block_sse = { |
300 | .name = "generic_sse", | 5 | .name = "generic_sse", |
301 | .do_2 = xor_sse_2, | 6 | .do_2 = xor_sse_2, |
@@ -308,17 +13,15 @@ static struct xor_block_template xor_block_sse = { | |||
308 | /* Also try the AVX routines */ | 13 | /* Also try the AVX routines */ |
309 | #include <asm/xor_avx.h> | 14 | #include <asm/xor_avx.h> |
310 | 15 | ||
16 | /* We force the use of the SSE xor block because it can write around L2. | ||
17 | We may also be able to load into the L1 only depending on how the cpu | ||
18 | deals with a load to a line that is being prefetched. */ | ||
311 | #undef XOR_TRY_TEMPLATES | 19 | #undef XOR_TRY_TEMPLATES |
312 | #define XOR_TRY_TEMPLATES \ | 20 | #define XOR_TRY_TEMPLATES \ |
313 | do { \ | 21 | do { \ |
314 | AVX_XOR_SPEED; \ | 22 | AVX_XOR_SPEED; \ |
23 | xor_speed(&xor_block_sse_pf64); \ | ||
315 | xor_speed(&xor_block_sse); \ | 24 | xor_speed(&xor_block_sse); \ |
316 | } while (0) | 25 | } while (0) |
317 | 26 | ||
318 | /* We force the use of the SSE xor block because it can write around L2. | ||
319 | We may also be able to load into the L1 only depending on how the cpu | ||
320 | deals with a load to a line that is being prefetched. */ | ||
321 | #define XOR_SELECT_TEMPLATE(FASTEST) \ | ||
322 | AVX_SELECT(&xor_block_sse) | ||
323 | |||
324 | #endif /* _ASM_X86_XOR_64_H */ | 27 | #endif /* _ASM_X86_XOR_64_H */ |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index c8932c79e78b..3c3f58a0808f 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -307,36 +307,45 @@ default_entry: | |||
307 | movl %eax,%cr0 | 307 | movl %eax,%cr0 |
308 | 308 | ||
309 | /* | 309 | /* |
310 | * New page tables may be in 4Mbyte page mode and may | 310 | * We want to start out with EFLAGS unambiguously cleared. Some BIOSes leave |
311 | * be using the global pages. | 311 | * bits like NT set. This would confuse the debugger if this code is traced. So |
312 | * initialize them properly now before switching to protected mode. That means | ||
313 | * DF in particular (even though we have cleared it earlier after copying the | ||
314 | * command line) because GCC expects it. | ||
315 | */ | ||
316 | pushl $0 | ||
317 | popfl | ||
318 | |||
319 | /* | ||
320 | * New page tables may be in 4Mbyte page mode and may be using the global pages. | ||
312 | * | 321 | * |
313 | * NOTE! If we are on a 486 we may have no cr4 at all! | 322 | * NOTE! If we are on a 486 we may have no cr4 at all! Specifically, cr4 exists |
314 | * Specifically, cr4 exists if and only if CPUID exists | 323 | * if and only if CPUID exists and has flags other than the FPU flag set. |
315 | * and has flags other than the FPU flag set. | ||
316 | */ | 324 | */ |
325 | movl $-1,pa(X86_CPUID) # preset CPUID level | ||
317 | movl $X86_EFLAGS_ID,%ecx | 326 | movl $X86_EFLAGS_ID,%ecx |
318 | pushl %ecx | 327 | pushl %ecx |
319 | popfl | 328 | popfl # set EFLAGS=ID |
320 | pushfl | 329 | pushfl |
321 | popl %eax | 330 | popl %eax # get EFLAGS |
322 | pushl $0 | 331 | testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set? |
323 | popfl | 332 | jz enable_paging # hw disallowed setting of ID bit |
324 | pushfl | 333 | # which means no CPUID and no CR4 |
325 | popl %edx | 334 | |
326 | xorl %edx,%eax | 335 | xorl %eax,%eax |
327 | testl %ecx,%eax | 336 | cpuid |
328 | jz 6f # No ID flag = no CPUID = no CR4 | 337 | movl %eax,pa(X86_CPUID) # save largest std CPUID function |
329 | 338 | ||
330 | movl $1,%eax | 339 | movl $1,%eax |
331 | cpuid | 340 | cpuid |
332 | andl $~1,%edx # Ignore CPUID.FPU | 341 | andl $~1,%edx # Ignore CPUID.FPU |
333 | jz 6f # No flags or only CPUID.FPU = no CR4 | 342 | jz enable_paging # No flags or only CPUID.FPU = no CR4 |
334 | 343 | ||
335 | movl pa(mmu_cr4_features),%eax | 344 | movl pa(mmu_cr4_features),%eax |
336 | movl %eax,%cr4 | 345 | movl %eax,%cr4 |
337 | 346 | ||
338 | testb $X86_CR4_PAE, %al # check if PAE is enabled | 347 | testb $X86_CR4_PAE, %al # check if PAE is enabled |
339 | jz 6f | 348 | jz enable_paging |
340 | 349 | ||
341 | /* Check if extended functions are implemented */ | 350 | /* Check if extended functions are implemented */ |
342 | movl $0x80000000, %eax | 351 | movl $0x80000000, %eax |
@@ -344,7 +353,7 @@ default_entry: | |||
344 | /* Value must be in the range 0x80000001 to 0x8000ffff */ | 353 | /* Value must be in the range 0x80000001 to 0x8000ffff */ |
345 | subl $0x80000001, %eax | 354 | subl $0x80000001, %eax |
346 | cmpl $(0x8000ffff-0x80000001), %eax | 355 | cmpl $(0x8000ffff-0x80000001), %eax |
347 | ja 6f | 356 | ja enable_paging |
348 | 357 | ||
349 | /* Clear bogus XD_DISABLE bits */ | 358 | /* Clear bogus XD_DISABLE bits */ |
350 | call verify_cpu | 359 | call verify_cpu |
@@ -353,7 +362,7 @@ default_entry: | |||
353 | cpuid | 362 | cpuid |
354 | /* Execute Disable bit supported? */ | 363 | /* Execute Disable bit supported? */ |
355 | btl $(X86_FEATURE_NX & 31), %edx | 364 | btl $(X86_FEATURE_NX & 31), %edx |
356 | jnc 6f | 365 | jnc enable_paging |
357 | 366 | ||
358 | /* Setup EFER (Extended Feature Enable Register) */ | 367 | /* Setup EFER (Extended Feature Enable Register) */ |
359 | movl $MSR_EFER, %ecx | 368 | movl $MSR_EFER, %ecx |
@@ -363,7 +372,7 @@ default_entry: | |||
363 | /* Make changes effective */ | 372 | /* Make changes effective */ |
364 | wrmsr | 373 | wrmsr |
365 | 374 | ||
366 | 6: | 375 | enable_paging: |
367 | 376 | ||
368 | /* | 377 | /* |
369 | * Enable paging | 378 | * Enable paging |
@@ -378,14 +387,6 @@ default_entry: | |||
378 | addl $__PAGE_OFFSET, %esp | 387 | addl $__PAGE_OFFSET, %esp |
379 | 388 | ||
380 | /* | 389 | /* |
381 | * Initialize eflags. Some BIOS's leave bits like NT set. This would | ||
382 | * confuse the debugger if this code is traced. | ||
383 | * XXX - best to initialize before switching to protected mode. | ||
384 | */ | ||
385 | pushl $0 | ||
386 | popfl | ||
387 | |||
388 | /* | ||
389 | * start system 32-bit setup. We need to re-do some of the things done | 390 | * start system 32-bit setup. We need to re-do some of the things done |
390 | * in 16-bit mode for the "real" operations. | 391 | * in 16-bit mode for the "real" operations. |
391 | */ | 392 | */ |
@@ -394,31 +395,11 @@ default_entry: | |||
394 | jz 1f # Did we do this already? | 395 | jz 1f # Did we do this already? |
395 | call *%eax | 396 | call *%eax |
396 | 1: | 397 | 1: |
397 | 398 | ||
398 | /* check if it is 486 or 386. */ | ||
399 | /* | 399 | /* |
400 | * XXX - this does a lot of unnecessary setup. Alignment checks don't | 400 | * Check if it is 486 |
401 | * apply at our cpl of 0 and the stack ought to be aligned already, and | ||
402 | * we don't need to preserve eflags. | ||
403 | */ | 401 | */ |
404 | movl $-1,X86_CPUID # -1 for no CPUID initially | 402 | cmpl $-1,X86_CPUID |
405 | movb $3,X86 # at least 386 | ||
406 | pushfl # push EFLAGS | ||
407 | popl %eax # get EFLAGS | ||
408 | movl %eax,%ecx # save original EFLAGS | ||
409 | xorl $0x240000,%eax # flip AC and ID bits in EFLAGS | ||
410 | pushl %eax # copy to EFLAGS | ||
411 | popfl # set EFLAGS | ||
412 | pushfl # get new EFLAGS | ||
413 | popl %eax # put it in eax | ||
414 | xorl %ecx,%eax # change in flags | ||
415 | pushl %ecx # restore original EFLAGS | ||
416 | popfl | ||
417 | testl $0x40000,%eax # check if AC bit changed | ||
418 | je is386 | ||
419 | |||
420 | movb $4,X86 # at least 486 | ||
421 | testl $0x200000,%eax # check if ID bit changed | ||
422 | je is486 | 403 | je is486 |
423 | 404 | ||
424 | /* get vendor info */ | 405 | /* get vendor info */ |
@@ -444,11 +425,10 @@ default_entry: | |||
444 | movb %cl,X86_MASK | 425 | movb %cl,X86_MASK |
445 | movl %edx,X86_CAPABILITY | 426 | movl %edx,X86_CAPABILITY |
446 | 427 | ||
447 | is486: movl $0x50022,%ecx # set AM, WP, NE and MP | 428 | is486: |
448 | jmp 2f | 429 | movb $4,X86 |
449 | 430 | movl $0x50022,%ecx # set AM, WP, NE and MP | |
450 | is386: movl $2,%ecx # set MP | 431 | movl %cr0,%eax |
451 | 2: movl %cr0,%eax | ||
452 | andl $0x80000011,%eax # Save PG,PE,ET | 432 | andl $0x80000011,%eax # Save PG,PE,ET |
453 | orl %ecx,%eax | 433 | orl %ecx,%eax |
454 | movl %eax,%cr0 | 434 | movl %eax,%cr0 |
@@ -473,7 +453,6 @@ is386: movl $2,%ecx # set MP | |||
473 | xorl %eax,%eax # Clear LDT | 453 | xorl %eax,%eax # Clear LDT |
474 | lldt %ax | 454 | lldt %ax |
475 | 455 | ||
476 | cld # gcc2 wants the direction flag cleared at all times | ||
477 | pushl $0 # fake return address for unwinder | 456 | pushl $0 # fake return address for unwinder |
478 | jmp *(initial_code) | 457 | jmp *(initial_code) |
479 | 458 | ||
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 97ef74b88e0f..dbded5aedb81 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -157,7 +157,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
157 | if (flags & MAP_FIXED) | 157 | if (flags & MAP_FIXED) |
158 | return addr; | 158 | return addr; |
159 | 159 | ||
160 | /* for MAP_32BIT mappings we force the legact mmap base */ | 160 | /* for MAP_32BIT mappings we force the legacy mmap base */ |
161 | if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) | 161 | if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) |
162 | goto bottomup; | 162 | goto bottomup; |
163 | 163 | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 75c9a6a59697..d6eeead43758 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -605,7 +605,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
605 | } | 605 | } |
606 | 606 | ||
607 | if (pgd_changed) | 607 | if (pgd_changed) |
608 | sync_global_pgds(addr, end); | 608 | sync_global_pgds(addr, end - 1); |
609 | 609 | ||
610 | __flush_tlb_all(); | 610 | __flush_tlb_all(); |
611 | 611 | ||
@@ -984,7 +984,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
984 | } | 984 | } |
985 | 985 | ||
986 | } | 986 | } |
987 | sync_global_pgds((unsigned long)start_page, end); | 987 | sync_global_pgds((unsigned long)start_page, end - 1); |
988 | return 0; | 988 | return 0; |
989 | } | 989 | } |
990 | 990 | ||