diff options
author | Harvey Harrison <harvey.harrison@gmail.com> | 2008-01-30 07:32:35 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-30 07:32:35 -0500 |
commit | 1dc85be087d6645575847dc23c37147a2352312b (patch) | |
tree | f5bd8543789decca03c7d8a152fc2427af71c975 /arch/x86 | |
parent | b6795e65f158d12d3124379fc50ec156ae60f888 (diff) |
x86: begin fault_{32|64}.c unification
Move X86_32 only get_segment_eip to X86_64
Move X86_64 only is_errata93 to X86_32
Change X86_32 loop in is_prefetch to highlight the differences
between them. Fold the logic from __is_prefetch in as well on
X86_32.
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/mm/fault_32.c | 97 | ||||
-rw-r--r-- | arch/x86/mm/fault_64.c | 124 |
2 files changed, 196 insertions, 25 deletions
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c index 870b5610555c..300c9d8b684a 100644 --- a/arch/x86/mm/fault_32.c +++ b/arch/x86/mm/fault_32.c | |||
@@ -61,6 +61,7 @@ static inline int notify_page_fault(struct pt_regs *regs) | |||
61 | #endif | 61 | #endif |
62 | } | 62 | } |
63 | 63 | ||
64 | #ifdef CONFIG_X86_32 | ||
64 | /* | 65 | /* |
65 | * Return EIP plus the CS segment base. The segment limit is also | 66 | * Return EIP plus the CS segment base. The segment limit is also |
66 | * adjusted, clamped to the kernel/user address space (whichever is | 67 | * adjusted, clamped to the kernel/user address space (whichever is |
@@ -135,26 +136,61 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, | |||
135 | *eip_limit = seg_limit; | 136 | *eip_limit = seg_limit; |
136 | return ip + base; | 137 | return ip + base; |
137 | } | 138 | } |
139 | #endif | ||
138 | 140 | ||
139 | /* | 141 | /* |
142 | * X86_32 | ||
140 | * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. | 143 | * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. |
141 | * Check that here and ignore it. | 144 | * Check that here and ignore it. |
145 | * | ||
146 | * X86_64 | ||
147 | * Sometimes the CPU reports invalid exceptions on prefetch. | ||
148 | * Check that here and ignore it. | ||
149 | * | ||
150 | * Opcode checker based on code by Richard Brunner | ||
142 | */ | 151 | */ |
143 | static int __is_prefetch(struct pt_regs *regs, unsigned long addr) | 152 | static int is_prefetch(struct pt_regs *regs, unsigned long addr, |
153 | unsigned long error_code) | ||
144 | { | 154 | { |
145 | unsigned long limit; | 155 | unsigned char *instr; |
146 | unsigned char *instr = (unsigned char *)get_segment_eip(regs, &limit); | ||
147 | int scan_more = 1; | 156 | int scan_more = 1; |
148 | int prefetch = 0; | 157 | int prefetch = 0; |
149 | int i; | 158 | unsigned char *max_instr; |
159 | |||
160 | #ifdef CONFIG_X86_32 | ||
161 | unsigned long limit; | ||
162 | if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
163 | boot_cpu_data.x86 >= 6)) { | ||
164 | /* Catch an obscure case of prefetch inside an NX page. */ | ||
165 | if (nx_enabled && (error_code & PF_INSTR)) | ||
166 | return 0; | ||
167 | } else { | ||
168 | return 0; | ||
169 | } | ||
170 | instr = (unsigned char *)get_segment_eip(regs, &limit); | ||
171 | #else | ||
172 | /* If it was a exec fault ignore */ | ||
173 | if (error_code & PF_INSTR) | ||
174 | return 0; | ||
175 | instr = (unsigned char __user *)convert_rip_to_linear(current, regs); | ||
176 | #endif | ||
177 | |||
178 | max_instr = instr + 15; | ||
179 | |||
180 | #ifdef CONFIG_X86_64 | ||
181 | if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) | ||
182 | return 0; | ||
183 | #endif | ||
150 | 184 | ||
151 | for (i = 0; scan_more && i < 15; i++) { | 185 | while (scan_more && instr < max_instr) { |
152 | unsigned char opcode; | 186 | unsigned char opcode; |
153 | unsigned char instr_hi; | 187 | unsigned char instr_hi; |
154 | unsigned char instr_lo; | 188 | unsigned char instr_lo; |
155 | 189 | ||
190 | #ifdef CONFIG_X86_32 | ||
156 | if (instr > (unsigned char *)limit) | 191 | if (instr > (unsigned char *)limit) |
157 | break; | 192 | break; |
193 | #endif | ||
158 | if (probe_kernel_address(instr, opcode)) | 194 | if (probe_kernel_address(instr, opcode)) |
159 | break; | 195 | break; |
160 | 196 | ||
@@ -196,8 +232,10 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr) | |||
196 | case 0x00: | 232 | case 0x00: |
197 | /* Prefetch instruction is 0x0F0D or 0x0F18 */ | 233 | /* Prefetch instruction is 0x0F0D or 0x0F18 */ |
198 | scan_more = 0; | 234 | scan_more = 0; |
235 | #ifdef CONFIG_X86_32 | ||
199 | if (instr > (unsigned char *)limit) | 236 | if (instr > (unsigned char *)limit) |
200 | break; | 237 | break; |
238 | #endif | ||
201 | if (probe_kernel_address(instr, opcode)) | 239 | if (probe_kernel_address(instr, opcode)) |
202 | break; | 240 | break; |
203 | prefetch = (instr_lo == 0xF) && | 241 | prefetch = (instr_lo == 0xF) && |
@@ -211,19 +249,6 @@ static int __is_prefetch(struct pt_regs *regs, unsigned long addr) | |||
211 | return prefetch; | 249 | return prefetch; |
212 | } | 250 | } |
213 | 251 | ||
214 | static inline int is_prefetch(struct pt_regs *regs, unsigned long addr, | ||
215 | unsigned long error_code) | ||
216 | { | ||
217 | if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
218 | boot_cpu_data.x86 >= 6)) { | ||
219 | /* Catch an obscure case of prefetch inside an NX page. */ | ||
220 | if (nx_enabled && (error_code & 16)) | ||
221 | return 0; | ||
222 | return __is_prefetch(regs, addr); | ||
223 | } | ||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | static noinline void force_sig_info_fault(int si_signo, int si_code, | 252 | static noinline void force_sig_info_fault(int si_signo, int si_code, |
228 | unsigned long address, struct task_struct *tsk) | 253 | unsigned long address, struct task_struct *tsk) |
229 | { | 254 | { |
@@ -274,6 +299,42 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) | |||
274 | return pmd_k; | 299 | return pmd_k; |
275 | } | 300 | } |
276 | 301 | ||
302 | #ifdef CONFIG_X86_64 | ||
303 | static const char errata93_warning[] = | ||
304 | KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" | ||
305 | KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" | ||
306 | KERN_ERR "******* Please consider a BIOS update.\n" | ||
307 | KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n"; | ||
308 | |||
309 | /* Workaround for K8 erratum #93 & buggy BIOS. | ||
310 | BIOS SMM functions are required to use a specific workaround | ||
311 | to avoid corruption of the 64bit RIP register on C stepping K8. | ||
312 | A lot of BIOS that didn't get tested properly miss this. | ||
313 | The OS sees this as a page fault with the upper 32bits of RIP cleared. | ||
314 | Try to work around it here. | ||
315 | Note we only handle faults in kernel here. */ | ||
316 | |||
317 | static int is_errata93(struct pt_regs *regs, unsigned long address) | ||
318 | { | ||
319 | static int warned; | ||
320 | if (address != regs->ip) | ||
321 | return 0; | ||
322 | if ((address >> 32) != 0) | ||
323 | return 0; | ||
324 | address |= 0xffffffffUL << 32; | ||
325 | if ((address >= (u64)_stext && address <= (u64)_etext) || | ||
326 | (address >= MODULES_VADDR && address <= MODULES_END)) { | ||
327 | if (!warned) { | ||
328 | printk(errata93_warning); | ||
329 | warned = 1; | ||
330 | } | ||
331 | regs->ip = address; | ||
332 | return 1; | ||
333 | } | ||
334 | return 0; | ||
335 | } | ||
336 | #endif | ||
337 | |||
277 | /* | 338 | /* |
278 | * Handle a fault on the vmalloc or module mapping area | 339 | * Handle a fault on the vmalloc or module mapping area |
279 | * | 340 | * |
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c index 7e98a7691283..0d3d5979ce2c 100644 --- a/arch/x86/mm/fault_64.c +++ b/arch/x86/mm/fault_64.c | |||
@@ -64,32 +64,136 @@ static inline int notify_page_fault(struct pt_regs *regs) | |||
64 | #endif | 64 | #endif |
65 | } | 65 | } |
66 | 66 | ||
67 | /* Sometimes the CPU reports invalid exceptions on prefetch. | 67 | #ifdef CONFIG_X86_32 |
68 | Check that here and ignore. | 68 | /* |
69 | Opcode checker based on code by Richard Brunner */ | 69 | * Return EIP plus the CS segment base. The segment limit is also |
70 | static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, | 70 | * adjusted, clamped to the kernel/user address space (whichever is |
71 | unsigned long error_code) | 71 | * appropriate), and returned in *eip_limit. |
72 | * | ||
73 | * The segment is checked, because it might have been changed by another | ||
74 | * task between the original faulting instruction and here. | ||
75 | * | ||
76 | * If CS is no longer a valid code segment, or if EIP is beyond the | ||
77 | * limit, or if it is a kernel address when CS is not a kernel segment, | ||
78 | * then the returned value will be greater than *eip_limit. | ||
79 | * | ||
80 | * This is slow, but is very rarely executed. | ||
81 | */ | ||
82 | static inline unsigned long get_segment_eip(struct pt_regs *regs, | ||
83 | unsigned long *eip_limit) | ||
84 | { | ||
85 | unsigned long ip = regs->ip; | ||
86 | unsigned seg = regs->cs & 0xffff; | ||
87 | u32 seg_ar, seg_limit, base, *desc; | ||
88 | |||
89 | /* Unlikely, but must come before segment checks. */ | ||
90 | if (unlikely(regs->flags & VM_MASK)) { | ||
91 | base = seg << 4; | ||
92 | *eip_limit = base + 0xffff; | ||
93 | return base + (ip & 0xffff); | ||
94 | } | ||
95 | |||
96 | /* The standard kernel/user address space limit. */ | ||
97 | *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; | ||
98 | |||
99 | /* By far the most common cases. */ | ||
100 | if (likely(SEGMENT_IS_FLAT_CODE(seg))) | ||
101 | return ip; | ||
102 | |||
103 | /* Check the segment exists, is within the current LDT/GDT size, | ||
104 | that kernel/user (ring 0..3) has the appropriate privilege, | ||
105 | that it's a code segment, and get the limit. */ | ||
106 | __asm__("larl %3,%0; lsll %3,%1" | ||
107 | : "=&r" (seg_ar), "=r" (seg_limit) : "0" (0), "rm" (seg)); | ||
108 | if ((~seg_ar & 0x9800) || ip > seg_limit) { | ||
109 | *eip_limit = 0; | ||
110 | return 1; /* So that returned ip > *eip_limit. */ | ||
111 | } | ||
112 | |||
113 | /* Get the GDT/LDT descriptor base. | ||
114 | When you look for races in this code remember that | ||
115 | LDT and other horrors are only used in user space. */ | ||
116 | if (seg & (1<<2)) { | ||
117 | /* Must lock the LDT while reading it. */ | ||
118 | mutex_lock(¤t->mm->context.lock); | ||
119 | desc = current->mm->context.ldt; | ||
120 | desc = (void *)desc + (seg & ~7); | ||
121 | } else { | ||
122 | /* Must disable preemption while reading the GDT. */ | ||
123 | desc = (u32 *)get_cpu_gdt_table(get_cpu()); | ||
124 | desc = (void *)desc + (seg & ~7); | ||
125 | } | ||
126 | |||
127 | /* Decode the code segment base from the descriptor */ | ||
128 | base = get_desc_base((struct desc_struct *)desc); | ||
129 | |||
130 | if (seg & (1<<2)) | ||
131 | mutex_unlock(¤t->mm->context.lock); | ||
132 | else | ||
133 | put_cpu(); | ||
134 | |||
135 | /* Adjust EIP and segment limit, and clamp at the kernel limit. | ||
136 | It's legitimate for segments to wrap at 0xffffffff. */ | ||
137 | seg_limit += base; | ||
138 | if (seg_limit < *eip_limit && seg_limit >= base) | ||
139 | *eip_limit = seg_limit; | ||
140 | return ip + base; | ||
141 | } | ||
142 | #endif | ||
143 | |||
144 | /* | ||
145 | * X86_32 | ||
146 | * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. | ||
147 | * Check that here and ignore it. | ||
148 | * | ||
149 | * X86_64 | ||
150 | * Sometimes the CPU reports invalid exceptions on prefetch. | ||
151 | * Check that here and ignore it. | ||
152 | * | ||
153 | * Opcode checker based on code by Richard Brunner | ||
154 | */ | ||
155 | static int is_prefetch(struct pt_regs *regs, unsigned long addr, | ||
156 | unsigned long error_code) | ||
72 | { | 157 | { |
73 | unsigned char *instr; | 158 | unsigned char *instr; |
74 | int scan_more = 1; | 159 | int scan_more = 1; |
75 | int prefetch = 0; | 160 | int prefetch = 0; |
76 | unsigned char *max_instr; | 161 | unsigned char *max_instr; |
77 | 162 | ||
163 | #ifdef CONFIG_X86_32 | ||
164 | unsigned long limit; | ||
165 | if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
166 | boot_cpu_data.x86 >= 6)) { | ||
167 | /* Catch an obscure case of prefetch inside an NX page. */ | ||
168 | if (nx_enabled && (error_code & PF_INSTR)) | ||
169 | return 0; | ||
170 | } else { | ||
171 | return 0; | ||
172 | } | ||
173 | instr = (unsigned char *)get_segment_eip(regs, &limit); | ||
174 | #else | ||
78 | /* If it was a exec fault ignore */ | 175 | /* If it was a exec fault ignore */ |
79 | if (error_code & PF_INSTR) | 176 | if (error_code & PF_INSTR) |
80 | return 0; | 177 | return 0; |
81 | |||
82 | instr = (unsigned char __user *)convert_rip_to_linear(current, regs); | 178 | instr = (unsigned char __user *)convert_rip_to_linear(current, regs); |
179 | #endif | ||
180 | |||
83 | max_instr = instr + 15; | 181 | max_instr = instr + 15; |
84 | 182 | ||
183 | #ifdef CONFIG_X86_64 | ||
85 | if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) | 184 | if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) |
86 | return 0; | 185 | return 0; |
186 | #endif | ||
87 | 187 | ||
88 | while (scan_more && instr < max_instr) { | 188 | while (scan_more && instr < max_instr) { |
89 | unsigned char opcode; | 189 | unsigned char opcode; |
90 | unsigned char instr_hi; | 190 | unsigned char instr_hi; |
91 | unsigned char instr_lo; | 191 | unsigned char instr_lo; |
92 | 192 | ||
193 | #ifdef CONFIG_X86_32 | ||
194 | if (instr > (unsigned char *)limit) | ||
195 | break; | ||
196 | #endif | ||
93 | if (probe_kernel_address(instr, opcode)) | 197 | if (probe_kernel_address(instr, opcode)) |
94 | break; | 198 | break; |
95 | 199 | ||
@@ -125,12 +229,16 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, | |||
125 | scan_more = (instr_lo & 0xC) == 0x4; | 229 | scan_more = (instr_lo & 0xC) == 0x4; |
126 | break; | 230 | break; |
127 | case 0xF0: | 231 | case 0xF0: |
128 | /* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */ | 232 | /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */ |
129 | scan_more = !instr_lo || (instr_lo>>1) == 1; | 233 | scan_more = !instr_lo || (instr_lo>>1) == 1; |
130 | break; | 234 | break; |
131 | case 0x00: | 235 | case 0x00: |
132 | /* Prefetch instruction is 0x0F0D or 0x0F18 */ | 236 | /* Prefetch instruction is 0x0F0D or 0x0F18 */ |
133 | scan_more = 0; | 237 | scan_more = 0; |
238 | #ifdef CONFIG_X86_32 | ||
239 | if (instr > (unsigned char *)limit) | ||
240 | break; | ||
241 | #endif | ||
134 | if (probe_kernel_address(instr, opcode)) | 242 | if (probe_kernel_address(instr, opcode)) |
135 | break; | 243 | break; |
136 | prefetch = (instr_lo == 0xF) && | 244 | prefetch = (instr_lo == 0xF) && |
@@ -185,6 +293,7 @@ bad: | |||
185 | printk("BAD\n"); | 293 | printk("BAD\n"); |
186 | } | 294 | } |
187 | 295 | ||
296 | #ifdef CONFIG_X86_64 | ||
188 | static const char errata93_warning[] = | 297 | static const char errata93_warning[] = |
189 | KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" | 298 | KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n" |
190 | KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" | 299 | KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n" |
@@ -218,6 +327,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) | |||
218 | } | 327 | } |
219 | return 0; | 328 | return 0; |
220 | } | 329 | } |
330 | #endif | ||
221 | 331 | ||
222 | static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | 332 | static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, |
223 | unsigned long error_code) | 333 | unsigned long error_code) |