diff options
Diffstat (limited to 'arch/x86/kernel/traps.c')
-rw-r--r-- | arch/x86/kernel/traps.c | 1034 |
1 files changed, 1034 insertions, 0 deletions
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c new file mode 100644 index 000000000000..e062974cce34 --- /dev/null +++ b/arch/x86/kernel/traps.c | |||
@@ -0,0 +1,1034 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | * | ||
5 | * Pentium III FXSR, SSE support | ||
6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
7 | */ | ||
8 | |||
9 | /* | ||
10 | * Handle hardware traps and faults. | ||
11 | */ | ||
12 | #include <linux/interrupt.h> | ||
13 | #include <linux/kallsyms.h> | ||
14 | #include <linux/spinlock.h> | ||
15 | #include <linux/kprobes.h> | ||
16 | #include <linux/uaccess.h> | ||
17 | #include <linux/utsname.h> | ||
18 | #include <linux/kdebug.h> | ||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/ptrace.h> | ||
22 | #include <linux/string.h> | ||
23 | #include <linux/unwind.h> | ||
24 | #include <linux/delay.h> | ||
25 | #include <linux/errno.h> | ||
26 | #include <linux/kexec.h> | ||
27 | #include <linux/sched.h> | ||
28 | #include <linux/timer.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/bug.h> | ||
31 | #include <linux/nmi.h> | ||
32 | #include <linux/mm.h> | ||
33 | #include <linux/smp.h> | ||
34 | #include <linux/io.h> | ||
35 | |||
36 | #ifdef CONFIG_EISA | ||
37 | #include <linux/ioport.h> | ||
38 | #include <linux/eisa.h> | ||
39 | #endif | ||
40 | |||
41 | #ifdef CONFIG_MCA | ||
42 | #include <linux/mca.h> | ||
43 | #endif | ||
44 | |||
45 | #if defined(CONFIG_EDAC) | ||
46 | #include <linux/edac.h> | ||
47 | #endif | ||
48 | |||
49 | #include <asm/stacktrace.h> | ||
50 | #include <asm/processor.h> | ||
51 | #include <asm/debugreg.h> | ||
52 | #include <asm/atomic.h> | ||
53 | #include <asm/system.h> | ||
54 | #include <asm/unwind.h> | ||
55 | #include <asm/traps.h> | ||
56 | #include <asm/desc.h> | ||
57 | #include <asm/i387.h> | ||
58 | |||
59 | #include <mach_traps.h> | ||
60 | |||
61 | #ifdef CONFIG_X86_64 | ||
62 | #include <asm/pgalloc.h> | ||
63 | #include <asm/proto.h> | ||
64 | #include <asm/pda.h> | ||
65 | #else | ||
66 | #include <asm/processor-flags.h> | ||
67 | #include <asm/arch_hooks.h> | ||
68 | #include <asm/nmi.h> | ||
69 | #include <asm/smp.h> | ||
70 | #include <asm/io.h> | ||
71 | #include <asm/traps.h> | ||
72 | |||
73 | #include "cpu/mcheck/mce.h" | ||
74 | |||
75 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | ||
76 | EXPORT_SYMBOL_GPL(used_vectors); | ||
77 | |||
78 | asmlinkage int system_call(void); | ||
79 | |||
80 | /* Do we ignore FPU interrupts ? */ | ||
81 | char ignore_fpu_irq; | ||
82 | |||
83 | /* | ||
84 | * The IDT has to be page-aligned to simplify the Pentium | ||
85 | * F0 0F bug workaround.. We have a special link segment | ||
86 | * for this. | ||
87 | */ | ||
88 | gate_desc idt_table[256] | ||
89 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | ||
90 | #endif | ||
91 | |||
92 | static int ignore_nmis; | ||
93 | |||
94 | static inline void conditional_sti(struct pt_regs *regs) | ||
95 | { | ||
96 | if (regs->flags & X86_EFLAGS_IF) | ||
97 | local_irq_enable(); | ||
98 | } | ||
99 | |||
100 | static inline void preempt_conditional_sti(struct pt_regs *regs) | ||
101 | { | ||
102 | inc_preempt_count(); | ||
103 | if (regs->flags & X86_EFLAGS_IF) | ||
104 | local_irq_enable(); | ||
105 | } | ||
106 | |||
107 | static inline void preempt_conditional_cli(struct pt_regs *regs) | ||
108 | { | ||
109 | if (regs->flags & X86_EFLAGS_IF) | ||
110 | local_irq_disable(); | ||
111 | dec_preempt_count(); | ||
112 | } | ||
113 | |||
114 | #ifdef CONFIG_X86_32 | ||
115 | static inline void | ||
116 | die_if_kernel(const char *str, struct pt_regs *regs, long err) | ||
117 | { | ||
118 | if (!user_mode_vm(regs)) | ||
119 | die(str, regs, err); | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * Perform the lazy TSS's I/O bitmap copy. If the TSS has an | ||
124 | * invalid offset set (the LAZY one) and the faulting thread has | ||
125 | * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, | ||
126 | * we set the offset field correctly and return 1. | ||
127 | */ | ||
128 | static int lazy_iobitmap_copy(void) | ||
129 | { | ||
130 | struct thread_struct *thread; | ||
131 | struct tss_struct *tss; | ||
132 | int cpu; | ||
133 | |||
134 | cpu = get_cpu(); | ||
135 | tss = &per_cpu(init_tss, cpu); | ||
136 | thread = ¤t->thread; | ||
137 | |||
138 | if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && | ||
139 | thread->io_bitmap_ptr) { | ||
140 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, | ||
141 | thread->io_bitmap_max); | ||
142 | /* | ||
143 | * If the previously set map was extending to higher ports | ||
144 | * than the current one, pad extra space with 0xff (no access). | ||
145 | */ | ||
146 | if (thread->io_bitmap_max < tss->io_bitmap_max) { | ||
147 | memset((char *) tss->io_bitmap + | ||
148 | thread->io_bitmap_max, 0xff, | ||
149 | tss->io_bitmap_max - thread->io_bitmap_max); | ||
150 | } | ||
151 | tss->io_bitmap_max = thread->io_bitmap_max; | ||
152 | tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; | ||
153 | tss->io_bitmap_owner = thread; | ||
154 | put_cpu(); | ||
155 | |||
156 | return 1; | ||
157 | } | ||
158 | put_cpu(); | ||
159 | |||
160 | return 0; | ||
161 | } | ||
162 | #endif | ||
163 | |||
164 | static void __kprobes | ||
165 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | ||
166 | long error_code, siginfo_t *info) | ||
167 | { | ||
168 | struct task_struct *tsk = current; | ||
169 | |||
170 | #ifdef CONFIG_X86_32 | ||
171 | if (regs->flags & X86_VM_MASK) { | ||
172 | /* | ||
173 | * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. | ||
174 | * On nmi (interrupt 2), do_trap should not be called. | ||
175 | */ | ||
176 | if (trapnr < 6) | ||
177 | goto vm86_trap; | ||
178 | goto trap_signal; | ||
179 | } | ||
180 | #endif | ||
181 | |||
182 | if (!user_mode(regs)) | ||
183 | goto kernel_trap; | ||
184 | |||
185 | #ifdef CONFIG_X86_32 | ||
186 | trap_signal: | ||
187 | #endif | ||
188 | /* | ||
189 | * We want error_code and trap_no set for userspace faults and | ||
190 | * kernelspace faults which result in die(), but not | ||
191 | * kernelspace faults which are fixed up. die() gives the | ||
192 | * process no chance to handle the signal and notice the | ||
193 | * kernel fault information, so that won't result in polluting | ||
194 | * the information about previously queued, but not yet | ||
195 | * delivered, faults. See also do_general_protection below. | ||
196 | */ | ||
197 | tsk->thread.error_code = error_code; | ||
198 | tsk->thread.trap_no = trapnr; | ||
199 | |||
200 | #ifdef CONFIG_X86_64 | ||
201 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && | ||
202 | printk_ratelimit()) { | ||
203 | printk(KERN_INFO | ||
204 | "%s[%d] trap %s ip:%lx sp:%lx error:%lx", | ||
205 | tsk->comm, tsk->pid, str, | ||
206 | regs->ip, regs->sp, error_code); | ||
207 | print_vma_addr(" in ", regs->ip); | ||
208 | printk("\n"); | ||
209 | } | ||
210 | #endif | ||
211 | |||
212 | if (info) | ||
213 | force_sig_info(signr, info, tsk); | ||
214 | else | ||
215 | force_sig(signr, tsk); | ||
216 | return; | ||
217 | |||
218 | kernel_trap: | ||
219 | if (!fixup_exception(regs)) { | ||
220 | tsk->thread.error_code = error_code; | ||
221 | tsk->thread.trap_no = trapnr; | ||
222 | die(str, regs, error_code); | ||
223 | } | ||
224 | return; | ||
225 | |||
226 | #ifdef CONFIG_X86_32 | ||
227 | vm86_trap: | ||
228 | if (handle_vm86_trap((struct kernel_vm86_regs *) regs, | ||
229 | error_code, trapnr)) | ||
230 | goto trap_signal; | ||
231 | return; | ||
232 | #endif | ||
233 | } | ||
234 | |||
235 | #define DO_ERROR(trapnr, signr, str, name) \ | ||
236 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | ||
237 | { \ | ||
238 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
239 | == NOTIFY_STOP) \ | ||
240 | return; \ | ||
241 | conditional_sti(regs); \ | ||
242 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ | ||
243 | } | ||
244 | |||
245 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | ||
246 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | ||
247 | { \ | ||
248 | siginfo_t info; \ | ||
249 | info.si_signo = signr; \ | ||
250 | info.si_errno = 0; \ | ||
251 | info.si_code = sicode; \ | ||
252 | info.si_addr = (void __user *)siaddr; \ | ||
253 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
254 | == NOTIFY_STOP) \ | ||
255 | return; \ | ||
256 | conditional_sti(regs); \ | ||
257 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | ||
258 | } | ||
259 | |||
260 | DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | ||
261 | DO_ERROR(4, SIGSEGV, "overflow", overflow) | ||
262 | DO_ERROR(5, SIGSEGV, "bounds", bounds) | ||
263 | DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) | ||
264 | DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | ||
265 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | ||
266 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | ||
267 | #ifdef CONFIG_X86_32 | ||
268 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) | ||
269 | #endif | ||
270 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) | ||
271 | |||
272 | #ifdef CONFIG_X86_64 | ||
273 | /* Runs on IST stack */ | ||
274 | dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) | ||
275 | { | ||
276 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, | ||
277 | 12, SIGBUS) == NOTIFY_STOP) | ||
278 | return; | ||
279 | preempt_conditional_sti(regs); | ||
280 | do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); | ||
281 | preempt_conditional_cli(regs); | ||
282 | } | ||
283 | |||
284 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | ||
285 | { | ||
286 | static const char str[] = "double fault"; | ||
287 | struct task_struct *tsk = current; | ||
288 | |||
289 | /* Return not checked because double check cannot be ignored */ | ||
290 | notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); | ||
291 | |||
292 | tsk->thread.error_code = error_code; | ||
293 | tsk->thread.trap_no = 8; | ||
294 | |||
295 | /* This is always a kernel trap and never fixable (and thus must | ||
296 | never return). */ | ||
297 | for (;;) | ||
298 | die(str, regs, error_code); | ||
299 | } | ||
300 | #endif | ||
301 | |||
302 | dotraplinkage void __kprobes | ||
303 | do_general_protection(struct pt_regs *regs, long error_code) | ||
304 | { | ||
305 | struct task_struct *tsk; | ||
306 | |||
307 | conditional_sti(regs); | ||
308 | |||
309 | #ifdef CONFIG_X86_32 | ||
310 | if (lazy_iobitmap_copy()) { | ||
311 | /* restart the faulting instruction */ | ||
312 | return; | ||
313 | } | ||
314 | |||
315 | if (regs->flags & X86_VM_MASK) | ||
316 | goto gp_in_vm86; | ||
317 | #endif | ||
318 | |||
319 | tsk = current; | ||
320 | if (!user_mode(regs)) | ||
321 | goto gp_in_kernel; | ||
322 | |||
323 | tsk->thread.error_code = error_code; | ||
324 | tsk->thread.trap_no = 13; | ||
325 | |||
326 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | ||
327 | printk_ratelimit()) { | ||
328 | printk(KERN_INFO | ||
329 | "%s[%d] general protection ip:%lx sp:%lx error:%lx", | ||
330 | tsk->comm, task_pid_nr(tsk), | ||
331 | regs->ip, regs->sp, error_code); | ||
332 | print_vma_addr(" in ", regs->ip); | ||
333 | printk("\n"); | ||
334 | } | ||
335 | |||
336 | force_sig(SIGSEGV, tsk); | ||
337 | return; | ||
338 | |||
339 | #ifdef CONFIG_X86_32 | ||
340 | gp_in_vm86: | ||
341 | local_irq_enable(); | ||
342 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | ||
343 | return; | ||
344 | #endif | ||
345 | |||
346 | gp_in_kernel: | ||
347 | if (fixup_exception(regs)) | ||
348 | return; | ||
349 | |||
350 | tsk->thread.error_code = error_code; | ||
351 | tsk->thread.trap_no = 13; | ||
352 | if (notify_die(DIE_GPF, "general protection fault", regs, | ||
353 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | ||
354 | return; | ||
355 | die("general protection fault", regs, error_code); | ||
356 | } | ||
357 | |||
358 | static notrace __kprobes void | ||
359 | mem_parity_error(unsigned char reason, struct pt_regs *regs) | ||
360 | { | ||
361 | printk(KERN_EMERG | ||
362 | "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", | ||
363 | reason, smp_processor_id()); | ||
364 | |||
365 | printk(KERN_EMERG | ||
366 | "You have some hardware problem, likely on the PCI bus.\n"); | ||
367 | |||
368 | #if defined(CONFIG_EDAC) | ||
369 | if (edac_handler_set()) { | ||
370 | edac_atomic_assert_error(); | ||
371 | return; | ||
372 | } | ||
373 | #endif | ||
374 | |||
375 | if (panic_on_unrecovered_nmi) | ||
376 | panic("NMI: Not continuing"); | ||
377 | |||
378 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | ||
379 | |||
380 | /* Clear and disable the memory parity error line. */ | ||
381 | reason = (reason & 0xf) | 4; | ||
382 | outb(reason, 0x61); | ||
383 | } | ||
384 | |||
385 | static notrace __kprobes void | ||
386 | io_check_error(unsigned char reason, struct pt_regs *regs) | ||
387 | { | ||
388 | unsigned long i; | ||
389 | |||
390 | printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); | ||
391 | show_registers(regs); | ||
392 | |||
393 | /* Re-enable the IOCK line, wait for a few seconds */ | ||
394 | reason = (reason & 0xf) | 8; | ||
395 | outb(reason, 0x61); | ||
396 | |||
397 | i = 2000; | ||
398 | while (--i) | ||
399 | udelay(1000); | ||
400 | |||
401 | reason &= ~8; | ||
402 | outb(reason, 0x61); | ||
403 | } | ||
404 | |||
405 | static notrace __kprobes void | ||
406 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | ||
407 | { | ||
408 | if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == | ||
409 | NOTIFY_STOP) | ||
410 | return; | ||
411 | #ifdef CONFIG_MCA | ||
412 | /* | ||
413 | * Might actually be able to figure out what the guilty party | ||
414 | * is: | ||
415 | */ | ||
416 | if (MCA_bus) { | ||
417 | mca_handle_nmi(); | ||
418 | return; | ||
419 | } | ||
420 | #endif | ||
421 | printk(KERN_EMERG | ||
422 | "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", | ||
423 | reason, smp_processor_id()); | ||
424 | |||
425 | printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); | ||
426 | if (panic_on_unrecovered_nmi) | ||
427 | panic("NMI: Not continuing"); | ||
428 | |||
429 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | ||
430 | } | ||
431 | |||
432 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | ||
433 | { | ||
434 | unsigned char reason = 0; | ||
435 | int cpu; | ||
436 | |||
437 | cpu = smp_processor_id(); | ||
438 | |||
439 | /* Only the BSP gets external NMIs from the system. */ | ||
440 | if (!cpu) | ||
441 | reason = get_nmi_reason(); | ||
442 | |||
443 | if (!(reason & 0xc0)) { | ||
444 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) | ||
445 | == NOTIFY_STOP) | ||
446 | return; | ||
447 | #ifdef CONFIG_X86_LOCAL_APIC | ||
448 | /* | ||
449 | * Ok, so this is none of the documented NMI sources, | ||
450 | * so it must be the NMI watchdog. | ||
451 | */ | ||
452 | if (nmi_watchdog_tick(regs, reason)) | ||
453 | return; | ||
454 | if (!do_nmi_callback(regs, cpu)) | ||
455 | unknown_nmi_error(reason, regs); | ||
456 | #else | ||
457 | unknown_nmi_error(reason, regs); | ||
458 | #endif | ||
459 | |||
460 | return; | ||
461 | } | ||
462 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | ||
463 | return; | ||
464 | |||
465 | /* AK: following checks seem to be broken on modern chipsets. FIXME */ | ||
466 | if (reason & 0x80) | ||
467 | mem_parity_error(reason, regs); | ||
468 | if (reason & 0x40) | ||
469 | io_check_error(reason, regs); | ||
470 | #ifdef CONFIG_X86_32 | ||
471 | /* | ||
472 | * Reassert NMI in case it became active meanwhile | ||
473 | * as it's edge-triggered: | ||
474 | */ | ||
475 | reassert_nmi(); | ||
476 | #endif | ||
477 | } | ||
478 | |||
479 | dotraplinkage notrace __kprobes void | ||
480 | do_nmi(struct pt_regs *regs, long error_code) | ||
481 | { | ||
482 | nmi_enter(); | ||
483 | |||
484 | #ifdef CONFIG_X86_32 | ||
485 | { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } | ||
486 | #else | ||
487 | add_pda(__nmi_count, 1); | ||
488 | #endif | ||
489 | |||
490 | if (!ignore_nmis) | ||
491 | default_do_nmi(regs); | ||
492 | |||
493 | nmi_exit(); | ||
494 | } | ||
495 | |||
496 | void stop_nmi(void) | ||
497 | { | ||
498 | acpi_nmi_disable(); | ||
499 | ignore_nmis++; | ||
500 | } | ||
501 | |||
502 | void restart_nmi(void) | ||
503 | { | ||
504 | ignore_nmis--; | ||
505 | acpi_nmi_enable(); | ||
506 | } | ||
507 | |||
508 | /* May run on IST stack. */ | ||
509 | dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | ||
510 | { | ||
511 | #ifdef CONFIG_KPROBES | ||
512 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | ||
513 | == NOTIFY_STOP) | ||
514 | return; | ||
515 | #else | ||
516 | if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) | ||
517 | == NOTIFY_STOP) | ||
518 | return; | ||
519 | #endif | ||
520 | |||
521 | preempt_conditional_sti(regs); | ||
522 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | ||
523 | preempt_conditional_cli(regs); | ||
524 | } | ||
525 | |||
526 | #ifdef CONFIG_X86_64 | ||
527 | /* Help handler running on IST stack to switch back to user stack | ||
528 | for scheduling or signal handling. The actual stack switch is done in | ||
529 | entry.S */ | ||
530 | asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | ||
531 | { | ||
532 | struct pt_regs *regs = eregs; | ||
533 | /* Did already sync */ | ||
534 | if (eregs == (struct pt_regs *)eregs->sp) | ||
535 | ; | ||
536 | /* Exception from user space */ | ||
537 | else if (user_mode(eregs)) | ||
538 | regs = task_pt_regs(current); | ||
539 | /* Exception from kernel and interrupts are enabled. Move to | ||
540 | kernel process stack. */ | ||
541 | else if (eregs->flags & X86_EFLAGS_IF) | ||
542 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); | ||
543 | if (eregs != regs) | ||
544 | *regs = *eregs; | ||
545 | return regs; | ||
546 | } | ||
547 | #endif | ||
548 | |||
549 | /* | ||
550 | * Our handling of the processor debug registers is non-trivial. | ||
551 | * We do not clear them on entry and exit from the kernel. Therefore | ||
552 | * it is possible to get a watchpoint trap here from inside the kernel. | ||
553 | * However, the code in ./ptrace.c has ensured that the user can | ||
554 | * only set watchpoints on userspace addresses. Therefore the in-kernel | ||
555 | * watchpoint trap can only occur in code which is reading/writing | ||
556 | * from user space. Such code must not hold kernel locks (since it | ||
557 | * can equally take a page fault), therefore it is safe to call | ||
558 | * force_sig_info even though that claims and releases locks. | ||
559 | * | ||
560 | * Code in ./signal.c ensures that the debug control register | ||
561 | * is restored before we deliver any signal, and therefore that | ||
562 | * user code runs with the correct debug control register even though | ||
563 | * we clear it here. | ||
564 | * | ||
565 | * Being careful here means that we don't have to be as careful in a | ||
566 | * lot of more complicated places (task switching can be a bit lazy | ||
567 | * about restoring all the debug state, and ptrace doesn't have to | ||
568 | * find every occurrence of the TF bit that could be saved away even | ||
569 | * by user code) | ||
570 | * | ||
571 | * May run on IST stack. | ||
572 | */ | ||
573 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | ||
574 | { | ||
575 | struct task_struct *tsk = current; | ||
576 | unsigned long condition; | ||
577 | int si_code; | ||
578 | |||
579 | get_debugreg(condition, 6); | ||
580 | |||
581 | /* | ||
582 | * The processor cleared BTF, so don't mark that we need it set. | ||
583 | */ | ||
584 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | ||
585 | tsk->thread.debugctlmsr = 0; | ||
586 | |||
587 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | ||
588 | SIGTRAP) == NOTIFY_STOP) | ||
589 | return; | ||
590 | |||
591 | /* It's safe to allow irq's after DR6 has been saved */ | ||
592 | preempt_conditional_sti(regs); | ||
593 | |||
594 | /* Mask out spurious debug traps due to lazy DR7 setting */ | ||
595 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | ||
596 | if (!tsk->thread.debugreg7) | ||
597 | goto clear_dr7; | ||
598 | } | ||
599 | |||
600 | #ifdef CONFIG_X86_32 | ||
601 | if (regs->flags & X86_VM_MASK) | ||
602 | goto debug_vm86; | ||
603 | #endif | ||
604 | |||
605 | /* Save debug status register where ptrace can see it */ | ||
606 | tsk->thread.debugreg6 = condition; | ||
607 | |||
608 | /* | ||
609 | * Single-stepping through TF: make sure we ignore any events in | ||
610 | * kernel space (but re-enable TF when returning to user mode). | ||
611 | */ | ||
612 | if (condition & DR_STEP) { | ||
613 | if (!user_mode(regs)) | ||
614 | goto clear_TF_reenable; | ||
615 | } | ||
616 | |||
617 | si_code = get_si_code(condition); | ||
618 | /* Ok, finally something we can handle */ | ||
619 | send_sigtrap(tsk, regs, error_code, si_code); | ||
620 | |||
621 | /* | ||
622 | * Disable additional traps. They'll be re-enabled when | ||
623 | * the signal is delivered. | ||
624 | */ | ||
625 | clear_dr7: | ||
626 | set_debugreg(0, 7); | ||
627 | preempt_conditional_cli(regs); | ||
628 | return; | ||
629 | |||
630 | #ifdef CONFIG_X86_32 | ||
631 | debug_vm86: | ||
632 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); | ||
633 | preempt_conditional_cli(regs); | ||
634 | return; | ||
635 | #endif | ||
636 | |||
637 | clear_TF_reenable: | ||
638 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | ||
639 | regs->flags &= ~X86_EFLAGS_TF; | ||
640 | preempt_conditional_cli(regs); | ||
641 | return; | ||
642 | } | ||
643 | |||
644 | #ifdef CONFIG_X86_64 | ||
645 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | ||
646 | { | ||
647 | if (fixup_exception(regs)) | ||
648 | return 1; | ||
649 | |||
650 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); | ||
651 | /* Illegal floating point operation in the kernel */ | ||
652 | current->thread.trap_no = trapnr; | ||
653 | die(str, regs, 0); | ||
654 | return 0; | ||
655 | } | ||
656 | #endif | ||
657 | |||
658 | /* | ||
659 | * Note that we play around with the 'TS' bit in an attempt to get | ||
660 | * the correct behaviour even in the presence of the asynchronous | ||
661 | * IRQ13 behaviour | ||
662 | */ | ||
663 | void math_error(void __user *ip) | ||
664 | { | ||
665 | struct task_struct *task; | ||
666 | siginfo_t info; | ||
667 | unsigned short cwd, swd; | ||
668 | |||
669 | /* | ||
670 | * Save the info for the exception handler and clear the error. | ||
671 | */ | ||
672 | task = current; | ||
673 | save_init_fpu(task); | ||
674 | task->thread.trap_no = 16; | ||
675 | task->thread.error_code = 0; | ||
676 | info.si_signo = SIGFPE; | ||
677 | info.si_errno = 0; | ||
678 | info.si_code = __SI_FAULT; | ||
679 | info.si_addr = ip; | ||
680 | /* | ||
681 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | ||
682 | * status. 0x3f is the exception bits in these regs, 0x200 is the | ||
683 | * C1 reg you need in case of a stack fault, 0x040 is the stack | ||
684 | * fault bit. We should only be taking one exception at a time, | ||
685 | * so if this combination doesn't produce any single exception, | ||
686 | * then we have a bad program that isn't synchronizing its FPU usage | ||
687 | * and it will suffer the consequences since we won't be able to | ||
688 | * fully reproduce the context of the exception | ||
689 | */ | ||
690 | cwd = get_fpu_cwd(task); | ||
691 | swd = get_fpu_swd(task); | ||
692 | switch (swd & ~cwd & 0x3f) { | ||
693 | case 0x000: /* No unmasked exception */ | ||
694 | #ifdef CONFIG_X86_32 | ||
695 | return; | ||
696 | #endif | ||
697 | default: /* Multiple exceptions */ | ||
698 | break; | ||
699 | case 0x001: /* Invalid Op */ | ||
700 | /* | ||
701 | * swd & 0x240 == 0x040: Stack Underflow | ||
702 | * swd & 0x240 == 0x240: Stack Overflow | ||
703 | * User must clear the SF bit (0x40) if set | ||
704 | */ | ||
705 | info.si_code = FPE_FLTINV; | ||
706 | break; | ||
707 | case 0x002: /* Denormalize */ | ||
708 | case 0x010: /* Underflow */ | ||
709 | info.si_code = FPE_FLTUND; | ||
710 | break; | ||
711 | case 0x004: /* Zero Divide */ | ||
712 | info.si_code = FPE_FLTDIV; | ||
713 | break; | ||
714 | case 0x008: /* Overflow */ | ||
715 | info.si_code = FPE_FLTOVF; | ||
716 | break; | ||
717 | case 0x020: /* Precision */ | ||
718 | info.si_code = FPE_FLTRES; | ||
719 | break; | ||
720 | } | ||
721 | force_sig_info(SIGFPE, &info, task); | ||
722 | } | ||
723 | |||
724 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) | ||
725 | { | ||
726 | conditional_sti(regs); | ||
727 | |||
728 | #ifdef CONFIG_X86_32 | ||
729 | ignore_fpu_irq = 1; | ||
730 | #else | ||
731 | if (!user_mode(regs) && | ||
732 | kernel_math_error(regs, "kernel x87 math error", 16)) | ||
733 | return; | ||
734 | #endif | ||
735 | |||
736 | math_error((void __user *)regs->ip); | ||
737 | } | ||
738 | |||
739 | static void simd_math_error(void __user *ip) | ||
740 | { | ||
741 | struct task_struct *task; | ||
742 | siginfo_t info; | ||
743 | unsigned short mxcsr; | ||
744 | |||
745 | /* | ||
746 | * Save the info for the exception handler and clear the error. | ||
747 | */ | ||
748 | task = current; | ||
749 | save_init_fpu(task); | ||
750 | task->thread.trap_no = 19; | ||
751 | task->thread.error_code = 0; | ||
752 | info.si_signo = SIGFPE; | ||
753 | info.si_errno = 0; | ||
754 | info.si_code = __SI_FAULT; | ||
755 | info.si_addr = ip; | ||
756 | /* | ||
757 | * The SIMD FPU exceptions are handled a little differently, as there | ||
758 | * is only a single status/control register. Thus, to determine which | ||
759 | * unmasked exception was caught we must mask the exception mask bits | ||
760 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
761 | */ | ||
762 | mxcsr = get_fpu_mxcsr(task); | ||
763 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { | ||
764 | case 0x000: | ||
765 | default: | ||
766 | break; | ||
767 | case 0x001: /* Invalid Op */ | ||
768 | info.si_code = FPE_FLTINV; | ||
769 | break; | ||
770 | case 0x002: /* Denormalize */ | ||
771 | case 0x010: /* Underflow */ | ||
772 | info.si_code = FPE_FLTUND; | ||
773 | break; | ||
774 | case 0x004: /* Zero Divide */ | ||
775 | info.si_code = FPE_FLTDIV; | ||
776 | break; | ||
777 | case 0x008: /* Overflow */ | ||
778 | info.si_code = FPE_FLTOVF; | ||
779 | break; | ||
780 | case 0x020: /* Precision */ | ||
781 | info.si_code = FPE_FLTRES; | ||
782 | break; | ||
783 | } | ||
784 | force_sig_info(SIGFPE, &info, task); | ||
785 | } | ||
786 | |||
787 | dotraplinkage void | ||
788 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | ||
789 | { | ||
790 | conditional_sti(regs); | ||
791 | |||
792 | #ifdef CONFIG_X86_32 | ||
793 | if (cpu_has_xmm) { | ||
794 | /* Handle SIMD FPU exceptions on PIII+ processors. */ | ||
795 | ignore_fpu_irq = 1; | ||
796 | simd_math_error((void __user *)regs->ip); | ||
797 | return; | ||
798 | } | ||
799 | /* | ||
800 | * Handle strange cache flush from user space exception | ||
801 | * in all other cases. This is undocumented behaviour. | ||
802 | */ | ||
803 | if (regs->flags & X86_VM_MASK) { | ||
804 | handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); | ||
805 | return; | ||
806 | } | ||
807 | current->thread.trap_no = 19; | ||
808 | current->thread.error_code = error_code; | ||
809 | die_if_kernel("cache flush denied", regs, error_code); | ||
810 | force_sig(SIGSEGV, current); | ||
811 | #else | ||
812 | if (!user_mode(regs) && | ||
813 | kernel_math_error(regs, "kernel simd math error", 19)) | ||
814 | return; | ||
815 | simd_math_error((void __user *)regs->ip); | ||
816 | #endif | ||
817 | } | ||
818 | |||
819 | dotraplinkage void | ||
820 | do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) | ||
821 | { | ||
822 | conditional_sti(regs); | ||
823 | #if 0 | ||
824 | /* No need to warn about this any longer. */ | ||
825 | printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); | ||
826 | #endif | ||
827 | } | ||
828 | |||
829 | #ifdef CONFIG_X86_32 | ||
830 | unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) | ||
831 | { | ||
832 | struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); | ||
833 | unsigned long base = (kesp - uesp) & -THREAD_SIZE; | ||
834 | unsigned long new_kesp = kesp - base; | ||
835 | unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; | ||
836 | __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; | ||
837 | |||
838 | /* Set up base for espfix segment */ | ||
839 | desc &= 0x00f0ff0000000000ULL; | ||
840 | desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | | ||
841 | ((((__u64)base) << 32) & 0xff00000000000000ULL) | | ||
842 | ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | | ||
843 | (lim_pages & 0xffff); | ||
844 | *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; | ||
845 | |||
846 | return new_kesp; | ||
847 | } | ||
848 | #else | ||
849 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | ||
850 | { | ||
851 | } | ||
852 | |||
853 | asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | ||
854 | { | ||
855 | } | ||
856 | #endif | ||
857 | |||
858 | /* | ||
859 | * 'math_state_restore()' saves the current math information in the | ||
860 | * old math state array, and gets the new ones from the current task | ||
861 | * | ||
862 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. | ||
863 | * Don't touch unless you *really* know how it works. | ||
864 | * | ||
865 | * Must be called with kernel preemption disabled (in this case, | ||
866 | * local interrupts are disabled at the call-site in entry.S). | ||
867 | */ | ||
868 | asmlinkage void math_state_restore(void) | ||
869 | { | ||
870 | struct thread_info *thread = current_thread_info(); | ||
871 | struct task_struct *tsk = thread->task; | ||
872 | |||
873 | if (!tsk_used_math(tsk)) { | ||
874 | local_irq_enable(); | ||
875 | /* | ||
876 | * does a slab alloc which can sleep | ||
877 | */ | ||
878 | if (init_fpu(tsk)) { | ||
879 | /* | ||
880 | * ran out of memory! | ||
881 | */ | ||
882 | do_group_exit(SIGKILL); | ||
883 | return; | ||
884 | } | ||
885 | local_irq_disable(); | ||
886 | } | ||
887 | |||
888 | clts(); /* Allow maths ops (or we recurse) */ | ||
889 | #ifdef CONFIG_X86_32 | ||
890 | restore_fpu(tsk); | ||
891 | #else | ||
892 | /* | ||
893 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
894 | */ | ||
895 | if (unlikely(restore_fpu_checking(tsk))) { | ||
896 | stts(); | ||
897 | force_sig(SIGSEGV, tsk); | ||
898 | return; | ||
899 | } | ||
900 | #endif | ||
901 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | ||
902 | tsk->fpu_counter++; | ||
903 | } | ||
904 | EXPORT_SYMBOL_GPL(math_state_restore); | ||
905 | |||
906 | #ifndef CONFIG_MATH_EMULATION | ||
907 | asmlinkage void math_emulate(long arg) | ||
908 | { | ||
909 | printk(KERN_EMERG | ||
910 | "math-emulation not enabled and no coprocessor found.\n"); | ||
911 | printk(KERN_EMERG "killing %s.\n", current->comm); | ||
912 | force_sig(SIGFPE, current); | ||
913 | schedule(); | ||
914 | } | ||
915 | #endif /* CONFIG_MATH_EMULATION */ | ||
916 | |||
917 | dotraplinkage void __kprobes | ||
918 | do_device_not_available(struct pt_regs *regs, long error) | ||
919 | { | ||
920 | #ifdef CONFIG_X86_32 | ||
921 | if (read_cr0() & X86_CR0_EM) { | ||
922 | conditional_sti(regs); | ||
923 | math_emulate(0); | ||
924 | } else { | ||
925 | math_state_restore(); /* interrupts still off */ | ||
926 | conditional_sti(regs); | ||
927 | } | ||
928 | #else | ||
929 | math_state_restore(); | ||
930 | #endif | ||
931 | } | ||
932 | |||
933 | #ifdef CONFIG_X86_32 | ||
934 | #ifdef CONFIG_X86_MCE | ||
935 | dotraplinkage void __kprobes do_machine_check(struct pt_regs *regs, long error) | ||
936 | { | ||
937 | conditional_sti(regs); | ||
938 | machine_check_vector(regs, error); | ||
939 | } | ||
940 | #endif | ||
941 | |||
942 | dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | ||
943 | { | ||
944 | siginfo_t info; | ||
945 | local_irq_enable(); | ||
946 | |||
947 | info.si_signo = SIGILL; | ||
948 | info.si_errno = 0; | ||
949 | info.si_code = ILL_BADSTK; | ||
950 | info.si_addr = 0; | ||
951 | if (notify_die(DIE_TRAP, "iret exception", | ||
952 | regs, error_code, 32, SIGILL) == NOTIFY_STOP) | ||
953 | return; | ||
954 | do_trap(32, SIGILL, "iret exception", regs, error_code, &info); | ||
955 | } | ||
956 | #endif | ||
957 | |||
958 | void __init trap_init(void) | ||
959 | { | ||
960 | #ifdef CONFIG_X86_32 | ||
961 | int i; | ||
962 | #endif | ||
963 | |||
964 | #ifdef CONFIG_EISA | ||
965 | void __iomem *p = early_ioremap(0x0FFFD9, 4); | ||
966 | |||
967 | if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) | ||
968 | EISA_bus = 1; | ||
969 | early_iounmap(p, 4); | ||
970 | #endif | ||
971 | |||
972 | set_intr_gate(0, ÷_error); | ||
973 | set_intr_gate_ist(1, &debug, DEBUG_STACK); | ||
974 | set_intr_gate_ist(2, &nmi, NMI_STACK); | ||
975 | /* int3 can be called from all */ | ||
976 | set_system_intr_gate_ist(3, &int3, DEBUG_STACK); | ||
977 | /* int4 can be called from all */ | ||
978 | set_system_intr_gate(4, &overflow); | ||
979 | set_intr_gate(5, &bounds); | ||
980 | set_intr_gate(6, &invalid_op); | ||
981 | set_intr_gate(7, &device_not_available); | ||
982 | #ifdef CONFIG_X86_32 | ||
983 | set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); | ||
984 | #else | ||
985 | set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); | ||
986 | #endif | ||
987 | set_intr_gate(9, &coprocessor_segment_overrun); | ||
988 | set_intr_gate(10, &invalid_TSS); | ||
989 | set_intr_gate(11, &segment_not_present); | ||
990 | set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); | ||
991 | set_intr_gate(13, &general_protection); | ||
992 | set_intr_gate(14, &page_fault); | ||
993 | set_intr_gate(15, &spurious_interrupt_bug); | ||
994 | set_intr_gate(16, &coprocessor_error); | ||
995 | set_intr_gate(17, &alignment_check); | ||
996 | #ifdef CONFIG_X86_MCE | ||
997 | set_intr_gate_ist(18, &machine_check, MCE_STACK); | ||
998 | #endif | ||
999 | set_intr_gate(19, &simd_coprocessor_error); | ||
1000 | |||
1001 | #ifdef CONFIG_IA32_EMULATION | ||
1002 | set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); | ||
1003 | #endif | ||
1004 | |||
1005 | #ifdef CONFIG_X86_32 | ||
1006 | if (cpu_has_fxsr) { | ||
1007 | printk(KERN_INFO "Enabling fast FPU save and restore... "); | ||
1008 | set_in_cr4(X86_CR4_OSFXSR); | ||
1009 | printk("done.\n"); | ||
1010 | } | ||
1011 | if (cpu_has_xmm) { | ||
1012 | printk(KERN_INFO | ||
1013 | "Enabling unmasked SIMD FPU exception support... "); | ||
1014 | set_in_cr4(X86_CR4_OSXMMEXCPT); | ||
1015 | printk("done.\n"); | ||
1016 | } | ||
1017 | |||
1018 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); | ||
1019 | |||
1020 | /* Reserve all the builtin and the syscall vector: */ | ||
1021 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | ||
1022 | set_bit(i, used_vectors); | ||
1023 | |||
1024 | set_bit(SYSCALL_VECTOR, used_vectors); | ||
1025 | #endif | ||
1026 | /* | ||
1027 | * Should be a barrier for any external CPU state: | ||
1028 | */ | ||
1029 | cpu_init(); | ||
1030 | |||
1031 | #ifdef CONFIG_X86_32 | ||
1032 | trap_init_hook(); | ||
1033 | #endif | ||
1034 | } | ||