diff options
author | Alexander van Heukelum <heukelum@fastmail.fm> | 2008-10-03 16:00:40 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-13 04:33:29 -0400 |
commit | 8728861b4fead8119a1b7bb856a387320859cd98 (patch) | |
tree | 03acdb0dccde800696409b6f59f8d5248c4da0f6 /arch/x86/kernel/traps.c | |
parent | 081f75bbdc86de53537e1b5aca01de72bd2fea6b (diff) |
traps: x86: finalize unification of traps.c
traps_32.c and traps_64.c are now equal. Move one to traps.c,
delete the other one and change the Makefile
Signed-off-by: Alexander van Heukelum <heukelum@fastmail.fm>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/traps.c')
-rw-r--r-- | arch/x86/kernel/traps.c | 1071 |
1 files changed, 1071 insertions, 0 deletions
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c new file mode 100644 index 000000000000..ffb131f74f78 --- /dev/null +++ b/arch/x86/kernel/traps.c | |||
@@ -0,0 +1,1071 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | * | ||
5 | * Pentium III FXSR, SSE support | ||
6 | * Gareth Hughes <gareth@valinux.com>, May 2000 | ||
7 | */ | ||
8 | |||
9 | /* | ||
10 | * Handle hardware traps and faults. | ||
11 | */ | ||
12 | #include <linux/interrupt.h> | ||
13 | #include <linux/kallsyms.h> | ||
14 | #include <linux/spinlock.h> | ||
15 | #include <linux/kprobes.h> | ||
16 | #include <linux/uaccess.h> | ||
17 | #include <linux/utsname.h> | ||
18 | #include <linux/kdebug.h> | ||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/ptrace.h> | ||
22 | #include <linux/string.h> | ||
23 | #include <linux/unwind.h> | ||
24 | #include <linux/delay.h> | ||
25 | #include <linux/errno.h> | ||
26 | #include <linux/kexec.h> | ||
27 | #include <linux/sched.h> | ||
28 | #include <linux/timer.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/bug.h> | ||
31 | #include <linux/nmi.h> | ||
32 | #include <linux/mm.h> | ||
33 | #include <linux/smp.h> | ||
34 | #include <linux/io.h> | ||
35 | |||
36 | #ifdef CONFIG_EISA | ||
37 | #include <linux/ioport.h> | ||
38 | #include <linux/eisa.h> | ||
39 | #endif | ||
40 | |||
41 | #ifdef CONFIG_MCA | ||
42 | #include <linux/mca.h> | ||
43 | #endif | ||
44 | |||
45 | #if defined(CONFIG_EDAC) | ||
46 | #include <linux/edac.h> | ||
47 | #endif | ||
48 | |||
49 | #include <asm/stacktrace.h> | ||
50 | #include <asm/processor.h> | ||
51 | #include <asm/debugreg.h> | ||
52 | #include <asm/atomic.h> | ||
53 | #include <asm/system.h> | ||
54 | #include <asm/unwind.h> | ||
55 | #include <asm/traps.h> | ||
56 | #include <asm/desc.h> | ||
57 | #include <asm/i387.h> | ||
58 | |||
59 | #include <mach_traps.h> | ||
60 | |||
61 | #ifdef CONFIG_X86_64 | ||
62 | #include <asm/pgalloc.h> | ||
63 | #include <asm/proto.h> | ||
64 | #include <asm/pda.h> | ||
65 | #else | ||
66 | #include <asm/processor-flags.h> | ||
67 | #include <asm/arch_hooks.h> | ||
68 | #include <asm/nmi.h> | ||
69 | #include <asm/smp.h> | ||
70 | #include <asm/io.h> | ||
71 | #include <asm/traps.h> | ||
72 | |||
73 | #include "cpu/mcheck/mce.h" | ||
74 | |||
75 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | ||
76 | EXPORT_SYMBOL_GPL(used_vectors); | ||
77 | |||
78 | asmlinkage int system_call(void); | ||
79 | |||
80 | /* Do we ignore FPU interrupts ? */ | ||
81 | char ignore_fpu_irq; | ||
82 | |||
83 | /* | ||
84 | * The IDT has to be page-aligned to simplify the Pentium | ||
85 | * F0 0F bug workaround.. We have a special link segment | ||
86 | * for this. | ||
87 | */ | ||
88 | gate_desc idt_table[256] | ||
89 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | ||
90 | #endif | ||
91 | |||
92 | static int ignore_nmis; | ||
93 | |||
94 | static inline void conditional_sti(struct pt_regs *regs) | ||
95 | { | ||
96 | if (regs->flags & X86_EFLAGS_IF) | ||
97 | local_irq_enable(); | ||
98 | } | ||
99 | |||
100 | static inline void preempt_conditional_sti(struct pt_regs *regs) | ||
101 | { | ||
102 | inc_preempt_count(); | ||
103 | if (regs->flags & X86_EFLAGS_IF) | ||
104 | local_irq_enable(); | ||
105 | } | ||
106 | |||
107 | static inline void preempt_conditional_cli(struct pt_regs *regs) | ||
108 | { | ||
109 | if (regs->flags & X86_EFLAGS_IF) | ||
110 | local_irq_disable(); | ||
111 | dec_preempt_count(); | ||
112 | } | ||
113 | |||
114 | #ifdef CONFIG_X86_32 | ||
115 | static inline void | ||
116 | die_if_kernel(const char *str, struct pt_regs *regs, long err) | ||
117 | { | ||
118 | if (!user_mode_vm(regs)) | ||
119 | die(str, regs, err); | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * Perform the lazy TSS's I/O bitmap copy. If the TSS has an | ||
124 | * invalid offset set (the LAZY one) and the faulting thread has | ||
125 | * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS, | ||
126 | * we set the offset field correctly and return 1. | ||
127 | */ | ||
128 | static int lazy_iobitmap_copy(void) | ||
129 | { | ||
130 | struct thread_struct *thread; | ||
131 | struct tss_struct *tss; | ||
132 | int cpu; | ||
133 | |||
134 | cpu = get_cpu(); | ||
135 | tss = &per_cpu(init_tss, cpu); | ||
136 | thread = ¤t->thread; | ||
137 | |||
138 | if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && | ||
139 | thread->io_bitmap_ptr) { | ||
140 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, | ||
141 | thread->io_bitmap_max); | ||
142 | /* | ||
143 | * If the previously set map was extending to higher ports | ||
144 | * than the current one, pad extra space with 0xff (no access). | ||
145 | */ | ||
146 | if (thread->io_bitmap_max < tss->io_bitmap_max) { | ||
147 | memset((char *) tss->io_bitmap + | ||
148 | thread->io_bitmap_max, 0xff, | ||
149 | tss->io_bitmap_max - thread->io_bitmap_max); | ||
150 | } | ||
151 | tss->io_bitmap_max = thread->io_bitmap_max; | ||
152 | tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; | ||
153 | tss->io_bitmap_owner = thread; | ||
154 | put_cpu(); | ||
155 | |||
156 | return 1; | ||
157 | } | ||
158 | put_cpu(); | ||
159 | |||
160 | return 0; | ||
161 | } | ||
162 | #endif | ||
163 | |||
164 | static void __kprobes | ||
165 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | ||
166 | long error_code, siginfo_t *info) | ||
167 | { | ||
168 | struct task_struct *tsk = current; | ||
169 | |||
170 | #ifdef CONFIG_X86_32 | ||
171 | if (regs->flags & X86_VM_MASK) { | ||
172 | /* | ||
173 | * traps 0, 1, 3, 4, and 5 should be forwarded to vm86. | ||
174 | * On nmi (interrupt 2), do_trap should not be called. | ||
175 | */ | ||
176 | if (trapnr < 6) | ||
177 | goto vm86_trap; | ||
178 | goto trap_signal; | ||
179 | } | ||
180 | #endif | ||
181 | |||
182 | if (!user_mode(regs)) | ||
183 | goto kernel_trap; | ||
184 | |||
185 | #ifdef CONFIG_X86_32 | ||
186 | trap_signal: | ||
187 | #endif | ||
188 | /* | ||
189 | * We want error_code and trap_no set for userspace faults and | ||
190 | * kernelspace faults which result in die(), but not | ||
191 | * kernelspace faults which are fixed up. die() gives the | ||
192 | * process no chance to handle the signal and notice the | ||
193 | * kernel fault information, so that won't result in polluting | ||
194 | * the information about previously queued, but not yet | ||
195 | * delivered, faults. See also do_general_protection below. | ||
196 | */ | ||
197 | tsk->thread.error_code = error_code; | ||
198 | tsk->thread.trap_no = trapnr; | ||
199 | |||
200 | #ifdef CONFIG_X86_64 | ||
201 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && | ||
202 | printk_ratelimit()) { | ||
203 | printk(KERN_INFO | ||
204 | "%s[%d] trap %s ip:%lx sp:%lx error:%lx", | ||
205 | tsk->comm, tsk->pid, str, | ||
206 | regs->ip, regs->sp, error_code); | ||
207 | print_vma_addr(" in ", regs->ip); | ||
208 | printk("\n"); | ||
209 | } | ||
210 | #endif | ||
211 | |||
212 | if (info) | ||
213 | force_sig_info(signr, info, tsk); | ||
214 | else | ||
215 | force_sig(signr, tsk); | ||
216 | return; | ||
217 | |||
218 | kernel_trap: | ||
219 | if (!fixup_exception(regs)) { | ||
220 | tsk->thread.error_code = error_code; | ||
221 | tsk->thread.trap_no = trapnr; | ||
222 | die(str, regs, error_code); | ||
223 | } | ||
224 | return; | ||
225 | |||
226 | #ifdef CONFIG_X86_32 | ||
227 | vm86_trap: | ||
228 | if (handle_vm86_trap((struct kernel_vm86_regs *) regs, | ||
229 | error_code, trapnr)) | ||
230 | goto trap_signal; | ||
231 | return; | ||
232 | #endif | ||
233 | } | ||
234 | |||
235 | #define DO_ERROR(trapnr, signr, str, name) \ | ||
236 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | ||
237 | { \ | ||
238 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
239 | == NOTIFY_STOP) \ | ||
240 | return; \ | ||
241 | conditional_sti(regs); \ | ||
242 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ | ||
243 | } | ||
244 | |||
245 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | ||
246 | dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | ||
247 | { \ | ||
248 | siginfo_t info; \ | ||
249 | info.si_signo = signr; \ | ||
250 | info.si_errno = 0; \ | ||
251 | info.si_code = sicode; \ | ||
252 | info.si_addr = (void __user *)siaddr; \ | ||
253 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | ||
254 | == NOTIFY_STOP) \ | ||
255 | return; \ | ||
256 | conditional_sti(regs); \ | ||
257 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | ||
258 | } | ||
259 | |||
260 | DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | ||
261 | DO_ERROR(4, SIGSEGV, "overflow", overflow) | ||
262 | DO_ERROR(5, SIGSEGV, "bounds", bounds) | ||
263 | DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) | ||
264 | DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | ||
265 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | ||
266 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | ||
267 | #ifdef CONFIG_X86_32 | ||
268 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) | ||
269 | #endif | ||
270 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) | ||
271 | |||
272 | #ifdef CONFIG_X86_64 | ||
273 | /* Runs on IST stack */ | ||
274 | dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) | ||
275 | { | ||
276 | if (notify_die(DIE_TRAP, "stack segment", regs, error_code, | ||
277 | 12, SIGBUS) == NOTIFY_STOP) | ||
278 | return; | ||
279 | preempt_conditional_sti(regs); | ||
280 | do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); | ||
281 | preempt_conditional_cli(regs); | ||
282 | } | ||
283 | |||
284 | dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | ||
285 | { | ||
286 | static const char str[] = "double fault"; | ||
287 | struct task_struct *tsk = current; | ||
288 | |||
289 | /* Return not checked because double check cannot be ignored */ | ||
290 | notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); | ||
291 | |||
292 | tsk->thread.error_code = error_code; | ||
293 | tsk->thread.trap_no = 8; | ||
294 | |||
295 | /* This is always a kernel trap and never fixable (and thus must | ||
296 | never return). */ | ||
297 | for (;;) | ||
298 | die(str, regs, error_code); | ||
299 | } | ||
300 | #endif | ||
301 | |||
302 | dotraplinkage void __kprobes | ||
303 | do_general_protection(struct pt_regs *regs, long error_code) | ||
304 | { | ||
305 | struct task_struct *tsk; | ||
306 | |||
307 | conditional_sti(regs); | ||
308 | |||
309 | #ifdef CONFIG_X86_32 | ||
310 | if (lazy_iobitmap_copy()) { | ||
311 | /* restart the faulting instruction */ | ||
312 | return; | ||
313 | } | ||
314 | |||
315 | if (regs->flags & X86_VM_MASK) | ||
316 | goto gp_in_vm86; | ||
317 | #endif | ||
318 | |||
319 | tsk = current; | ||
320 | if (!user_mode(regs)) | ||
321 | goto gp_in_kernel; | ||
322 | |||
323 | tsk->thread.error_code = error_code; | ||
324 | tsk->thread.trap_no = 13; | ||
325 | |||
326 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | ||
327 | printk_ratelimit()) { | ||
328 | printk(KERN_INFO | ||
329 | "%s[%d] general protection ip:%lx sp:%lx error:%lx", | ||
330 | tsk->comm, task_pid_nr(tsk), | ||
331 | regs->ip, regs->sp, error_code); | ||
332 | print_vma_addr(" in ", regs->ip); | ||
333 | printk("\n"); | ||
334 | } | ||
335 | |||
336 | force_sig(SIGSEGV, tsk); | ||
337 | return; | ||
338 | |||
339 | #ifdef CONFIG_X86_32 | ||
340 | gp_in_vm86: | ||
341 | local_irq_enable(); | ||
342 | handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); | ||
343 | return; | ||
344 | #endif | ||
345 | |||
346 | gp_in_kernel: | ||
347 | if (fixup_exception(regs)) | ||
348 | return; | ||
349 | |||
350 | tsk->thread.error_code = error_code; | ||
351 | tsk->thread.trap_no = 13; | ||
352 | if (notify_die(DIE_GPF, "general protection fault", regs, | ||
353 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | ||
354 | return; | ||
355 | die("general protection fault", regs, error_code); | ||
356 | } | ||
357 | |||
358 | static notrace __kprobes void | ||
359 | mem_parity_error(unsigned char reason, struct pt_regs *regs) | ||
360 | { | ||
361 | printk(KERN_EMERG | ||
362 | "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", | ||
363 | reason, smp_processor_id()); | ||
364 | |||
365 | printk(KERN_EMERG | ||
366 | "You have some hardware problem, likely on the PCI bus.\n"); | ||
367 | |||
368 | #if defined(CONFIG_EDAC) | ||
369 | if (edac_handler_set()) { | ||
370 | edac_atomic_assert_error(); | ||
371 | return; | ||
372 | } | ||
373 | #endif | ||
374 | |||
375 | if (panic_on_unrecovered_nmi) | ||
376 | panic("NMI: Not continuing"); | ||
377 | |||
378 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | ||
379 | |||
380 | /* Clear and disable the memory parity error line. */ | ||
381 | reason = (reason & 0xf) | 4; | ||
382 | outb(reason, 0x61); | ||
383 | } | ||
384 | |||
385 | static notrace __kprobes void | ||
386 | io_check_error(unsigned char reason, struct pt_regs *regs) | ||
387 | { | ||
388 | unsigned long i; | ||
389 | |||
390 | printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); | ||
391 | show_registers(regs); | ||
392 | |||
393 | /* Re-enable the IOCK line, wait for a few seconds */ | ||
394 | reason = (reason & 0xf) | 8; | ||
395 | outb(reason, 0x61); | ||
396 | |||
397 | i = 2000; | ||
398 | while (--i) | ||
399 | udelay(1000); | ||
400 | |||
401 | reason &= ~8; | ||
402 | outb(reason, 0x61); | ||
403 | } | ||
404 | |||
405 | static notrace __kprobes void | ||
406 | unknown_nmi_error(unsigned char reason, struct pt_regs *regs) | ||
407 | { | ||
408 | if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == | ||
409 | NOTIFY_STOP) | ||
410 | return; | ||
411 | #ifdef CONFIG_MCA | ||
412 | /* | ||
413 | * Might actually be able to figure out what the guilty party | ||
414 | * is: | ||
415 | */ | ||
416 | if (MCA_bus) { | ||
417 | mca_handle_nmi(); | ||
418 | return; | ||
419 | } | ||
420 | #endif | ||
421 | printk(KERN_EMERG | ||
422 | "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", | ||
423 | reason, smp_processor_id()); | ||
424 | |||
425 | printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); | ||
426 | if (panic_on_unrecovered_nmi) | ||
427 | panic("NMI: Not continuing"); | ||
428 | |||
429 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | ||
430 | } | ||
431 | |||
432 | #ifdef CONFIG_X86_32 | ||
433 | static DEFINE_SPINLOCK(nmi_print_lock); | ||
434 | |||
435 | void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
436 | { | ||
437 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
438 | return; | ||
439 | |||
440 | spin_lock(&nmi_print_lock); | ||
441 | /* | ||
442 | * We are in trouble anyway, lets at least try | ||
443 | * to get a message out: | ||
444 | */ | ||
445 | bust_spinlocks(1); | ||
446 | printk(KERN_EMERG "%s", str); | ||
447 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
448 | smp_processor_id(), regs->ip); | ||
449 | show_registers(regs); | ||
450 | if (do_panic) | ||
451 | panic("Non maskable interrupt"); | ||
452 | console_silent(); | ||
453 | spin_unlock(&nmi_print_lock); | ||
454 | bust_spinlocks(0); | ||
455 | |||
456 | /* | ||
457 | * If we are in kernel we are probably nested up pretty bad | ||
458 | * and might aswell get out now while we still can: | ||
459 | */ | ||
460 | if (!user_mode_vm(regs)) { | ||
461 | current->thread.trap_no = 2; | ||
462 | crash_kexec(regs); | ||
463 | } | ||
464 | |||
465 | do_exit(SIGSEGV); | ||
466 | } | ||
467 | #endif | ||
468 | |||
469 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | ||
470 | { | ||
471 | unsigned char reason = 0; | ||
472 | int cpu; | ||
473 | |||
474 | cpu = smp_processor_id(); | ||
475 | |||
476 | /* Only the BSP gets external NMIs from the system. */ | ||
477 | if (!cpu) | ||
478 | reason = get_nmi_reason(); | ||
479 | |||
480 | if (!(reason & 0xc0)) { | ||
481 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) | ||
482 | == NOTIFY_STOP) | ||
483 | return; | ||
484 | #ifdef CONFIG_X86_LOCAL_APIC | ||
485 | /* | ||
486 | * Ok, so this is none of the documented NMI sources, | ||
487 | * so it must be the NMI watchdog. | ||
488 | */ | ||
489 | if (nmi_watchdog_tick(regs, reason)) | ||
490 | return; | ||
491 | if (!do_nmi_callback(regs, cpu)) | ||
492 | unknown_nmi_error(reason, regs); | ||
493 | #else | ||
494 | unknown_nmi_error(reason, regs); | ||
495 | #endif | ||
496 | |||
497 | return; | ||
498 | } | ||
499 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | ||
500 | return; | ||
501 | |||
502 | /* AK: following checks seem to be broken on modern chipsets. FIXME */ | ||
503 | if (reason & 0x80) | ||
504 | mem_parity_error(reason, regs); | ||
505 | if (reason & 0x40) | ||
506 | io_check_error(reason, regs); | ||
507 | #ifdef CONFIG_X86_32 | ||
508 | /* | ||
509 | * Reassert NMI in case it became active meanwhile | ||
510 | * as it's edge-triggered: | ||
511 | */ | ||
512 | reassert_nmi(); | ||
513 | #endif | ||
514 | } | ||
515 | |||
516 | dotraplinkage notrace __kprobes void | ||
517 | do_nmi(struct pt_regs *regs, long error_code) | ||
518 | { | ||
519 | nmi_enter(); | ||
520 | |||
521 | #ifdef CONFIG_X86_32 | ||
522 | { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } | ||
523 | #else | ||
524 | add_pda(__nmi_count, 1); | ||
525 | #endif | ||
526 | |||
527 | if (!ignore_nmis) | ||
528 | default_do_nmi(regs); | ||
529 | |||
530 | nmi_exit(); | ||
531 | } | ||
532 | |||
533 | void stop_nmi(void) | ||
534 | { | ||
535 | acpi_nmi_disable(); | ||
536 | ignore_nmis++; | ||
537 | } | ||
538 | |||
539 | void restart_nmi(void) | ||
540 | { | ||
541 | ignore_nmis--; | ||
542 | acpi_nmi_enable(); | ||
543 | } | ||
544 | |||
545 | /* May run on IST stack. */ | ||
546 | dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | ||
547 | { | ||
548 | #ifdef CONFIG_KPROBES | ||
549 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) | ||
550 | == NOTIFY_STOP) | ||
551 | return; | ||
552 | #else | ||
553 | if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP) | ||
554 | == NOTIFY_STOP) | ||
555 | return; | ||
556 | #endif | ||
557 | |||
558 | preempt_conditional_sti(regs); | ||
559 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | ||
560 | preempt_conditional_cli(regs); | ||
561 | } | ||
562 | |||
563 | #ifdef CONFIG_X86_64 | ||
564 | /* Help handler running on IST stack to switch back to user stack | ||
565 | for scheduling or signal handling. The actual stack switch is done in | ||
566 | entry.S */ | ||
567 | asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | ||
568 | { | ||
569 | struct pt_regs *regs = eregs; | ||
570 | /* Did already sync */ | ||
571 | if (eregs == (struct pt_regs *)eregs->sp) | ||
572 | ; | ||
573 | /* Exception from user space */ | ||
574 | else if (user_mode(eregs)) | ||
575 | regs = task_pt_regs(current); | ||
576 | /* Exception from kernel and interrupts are enabled. Move to | ||
577 | kernel process stack. */ | ||
578 | else if (eregs->flags & X86_EFLAGS_IF) | ||
579 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); | ||
580 | if (eregs != regs) | ||
581 | *regs = *eregs; | ||
582 | return regs; | ||
583 | } | ||
584 | #endif | ||
585 | |||
586 | /* | ||
587 | * Our handling of the processor debug registers is non-trivial. | ||
588 | * We do not clear them on entry and exit from the kernel. Therefore | ||
589 | * it is possible to get a watchpoint trap here from inside the kernel. | ||
590 | * However, the code in ./ptrace.c has ensured that the user can | ||
591 | * only set watchpoints on userspace addresses. Therefore the in-kernel | ||
592 | * watchpoint trap can only occur in code which is reading/writing | ||
593 | * from user space. Such code must not hold kernel locks (since it | ||
594 | * can equally take a page fault), therefore it is safe to call | ||
595 | * force_sig_info even though that claims and releases locks. | ||
596 | * | ||
597 | * Code in ./signal.c ensures that the debug control register | ||
598 | * is restored before we deliver any signal, and therefore that | ||
599 | * user code runs with the correct debug control register even though | ||
600 | * we clear it here. | ||
601 | * | ||
602 | * Being careful here means that we don't have to be as careful in a | ||
603 | * lot of more complicated places (task switching can be a bit lazy | ||
604 | * about restoring all the debug state, and ptrace doesn't have to | ||
605 | * find every occurrence of the TF bit that could be saved away even | ||
606 | * by user code) | ||
607 | * | ||
608 | * May run on IST stack. | ||
609 | */ | ||
610 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | ||
611 | { | ||
612 | struct task_struct *tsk = current; | ||
613 | unsigned long condition; | ||
614 | int si_code; | ||
615 | |||
616 | get_debugreg(condition, 6); | ||
617 | |||
618 | /* | ||
619 | * The processor cleared BTF, so don't mark that we need it set. | ||
620 | */ | ||
621 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | ||
622 | tsk->thread.debugctlmsr = 0; | ||
623 | |||
624 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | ||
625 | SIGTRAP) == NOTIFY_STOP) | ||
626 | return; | ||
627 | |||
628 | /* It's safe to allow irq's after DR6 has been saved */ | ||
629 | preempt_conditional_sti(regs); | ||
630 | |||
631 | /* Mask out spurious debug traps due to lazy DR7 setting */ | ||
632 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | ||
633 | if (!tsk->thread.debugreg7) | ||
634 | goto clear_dr7; | ||
635 | } | ||
636 | |||
637 | #ifdef CONFIG_X86_32 | ||
638 | if (regs->flags & X86_VM_MASK) | ||
639 | goto debug_vm86; | ||
640 | #endif | ||
641 | |||
642 | /* Save debug status register where ptrace can see it */ | ||
643 | tsk->thread.debugreg6 = condition; | ||
644 | |||
645 | /* | ||
646 | * Single-stepping through TF: make sure we ignore any events in | ||
647 | * kernel space (but re-enable TF when returning to user mode). | ||
648 | */ | ||
649 | if (condition & DR_STEP) { | ||
650 | if (!user_mode(regs)) | ||
651 | goto clear_TF_reenable; | ||
652 | } | ||
653 | |||
654 | si_code = get_si_code(condition); | ||
655 | /* Ok, finally something we can handle */ | ||
656 | send_sigtrap(tsk, regs, error_code, si_code); | ||
657 | |||
658 | /* | ||
659 | * Disable additional traps. They'll be re-enabled when | ||
660 | * the signal is delivered. | ||
661 | */ | ||
662 | clear_dr7: | ||
663 | set_debugreg(0, 7); | ||
664 | preempt_conditional_cli(regs); | ||
665 | return; | ||
666 | |||
667 | #ifdef CONFIG_X86_32 | ||
668 | debug_vm86: | ||
669 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); | ||
670 | preempt_conditional_cli(regs); | ||
671 | return; | ||
672 | #endif | ||
673 | |||
674 | clear_TF_reenable: | ||
675 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | ||
676 | regs->flags &= ~X86_EFLAGS_TF; | ||
677 | preempt_conditional_cli(regs); | ||
678 | return; | ||
679 | } | ||
680 | |||
681 | #ifdef CONFIG_X86_64 | ||
682 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | ||
683 | { | ||
684 | if (fixup_exception(regs)) | ||
685 | return 1; | ||
686 | |||
687 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); | ||
688 | /* Illegal floating point operation in the kernel */ | ||
689 | current->thread.trap_no = trapnr; | ||
690 | die(str, regs, 0); | ||
691 | return 0; | ||
692 | } | ||
693 | #endif | ||
694 | |||
695 | /* | ||
696 | * Note that we play around with the 'TS' bit in an attempt to get | ||
697 | * the correct behaviour even in the presence of the asynchronous | ||
698 | * IRQ13 behaviour | ||
699 | */ | ||
700 | void math_error(void __user *ip) | ||
701 | { | ||
702 | struct task_struct *task; | ||
703 | siginfo_t info; | ||
704 | unsigned short cwd, swd; | ||
705 | |||
706 | /* | ||
707 | * Save the info for the exception handler and clear the error. | ||
708 | */ | ||
709 | task = current; | ||
710 | save_init_fpu(task); | ||
711 | task->thread.trap_no = 16; | ||
712 | task->thread.error_code = 0; | ||
713 | info.si_signo = SIGFPE; | ||
714 | info.si_errno = 0; | ||
715 | info.si_code = __SI_FAULT; | ||
716 | info.si_addr = ip; | ||
717 | /* | ||
718 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | ||
719 | * status. 0x3f is the exception bits in these regs, 0x200 is the | ||
720 | * C1 reg you need in case of a stack fault, 0x040 is the stack | ||
721 | * fault bit. We should only be taking one exception at a time, | ||
722 | * so if this combination doesn't produce any single exception, | ||
723 | * then we have a bad program that isn't synchronizing its FPU usage | ||
724 | * and it will suffer the consequences since we won't be able to | ||
725 | * fully reproduce the context of the exception | ||
726 | */ | ||
727 | cwd = get_fpu_cwd(task); | ||
728 | swd = get_fpu_swd(task); | ||
729 | switch (swd & ~cwd & 0x3f) { | ||
730 | case 0x000: /* No unmasked exception */ | ||
731 | #ifdef CONFIG_X86_32 | ||
732 | return; | ||
733 | #endif | ||
734 | default: /* Multiple exceptions */ | ||
735 | break; | ||
736 | case 0x001: /* Invalid Op */ | ||
737 | /* | ||
738 | * swd & 0x240 == 0x040: Stack Underflow | ||
739 | * swd & 0x240 == 0x240: Stack Overflow | ||
740 | * User must clear the SF bit (0x40) if set | ||
741 | */ | ||
742 | info.si_code = FPE_FLTINV; | ||
743 | break; | ||
744 | case 0x002: /* Denormalize */ | ||
745 | case 0x010: /* Underflow */ | ||
746 | info.si_code = FPE_FLTUND; | ||
747 | break; | ||
748 | case 0x004: /* Zero Divide */ | ||
749 | info.si_code = FPE_FLTDIV; | ||
750 | break; | ||
751 | case 0x008: /* Overflow */ | ||
752 | info.si_code = FPE_FLTOVF; | ||
753 | break; | ||
754 | case 0x020: /* Precision */ | ||
755 | info.si_code = FPE_FLTRES; | ||
756 | break; | ||
757 | } | ||
758 | force_sig_info(SIGFPE, &info, task); | ||
759 | } | ||
760 | |||
761 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) | ||
762 | { | ||
763 | conditional_sti(regs); | ||
764 | |||
765 | #ifdef CONFIG_X86_32 | ||
766 | ignore_fpu_irq = 1; | ||
767 | #else | ||
768 | if (!user_mode(regs) && | ||
769 | kernel_math_error(regs, "kernel x87 math error", 16)) | ||
770 | return; | ||
771 | #endif | ||
772 | |||
773 | math_error((void __user *)regs->ip); | ||
774 | } | ||
775 | |||
776 | static void simd_math_error(void __user *ip) | ||
777 | { | ||
778 | struct task_struct *task; | ||
779 | siginfo_t info; | ||
780 | unsigned short mxcsr; | ||
781 | |||
782 | /* | ||
783 | * Save the info for the exception handler and clear the error. | ||
784 | */ | ||
785 | task = current; | ||
786 | save_init_fpu(task); | ||
787 | task->thread.trap_no = 19; | ||
788 | task->thread.error_code = 0; | ||
789 | info.si_signo = SIGFPE; | ||
790 | info.si_errno = 0; | ||
791 | info.si_code = __SI_FAULT; | ||
792 | info.si_addr = ip; | ||
793 | /* | ||
794 | * The SIMD FPU exceptions are handled a little differently, as there | ||
795 | * is only a single status/control register. Thus, to determine which | ||
796 | * unmasked exception was caught we must mask the exception mask bits | ||
797 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
798 | */ | ||
799 | mxcsr = get_fpu_mxcsr(task); | ||
800 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { | ||
801 | case 0x000: | ||
802 | default: | ||
803 | break; | ||
804 | case 0x001: /* Invalid Op */ | ||
805 | info.si_code = FPE_FLTINV; | ||
806 | break; | ||
807 | case 0x002: /* Denormalize */ | ||
808 | case 0x010: /* Underflow */ | ||
809 | info.si_code = FPE_FLTUND; | ||
810 | break; | ||
811 | case 0x004: /* Zero Divide */ | ||
812 | info.si_code = FPE_FLTDIV; | ||
813 | break; | ||
814 | case 0x008: /* Overflow */ | ||
815 | info.si_code = FPE_FLTOVF; | ||
816 | break; | ||
817 | case 0x020: /* Precision */ | ||
818 | info.si_code = FPE_FLTRES; | ||
819 | break; | ||
820 | } | ||
821 | force_sig_info(SIGFPE, &info, task); | ||
822 | } | ||
823 | |||
824 | dotraplinkage void | ||
825 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | ||
826 | { | ||
827 | conditional_sti(regs); | ||
828 | |||
829 | #ifdef CONFIG_X86_32 | ||
830 | if (cpu_has_xmm) { | ||
831 | /* Handle SIMD FPU exceptions on PIII+ processors. */ | ||
832 | ignore_fpu_irq = 1; | ||
833 | simd_math_error((void __user *)regs->ip); | ||
834 | return; | ||
835 | } | ||
836 | /* | ||
837 | * Handle strange cache flush from user space exception | ||
838 | * in all other cases. This is undocumented behaviour. | ||
839 | */ | ||
840 | if (regs->flags & X86_VM_MASK) { | ||
841 | handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); | ||
842 | return; | ||
843 | } | ||
844 | current->thread.trap_no = 19; | ||
845 | current->thread.error_code = error_code; | ||
846 | die_if_kernel("cache flush denied", regs, error_code); | ||
847 | force_sig(SIGSEGV, current); | ||
848 | #else | ||
849 | if (!user_mode(regs) && | ||
850 | kernel_math_error(regs, "kernel simd math error", 19)) | ||
851 | return; | ||
852 | simd_math_error((void __user *)regs->ip); | ||
853 | #endif | ||
854 | } | ||
855 | |||
856 | dotraplinkage void | ||
857 | do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) | ||
858 | { | ||
859 | conditional_sti(regs); | ||
860 | #if 0 | ||
861 | /* No need to warn about this any longer. */ | ||
862 | printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); | ||
863 | #endif | ||
864 | } | ||
865 | |||
866 | #ifdef CONFIG_X86_32 | ||
867 | unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) | ||
868 | { | ||
869 | struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); | ||
870 | unsigned long base = (kesp - uesp) & -THREAD_SIZE; | ||
871 | unsigned long new_kesp = kesp - base; | ||
872 | unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; | ||
873 | __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; | ||
874 | |||
875 | /* Set up base for espfix segment */ | ||
876 | desc &= 0x00f0ff0000000000ULL; | ||
877 | desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | | ||
878 | ((((__u64)base) << 32) & 0xff00000000000000ULL) | | ||
879 | ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | | ||
880 | (lim_pages & 0xffff); | ||
881 | *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; | ||
882 | |||
883 | return new_kesp; | ||
884 | } | ||
885 | #else | ||
886 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | ||
887 | { | ||
888 | } | ||
889 | |||
890 | asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | ||
891 | { | ||
892 | } | ||
893 | #endif | ||
894 | |||
895 | /* | ||
896 | * 'math_state_restore()' saves the current math information in the | ||
897 | * old math state array, and gets the new ones from the current task | ||
898 | * | ||
899 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. | ||
900 | * Don't touch unless you *really* know how it works. | ||
901 | * | ||
902 | * Must be called with kernel preemption disabled (in this case, | ||
903 | * local interrupts are disabled at the call-site in entry.S). | ||
904 | */ | ||
905 | asmlinkage void math_state_restore(void) | ||
906 | { | ||
907 | struct thread_info *thread = current_thread_info(); | ||
908 | struct task_struct *tsk = thread->task; | ||
909 | |||
910 | if (!tsk_used_math(tsk)) { | ||
911 | local_irq_enable(); | ||
912 | /* | ||
913 | * does a slab alloc which can sleep | ||
914 | */ | ||
915 | if (init_fpu(tsk)) { | ||
916 | /* | ||
917 | * ran out of memory! | ||
918 | */ | ||
919 | do_group_exit(SIGKILL); | ||
920 | return; | ||
921 | } | ||
922 | local_irq_disable(); | ||
923 | } | ||
924 | |||
925 | clts(); /* Allow maths ops (or we recurse) */ | ||
926 | #ifdef CONFIG_X86_32 | ||
927 | restore_fpu(tsk); | ||
928 | #else | ||
929 | /* | ||
930 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
931 | */ | ||
932 | if (unlikely(restore_fpu_checking(tsk))) { | ||
933 | stts(); | ||
934 | force_sig(SIGSEGV, tsk); | ||
935 | return; | ||
936 | } | ||
937 | #endif | ||
938 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | ||
939 | tsk->fpu_counter++; | ||
940 | } | ||
941 | EXPORT_SYMBOL_GPL(math_state_restore); | ||
942 | |||
943 | #ifndef CONFIG_MATH_EMULATION | ||
944 | asmlinkage void math_emulate(long arg) | ||
945 | { | ||
946 | printk(KERN_EMERG | ||
947 | "math-emulation not enabled and no coprocessor found.\n"); | ||
948 | printk(KERN_EMERG "killing %s.\n", current->comm); | ||
949 | force_sig(SIGFPE, current); | ||
950 | schedule(); | ||
951 | } | ||
952 | #endif /* CONFIG_MATH_EMULATION */ | ||
953 | |||
954 | dotraplinkage void __kprobes | ||
955 | do_device_not_available(struct pt_regs *regs, long error) | ||
956 | { | ||
957 | #ifdef CONFIG_X86_32 | ||
958 | if (read_cr0() & X86_CR0_EM) { | ||
959 | conditional_sti(regs); | ||
960 | math_emulate(0); | ||
961 | } else { | ||
962 | math_state_restore(); /* interrupts still off */ | ||
963 | conditional_sti(regs); | ||
964 | } | ||
965 | #else | ||
966 | math_state_restore(); | ||
967 | #endif | ||
968 | } | ||
969 | |||
970 | #ifdef CONFIG_X86_32 | ||
971 | #ifdef CONFIG_X86_MCE | ||
972 | dotraplinkage void __kprobes do_machine_check(struct pt_regs *regs, long error) | ||
973 | { | ||
974 | conditional_sti(regs); | ||
975 | machine_check_vector(regs, error); | ||
976 | } | ||
977 | #endif | ||
978 | |||
979 | dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | ||
980 | { | ||
981 | siginfo_t info; | ||
982 | local_irq_enable(); | ||
983 | |||
984 | info.si_signo = SIGILL; | ||
985 | info.si_errno = 0; | ||
986 | info.si_code = ILL_BADSTK; | ||
987 | info.si_addr = 0; | ||
988 | if (notify_die(DIE_TRAP, "iret exception", | ||
989 | regs, error_code, 32, SIGILL) == NOTIFY_STOP) | ||
990 | return; | ||
991 | do_trap(32, SIGILL, "iret exception", regs, error_code, &info); | ||
992 | } | ||
993 | #endif | ||
994 | |||
995 | void __init trap_init(void) | ||
996 | { | ||
997 | #ifdef CONFIG_X86_32 | ||
998 | int i; | ||
999 | #endif | ||
1000 | |||
1001 | #ifdef CONFIG_EISA | ||
1002 | void __iomem *p = early_ioremap(0x0FFFD9, 4); | ||
1003 | |||
1004 | if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) | ||
1005 | EISA_bus = 1; | ||
1006 | early_iounmap(p, 4); | ||
1007 | #endif | ||
1008 | |||
1009 | set_intr_gate(0, ÷_error); | ||
1010 | set_intr_gate_ist(1, &debug, DEBUG_STACK); | ||
1011 | set_intr_gate_ist(2, &nmi, NMI_STACK); | ||
1012 | /* int3 can be called from all */ | ||
1013 | set_system_intr_gate_ist(3, &int3, DEBUG_STACK); | ||
1014 | /* int4 can be called from all */ | ||
1015 | set_system_intr_gate(4, &overflow); | ||
1016 | set_intr_gate(5, &bounds); | ||
1017 | set_intr_gate(6, &invalid_op); | ||
1018 | set_intr_gate(7, &device_not_available); | ||
1019 | #ifdef CONFIG_X86_32 | ||
1020 | set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); | ||
1021 | #else | ||
1022 | set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); | ||
1023 | #endif | ||
1024 | set_intr_gate(9, &coprocessor_segment_overrun); | ||
1025 | set_intr_gate(10, &invalid_TSS); | ||
1026 | set_intr_gate(11, &segment_not_present); | ||
1027 | set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); | ||
1028 | set_intr_gate(13, &general_protection); | ||
1029 | set_intr_gate(14, &page_fault); | ||
1030 | set_intr_gate(15, &spurious_interrupt_bug); | ||
1031 | set_intr_gate(16, &coprocessor_error); | ||
1032 | set_intr_gate(17, &alignment_check); | ||
1033 | #ifdef CONFIG_X86_MCE | ||
1034 | set_intr_gate_ist(18, &machine_check, MCE_STACK); | ||
1035 | #endif | ||
1036 | set_intr_gate(19, &simd_coprocessor_error); | ||
1037 | |||
1038 | #ifdef CONFIG_IA32_EMULATION | ||
1039 | set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); | ||
1040 | #endif | ||
1041 | |||
1042 | #ifdef CONFIG_X86_32 | ||
1043 | if (cpu_has_fxsr) { | ||
1044 | printk(KERN_INFO "Enabling fast FPU save and restore... "); | ||
1045 | set_in_cr4(X86_CR4_OSFXSR); | ||
1046 | printk("done.\n"); | ||
1047 | } | ||
1048 | if (cpu_has_xmm) { | ||
1049 | printk(KERN_INFO | ||
1050 | "Enabling unmasked SIMD FPU exception support... "); | ||
1051 | set_in_cr4(X86_CR4_OSXMMEXCPT); | ||
1052 | printk("done.\n"); | ||
1053 | } | ||
1054 | |||
1055 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); | ||
1056 | |||
1057 | /* Reserve all the builtin and the syscall vector: */ | ||
1058 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | ||
1059 | set_bit(i, used_vectors); | ||
1060 | |||
1061 | set_bit(SYSCALL_VECTOR, used_vectors); | ||
1062 | #endif | ||
1063 | /* | ||
1064 | * Should be a barrier for any external CPU state: | ||
1065 | */ | ||
1066 | cpu_init(); | ||
1067 | |||
1068 | #ifdef CONFIG_X86_32 | ||
1069 | trap_init_hook(); | ||
1070 | #endif | ||
1071 | } | ||