aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/alternative.c2
-rw-r--r--arch/x86/kernel/apic_32.c4
-rw-r--r--arch/x86/kernel/apic_64.c4
-rw-r--r--arch/x86/kernel/cpu/common.c45
-rw-r--r--arch/x86/kernel/doublefault_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_32.c447
-rw-r--r--arch/x86/kernel/dumpstack_64.c573
-rw-r--r--arch/x86/kernel/entry_32.S22
-rw-r--r--arch/x86/kernel/entry_64.S15
-rw-r--r--arch/x86/kernel/es7000_32.c28
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c23
-rw-r--r--arch/x86/kernel/head.c1
-rw-r--r--arch/x86/kernel/hpet.c6
-rw-r--r--arch/x86/kernel/irqinit_64.c43
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c7
-rw-r--r--arch/x86/kernel/quirks.c41
-rw-r--r--arch/x86/kernel/setup.c10
-rw-r--r--arch/x86/kernel/smpboot.c87
-rw-r--r--arch/x86/kernel/time_32.c7
-rw-r--r--arch/x86/kernel/time_64.c23
-rw-r--r--arch/x86/kernel/traps.c (renamed from arch/x86/kernel/traps_32.c)893
-rw-r--r--arch/x86/kernel/traps_64.c1214
24 files changed, 1596 insertions, 1907 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5098585f87ce..0d41f0343dc0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ CFLAGS_hpet.o := $(nostackp)
23CFLAGS_tsc.o := $(nostackp) 23CFLAGS_tsc.o := $(nostackp)
24 24
25obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o 25obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
26obj-y += traps_$(BITS).o irq_$(BITS).o 26obj-y += traps.o irq_$(BITS).o dumpstack_$(BITS).o
27obj-y += time_$(BITS).o ioport.o ldt.o 27obj-y += time_$(BITS).o ioport.o ldt.o
28obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 28obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
29obj-$(CONFIG_X86_VISWS) += visws_quirks.o 29obj-$(CONFIG_X86_VISWS) += visws_quirks.o
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index fb04e49776ba..a84ac7b570e6 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -444,7 +444,7 @@ void __init alternative_instructions(void)
444 _text, _etext); 444 _text, _etext);
445 445
446 /* Only switch to UP mode if we don't immediately boot others */ 446 /* Only switch to UP mode if we don't immediately boot others */
447 if (num_possible_cpus() == 1 || setup_max_cpus <= 1) 447 if (num_present_cpus() == 1 || setup_max_cpus <= 1)
448 alternatives_smp_switch(0); 448 alternatives_smp_switch(0);
449 } 449 }
450#endif 450#endif
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index a91c57cb666a..21c831d96af3 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -295,6 +295,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
295 * 295 *
296 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and 296 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
297 * MCE interrupts are supported. Thus MCE offset must be set to 0. 297 * MCE interrupts are supported. Thus MCE offset must be set to 0.
298 *
299 * If mask=1, the LVT entry does not generate interrupts while mask=0
300 * enables the vector. See also the BKDGs.
298 */ 301 */
299 302
300#define APIC_EILVT_LVTOFF_MCE 0 303#define APIC_EILVT_LVTOFF_MCE 0
@@ -319,6 +322,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
319 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); 322 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
320 return APIC_EILVT_LVTOFF_IBS; 323 return APIC_EILVT_LVTOFF_IBS;
321} 324}
325EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
322 326
323/* 327/*
324 * Program the next event, relative to now 328 * Program the next event, relative to now
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 53898b65a6ae..94ddb69ae15e 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -307,6 +307,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
307 * 307 *
308 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and 308 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
309 * MCE interrupts are supported. Thus MCE offset must be set to 0. 309 * MCE interrupts are supported. Thus MCE offset must be set to 0.
310 *
311 * If mask=1, the LVT entry does not generate interrupts while mask=0
312 * enables the vector. See also the BKDGs.
310 */ 313 */
311 314
312#define APIC_EILVT_LVTOFF_MCE 0 315#define APIC_EILVT_LVTOFF_MCE 0
@@ -331,6 +334,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
331 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); 334 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
332 return APIC_EILVT_LVTOFF_IBS; 335 return APIC_EILVT_LVTOFF_IBS;
333} 336}
337EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
334 338
335/* 339/*
336 * Program the next event, relative to now 340 * Program the next event, relative to now
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fb789dd9e691..25581dcb280e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -124,18 +124,25 @@ static inline int flag_is_changeable_p(u32 flag)
124{ 124{
125 u32 f1, f2; 125 u32 f1, f2;
126 126
127 asm("pushfl\n\t" 127 /*
128 "pushfl\n\t" 128 * Cyrix and IDT cpus allow disabling of CPUID
129 "popl %0\n\t" 129 * so the code below may return different results
130 "movl %0,%1\n\t" 130 * when it is executed before and after enabling
131 "xorl %2,%0\n\t" 131 * the CPUID. Add "volatile" to not allow gcc to
132 "pushl %0\n\t" 132 * optimize the subsequent calls to this function.
133 "popfl\n\t" 133 */
134 "pushfl\n\t" 134 asm volatile ("pushfl\n\t"
135 "popl %0\n\t" 135 "pushfl\n\t"
136 "popfl\n\t" 136 "popl %0\n\t"
137 : "=&r" (f1), "=&r" (f2) 137 "movl %0,%1\n\t"
138 : "ir" (flag)); 138 "xorl %2,%0\n\t"
139 "pushl %0\n\t"
140 "popfl\n\t"
141 "pushfl\n\t"
142 "popl %0\n\t"
143 "popfl\n\t"
144 : "=&r" (f1), "=&r" (f2)
145 : "ir" (flag));
139 146
140 return ((f1^f2) & flag) != 0; 147 return ((f1^f2) & flag) != 0;
141} 148}
@@ -719,12 +726,24 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
719#endif 726#endif
720} 727}
721 728
729#ifdef CONFIG_X86_64
730static void vgetcpu_set_mode(void)
731{
732 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
733 vgetcpu_mode = VGETCPU_RDTSCP;
734 else
735 vgetcpu_mode = VGETCPU_LSL;
736}
737#endif
738
722void __init identify_boot_cpu(void) 739void __init identify_boot_cpu(void)
723{ 740{
724 identify_cpu(&boot_cpu_data); 741 identify_cpu(&boot_cpu_data);
725#ifdef CONFIG_X86_32 742#ifdef CONFIG_X86_32
726 sysenter_setup(); 743 sysenter_setup();
727 enable_sep_cpu(); 744 enable_sep_cpu();
745#else
746 vgetcpu_set_mode();
728#endif 747#endif
729} 748}
730 749
@@ -797,7 +816,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
797 else if (c->cpuid_level >= 0) 816 else if (c->cpuid_level >= 0)
798 vendor = c->x86_vendor_id; 817 vendor = c->x86_vendor_id;
799 818
800 if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) 819 if (vendor && !strstr(c->x86_model_id, vendor))
801 printk(KERN_CONT "%s ", vendor); 820 printk(KERN_CONT "%s ", vendor);
802 821
803 if (c->x86_model_id[0]) 822 if (c->x86_model_id[0])
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index 395acb12b0d1..b4f14c6c09d9 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -66,6 +66,6 @@ struct tss_struct doublefault_tss __cacheline_aligned = {
66 .ds = __USER_DS, 66 .ds = __USER_DS,
67 .fs = __KERNEL_PERCPU, 67 .fs = __KERNEL_PERCPU,
68 68
69 .__cr3 = __phys_addr_const((unsigned long)swapper_pg_dir) 69 .__cr3 = __pa_nodebug(swapper_pg_dir),
70 } 70 }
71}; 71};
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
new file mode 100644
index 000000000000..201ee359a1a9
--- /dev/null
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -0,0 +1,447 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5#include <linux/kallsyms.h>
6#include <linux/kprobes.h>
7#include <linux/uaccess.h>
8#include <linux/utsname.h>
9#include <linux/hardirq.h>
10#include <linux/kdebug.h>
11#include <linux/module.h>
12#include <linux/ptrace.h>
13#include <linux/kexec.h>
14#include <linux/bug.h>
15#include <linux/nmi.h>
16
17#include <asm/stacktrace.h>
18
19#define STACKSLOTS_PER_LINE 8
20#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
21
22int panic_on_unrecovered_nmi;
23int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
24static unsigned int code_bytes = 64;
25static int die_counter;
26
27void printk_address(unsigned long address, int reliable)
28{
29 printk(" [<%p>] %s%pS\n", (void *) address,
30 reliable ? "" : "? ", (void *) address);
31}
32
33static inline int valid_stack_ptr(struct thread_info *tinfo,
34 void *p, unsigned int size, void *end)
35{
36 void *t = tinfo;
37 if (end) {
38 if (p < end && p >= (end-THREAD_SIZE))
39 return 1;
40 else
41 return 0;
42 }
43 return p > t && p < t + THREAD_SIZE - size;
44}
45
46/* The form of the top of the frame on the stack */
47struct stack_frame {
48 struct stack_frame *next_frame;
49 unsigned long return_address;
50};
51
52static inline unsigned long
53print_context_stack(struct thread_info *tinfo,
54 unsigned long *stack, unsigned long bp,
55 const struct stacktrace_ops *ops, void *data,
56 unsigned long *end)
57{
58 struct stack_frame *frame = (struct stack_frame *)bp;
59
60 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
61 unsigned long addr;
62
63 addr = *stack;
64 if (__kernel_text_address(addr)) {
65 if ((unsigned long) stack == bp + sizeof(long)) {
66 ops->address(data, addr, 1);
67 frame = frame->next_frame;
68 bp = (unsigned long) frame;
69 } else {
70 ops->address(data, addr, bp == 0);
71 }
72 }
73 stack++;
74 }
75 return bp;
76}
77
78void dump_trace(struct task_struct *task, struct pt_regs *regs,
79 unsigned long *stack, unsigned long bp,
80 const struct stacktrace_ops *ops, void *data)
81{
82 if (!task)
83 task = current;
84
85 if (!stack) {
86 unsigned long dummy;
87 stack = &dummy;
88 if (task && task != current)
89 stack = (unsigned long *)task->thread.sp;
90 }
91
92#ifdef CONFIG_FRAME_POINTER
93 if (!bp) {
94 if (task == current) {
95 /* Grab bp right from our regs */
96 get_bp(bp);
97 } else {
98 /* bp is the last reg pushed by switch_to */
99 bp = *(unsigned long *) task->thread.sp;
100 }
101 }
102#endif
103
104 for (;;) {
105 struct thread_info *context;
106
107 context = (struct thread_info *)
108 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
109 bp = print_context_stack(context, stack, bp, ops, data, NULL);
110
111 stack = (unsigned long *)context->previous_esp;
112 if (!stack)
113 break;
114 if (ops->stack(data, "IRQ") < 0)
115 break;
116 touch_nmi_watchdog();
117 }
118}
119EXPORT_SYMBOL(dump_trace);
120
121static void
122print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
123{
124 printk(data);
125 print_symbol(msg, symbol);
126 printk("\n");
127}
128
129static void print_trace_warning(void *data, char *msg)
130{
131 printk("%s%s\n", (char *)data, msg);
132}
133
134static int print_trace_stack(void *data, char *name)
135{
136 printk("%s <%s> ", (char *)data, name);
137 return 0;
138}
139
140/*
141 * Print one address/symbol entries per line.
142 */
143static void print_trace_address(void *data, unsigned long addr, int reliable)
144{
145 touch_nmi_watchdog();
146 printk(data);
147 printk_address(addr, reliable);
148}
149
150static const struct stacktrace_ops print_trace_ops = {
151 .warning = print_trace_warning,
152 .warning_symbol = print_trace_warning_symbol,
153 .stack = print_trace_stack,
154 .address = print_trace_address,
155};
156
157static void
158show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
159 unsigned long *stack, unsigned long bp, char *log_lvl)
160{
161 printk("%sCall Trace:\n", log_lvl);
162 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
163}
164
165void show_trace(struct task_struct *task, struct pt_regs *regs,
166 unsigned long *stack, unsigned long bp)
167{
168 show_trace_log_lvl(task, regs, stack, bp, "");
169}
170
171static void
172show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
173 unsigned long *sp, unsigned long bp, char *log_lvl)
174{
175 unsigned long *stack;
176 int i;
177
178 if (sp == NULL) {
179 if (task)
180 sp = (unsigned long *)task->thread.sp;
181 else
182 sp = (unsigned long *)&sp;
183 }
184
185 stack = sp;
186 for (i = 0; i < kstack_depth_to_print; i++) {
187 if (kstack_end(stack))
188 break;
189 if (i && ((i % STACKSLOTS_PER_LINE) == 0))
190 printk("\n%s", log_lvl);
191 printk(" %08lx", *stack++);
192 touch_nmi_watchdog();
193 }
194 printk("\n");
195 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
196}
197
198void show_stack(struct task_struct *task, unsigned long *sp)
199{
200 show_stack_log_lvl(task, NULL, sp, 0, "");
201}
202
203/*
204 * The architecture-independent dump_stack generator
205 */
206void dump_stack(void)
207{
208 unsigned long bp = 0;
209 unsigned long stack;
210
211#ifdef CONFIG_FRAME_POINTER
212 if (!bp)
213 get_bp(bp);
214#endif
215
216 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
217 current->pid, current->comm, print_tainted(),
218 init_utsname()->release,
219 (int)strcspn(init_utsname()->version, " "),
220 init_utsname()->version);
221 show_trace(NULL, NULL, &stack, bp);
222}
223
224EXPORT_SYMBOL(dump_stack);
225
226void show_registers(struct pt_regs *regs)
227{
228 int i;
229
230 print_modules();
231 __show_regs(regs, 0);
232
233 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
234 TASK_COMM_LEN, current->comm, task_pid_nr(current),
235 current_thread_info(), current, task_thread_info(current));
236 /*
237 * When in-kernel, we also print out the stack and code at the
238 * time of the fault..
239 */
240 if (!user_mode_vm(regs)) {
241 unsigned int code_prologue = code_bytes * 43 / 64;
242 unsigned int code_len = code_bytes;
243 unsigned char c;
244 u8 *ip;
245
246 printk(KERN_EMERG "Stack:\n");
247 show_stack_log_lvl(NULL, regs, &regs->sp,
248 0, KERN_EMERG);
249
250 printk(KERN_EMERG "Code: ");
251
252 ip = (u8 *)regs->ip - code_prologue;
253 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
254 /* try starting at IP */
255 ip = (u8 *)regs->ip;
256 code_len = code_len - code_prologue + 1;
257 }
258 for (i = 0; i < code_len; i++, ip++) {
259 if (ip < (u8 *)PAGE_OFFSET ||
260 probe_kernel_address(ip, c)) {
261 printk(" Bad EIP value.");
262 break;
263 }
264 if (ip == (u8 *)regs->ip)
265 printk("<%02x> ", c);
266 else
267 printk("%02x ", c);
268 }
269 }
270 printk("\n");
271}
272
273int is_valid_bugaddr(unsigned long ip)
274{
275 unsigned short ud2;
276
277 if (ip < PAGE_OFFSET)
278 return 0;
279 if (probe_kernel_address((unsigned short *)ip, ud2))
280 return 0;
281
282 return ud2 == 0x0b0f;
283}
284
285static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
286static int die_owner = -1;
287static unsigned int die_nest_count;
288
289unsigned __kprobes long oops_begin(void)
290{
291 unsigned long flags;
292
293 oops_enter();
294
295 if (die_owner != raw_smp_processor_id()) {
296 console_verbose();
297 raw_local_irq_save(flags);
298 __raw_spin_lock(&die_lock);
299 die_owner = smp_processor_id();
300 die_nest_count = 0;
301 bust_spinlocks(1);
302 } else {
303 raw_local_irq_save(flags);
304 }
305 die_nest_count++;
306 return flags;
307}
308
309void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
310{
311 bust_spinlocks(0);
312 die_owner = -1;
313 add_taint(TAINT_DIE);
314 __raw_spin_unlock(&die_lock);
315 raw_local_irq_restore(flags);
316
317 if (!regs)
318 return;
319
320 if (kexec_should_crash(current))
321 crash_kexec(regs);
322 if (in_interrupt())
323 panic("Fatal exception in interrupt");
324 if (panic_on_oops)
325 panic("Fatal exception");
326 oops_exit();
327 do_exit(signr);
328}
329
330int __kprobes __die(const char *str, struct pt_regs *regs, long err)
331{
332 unsigned short ss;
333 unsigned long sp;
334
335 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
336#ifdef CONFIG_PREEMPT
337 printk("PREEMPT ");
338#endif
339#ifdef CONFIG_SMP
340 printk("SMP ");
341#endif
342#ifdef CONFIG_DEBUG_PAGEALLOC
343 printk("DEBUG_PAGEALLOC");
344#endif
345 printk("\n");
346 if (notify_die(DIE_OOPS, str, regs, err,
347 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
348 return 1;
349
350 show_registers(regs);
351 /* Executive summary in case the oops scrolled away */
352 sp = (unsigned long) (&regs->sp);
353 savesegment(ss, ss);
354 if (user_mode(regs)) {
355 sp = regs->sp;
356 ss = regs->ss & 0xffff;
357 }
358 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
359 print_symbol("%s", regs->ip);
360 printk(" SS:ESP %04x:%08lx\n", ss, sp);
361 return 0;
362}
363
364/*
365 * This is gone through when something in the kernel has done something bad
366 * and is about to be terminated:
367 */
368void die(const char *str, struct pt_regs *regs, long err)
369{
370 unsigned long flags = oops_begin();
371
372 if (die_nest_count < 3) {
373 report_bug(regs->ip, regs);
374
375 if (__die(str, regs, err))
376 regs = NULL;
377 } else {
378 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
379 }
380
381 oops_end(flags, regs, SIGSEGV);
382}
383
384static DEFINE_SPINLOCK(nmi_print_lock);
385
386void notrace __kprobes
387die_nmi(char *str, struct pt_regs *regs, int do_panic)
388{
389 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
390 return;
391
392 spin_lock(&nmi_print_lock);
393 /*
394 * We are in trouble anyway, lets at least try
395 * to get a message out:
396 */
397 bust_spinlocks(1);
398 printk(KERN_EMERG "%s", str);
399 printk(" on CPU%d, ip %08lx, registers:\n",
400 smp_processor_id(), regs->ip);
401 show_registers(regs);
402 if (do_panic)
403 panic("Non maskable interrupt");
404 console_silent();
405 spin_unlock(&nmi_print_lock);
406 bust_spinlocks(0);
407
408 /*
409 * If we are in kernel we are probably nested up pretty bad
410 * and might aswell get out now while we still can:
411 */
412 if (!user_mode_vm(regs)) {
413 current->thread.trap_no = 2;
414 crash_kexec(regs);
415 }
416
417 do_exit(SIGSEGV);
418}
419
420static int __init oops_setup(char *s)
421{
422 if (!s)
423 return -EINVAL;
424 if (!strcmp(s, "panic"))
425 panic_on_oops = 1;
426 return 0;
427}
428early_param("oops", oops_setup);
429
430static int __init kstack_setup(char *s)
431{
432 if (!s)
433 return -EINVAL;
434 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
435 return 0;
436}
437early_param("kstack", kstack_setup);
438
439static int __init code_bytes_setup(char *s)
440{
441 code_bytes = simple_strtoul(s, NULL, 0);
442 if (code_bytes > 8192)
443 code_bytes = 8192;
444
445 return 1;
446}
447__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
new file mode 100644
index 000000000000..086cc8118e39
--- /dev/null
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -0,0 +1,573 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 */
5#include <linux/kallsyms.h>
6#include <linux/kprobes.h>
7#include <linux/uaccess.h>
8#include <linux/utsname.h>
9#include <linux/hardirq.h>
10#include <linux/kdebug.h>
11#include <linux/module.h>
12#include <linux/ptrace.h>
13#include <linux/kexec.h>
14#include <linux/bug.h>
15#include <linux/nmi.h>
16
17#include <asm/stacktrace.h>
18
19#define STACKSLOTS_PER_LINE 4
20#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
21
22int panic_on_unrecovered_nmi;
23int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
24static unsigned int code_bytes = 64;
25static int die_counter;
26
27void printk_address(unsigned long address, int reliable)
28{
29 printk(" [<%p>] %s%pS\n", (void *) address,
30 reliable ? "" : "? ", (void *) address);
31}
32
33static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
34 unsigned *usedp, char **idp)
35{
36 static char ids[][8] = {
37 [DEBUG_STACK - 1] = "#DB",
38 [NMI_STACK - 1] = "NMI",
39 [DOUBLEFAULT_STACK - 1] = "#DF",
40 [STACKFAULT_STACK - 1] = "#SS",
41 [MCE_STACK - 1] = "#MC",
42#if DEBUG_STKSZ > EXCEPTION_STKSZ
43 [N_EXCEPTION_STACKS ...
44 N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
45#endif
46 };
47 unsigned k;
48
49 /*
50 * Iterate over all exception stacks, and figure out whether
51 * 'stack' is in one of them:
52 */
53 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
54 unsigned long end = per_cpu(orig_ist, cpu).ist[k];
55 /*
56 * Is 'stack' above this exception frame's end?
57 * If yes then skip to the next frame.
58 */
59 if (stack >= end)
60 continue;
61 /*
62 * Is 'stack' above this exception frame's start address?
63 * If yes then we found the right frame.
64 */
65 if (stack >= end - EXCEPTION_STKSZ) {
66 /*
67 * Make sure we only iterate through an exception
68 * stack once. If it comes up for the second time
69 * then there's something wrong going on - just
70 * break out and return NULL:
71 */
72 if (*usedp & (1U << k))
73 break;
74 *usedp |= 1U << k;
75 *idp = ids[k];
76 return (unsigned long *)end;
77 }
78 /*
79 * If this is a debug stack, and if it has a larger size than
80 * the usual exception stacks, then 'stack' might still
81 * be within the lower portion of the debug stack:
82 */
83#if DEBUG_STKSZ > EXCEPTION_STKSZ
84 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
85 unsigned j = N_EXCEPTION_STACKS - 1;
86
87 /*
88 * Black magic. A large debug stack is composed of
89 * multiple exception stack entries, which we
90 * iterate through now. Dont look:
91 */
92 do {
93 ++j;
94 end -= EXCEPTION_STKSZ;
95 ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
96 } while (stack < end - EXCEPTION_STKSZ);
97 if (*usedp & (1U << j))
98 break;
99 *usedp |= 1U << j;
100 *idp = ids[j];
101 return (unsigned long *)end;
102 }
103#endif
104 }
105 return NULL;
106}
107
108/*
109 * x86-64 can have up to three kernel stacks:
110 * process stack
111 * interrupt stack
112 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
113 */
114
115static inline int valid_stack_ptr(struct thread_info *tinfo,
116 void *p, unsigned int size, void *end)
117{
118 void *t = tinfo;
119 if (end) {
120 if (p < end && p >= (end-THREAD_SIZE))
121 return 1;
122 else
123 return 0;
124 }
125 return p > t && p < t + THREAD_SIZE - size;
126}
127
128/* The form of the top of the frame on the stack */
129struct stack_frame {
130 struct stack_frame *next_frame;
131 unsigned long return_address;
132};
133
134static inline unsigned long
135print_context_stack(struct thread_info *tinfo,
136 unsigned long *stack, unsigned long bp,
137 const struct stacktrace_ops *ops, void *data,
138 unsigned long *end)
139{
140 struct stack_frame *frame = (struct stack_frame *)bp;
141
142 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
143 unsigned long addr;
144
145 addr = *stack;
146 if (__kernel_text_address(addr)) {
147 if ((unsigned long) stack == bp + sizeof(long)) {
148 ops->address(data, addr, 1);
149 frame = frame->next_frame;
150 bp = (unsigned long) frame;
151 } else {
152 ops->address(data, addr, bp == 0);
153 }
154 }
155 stack++;
156 }
157 return bp;
158}
159
160void dump_trace(struct task_struct *task, struct pt_regs *regs,
161 unsigned long *stack, unsigned long bp,
162 const struct stacktrace_ops *ops, void *data)
163{
164 const unsigned cpu = get_cpu();
165 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
166 unsigned used = 0;
167 struct thread_info *tinfo;
168
169 if (!task)
170 task = current;
171
172 if (!stack) {
173 unsigned long dummy;
174 stack = &dummy;
175 if (task && task != current)
176 stack = (unsigned long *)task->thread.sp;
177 }
178
179#ifdef CONFIG_FRAME_POINTER
180 if (!bp) {
181 if (task == current) {
182 /* Grab bp right from our regs */
183 get_bp(bp);
184 } else {
185 /* bp is the last reg pushed by switch_to */
186 bp = *(unsigned long *) task->thread.sp;
187 }
188 }
189#endif
190
191 /*
192 * Print function call entries in all stacks, starting at the
193 * current stack address. If the stacks consist of nested
194 * exceptions
195 */
196 tinfo = task_thread_info(task);
197 for (;;) {
198 char *id;
199 unsigned long *estack_end;
200 estack_end = in_exception_stack(cpu, (unsigned long)stack,
201 &used, &id);
202
203 if (estack_end) {
204 if (ops->stack(data, id) < 0)
205 break;
206
207 bp = print_context_stack(tinfo, stack, bp, ops,
208 data, estack_end);
209 ops->stack(data, "<EOE>");
210 /*
211 * We link to the next stack via the
212 * second-to-last pointer (index -2 to end) in the
213 * exception stack:
214 */
215 stack = (unsigned long *) estack_end[-2];
216 continue;
217 }
218 if (irqstack_end) {
219 unsigned long *irqstack;
220 irqstack = irqstack_end -
221 (IRQSTACKSIZE - 64) / sizeof(*irqstack);
222
223 if (stack >= irqstack && stack < irqstack_end) {
224 if (ops->stack(data, "IRQ") < 0)
225 break;
226 bp = print_context_stack(tinfo, stack, bp,
227 ops, data, irqstack_end);
228 /*
229 * We link to the next stack (which would be
230 * the process stack normally) the last
231 * pointer (index -1 to end) in the IRQ stack:
232 */
233 stack = (unsigned long *) (irqstack_end[-1]);
234 irqstack_end = NULL;
235 ops->stack(data, "EOI");
236 continue;
237 }
238 }
239 break;
240 }
241
242 /*
243 * This handles the process stack:
244 */
245 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL);
246 put_cpu();
247}
248EXPORT_SYMBOL(dump_trace);
249
250static void
251print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
252{
253 printk(data);
254 print_symbol(msg, symbol);
255 printk("\n");
256}
257
258static void print_trace_warning(void *data, char *msg)
259{
260 printk("%s%s\n", (char *)data, msg);
261}
262
263static int print_trace_stack(void *data, char *name)
264{
265 printk("%s <%s> ", (char *)data, name);
266 return 0;
267}
268
269/*
270 * Print one address/symbol entries per line.
271 */
272static void print_trace_address(void *data, unsigned long addr, int reliable)
273{
274 touch_nmi_watchdog();
275 printk(data);
276 printk_address(addr, reliable);
277}
278
279static const struct stacktrace_ops print_trace_ops = {
280 .warning = print_trace_warning,
281 .warning_symbol = print_trace_warning_symbol,
282 .stack = print_trace_stack,
283 .address = print_trace_address,
284};
285
286static void
287show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
288 unsigned long *stack, unsigned long bp, char *log_lvl)
289{
290 printk("%sCall Trace:\n", log_lvl);
291 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
292}
293
294void show_trace(struct task_struct *task, struct pt_regs *regs,
295 unsigned long *stack, unsigned long bp)
296{
297 show_trace_log_lvl(task, regs, stack, bp, "");
298}
299
300static void
301show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
302 unsigned long *sp, unsigned long bp, char *log_lvl)
303{
304 unsigned long *stack;
305 int i;
306 const int cpu = smp_processor_id();
307 unsigned long *irqstack_end =
308 (unsigned long *) (cpu_pda(cpu)->irqstackptr);
309 unsigned long *irqstack =
310 (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
311
312 /*
313 * debugging aid: "show_stack(NULL, NULL);" prints the
314 * back trace for this cpu.
315 */
316
317 if (sp == NULL) {
318 if (task)
319 sp = (unsigned long *)task->thread.sp;
320 else
321 sp = (unsigned long *)&sp;
322 }
323
324 stack = sp;
325 for (i = 0; i < kstack_depth_to_print; i++) {
326 if (stack >= irqstack && stack <= irqstack_end) {
327 if (stack == irqstack_end) {
328 stack = (unsigned long *) (irqstack_end[-1]);
329 printk(" <EOI> ");
330 }
331 } else {
332 if (((long) stack & (THREAD_SIZE-1)) == 0)
333 break;
334 }
335 if (i && ((i % STACKSLOTS_PER_LINE) == 0))
336 printk("\n%s", log_lvl);
337 printk(" %016lx", *stack++);
338 touch_nmi_watchdog();
339 }
340 printk("\n");
341 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
342}
343
344void show_stack(struct task_struct *task, unsigned long *sp)
345{
346 show_stack_log_lvl(task, NULL, sp, 0, "");
347}
348
349/*
350 * The architecture-independent dump_stack generator
351 */
352void dump_stack(void)
353{
354 unsigned long bp = 0;
355 unsigned long stack;
356
357#ifdef CONFIG_FRAME_POINTER
358 if (!bp)
359 get_bp(bp);
360#endif
361
362 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
363 current->pid, current->comm, print_tainted(),
364 init_utsname()->release,
365 (int)strcspn(init_utsname()->version, " "),
366 init_utsname()->version);
367 show_trace(NULL, NULL, &stack, bp);
368}
369EXPORT_SYMBOL(dump_stack);
370
371void show_registers(struct pt_regs *regs)
372{
373 int i;
374 unsigned long sp;
375 const int cpu = smp_processor_id();
376 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
377
378 sp = regs->sp;
379 printk("CPU %d ", cpu);
380 __show_regs(regs, 1);
381 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
382 cur->comm, cur->pid, task_thread_info(cur), cur);
383
384 /*
385 * When in-kernel, we also print out the stack and code at the
386 * time of the fault..
387 */
388 if (!user_mode(regs)) {
389 unsigned int code_prologue = code_bytes * 43 / 64;
390 unsigned int code_len = code_bytes;
391 unsigned char c;
392 u8 *ip;
393
394 printk(KERN_EMERG "Stack:\n");
395 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
396 regs->bp, KERN_EMERG);
397
398 printk(KERN_EMERG "Code: ");
399
400 ip = (u8 *)regs->ip - code_prologue;
401 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
402 /* try starting at IP */
403 ip = (u8 *)regs->ip;
404 code_len = code_len - code_prologue + 1;
405 }
406 for (i = 0; i < code_len; i++, ip++) {
407 if (ip < (u8 *)PAGE_OFFSET ||
408 probe_kernel_address(ip, c)) {
409 printk(" Bad RIP value.");
410 break;
411 }
412 if (ip == (u8 *)regs->ip)
413 printk("<%02x> ", c);
414 else
415 printk("%02x ", c);
416 }
417 }
418 printk("\n");
419}
420
421int is_valid_bugaddr(unsigned long ip)
422{
423 unsigned short ud2;
424
425 if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
426 return 0;
427
428 return ud2 == 0x0b0f;
429}
430
431static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
432static int die_owner = -1;
433static unsigned int die_nest_count;
434
435unsigned __kprobes long oops_begin(void)
436{
437 int cpu;
438 unsigned long flags;
439
440 oops_enter();
441
442 /* racy, but better than risking deadlock. */
443 raw_local_irq_save(flags);
444 cpu = smp_processor_id();
445 if (!__raw_spin_trylock(&die_lock)) {
446 if (cpu == die_owner)
447 /* nested oops. should stop eventually */;
448 else
449 __raw_spin_lock(&die_lock);
450 }
451 die_nest_count++;
452 die_owner = cpu;
453 console_verbose();
454 bust_spinlocks(1);
455 return flags;
456}
457
458void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
459{
460 die_owner = -1;
461 bust_spinlocks(0);
462 die_nest_count--;
463 if (!die_nest_count)
464 /* Nest count reaches zero, release the lock. */
465 __raw_spin_unlock(&die_lock);
466 raw_local_irq_restore(flags);
467 if (!regs) {
468 oops_exit();
469 return;
470 }
471 if (in_interrupt())
472 panic("Fatal exception in interrupt");
473 if (panic_on_oops)
474 panic("Fatal exception");
475 oops_exit();
476 do_exit(signr);
477}
478
479int __kprobes __die(const char *str, struct pt_regs *regs, long err)
480{
481 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
482#ifdef CONFIG_PREEMPT
483 printk("PREEMPT ");
484#endif
485#ifdef CONFIG_SMP
486 printk("SMP ");
487#endif
488#ifdef CONFIG_DEBUG_PAGEALLOC
489 printk("DEBUG_PAGEALLOC");
490#endif
491 printk("\n");
492 if (notify_die(DIE_OOPS, str, regs, err,
493 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
494 return 1;
495
496 show_registers(regs);
497 add_taint(TAINT_DIE);
498 /* Executive summary in case the oops scrolled away */
499 printk(KERN_ALERT "RIP ");
500 printk_address(regs->ip, 1);
501 printk(" RSP <%016lx>\n", regs->sp);
502 if (kexec_should_crash(current))
503 crash_kexec(regs);
504 return 0;
505}
506
507void die(const char *str, struct pt_regs *regs, long err)
508{
509 unsigned long flags = oops_begin();
510
511 if (!user_mode(regs))
512 report_bug(regs->ip, regs);
513
514 if (__die(str, regs, err))
515 regs = NULL;
516 oops_end(flags, regs, SIGSEGV);
517}
518
519notrace __kprobes void
520die_nmi(char *str, struct pt_regs *regs, int do_panic)
521{
522 unsigned long flags;
523
524 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
525 return;
526
527 flags = oops_begin();
528 /*
529 * We are in trouble anyway, lets at least try
530 * to get a message out.
531 */
532 printk(KERN_EMERG "%s", str);
533 printk(" on CPU%d, ip %08lx, registers:\n",
534 smp_processor_id(), regs->ip);
535 show_registers(regs);
536 if (kexec_should_crash(current))
537 crash_kexec(regs);
538 if (do_panic || panic_on_oops)
539 panic("Non maskable interrupt");
540 oops_end(flags, NULL, SIGBUS);
541 nmi_exit();
542 local_irq_enable();
543 do_exit(SIGBUS);
544}
545
546static int __init oops_setup(char *s)
547{
548 if (!s)
549 return -EINVAL;
550 if (!strcmp(s, "panic"))
551 panic_on_oops = 1;
552 return 0;
553}
554early_param("oops", oops_setup);
555
556static int __init kstack_setup(char *s)
557{
558 if (!s)
559 return -EINVAL;
560 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
561 return 0;
562}
563early_param("kstack", kstack_setup);
564
565static int __init code_bytes_setup(char *s)
566{
567 code_bytes = simple_strtoul(s, NULL, 0);
568 if (code_bytes > 8192)
569 code_bytes = 8192;
570
571 return 1;
572}
573__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 109792bc7cfa..b21fbfaffe39 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -730,6 +730,7 @@ error_code:
730 movl $(__USER_DS), %ecx 730 movl $(__USER_DS), %ecx
731 movl %ecx, %ds 731 movl %ecx, %ds
732 movl %ecx, %es 732 movl %ecx, %es
733 TRACE_IRQS_OFF
733 movl %esp,%eax # pt_regs pointer 734 movl %esp,%eax # pt_regs pointer
734 call *%edi 735 call *%edi
735 jmp ret_from_exception 736 jmp ret_from_exception
@@ -760,20 +761,9 @@ ENTRY(device_not_available)
760 RING0_INT_FRAME 761 RING0_INT_FRAME
761 pushl $-1 # mark this as an int 762 pushl $-1 # mark this as an int
762 CFI_ADJUST_CFA_OFFSET 4 763 CFI_ADJUST_CFA_OFFSET 4
763 SAVE_ALL 764 pushl $do_device_not_available
764 GET_CR0_INTO_EAX
765 testl $0x4, %eax # EM (math emulation bit)
766 jne device_not_available_emulate
767 preempt_stop(CLBR_ANY)
768 call math_state_restore
769 jmp ret_from_exception
770device_not_available_emulate:
771 pushl $0 # temporary storage for ORIG_EIP
772 CFI_ADJUST_CFA_OFFSET 4 765 CFI_ADJUST_CFA_OFFSET 4
773 call math_emulate 766 jmp error_code
774 addl $4, %esp
775 CFI_ADJUST_CFA_OFFSET -4
776 jmp ret_from_exception
777 CFI_ENDPROC 767 CFI_ENDPROC
778END(device_not_available) 768END(device_not_available)
779 769
@@ -814,6 +804,7 @@ debug_stack_correct:
814 pushl $-1 # mark this as an int 804 pushl $-1 # mark this as an int
815 CFI_ADJUST_CFA_OFFSET 4 805 CFI_ADJUST_CFA_OFFSET 4
816 SAVE_ALL 806 SAVE_ALL
807 TRACE_IRQS_OFF
817 xorl %edx,%edx # error code 0 808 xorl %edx,%edx # error code 0
818 movl %esp,%eax # pt_regs pointer 809 movl %esp,%eax # pt_regs pointer
819 call do_debug 810 call do_debug
@@ -858,6 +849,7 @@ nmi_stack_correct:
858 pushl %eax 849 pushl %eax
859 CFI_ADJUST_CFA_OFFSET 4 850 CFI_ADJUST_CFA_OFFSET 4
860 SAVE_ALL 851 SAVE_ALL
852 TRACE_IRQS_OFF
861 xorl %edx,%edx # zero error code 853 xorl %edx,%edx # zero error code
862 movl %esp,%eax # pt_regs pointer 854 movl %esp,%eax # pt_regs pointer
863 call do_nmi 855 call do_nmi
@@ -898,6 +890,7 @@ nmi_espfix_stack:
898 pushl %eax 890 pushl %eax
899 CFI_ADJUST_CFA_OFFSET 4 891 CFI_ADJUST_CFA_OFFSET 4
900 SAVE_ALL 892 SAVE_ALL
893 TRACE_IRQS_OFF
901 FIXUP_ESPFIX_STACK # %eax == %esp 894 FIXUP_ESPFIX_STACK # %eax == %esp
902 xorl %edx,%edx # zero error code 895 xorl %edx,%edx # zero error code
903 call do_nmi 896 call do_nmi
@@ -928,6 +921,7 @@ KPROBE_ENTRY(int3)
928 pushl $-1 # mark this as an int 921 pushl $-1 # mark this as an int
929 CFI_ADJUST_CFA_OFFSET 4 922 CFI_ADJUST_CFA_OFFSET 4
930 SAVE_ALL 923 SAVE_ALL
924 TRACE_IRQS_OFF
931 xorl %edx,%edx # zero error code 925 xorl %edx,%edx # zero error code
932 movl %esp,%eax # pt_regs pointer 926 movl %esp,%eax # pt_regs pointer
933 call do_int3 927 call do_int3
@@ -1030,7 +1024,7 @@ ENTRY(machine_check)
1030 RING0_INT_FRAME 1024 RING0_INT_FRAME
1031 pushl $0 1025 pushl $0
1032 CFI_ADJUST_CFA_OFFSET 4 1026 CFI_ADJUST_CFA_OFFSET 4
1033 pushl machine_check_vector 1027 pushl $do_machine_check
1034 CFI_ADJUST_CFA_OFFSET 4 1028 CFI_ADJUST_CFA_OFFSET 4
1035 jmp error_code 1029 jmp error_code
1036 CFI_ENDPROC 1030 CFI_ENDPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index cf3a0b2d0059..1db6ce4314e1 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -667,6 +667,13 @@ END(stub_rt_sigreturn)
667 SAVE_ARGS 667 SAVE_ARGS
668 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler 668 leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
669 pushq %rbp 669 pushq %rbp
670 /*
671 * Save rbp twice: One is for marking the stack frame, as usual, and the
672 * other, to fill pt_regs properly. This is because bx comes right
673 * before the last saved register in that structure, and not bp. If the
674 * base pointer were in the place bx is today, this would not be needed.
675 */
676 movq %rbp, -8(%rsp)
670 CFI_ADJUST_CFA_OFFSET 8 677 CFI_ADJUST_CFA_OFFSET 8
671 CFI_REL_OFFSET rbp, 0 678 CFI_REL_OFFSET rbp, 0
672 movq %rsp,%rbp 679 movq %rsp,%rbp
@@ -932,6 +939,9 @@ END(spurious_interrupt)
932 .if \ist 939 .if \ist
933 movq %gs:pda_data_offset, %rbp 940 movq %gs:pda_data_offset, %rbp
934 .endif 941 .endif
942 .if \irqtrace
943 TRACE_IRQS_OFF
944 .endif
935 movq %rsp,%rdi 945 movq %rsp,%rdi
936 movq ORIG_RAX(%rsp),%rsi 946 movq ORIG_RAX(%rsp),%rsi
937 movq $-1,ORIG_RAX(%rsp) 947 movq $-1,ORIG_RAX(%rsp)
@@ -1058,7 +1068,8 @@ KPROBE_ENTRY(error_entry)
1058 je error_kernelspace 1068 je error_kernelspace
1059error_swapgs: 1069error_swapgs:
1060 SWAPGS 1070 SWAPGS
1061error_sti: 1071error_sti:
1072 TRACE_IRQS_OFF
1062 movq %rdi,RDI(%rsp) 1073 movq %rdi,RDI(%rsp)
1063 CFI_REL_OFFSET rdi,RDI 1074 CFI_REL_OFFSET rdi,RDI
1064 movq %rsp,%rdi 1075 movq %rsp,%rdi
@@ -1232,7 +1243,7 @@ ENTRY(simd_coprocessor_error)
1232END(simd_coprocessor_error) 1243END(simd_coprocessor_error)
1233 1244
1234ENTRY(device_not_available) 1245ENTRY(device_not_available)
1235 zeroentry math_state_restore 1246 zeroentry do_device_not_available
1236END(device_not_available) 1247END(device_not_available)
1237 1248
1238 /* runs on exception stack */ 1249 /* runs on exception stack */
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index 849e5cd485b8..f454c78fcef6 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -109,6 +109,7 @@ struct oem_table {
109}; 109};
110 110
111extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); 111extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
112extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr);
112#endif 113#endif
113 114
114struct mip_reg { 115struct mip_reg {
@@ -243,21 +244,38 @@ parse_unisys_oem (char *oemptr)
243} 244}
244 245
245#ifdef CONFIG_ACPI 246#ifdef CONFIG_ACPI
246int __init 247static unsigned long oem_addrX;
247find_unisys_acpi_oem_table(unsigned long *oem_addr) 248static unsigned long oem_size;
249int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
248{ 250{
249 struct acpi_table_header *header = NULL; 251 struct acpi_table_header *header = NULL;
250 int i = 0; 252 int i = 0;
251 while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { 253 acpi_size tbl_size;
254
255 while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) {
252 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { 256 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
253 struct oem_table *t = (struct oem_table *)header; 257 struct oem_table *t = (struct oem_table *)header;
254 *oem_addr = (unsigned long)__acpi_map_table(t->OEMTableAddr, 258
255 t->OEMTableSize); 259 oem_addrX = t->OEMTableAddr;
260 oem_size = t->OEMTableSize;
261 early_acpi_os_unmap_memory(header, tbl_size);
262
263 *oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
264 oem_size);
256 return 0; 265 return 0;
257 } 266 }
267 early_acpi_os_unmap_memory(header, tbl_size);
258 } 268 }
259 return -1; 269 return -1;
260} 270}
271
272void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
273{
274 if (!oem_addr)
275 return;
276
277 __acpi_unmap_table((char *)oem_addr, oem_size);
278}
261#endif 279#endif
262 280
263static void 281static void
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index ae2ffc8a400c..33581d94a90e 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -114,7 +114,7 @@ static void uv_send_IPI_one(int cpu, int vector)
114 unsigned long val, apicid, lapicid; 114 unsigned long val, apicid, lapicid;
115 int pnode; 115 int pnode;
116 116
117 apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ 117 apicid = per_cpu(x86_cpu_to_apicid, cpu);
118 lapicid = apicid & 0x3f; /* ZZZ macro needed */ 118 lapicid = apicid & 0x3f; /* ZZZ macro needed */
119 pnode = uv_apicid_to_pnode(apicid); 119 pnode = uv_apicid_to_pnode(apicid);
120 val = 120 val =
@@ -202,12 +202,10 @@ static unsigned int phys_pkg_id(int index_msb)
202 return uv_read_apic_id() >> index_msb; 202 return uv_read_apic_id() >> index_msb;
203} 203}
204 204
205#ifdef ZZZ /* Needs x2apic patch */
206static void uv_send_IPI_self(int vector) 205static void uv_send_IPI_self(int vector)
207{ 206{
208 apic_write(APIC_SELF_IPI, vector); 207 apic_write(APIC_SELF_IPI, vector);
209} 208}
210#endif
211 209
212struct genapic apic_x2apic_uv_x = { 210struct genapic apic_x2apic_uv_x = {
213 .name = "UV large system", 211 .name = "UV large system",
@@ -215,15 +213,15 @@ struct genapic apic_x2apic_uv_x = {
215 .int_delivery_mode = dest_Fixed, 213 .int_delivery_mode = dest_Fixed,
216 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 214 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
217 .target_cpus = uv_target_cpus, 215 .target_cpus = uv_target_cpus,
218 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ 216 .vector_allocation_domain = uv_vector_allocation_domain,
219 .apic_id_registered = uv_apic_id_registered, 217 .apic_id_registered = uv_apic_id_registered,
220 .init_apic_ldr = uv_init_apic_ldr, 218 .init_apic_ldr = uv_init_apic_ldr,
221 .send_IPI_all = uv_send_IPI_all, 219 .send_IPI_all = uv_send_IPI_all,
222 .send_IPI_allbutself = uv_send_IPI_allbutself, 220 .send_IPI_allbutself = uv_send_IPI_allbutself,
223 .send_IPI_mask = uv_send_IPI_mask, 221 .send_IPI_mask = uv_send_IPI_mask,
224 /* ZZZ.send_IPI_self = uv_send_IPI_self, */ 222 .send_IPI_self = uv_send_IPI_self,
225 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 223 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
226 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ 224 .phys_pkg_id = phys_pkg_id,
227 .get_apic_id = get_apic_id, 225 .get_apic_id = get_apic_id,
228 .set_apic_id = set_apic_id, 226 .set_apic_id = set_apic_id,
229 .apic_id_mask = (0xFFFFFFFFu), 227 .apic_id_mask = (0xFFFFFFFFu),
@@ -286,12 +284,13 @@ static __init void map_low_mmrs(void)
286 284
287enum map_type {map_wb, map_uc}; 285enum map_type {map_wb, map_uc};
288 286
289static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type) 287static __init void map_high(char *id, unsigned long base, int shift,
288 int max_pnode, enum map_type map_type)
290{ 289{
291 unsigned long bytes, paddr; 290 unsigned long bytes, paddr;
292 291
293 paddr = base << shift; 292 paddr = base << shift;
294 bytes = (1UL << shift); 293 bytes = (1UL << shift) * (max_pnode + 1);
295 printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, 294 printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
296 paddr + bytes); 295 paddr + bytes);
297 if (map_type == map_uc) 296 if (map_type == map_uc)
@@ -307,7 +306,7 @@ static __init void map_gru_high(int max_pnode)
307 306
308 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); 307 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
309 if (gru.s.enable) 308 if (gru.s.enable)
310 map_high("GRU", gru.s.base, shift, map_wb); 309 map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
311} 310}
312 311
313static __init void map_config_high(int max_pnode) 312static __init void map_config_high(int max_pnode)
@@ -317,7 +316,7 @@ static __init void map_config_high(int max_pnode)
317 316
318 cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); 317 cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
319 if (cfg.s.enable) 318 if (cfg.s.enable)
320 map_high("CONFIG", cfg.s.base, shift, map_uc); 319 map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc);
321} 320}
322 321
323static __init void map_mmr_high(int max_pnode) 322static __init void map_mmr_high(int max_pnode)
@@ -327,7 +326,7 @@ static __init void map_mmr_high(int max_pnode)
327 326
328 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); 327 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
329 if (mmr.s.enable) 328 if (mmr.s.enable)
330 map_high("MMR", mmr.s.base, shift, map_uc); 329 map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
331} 330}
332 331
333static __init void map_mmioh_high(int max_pnode) 332static __init void map_mmioh_high(int max_pnode)
@@ -337,7 +336,7 @@ static __init void map_mmioh_high(int max_pnode)
337 336
338 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); 337 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
339 if (mmioh.s.enable) 338 if (mmioh.s.enable)
340 map_high("MMIOH", mmioh.s.base, shift, map_uc); 339 map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc);
341} 340}
342 341
343static __init void uv_rtc_init(void) 342static __init void uv_rtc_init(void)
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
index 3e66bd364a9d..1dcb0f13897e 100644
--- a/arch/x86/kernel/head.c
+++ b/arch/x86/kernel/head.c
@@ -35,6 +35,7 @@ void __init reserve_ebda_region(void)
35 35
36 /* start of EBDA area */ 36 /* start of EBDA area */
37 ebda_addr = get_bios_ebda(); 37 ebda_addr = get_bios_ebda();
38 printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem);
38 39
39 /* Fixup: bios puts an EBDA in the top 64K segment */ 40 /* Fixup: bios puts an EBDA in the top 64K segment */
40 /* of conventional memory, but does not adjust lowmem. */ 41 /* of conventional memory, but does not adjust lowmem. */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 73deaffadd03..acf62fc233da 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -115,13 +115,17 @@ static void hpet_reserve_platform_timers(unsigned long id)
115 hd.hd_phys_address = hpet_address; 115 hd.hd_phys_address = hpet_address;
116 hd.hd_address = hpet; 116 hd.hd_address = hpet;
117 hd.hd_nirqs = nrtimers; 117 hd.hd_nirqs = nrtimers;
118 hd.hd_flags = HPET_DATA_PLATFORM;
119 hpet_reserve_timer(&hd, 0); 118 hpet_reserve_timer(&hd, 0);
120 119
121#ifdef CONFIG_HPET_EMULATE_RTC 120#ifdef CONFIG_HPET_EMULATE_RTC
122 hpet_reserve_timer(&hd, 1); 121 hpet_reserve_timer(&hd, 1);
123#endif 122#endif
124 123
124 /*
125 * NOTE that hd_irq[] reflects IOAPIC input pins (LEGACY_8254
126 * is wrong for i8259!) not the output IRQ. Many BIOS writers
127 * don't bother configuring *any* comparator interrupts.
128 */
125 hd.hd_irq[0] = HPET_LEGACY_8254; 129 hd.hd_irq[0] = HPET_LEGACY_8254;
126 hd.hd_irq[1] = HPET_LEGACY_RTC; 130 hd.hd_irq[1] = HPET_LEGACY_RTC;
127 131
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 1f26fd9ec4f4..5b5be9d43c2a 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -135,7 +135,7 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
135 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 135 [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
136}; 136};
137 137
138static void __init init_ISA_irqs (void) 138void __init init_ISA_irqs(void)
139{ 139{
140 int i; 140 int i;
141 141
@@ -164,22 +164,8 @@ static void __init init_ISA_irqs (void)
164 164
165void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 165void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
166 166
167void __init native_init_IRQ(void) 167static void __init smp_intr_init(void)
168{ 168{
169 int i;
170
171 init_ISA_irqs();
172 /*
173 * Cover the whole vector space, no vector can escape
174 * us. (some of these will be overridden and become
175 * 'special' SMP interrupts)
176 */
177 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
178 int vector = FIRST_EXTERNAL_VECTOR + i;
179 if (vector != IA32_SYSCALL_VECTOR)
180 set_intr_gate(vector, interrupt[i]);
181 }
182
183#ifdef CONFIG_SMP 169#ifdef CONFIG_SMP
184 /* 170 /*
185 * The reschedule interrupt is a CPU-to-CPU reschedule-helper 171 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
@@ -207,6 +193,12 @@ void __init native_init_IRQ(void)
207 /* Low priority IPI to cleanup after moving an irq */ 193 /* Low priority IPI to cleanup after moving an irq */
208 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 194 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
209#endif 195#endif
196}
197
198static void __init apic_intr_init(void)
199{
200 smp_intr_init();
201
210 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 202 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
211 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); 203 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
212 204
@@ -216,6 +208,25 @@ void __init native_init_IRQ(void)
216 /* IPI vectors for APIC spurious and error interrupts */ 208 /* IPI vectors for APIC spurious and error interrupts */
217 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); 209 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
218 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); 210 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
211}
212
213void __init native_init_IRQ(void)
214{
215 int i;
216
217 init_ISA_irqs();
218 /*
219 * Cover the whole vector space, no vector can escape
220 * us. (some of these will be overridden and become
221 * 'special' SMP interrupts)
222 */
223 for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
224 int vector = FIRST_EXTERNAL_VECTOR + i;
225 if (vector != IA32_SYSCALL_VECTOR)
226 set_intr_gate(vector, interrupt[i]);
227 }
228
229 apic_intr_init();
219 230
220 if (!acpi_ioapic) 231 if (!acpi_ioapic)
221 setup_irq(2, &irq2); 232 setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 922c14058f97..0a1302fe6d45 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -123,7 +123,7 @@ void cpu_idle(void)
123 } 123 }
124} 124}
125 125
126void __show_registers(struct pt_regs *regs, int all) 126void __show_regs(struct pt_regs *regs, int all)
127{ 127{
128 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; 128 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
129 unsigned long d0, d1, d2, d3, d6, d7; 129 unsigned long d0, d1, d2, d3, d6, d7;
@@ -189,7 +189,7 @@ void __show_registers(struct pt_regs *regs, int all)
189 189
190void show_regs(struct pt_regs *regs) 190void show_regs(struct pt_regs *regs)
191{ 191{
192 __show_registers(regs, 1); 192 __show_regs(regs, 1);
193 show_trace(NULL, regs, &regs->sp, regs->bp); 193 show_trace(NULL, regs, &regs->sp, regs->bp);
194} 194}
195 195
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ca80394ef5b8..cd8c0ed02b7e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -136,7 +136,7 @@ void cpu_idle(void)
136} 136}
137 137
138/* Prints also some state that isn't saved in the pt_regs */ 138/* Prints also some state that isn't saved in the pt_regs */
139void __show_regs(struct pt_regs *regs) 139void __show_regs(struct pt_regs *regs, int all)
140{ 140{
141 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; 141 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
142 unsigned long d0, d1, d2, d3, d6, d7; 142 unsigned long d0, d1, d2, d3, d6, d7;
@@ -175,6 +175,9 @@ void __show_regs(struct pt_regs *regs)
175 rdmsrl(MSR_GS_BASE, gs); 175 rdmsrl(MSR_GS_BASE, gs);
176 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 176 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
177 177
178 if (!all)
179 return;
180
178 cr0 = read_cr0(); 181 cr0 = read_cr0();
179 cr2 = read_cr2(); 182 cr2 = read_cr2();
180 cr3 = read_cr3(); 183 cr3 = read_cr3();
@@ -200,7 +203,7 @@ void __show_regs(struct pt_regs *regs)
200void show_regs(struct pt_regs *regs) 203void show_regs(struct pt_regs *regs)
201{ 204{
202 printk(KERN_INFO "CPU %d:", smp_processor_id()); 205 printk(KERN_INFO "CPU %d:", smp_processor_id());
203 __show_regs(regs); 206 __show_regs(regs, 1);
204 show_trace(NULL, regs, (void *)(regs + 1), regs->bp); 207 show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
205} 208}
206 209
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index d13858818100..f6a11b9b1f98 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -354,9 +354,27 @@ static void ati_force_hpet_resume(void)
354 printk(KERN_DEBUG "Force enabled HPET at resume\n"); 354 printk(KERN_DEBUG "Force enabled HPET at resume\n");
355} 355}
356 356
357static u32 ati_ixp4x0_rev(struct pci_dev *dev)
358{
359 u32 d;
360 u8 b;
361
362 pci_read_config_byte(dev, 0xac, &b);
363 b &= ~(1<<5);
364 pci_write_config_byte(dev, 0xac, b);
365 pci_read_config_dword(dev, 0x70, &d);
366 d |= 1<<8;
367 pci_write_config_dword(dev, 0x70, d);
368 pci_read_config_dword(dev, 0x8, &d);
369 d &= 0xff;
370 dev_printk(KERN_DEBUG, &dev->dev, "SB4X0 revision 0x%x\n", d);
371 return d;
372}
373
357static void ati_force_enable_hpet(struct pci_dev *dev) 374static void ati_force_enable_hpet(struct pci_dev *dev)
358{ 375{
359 u32 uninitialized_var(val); 376 u32 d, val;
377 u8 b;
360 378
361 if (hpet_address || force_hpet_address) 379 if (hpet_address || force_hpet_address)
362 return; 380 return;
@@ -366,14 +384,33 @@ static void ati_force_enable_hpet(struct pci_dev *dev)
366 return; 384 return;
367 } 385 }
368 386
387 d = ati_ixp4x0_rev(dev);
388 if (d < 0x82)
389 return;
390
391 /* base address */
369 pci_write_config_dword(dev, 0x14, 0xfed00000); 392 pci_write_config_dword(dev, 0x14, 0xfed00000);
370 pci_read_config_dword(dev, 0x14, &val); 393 pci_read_config_dword(dev, 0x14, &val);
394
395 /* enable interrupt */
396 outb(0x72, 0xcd6); b = inb(0xcd7);
397 b |= 0x1;
398 outb(0x72, 0xcd6); outb(b, 0xcd7);
399 outb(0x72, 0xcd6); b = inb(0xcd7);
400 if (!(b & 0x1))
401 return;
402 pci_read_config_dword(dev, 0x64, &d);
403 d |= (1<<10);
404 pci_write_config_dword(dev, 0x64, d);
405 pci_read_config_dword(dev, 0x64, &d);
406 if (!(d & (1<<10)))
407 return;
408
371 force_hpet_address = val; 409 force_hpet_address = val;
372 force_hpet_resume_type = ATI_FORCE_HPET_RESUME; 410 force_hpet_resume_type = ATI_FORCE_HPET_RESUME;
373 dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", 411 dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n",
374 force_hpet_address); 412 force_hpet_address);
375 cached_dev = dev; 413 cached_dev = dev;
376 return;
377} 414}
378DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS, 415DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS,
379 ati_force_enable_hpet); 416 ati_force_enable_hpet);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 21b8e0a59780..2255782e8d4b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -302,7 +302,7 @@ static void __init relocate_initrd(void)
302 if (clen > MAX_MAP_CHUNK-slop) 302 if (clen > MAX_MAP_CHUNK-slop)
303 clen = MAX_MAP_CHUNK-slop; 303 clen = MAX_MAP_CHUNK-slop;
304 mapaddr = ramdisk_image & PAGE_MASK; 304 mapaddr = ramdisk_image & PAGE_MASK;
305 p = early_ioremap(mapaddr, clen+slop); 305 p = early_memremap(mapaddr, clen+slop);
306 memcpy(q, p+slop, clen); 306 memcpy(q, p+slop, clen);
307 early_iounmap(p, clen+slop); 307 early_iounmap(p, clen+slop);
308 q += clen; 308 q += clen;
@@ -379,7 +379,7 @@ static void __init parse_setup_data(void)
379 return; 379 return;
380 pa_data = boot_params.hdr.setup_data; 380 pa_data = boot_params.hdr.setup_data;
381 while (pa_data) { 381 while (pa_data) {
382 data = early_ioremap(pa_data, PAGE_SIZE); 382 data = early_memremap(pa_data, PAGE_SIZE);
383 switch (data->type) { 383 switch (data->type) {
384 case SETUP_E820_EXT: 384 case SETUP_E820_EXT:
385 parse_e820_ext(data, pa_data); 385 parse_e820_ext(data, pa_data);
@@ -402,7 +402,7 @@ static void __init e820_reserve_setup_data(void)
402 return; 402 return;
403 pa_data = boot_params.hdr.setup_data; 403 pa_data = boot_params.hdr.setup_data;
404 while (pa_data) { 404 while (pa_data) {
405 data = early_ioremap(pa_data, sizeof(*data)); 405 data = early_memremap(pa_data, sizeof(*data));
406 e820_update_range(pa_data, sizeof(*data)+data->len, 406 e820_update_range(pa_data, sizeof(*data)+data->len,
407 E820_RAM, E820_RESERVED_KERN); 407 E820_RAM, E820_RESERVED_KERN);
408 found = 1; 408 found = 1;
@@ -428,7 +428,7 @@ static void __init reserve_early_setup_data(void)
428 return; 428 return;
429 pa_data = boot_params.hdr.setup_data; 429 pa_data = boot_params.hdr.setup_data;
430 while (pa_data) { 430 while (pa_data) {
431 data = early_ioremap(pa_data, sizeof(*data)); 431 data = early_memremap(pa_data, sizeof(*data));
432 sprintf(buf, "setup data %x", data->type); 432 sprintf(buf, "setup data %x", data->type);
433 reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); 433 reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
434 pa_data = data->next; 434 pa_data = data->next;
@@ -998,6 +998,8 @@ void __init setup_arch(char **cmdline_p)
998 */ 998 */
999 acpi_boot_table_init(); 999 acpi_boot_table_init();
1000 1000
1001 early_acpi_boot_init();
1002
1001#ifdef CONFIG_ACPI_NUMA 1003#ifdef CONFIG_ACPI_NUMA
1002 /* 1004 /*
1003 * Parse SRAT to discover nodes. 1005 * Parse SRAT to discover nodes.
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 76b6f50978f7..8c3aca7cb343 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -334,14 +334,17 @@ static void __cpuinit start_secondary(void *unused)
334 * does not change while we are assigning vectors to cpus. Holding 334 * does not change while we are assigning vectors to cpus. Holding
335 * this lock ensures we don't half assign or remove an irq from a cpu. 335 * this lock ensures we don't half assign or remove an irq from a cpu.
336 */ 336 */
337 ipi_call_lock_irq(); 337 ipi_call_lock();
338 lock_vector_lock(); 338 lock_vector_lock();
339 __setup_vector_irq(smp_processor_id()); 339 __setup_vector_irq(smp_processor_id());
340 cpu_set(smp_processor_id(), cpu_online_map); 340 cpu_set(smp_processor_id(), cpu_online_map);
341 unlock_vector_lock(); 341 unlock_vector_lock();
342 ipi_call_unlock_irq(); 342 ipi_call_unlock();
343 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 343 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
344 344
345 /* enable local interrupts */
346 local_irq_enable();
347
345 setup_secondary_clock(); 348 setup_secondary_clock();
346 349
347 wmb(); 350 wmb();
@@ -596,10 +599,12 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
596 * Give the other CPU some time to accept the IPI. 599 * Give the other CPU some time to accept the IPI.
597 */ 600 */
598 udelay(200); 601 udelay(200);
599 maxlvt = lapic_get_maxlvt(); 602 if (APIC_INTEGRATED(apic_version[phys_apicid])) {
600 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 603 maxlvt = lapic_get_maxlvt();
601 apic_write(APIC_ESR, 0); 604 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
602 accept_status = (apic_read(APIC_ESR) & 0xEF); 605 apic_write(APIC_ESR, 0);
606 accept_status = (apic_read(APIC_ESR) & 0xEF);
607 }
603 pr_debug("NMI sent.\n"); 608 pr_debug("NMI sent.\n");
604 609
605 if (send_status) 610 if (send_status)
@@ -1256,39 +1261,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1256 check_nmi_watchdog(); 1261 check_nmi_watchdog();
1257} 1262}
1258 1263
1259#ifdef CONFIG_HOTPLUG_CPU
1260
1261static void remove_siblinginfo(int cpu)
1262{
1263 int sibling;
1264 struct cpuinfo_x86 *c = &cpu_data(cpu);
1265
1266 for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) {
1267 cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
1268 /*/
1269 * last thread sibling in this cpu core going down
1270 */
1271 if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
1272 cpu_data(sibling).booted_cores--;
1273 }
1274
1275 for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu))
1276 cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
1277 cpus_clear(per_cpu(cpu_sibling_map, cpu));
1278 cpus_clear(per_cpu(cpu_core_map, cpu));
1279 c->phys_proc_id = 0;
1280 c->cpu_core_id = 0;
1281 cpu_clear(cpu, cpu_sibling_setup_map);
1282}
1283
1284static int additional_cpus __initdata = -1;
1285
1286static __init int setup_additional_cpus(char *s)
1287{
1288 return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL;
1289}
1290early_param("additional_cpus", setup_additional_cpus);
1291
1292/* 1264/*
1293 * cpu_possible_map should be static, it cannot change as cpu's 1265 * cpu_possible_map should be static, it cannot change as cpu's
1294 * are onlined, or offlined. The reason is per-cpu data-structures 1266 * are onlined, or offlined. The reason is per-cpu data-structures
@@ -1308,21 +1280,13 @@ early_param("additional_cpus", setup_additional_cpus);
1308 */ 1280 */
1309__init void prefill_possible_map(void) 1281__init void prefill_possible_map(void)
1310{ 1282{
1311 int i; 1283 int i, possible;
1312 int possible;
1313 1284
1314 /* no processor from mptable or madt */ 1285 /* no processor from mptable or madt */
1315 if (!num_processors) 1286 if (!num_processors)
1316 num_processors = 1; 1287 num_processors = 1;
1317 1288
1318 if (additional_cpus == -1) { 1289 possible = num_processors + disabled_cpus;
1319 if (disabled_cpus > 0)
1320 additional_cpus = disabled_cpus;
1321 else
1322 additional_cpus = 0;
1323 }
1324
1325 possible = num_processors + additional_cpus;
1326 if (possible > NR_CPUS) 1290 if (possible > NR_CPUS)
1327 possible = NR_CPUS; 1291 possible = NR_CPUS;
1328 1292
@@ -1335,6 +1299,31 @@ __init void prefill_possible_map(void)
1335 nr_cpu_ids = possible; 1299 nr_cpu_ids = possible;
1336} 1300}
1337 1301
1302#ifdef CONFIG_HOTPLUG_CPU
1303
1304static void remove_siblinginfo(int cpu)
1305{
1306 int sibling;
1307 struct cpuinfo_x86 *c = &cpu_data(cpu);
1308
1309 for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) {
1310 cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
1311 /*/
1312 * last thread sibling in this cpu core going down
1313 */
1314 if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
1315 cpu_data(sibling).booted_cores--;
1316 }
1317
1318 for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu))
1319 cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
1320 cpus_clear(per_cpu(cpu_sibling_map, cpu));
1321 cpus_clear(per_cpu(cpu_core_map, cpu));
1322 c->phys_proc_id = 0;
1323 c->cpu_core_id = 0;
1324 cpu_clear(cpu, cpu_sibling_setup_map);
1325}
1326
1338static void __ref remove_cpu_from_maps(int cpu) 1327static void __ref remove_cpu_from_maps(int cpu)
1339{ 1328{
1340 cpu_clear(cpu, cpu_online_map); 1329 cpu_clear(cpu, cpu_online_map);
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index bbecf8b6bf96..77b400f06ea2 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -47,10 +47,9 @@ unsigned long profile_pc(struct pt_regs *regs)
47 unsigned long pc = instruction_pointer(regs); 47 unsigned long pc = instruction_pointer(regs);
48 48
49#ifdef CONFIG_SMP 49#ifdef CONFIG_SMP
50 if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->cs) && 50 if (!user_mode_vm(regs) && in_lock_functions(pc)) {
51 in_lock_functions(pc)) {
52#ifdef CONFIG_FRAME_POINTER 51#ifdef CONFIG_FRAME_POINTER
53 return *(unsigned long *)(regs->bp + 4); 52 return *(unsigned long *)(regs->bp + sizeof(long));
54#else 53#else
55 unsigned long *sp = (unsigned long *)&regs->sp; 54 unsigned long *sp = (unsigned long *)&regs->sp;
56 55
@@ -95,6 +94,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
95 94
96 do_timer_interrupt_hook(); 95 do_timer_interrupt_hook();
97 96
97#ifdef CONFIG_MCA
98 if (MCA_bus) { 98 if (MCA_bus) {
99 /* The PS/2 uses level-triggered interrupts. You can't 99 /* The PS/2 uses level-triggered interrupts. You can't
100 turn them off, nor would you want to (any attempt to 100 turn them off, nor would you want to (any attempt to
@@ -108,6 +108,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
108 u8 irq_v = inb_p( 0x61 ); /* read the current state */ 108 u8 irq_v = inb_p( 0x61 ); /* read the current state */
109 outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */ 109 outb_p( irq_v|0x80, 0x61 ); /* reset the IRQ */
110 } 110 }
111#endif
111 112
112 return IRQ_HANDLED; 113 return IRQ_HANDLED;
113} 114}
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index e3d49c553af2..cb19d650c216 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -16,6 +16,7 @@
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/mca.h>
19 20
20#include <asm/i8253.h> 21#include <asm/i8253.h>
21#include <asm/hpet.h> 22#include <asm/hpet.h>
@@ -33,23 +34,34 @@ unsigned long profile_pc(struct pt_regs *regs)
33 /* Assume the lock function has either no stack frame or a copy 34 /* Assume the lock function has either no stack frame or a copy
34 of flags from PUSHF 35 of flags from PUSHF
35 Eflags always has bits 22 and up cleared unlike kernel addresses. */ 36 Eflags always has bits 22 and up cleared unlike kernel addresses. */
36 if (!user_mode(regs) && in_lock_functions(pc)) { 37 if (!user_mode_vm(regs) && in_lock_functions(pc)) {
38#ifdef CONFIG_FRAME_POINTER
39 return *(unsigned long *)(regs->bp + sizeof(long));
40#else
37 unsigned long *sp = (unsigned long *)regs->sp; 41 unsigned long *sp = (unsigned long *)regs->sp;
38 if (sp[0] >> 22) 42 if (sp[0] >> 22)
39 return sp[0]; 43 return sp[0];
40 if (sp[1] >> 22) 44 if (sp[1] >> 22)
41 return sp[1]; 45 return sp[1];
46#endif
42 } 47 }
43 return pc; 48 return pc;
44} 49}
45EXPORT_SYMBOL(profile_pc); 50EXPORT_SYMBOL(profile_pc);
46 51
47static irqreturn_t timer_event_interrupt(int irq, void *dev_id) 52irqreturn_t timer_interrupt(int irq, void *dev_id)
48{ 53{
49 add_pda(irq0_irqs, 1); 54 add_pda(irq0_irqs, 1);
50 55
51 global_clock_event->event_handler(global_clock_event); 56 global_clock_event->event_handler(global_clock_event);
52 57
58#ifdef CONFIG_MCA
59 if (MCA_bus) {
60 u8 irq_v = inb_p(0x61); /* read the current state */
61 outb_p(irq_v|0x80, 0x61); /* reset the IRQ */
62 }
63#endif
64
53 return IRQ_HANDLED; 65 return IRQ_HANDLED;
54} 66}
55 67
@@ -100,7 +112,7 @@ unsigned long __init calibrate_cpu(void)
100} 112}
101 113
102static struct irqaction irq0 = { 114static struct irqaction irq0 = {
103 .handler = timer_event_interrupt, 115 .handler = timer_interrupt,
104 .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING, 116 .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING,
105 .mask = CPU_MASK_NONE, 117 .mask = CPU_MASK_NONE,
106 .name = "timer" 118 .name = "timer"
@@ -111,16 +123,13 @@ void __init hpet_time_init(void)
111 if (!hpet_enable()) 123 if (!hpet_enable())
112 setup_pit_timer(); 124 setup_pit_timer();
113 125
126 irq0.mask = cpumask_of_cpu(0);
114 setup_irq(0, &irq0); 127 setup_irq(0, &irq0);
115} 128}
116 129
117void __init time_init(void) 130void __init time_init(void)
118{ 131{
119 tsc_init(); 132 tsc_init();
120 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
121 vgetcpu_mode = VGETCPU_RDTSCP;
122 else
123 vgetcpu_mode = VGETCPU_LSL;
124 133
125 late_time_init = choose_time_init(); 134 late_time_init = choose_time_init();
126} 135}
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps.c
index 0429c5de5ea9..e062974cce34 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps.c
@@ -7,13 +7,11 @@
7 */ 7 */
8 8
9/* 9/*
10 * 'Traps.c' handles hardware traps and faults after we have saved some 10 * Handle hardware traps and faults.
11 * state in 'asm.s'.
12 */ 11 */
13#include <linux/interrupt.h> 12#include <linux/interrupt.h>
14#include <linux/kallsyms.h> 13#include <linux/kallsyms.h>
15#include <linux/spinlock.h> 14#include <linux/spinlock.h>
16#include <linux/highmem.h>
17#include <linux/kprobes.h> 15#include <linux/kprobes.h>
18#include <linux/uaccess.h> 16#include <linux/uaccess.h>
19#include <linux/utsname.h> 17#include <linux/utsname.h>
@@ -32,6 +30,8 @@
32#include <linux/bug.h> 30#include <linux/bug.h>
33#include <linux/nmi.h> 31#include <linux/nmi.h>
34#include <linux/mm.h> 32#include <linux/mm.h>
33#include <linux/smp.h>
34#include <linux/io.h>
35 35
36#ifdef CONFIG_EISA 36#ifdef CONFIG_EISA
37#include <linux/ioport.h> 37#include <linux/ioport.h>
@@ -46,21 +46,31 @@
46#include <linux/edac.h> 46#include <linux/edac.h>
47#endif 47#endif
48 48
49#include <asm/arch_hooks.h>
50#include <asm/stacktrace.h> 49#include <asm/stacktrace.h>
51#include <asm/processor.h> 50#include <asm/processor.h>
52#include <asm/debugreg.h> 51#include <asm/debugreg.h>
53#include <asm/atomic.h> 52#include <asm/atomic.h>
54#include <asm/system.h> 53#include <asm/system.h>
55#include <asm/unwind.h> 54#include <asm/unwind.h>
55#include <asm/traps.h>
56#include <asm/desc.h> 56#include <asm/desc.h>
57#include <asm/i387.h> 57#include <asm/i387.h>
58
59#include <mach_traps.h>
60
61#ifdef CONFIG_X86_64
62#include <asm/pgalloc.h>
63#include <asm/proto.h>
64#include <asm/pda.h>
65#else
66#include <asm/processor-flags.h>
67#include <asm/arch_hooks.h>
58#include <asm/nmi.h> 68#include <asm/nmi.h>
59#include <asm/smp.h> 69#include <asm/smp.h>
60#include <asm/io.h> 70#include <asm/io.h>
61#include <asm/traps.h> 71#include <asm/traps.h>
62 72
63#include "mach_traps.h" 73#include "cpu/mcheck/mce.h"
64 74
65DECLARE_BITMAP(used_vectors, NR_VECTORS); 75DECLARE_BITMAP(used_vectors, NR_VECTORS);
66EXPORT_SYMBOL_GPL(used_vectors); 76EXPORT_SYMBOL_GPL(used_vectors);
@@ -77,418 +87,104 @@ char ignore_fpu_irq;
77 */ 87 */
78gate_desc idt_table[256] 88gate_desc idt_table[256]
79 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; 89 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
80
81int panic_on_unrecovered_nmi;
82int kstack_depth_to_print = 24;
83static unsigned int code_bytes = 64;
84static int ignore_nmis;
85static int die_counter;
86
87void printk_address(unsigned long address, int reliable)
88{
89#ifdef CONFIG_KALLSYMS
90 unsigned long offset = 0;
91 unsigned long symsize;
92 const char *symname;
93 char *modname;
94 char *delim = ":";
95 char namebuf[KSYM_NAME_LEN];
96 char reliab[4] = "";
97
98 symname = kallsyms_lookup(address, &symsize, &offset,
99 &modname, namebuf);
100 if (!symname) {
101 printk(" [<%08lx>]\n", address);
102 return;
103 }
104 if (!reliable)
105 strcpy(reliab, "? ");
106
107 if (!modname)
108 modname = delim = "";
109 printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
110 address, reliab, delim, modname, delim, symname, offset, symsize);
111#else
112 printk(" [<%08lx>]\n", address);
113#endif 90#endif
114}
115
116static inline int valid_stack_ptr(struct thread_info *tinfo,
117 void *p, unsigned int size)
118{
119 void *t = tinfo;
120 return p > t && p <= t + THREAD_SIZE - size;
121}
122
123/* The form of the top of the frame on the stack */
124struct stack_frame {
125 struct stack_frame *next_frame;
126 unsigned long return_address;
127};
128
129static inline unsigned long
130print_context_stack(struct thread_info *tinfo,
131 unsigned long *stack, unsigned long bp,
132 const struct stacktrace_ops *ops, void *data)
133{
134 struct stack_frame *frame = (struct stack_frame *)bp;
135
136 while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) {
137 unsigned long addr;
138
139 addr = *stack;
140 if (__kernel_text_address(addr)) {
141 if ((unsigned long) stack == bp + 4) {
142 ops->address(data, addr, 1);
143 frame = frame->next_frame;
144 bp = (unsigned long) frame;
145 } else {
146 ops->address(data, addr, bp == 0);
147 }
148 }
149 stack++;
150 }
151 return bp;
152}
153
154void dump_trace(struct task_struct *task, struct pt_regs *regs,
155 unsigned long *stack, unsigned long bp,
156 const struct stacktrace_ops *ops, void *data)
157{
158 if (!task)
159 task = current;
160
161 if (!stack) {
162 unsigned long dummy;
163 stack = &dummy;
164 if (task != current)
165 stack = (unsigned long *)task->thread.sp;
166 }
167
168#ifdef CONFIG_FRAME_POINTER
169 if (!bp) {
170 if (task == current) {
171 /* Grab bp right from our regs */
172 asm("movl %%ebp, %0" : "=r" (bp) :);
173 } else {
174 /* bp is the last reg pushed by switch_to */
175 bp = *(unsigned long *) task->thread.sp;
176 }
177 }
178#endif
179
180 for (;;) {
181 struct thread_info *context;
182
183 context = (struct thread_info *)
184 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
185 bp = print_context_stack(context, stack, bp, ops, data);
186 /*
187 * Should be after the line below, but somewhere
188 * in early boot context comes out corrupted and we
189 * can't reference it:
190 */
191 if (ops->stack(data, "IRQ") < 0)
192 break;
193 stack = (unsigned long *)context->previous_esp;
194 if (!stack)
195 break;
196 touch_nmi_watchdog();
197 }
198}
199EXPORT_SYMBOL(dump_trace);
200
201static void
202print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
203{
204 printk(data);
205 print_symbol(msg, symbol);
206 printk("\n");
207}
208
209static void print_trace_warning(void *data, char *msg)
210{
211 printk("%s%s\n", (char *)data, msg);
212}
213 91
214static int print_trace_stack(void *data, char *name) 92static int ignore_nmis;
215{
216 return 0;
217}
218
219/*
220 * Print one address/symbol entries per line.
221 */
222static void print_trace_address(void *data, unsigned long addr, int reliable)
223{
224 printk("%s [<%08lx>] ", (char *)data, addr);
225 if (!reliable)
226 printk("? ");
227 print_symbol("%s\n", addr);
228 touch_nmi_watchdog();
229}
230
231static const struct stacktrace_ops print_trace_ops = {
232 .warning = print_trace_warning,
233 .warning_symbol = print_trace_warning_symbol,
234 .stack = print_trace_stack,
235 .address = print_trace_address,
236};
237 93
238static void 94static inline void conditional_sti(struct pt_regs *regs)
239show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
240 unsigned long *stack, unsigned long bp, char *log_lvl)
241{ 95{
242 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); 96 if (regs->flags & X86_EFLAGS_IF)
243 printk("%s =======================\n", log_lvl); 97 local_irq_enable();
244} 98}
245 99
246void show_trace(struct task_struct *task, struct pt_regs *regs, 100static inline void preempt_conditional_sti(struct pt_regs *regs)
247 unsigned long *stack, unsigned long bp)
248{ 101{
249 show_trace_log_lvl(task, regs, stack, bp, ""); 102 inc_preempt_count();
103 if (regs->flags & X86_EFLAGS_IF)
104 local_irq_enable();
250} 105}
251 106
252static void 107static inline void preempt_conditional_cli(struct pt_regs *regs)
253show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
254 unsigned long *sp, unsigned long bp, char *log_lvl)
255{ 108{
256 unsigned long *stack; 109 if (regs->flags & X86_EFLAGS_IF)
257 int i; 110 local_irq_disable();
258 111 dec_preempt_count();
259 if (sp == NULL) {
260 if (task)
261 sp = (unsigned long *)task->thread.sp;
262 else
263 sp = (unsigned long *)&sp;
264 }
265
266 stack = sp;
267 for (i = 0; i < kstack_depth_to_print; i++) {
268 if (kstack_end(stack))
269 break;
270 if (i && ((i % 8) == 0))
271 printk("\n%s ", log_lvl);
272 printk("%08lx ", *stack++);
273 }
274 printk("\n%sCall Trace:\n", log_lvl);
275
276 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
277} 112}
278 113
279void show_stack(struct task_struct *task, unsigned long *sp) 114#ifdef CONFIG_X86_32
115static inline void
116die_if_kernel(const char *str, struct pt_regs *regs, long err)
280{ 117{
281 printk(" "); 118 if (!user_mode_vm(regs))
282 show_stack_log_lvl(task, NULL, sp, 0, ""); 119 die(str, regs, err);
283} 120}
284 121
285/* 122/*
286 * The architecture-independent dump_stack generator 123 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
124 * invalid offset set (the LAZY one) and the faulting thread has
125 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
126 * we set the offset field correctly and return 1.
287 */ 127 */
288void dump_stack(void) 128static int lazy_iobitmap_copy(void)
289{ 129{
290 unsigned long bp = 0; 130 struct thread_struct *thread;
291 unsigned long stack; 131 struct tss_struct *tss;
292 132 int cpu;
293#ifdef CONFIG_FRAME_POINTER
294 if (!bp)
295 asm("movl %%ebp, %0" : "=r" (bp):);
296#endif
297
298 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
299 current->pid, current->comm, print_tainted(),
300 init_utsname()->release,
301 (int)strcspn(init_utsname()->version, " "),
302 init_utsname()->version);
303
304 show_trace(current, NULL, &stack, bp);
305}
306
307EXPORT_SYMBOL(dump_stack);
308
309void show_registers(struct pt_regs *regs)
310{
311 int i;
312 133
313 print_modules(); 134 cpu = get_cpu();
314 __show_registers(regs, 0); 135 tss = &per_cpu(init_tss, cpu);
136 thread = &current->thread;
315 137
316 printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", 138 if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
317 TASK_COMM_LEN, current->comm, task_pid_nr(current), 139 thread->io_bitmap_ptr) {
318 current_thread_info(), current, task_thread_info(current)); 140 memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
319 /* 141 thread->io_bitmap_max);
320 * When in-kernel, we also print out the stack and code at the 142 /*
321 * time of the fault.. 143 * If the previously set map was extending to higher ports
322 */ 144 * than the current one, pad extra space with 0xff (no access).
323 if (!user_mode_vm(regs)) { 145 */
324 unsigned int code_prologue = code_bytes * 43 / 64; 146 if (thread->io_bitmap_max < tss->io_bitmap_max) {
325 unsigned int code_len = code_bytes; 147 memset((char *) tss->io_bitmap +
326 unsigned char c; 148 thread->io_bitmap_max, 0xff,
327 u8 *ip; 149 tss->io_bitmap_max - thread->io_bitmap_max);
328
329 printk("\n" KERN_EMERG "Stack: ");
330 show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
331
332 printk(KERN_EMERG "Code: ");
333
334 ip = (u8 *)regs->ip - code_prologue;
335 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
336 /* try starting at EIP */
337 ip = (u8 *)regs->ip;
338 code_len = code_len - code_prologue + 1;
339 }
340 for (i = 0; i < code_len; i++, ip++) {
341 if (ip < (u8 *)PAGE_OFFSET ||
342 probe_kernel_address(ip, c)) {
343 printk(" Bad EIP value.");
344 break;
345 }
346 if (ip == (u8 *)regs->ip)
347 printk("<%02x> ", c);
348 else
349 printk("%02x ", c);
350 } 150 }
351 } 151 tss->io_bitmap_max = thread->io_bitmap_max;
352 printk("\n"); 152 tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
353} 153 tss->io_bitmap_owner = thread;
354 154 put_cpu();
355int is_valid_bugaddr(unsigned long ip)
356{
357 unsigned short ud2;
358
359 if (ip < PAGE_OFFSET)
360 return 0;
361 if (probe_kernel_address((unsigned short *)ip, ud2))
362 return 0;
363
364 return ud2 == 0x0b0f;
365}
366
367static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
368static int die_owner = -1;
369static unsigned int die_nest_count;
370
371unsigned __kprobes long oops_begin(void)
372{
373 unsigned long flags;
374
375 oops_enter();
376
377 if (die_owner != raw_smp_processor_id()) {
378 console_verbose();
379 raw_local_irq_save(flags);
380 __raw_spin_lock(&die_lock);
381 die_owner = smp_processor_id();
382 die_nest_count = 0;
383 bust_spinlocks(1);
384 } else {
385 raw_local_irq_save(flags);
386 }
387 die_nest_count++;
388 return flags;
389}
390
391void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
392{
393 bust_spinlocks(0);
394 die_owner = -1;
395 add_taint(TAINT_DIE);
396 __raw_spin_unlock(&die_lock);
397 raw_local_irq_restore(flags);
398
399 if (!regs)
400 return;
401
402 if (kexec_should_crash(current))
403 crash_kexec(regs);
404
405 if (in_interrupt())
406 panic("Fatal exception in interrupt");
407
408 if (panic_on_oops)
409 panic("Fatal exception");
410
411 oops_exit();
412 do_exit(signr);
413}
414
415int __kprobes __die(const char *str, struct pt_regs *regs, long err)
416{
417 unsigned short ss;
418 unsigned long sp;
419 155
420 printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
421#ifdef CONFIG_PREEMPT
422 printk("PREEMPT ");
423#endif
424#ifdef CONFIG_SMP
425 printk("SMP ");
426#endif
427#ifdef CONFIG_DEBUG_PAGEALLOC
428 printk("DEBUG_PAGEALLOC");
429#endif
430 printk("\n");
431 if (notify_die(DIE_OOPS, str, regs, err,
432 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
433 return 1; 156 return 1;
434
435 show_registers(regs);
436 /* Executive summary in case the oops scrolled away */
437 sp = (unsigned long) (&regs->sp);
438 savesegment(ss, ss);
439 if (user_mode(regs)) {
440 sp = regs->sp;
441 ss = regs->ss & 0xffff;
442 } 157 }
443 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); 158 put_cpu();
444 print_symbol("%s", regs->ip);
445 printk(" SS:ESP %04x:%08lx\n", ss, sp);
446 return 0;
447}
448
449/*
450 * This is gone through when something in the kernel has done something bad
451 * and is about to be terminated:
452 */
453void die(const char *str, struct pt_regs *regs, long err)
454{
455 unsigned long flags = oops_begin();
456
457 if (die_nest_count < 3) {
458 report_bug(regs->ip, regs);
459
460 if (__die(str, regs, err))
461 regs = NULL;
462 } else {
463 printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
464 }
465
466 oops_end(flags, regs, SIGSEGV);
467}
468 159
469static inline void 160 return 0;
470die_if_kernel(const char *str, struct pt_regs *regs, long err)
471{
472 if (!user_mode_vm(regs))
473 die(str, regs, err);
474} 161}
162#endif
475 163
476static void __kprobes 164static void __kprobes
477do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs, 165do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
478 long error_code, siginfo_t *info) 166 long error_code, siginfo_t *info)
479{ 167{
480 struct task_struct *tsk = current; 168 struct task_struct *tsk = current;
481 169
170#ifdef CONFIG_X86_32
482 if (regs->flags & X86_VM_MASK) { 171 if (regs->flags & X86_VM_MASK) {
483 if (vm86) 172 /*
173 * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
174 * On nmi (interrupt 2), do_trap should not be called.
175 */
176 if (trapnr < 6)
484 goto vm86_trap; 177 goto vm86_trap;
485 goto trap_signal; 178 goto trap_signal;
486 } 179 }
180#endif
487 181
488 if (!user_mode(regs)) 182 if (!user_mode(regs))
489 goto kernel_trap; 183 goto kernel_trap;
490 184
185#ifdef CONFIG_X86_32
491trap_signal: 186trap_signal:
187#endif
492 /* 188 /*
493 * We want error_code and trap_no set for userspace faults and 189 * We want error_code and trap_no set for userspace faults and
494 * kernelspace faults which result in die(), but not 190 * kernelspace faults which result in die(), but not
@@ -501,6 +197,18 @@ trap_signal:
501 tsk->thread.error_code = error_code; 197 tsk->thread.error_code = error_code;
502 tsk->thread.trap_no = trapnr; 198 tsk->thread.trap_no = trapnr;
503 199
200#ifdef CONFIG_X86_64
201 if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
202 printk_ratelimit()) {
203 printk(KERN_INFO
204 "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
205 tsk->comm, tsk->pid, str,
206 regs->ip, regs->sp, error_code);
207 print_vma_addr(" in ", regs->ip);
208 printk("\n");
209 }
210#endif
211
504 if (info) 212 if (info)
505 force_sig_info(signr, info, tsk); 213 force_sig_info(signr, info, tsk);
506 else 214 else
@@ -515,29 +223,29 @@ kernel_trap:
515 } 223 }
516 return; 224 return;
517 225
226#ifdef CONFIG_X86_32
518vm86_trap: 227vm86_trap:
519 if (handle_vm86_trap((struct kernel_vm86_regs *) regs, 228 if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
520 error_code, trapnr)) 229 error_code, trapnr))
521 goto trap_signal; 230 goto trap_signal;
522 return; 231 return;
232#endif
523} 233}
524 234
525#define DO_ERROR(trapnr, signr, str, name) \ 235#define DO_ERROR(trapnr, signr, str, name) \
526void do_##name(struct pt_regs *regs, long error_code) \ 236dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
527{ \ 237{ \
528 trace_hardirqs_fixup(); \
529 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 238 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
530 == NOTIFY_STOP) \ 239 == NOTIFY_STOP) \
531 return; \ 240 return; \
532 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ 241 conditional_sti(regs); \
242 do_trap(trapnr, signr, str, regs, error_code, NULL); \
533} 243}
534 244
535#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \ 245#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
536void do_##name(struct pt_regs *regs, long error_code) \ 246dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
537{ \ 247{ \
538 siginfo_t info; \ 248 siginfo_t info; \
539 if (irq) \
540 local_irq_enable(); \
541 info.si_signo = signr; \ 249 info.si_signo = signr; \
542 info.si_errno = 0; \ 250 info.si_errno = 0; \
543 info.si_code = sicode; \ 251 info.si_code = sicode; \
@@ -545,90 +253,68 @@ void do_##name(struct pt_regs *regs, long error_code) \
545 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 253 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
546 == NOTIFY_STOP) \ 254 == NOTIFY_STOP) \
547 return; \ 255 return; \
548 do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ 256 conditional_sti(regs); \
257 do_trap(trapnr, signr, str, regs, error_code, &info); \
549} 258}
550 259
551#define DO_VM86_ERROR(trapnr, signr, str, name) \ 260DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
552void do_##name(struct pt_regs *regs, long error_code) \ 261DO_ERROR(4, SIGSEGV, "overflow", overflow)
553{ \ 262DO_ERROR(5, SIGSEGV, "bounds", bounds)
554 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 263DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
555 == NOTIFY_STOP) \
556 return; \
557 do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
558}
559
560#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
561void do_##name(struct pt_regs *regs, long error_code) \
562{ \
563 siginfo_t info; \
564 info.si_signo = signr; \
565 info.si_errno = 0; \
566 info.si_code = sicode; \
567 info.si_addr = (void __user *)siaddr; \
568 trace_hardirqs_fixup(); \
569 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
570 == NOTIFY_STOP) \
571 return; \
572 do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
573}
574
575DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
576#ifndef CONFIG_KPROBES
577DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
578#endif
579DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
580DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
581DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
582DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) 264DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
583DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) 265DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
584DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 266DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
267#ifdef CONFIG_X86_32
585DO_ERROR(12, SIGBUS, "stack segment", stack_segment) 268DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
586DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) 269#endif
587DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) 270DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
271
272#ifdef CONFIG_X86_64
273/* Runs on IST stack */
274dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
275{
276 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
277 12, SIGBUS) == NOTIFY_STOP)
278 return;
279 preempt_conditional_sti(regs);
280 do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
281 preempt_conditional_cli(regs);
282}
283
284dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
285{
286 static const char str[] = "double fault";
287 struct task_struct *tsk = current;
288
289 /* Return not checked because double check cannot be ignored */
290 notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
588 291
589void __kprobes 292 tsk->thread.error_code = error_code;
293 tsk->thread.trap_no = 8;
294
295 /* This is always a kernel trap and never fixable (and thus must
296 never return). */
297 for (;;)
298 die(str, regs, error_code);
299}
300#endif
301
302dotraplinkage void __kprobes
590do_general_protection(struct pt_regs *regs, long error_code) 303do_general_protection(struct pt_regs *regs, long error_code)
591{ 304{
592 struct task_struct *tsk; 305 struct task_struct *tsk;
593 struct thread_struct *thread;
594 struct tss_struct *tss;
595 int cpu;
596 306
597 cpu = get_cpu(); 307 conditional_sti(regs);
598 tss = &per_cpu(init_tss, cpu);
599 thread = &current->thread;
600
601 /*
602 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
603 * invalid offset set (the LAZY one) and the faulting thread has
604 * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS
605 * and we set the offset field correctly. Then we let the CPU to
606 * restart the faulting instruction.
607 */
608 if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
609 thread->io_bitmap_ptr) {
610 memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
611 thread->io_bitmap_max);
612 /*
613 * If the previously set map was extending to higher ports
614 * than the current one, pad extra space with 0xff (no access).
615 */
616 if (thread->io_bitmap_max < tss->io_bitmap_max) {
617 memset((char *) tss->io_bitmap +
618 thread->io_bitmap_max, 0xff,
619 tss->io_bitmap_max - thread->io_bitmap_max);
620 }
621 tss->io_bitmap_max = thread->io_bitmap_max;
622 tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
623 tss->io_bitmap_owner = thread;
624 put_cpu();
625 308
309#ifdef CONFIG_X86_32
310 if (lazy_iobitmap_copy()) {
311 /* restart the faulting instruction */
626 return; 312 return;
627 } 313 }
628 put_cpu();
629 314
630 if (regs->flags & X86_VM_MASK) 315 if (regs->flags & X86_VM_MASK)
631 goto gp_in_vm86; 316 goto gp_in_vm86;
317#endif
632 318
633 tsk = current; 319 tsk = current;
634 if (!user_mode(regs)) 320 if (!user_mode(regs))
@@ -650,10 +336,12 @@ do_general_protection(struct pt_regs *regs, long error_code)
650 force_sig(SIGSEGV, tsk); 336 force_sig(SIGSEGV, tsk);
651 return; 337 return;
652 338
339#ifdef CONFIG_X86_32
653gp_in_vm86: 340gp_in_vm86:
654 local_irq_enable(); 341 local_irq_enable();
655 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code); 342 handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
656 return; 343 return;
344#endif
657 345
658gp_in_kernel: 346gp_in_kernel:
659 if (fixup_exception(regs)) 347 if (fixup_exception(regs))
@@ -690,7 +378,8 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs)
690 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 378 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
691 379
692 /* Clear and disable the memory parity error line. */ 380 /* Clear and disable the memory parity error line. */
693 clear_mem_error(reason); 381 reason = (reason & 0xf) | 4;
382 outb(reason, 0x61);
694} 383}
695 384
696static notrace __kprobes void 385static notrace __kprobes void
@@ -716,7 +405,8 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
716static notrace __kprobes void 405static notrace __kprobes void
717unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 406unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
718{ 407{
719 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 408 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
409 NOTIFY_STOP)
720 return; 410 return;
721#ifdef CONFIG_MCA 411#ifdef CONFIG_MCA
722 /* 412 /*
@@ -739,41 +429,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
739 printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 429 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
740} 430}
741 431
742static DEFINE_SPINLOCK(nmi_print_lock);
743
744void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
745{
746 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
747 return;
748
749 spin_lock(&nmi_print_lock);
750 /*
751 * We are in trouble anyway, lets at least try
752 * to get a message out:
753 */
754 bust_spinlocks(1);
755 printk(KERN_EMERG "%s", str);
756 printk(" on CPU%d, ip %08lx, registers:\n",
757 smp_processor_id(), regs->ip);
758 show_registers(regs);
759 if (do_panic)
760 panic("Non maskable interrupt");
761 console_silent();
762 spin_unlock(&nmi_print_lock);
763 bust_spinlocks(0);
764
765 /*
766 * If we are in kernel we are probably nested up pretty bad
767 * and might aswell get out now while we still can:
768 */
769 if (!user_mode_vm(regs)) {
770 current->thread.trap_no = 2;
771 crash_kexec(regs);
772 }
773
774 do_exit(SIGSEGV);
775}
776
777static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 432static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
778{ 433{
779 unsigned char reason = 0; 434 unsigned char reason = 0;
@@ -812,22 +467,25 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
812 mem_parity_error(reason, regs); 467 mem_parity_error(reason, regs);
813 if (reason & 0x40) 468 if (reason & 0x40)
814 io_check_error(reason, regs); 469 io_check_error(reason, regs);
470#ifdef CONFIG_X86_32
815 /* 471 /*
816 * Reassert NMI in case it became active meanwhile 472 * Reassert NMI in case it became active meanwhile
817 * as it's edge-triggered: 473 * as it's edge-triggered:
818 */ 474 */
819 reassert_nmi(); 475 reassert_nmi();
476#endif
820} 477}
821 478
822notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) 479dotraplinkage notrace __kprobes void
480do_nmi(struct pt_regs *regs, long error_code)
823{ 481{
824 int cpu;
825
826 nmi_enter(); 482 nmi_enter();
827 483
828 cpu = smp_processor_id(); 484#ifdef CONFIG_X86_32
829 485 { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); }
830 ++nmi_count(cpu); 486#else
487 add_pda(__nmi_count, 1);
488#endif
831 489
832 if (!ignore_nmis) 490 if (!ignore_nmis)
833 default_do_nmi(regs); 491 default_do_nmi(regs);
@@ -847,21 +505,44 @@ void restart_nmi(void)
847 acpi_nmi_enable(); 505 acpi_nmi_enable();
848} 506}
849 507
850#ifdef CONFIG_KPROBES 508/* May run on IST stack. */
851void __kprobes do_int3(struct pt_regs *regs, long error_code) 509dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
852{ 510{
853 trace_hardirqs_fixup(); 511#ifdef CONFIG_KPROBES
854
855 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) 512 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
856 == NOTIFY_STOP) 513 == NOTIFY_STOP)
857 return; 514 return;
858 /* 515#else
859 * This is an interrupt gate, because kprobes wants interrupts 516 if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
860 * disabled. Normal trap handlers don't. 517 == NOTIFY_STOP)
861 */ 518 return;
862 restore_interrupts(regs); 519#endif
520
521 preempt_conditional_sti(regs);
522 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
523 preempt_conditional_cli(regs);
524}
863 525
864 do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); 526#ifdef CONFIG_X86_64
527/* Help handler running on IST stack to switch back to user stack
528 for scheduling or signal handling. The actual stack switch is done in
529 entry.S */
530asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
531{
532 struct pt_regs *regs = eregs;
533 /* Did already sync */
534 if (eregs == (struct pt_regs *)eregs->sp)
535 ;
536 /* Exception from user space */
537 else if (user_mode(eregs))
538 regs = task_pt_regs(current);
539 /* Exception from kernel and interrupts are enabled. Move to
540 kernel process stack. */
541 else if (eregs->flags & X86_EFLAGS_IF)
542 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
543 if (eregs != regs)
544 *regs = *eregs;
545 return regs;
865} 546}
866#endif 547#endif
867 548
@@ -886,15 +567,15 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code)
886 * about restoring all the debug state, and ptrace doesn't have to 567 * about restoring all the debug state, and ptrace doesn't have to
887 * find every occurrence of the TF bit that could be saved away even 568 * find every occurrence of the TF bit that could be saved away even
888 * by user code) 569 * by user code)
570 *
571 * May run on IST stack.
889 */ 572 */
890void __kprobes do_debug(struct pt_regs *regs, long error_code) 573dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
891{ 574{
892 struct task_struct *tsk = current; 575 struct task_struct *tsk = current;
893 unsigned int condition; 576 unsigned long condition;
894 int si_code; 577 int si_code;
895 578
896 trace_hardirqs_fixup();
897
898 get_debugreg(condition, 6); 579 get_debugreg(condition, 6);
899 580
900 /* 581 /*
@@ -906,9 +587,9 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
906 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 587 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
907 SIGTRAP) == NOTIFY_STOP) 588 SIGTRAP) == NOTIFY_STOP)
908 return; 589 return;
590
909 /* It's safe to allow irq's after DR6 has been saved */ 591 /* It's safe to allow irq's after DR6 has been saved */
910 if (regs->flags & X86_EFLAGS_IF) 592 preempt_conditional_sti(regs);
911 local_irq_enable();
912 593
913 /* Mask out spurious debug traps due to lazy DR7 setting */ 594 /* Mask out spurious debug traps due to lazy DR7 setting */
914 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 595 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
@@ -916,8 +597,10 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
916 goto clear_dr7; 597 goto clear_dr7;
917 } 598 }
918 599
600#ifdef CONFIG_X86_32
919 if (regs->flags & X86_VM_MASK) 601 if (regs->flags & X86_VM_MASK)
920 goto debug_vm86; 602 goto debug_vm86;
603#endif
921 604
922 /* Save debug status register where ptrace can see it */ 605 /* Save debug status register where ptrace can see it */
923 tsk->thread.debugreg6 = condition; 606 tsk->thread.debugreg6 = condition;
@@ -927,16 +610,11 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
927 * kernel space (but re-enable TF when returning to user mode). 610 * kernel space (but re-enable TF when returning to user mode).
928 */ 611 */
929 if (condition & DR_STEP) { 612 if (condition & DR_STEP) {
930 /*
931 * We already checked v86 mode above, so we can
932 * check for kernel mode by just checking the CPL
933 * of CS.
934 */
935 if (!user_mode(regs)) 613 if (!user_mode(regs))
936 goto clear_TF_reenable; 614 goto clear_TF_reenable;
937 } 615 }
938 616
939 si_code = get_si_code((unsigned long)condition); 617 si_code = get_si_code(condition);
940 /* Ok, finally something we can handle */ 618 /* Ok, finally something we can handle */
941 send_sigtrap(tsk, regs, error_code, si_code); 619 send_sigtrap(tsk, regs, error_code, si_code);
942 620
@@ -946,18 +624,37 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
946 */ 624 */
947clear_dr7: 625clear_dr7:
948 set_debugreg(0, 7); 626 set_debugreg(0, 7);
627 preempt_conditional_cli(regs);
949 return; 628 return;
950 629
630#ifdef CONFIG_X86_32
951debug_vm86: 631debug_vm86:
952 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); 632 handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
633 preempt_conditional_cli(regs);
953 return; 634 return;
635#endif
954 636
955clear_TF_reenable: 637clear_TF_reenable:
956 set_tsk_thread_flag(tsk, TIF_SINGLESTEP); 638 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
957 regs->flags &= ~X86_EFLAGS_TF; 639 regs->flags &= ~X86_EFLAGS_TF;
640 preempt_conditional_cli(regs);
958 return; 641 return;
959} 642}
960 643
644#ifdef CONFIG_X86_64
645static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
646{
647 if (fixup_exception(regs))
648 return 1;
649
650 notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
651 /* Illegal floating point operation in the kernel */
652 current->thread.trap_no = trapnr;
653 die(str, regs, 0);
654 return 0;
655}
656#endif
657
961/* 658/*
962 * Note that we play around with the 'TS' bit in an attempt to get 659 * Note that we play around with the 'TS' bit in an attempt to get
963 * the correct behaviour even in the presence of the asynchronous 660 * the correct behaviour even in the presence of the asynchronous
@@ -994,7 +691,9 @@ void math_error(void __user *ip)
994 swd = get_fpu_swd(task); 691 swd = get_fpu_swd(task);
995 switch (swd & ~cwd & 0x3f) { 692 switch (swd & ~cwd & 0x3f) {
996 case 0x000: /* No unmasked exception */ 693 case 0x000: /* No unmasked exception */
694#ifdef CONFIG_X86_32
997 return; 695 return;
696#endif
998 default: /* Multiple exceptions */ 697 default: /* Multiple exceptions */
999 break; 698 break;
1000 case 0x001: /* Invalid Op */ 699 case 0x001: /* Invalid Op */
@@ -1022,9 +721,18 @@ void math_error(void __user *ip)
1022 force_sig_info(SIGFPE, &info, task); 721 force_sig_info(SIGFPE, &info, task);
1023} 722}
1024 723
1025void do_coprocessor_error(struct pt_regs *regs, long error_code) 724dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
1026{ 725{
726 conditional_sti(regs);
727
728#ifdef CONFIG_X86_32
1027 ignore_fpu_irq = 1; 729 ignore_fpu_irq = 1;
730#else
731 if (!user_mode(regs) &&
732 kernel_math_error(regs, "kernel x87 math error", 16))
733 return;
734#endif
735
1028 math_error((void __user *)regs->ip); 736 math_error((void __user *)regs->ip);
1029} 737}
1030 738
@@ -1076,8 +784,12 @@ static void simd_math_error(void __user *ip)
1076 force_sig_info(SIGFPE, &info, task); 784 force_sig_info(SIGFPE, &info, task);
1077} 785}
1078 786
1079void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) 787dotraplinkage void
788do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
1080{ 789{
790 conditional_sti(regs);
791
792#ifdef CONFIG_X86_32
1081 if (cpu_has_xmm) { 793 if (cpu_has_xmm) {
1082 /* Handle SIMD FPU exceptions on PIII+ processors. */ 794 /* Handle SIMD FPU exceptions on PIII+ processors. */
1083 ignore_fpu_irq = 1; 795 ignore_fpu_irq = 1;
@@ -1096,16 +808,25 @@ void do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
1096 current->thread.error_code = error_code; 808 current->thread.error_code = error_code;
1097 die_if_kernel("cache flush denied", regs, error_code); 809 die_if_kernel("cache flush denied", regs, error_code);
1098 force_sig(SIGSEGV, current); 810 force_sig(SIGSEGV, current);
811#else
812 if (!user_mode(regs) &&
813 kernel_math_error(regs, "kernel simd math error", 19))
814 return;
815 simd_math_error((void __user *)regs->ip);
816#endif
1099} 817}
1100 818
1101void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) 819dotraplinkage void
820do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
1102{ 821{
822 conditional_sti(regs);
1103#if 0 823#if 0
1104 /* No need to warn about this any longer. */ 824 /* No need to warn about this any longer. */
1105 printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); 825 printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
1106#endif 826#endif
1107} 827}
1108 828
829#ifdef CONFIG_X86_32
1109unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) 830unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
1110{ 831{
1111 struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); 832 struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
@@ -1124,6 +845,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
1124 845
1125 return new_kesp; 846 return new_kesp;
1126} 847}
848#else
849asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
850{
851}
852
853asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
854{
855}
856#endif
1127 857
1128/* 858/*
1129 * 'math_state_restore()' saves the current math information in the 859 * 'math_state_restore()' saves the current math information in the
@@ -1156,14 +886,24 @@ asmlinkage void math_state_restore(void)
1156 } 886 }
1157 887
1158 clts(); /* Allow maths ops (or we recurse) */ 888 clts(); /* Allow maths ops (or we recurse) */
889#ifdef CONFIG_X86_32
1159 restore_fpu(tsk); 890 restore_fpu(tsk);
891#else
892 /*
893 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
894 */
895 if (unlikely(restore_fpu_checking(tsk))) {
896 stts();
897 force_sig(SIGSEGV, tsk);
898 return;
899 }
900#endif
1160 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ 901 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
1161 tsk->fpu_counter++; 902 tsk->fpu_counter++;
1162} 903}
1163EXPORT_SYMBOL_GPL(math_state_restore); 904EXPORT_SYMBOL_GPL(math_state_restore);
1164 905
1165#ifndef CONFIG_MATH_EMULATION 906#ifndef CONFIG_MATH_EMULATION
1166
1167asmlinkage void math_emulate(long arg) 907asmlinkage void math_emulate(long arg)
1168{ 908{
1169 printk(KERN_EMERG 909 printk(KERN_EMERG
@@ -1172,12 +912,54 @@ asmlinkage void math_emulate(long arg)
1172 force_sig(SIGFPE, current); 912 force_sig(SIGFPE, current);
1173 schedule(); 913 schedule();
1174} 914}
1175
1176#endif /* CONFIG_MATH_EMULATION */ 915#endif /* CONFIG_MATH_EMULATION */
1177 916
917dotraplinkage void __kprobes
918do_device_not_available(struct pt_regs *regs, long error)
919{
920#ifdef CONFIG_X86_32
921 if (read_cr0() & X86_CR0_EM) {
922 conditional_sti(regs);
923 math_emulate(0);
924 } else {
925 math_state_restore(); /* interrupts still off */
926 conditional_sti(regs);
927 }
928#else
929 math_state_restore();
930#endif
931}
932
933#ifdef CONFIG_X86_32
934#ifdef CONFIG_X86_MCE
935dotraplinkage void __kprobes do_machine_check(struct pt_regs *regs, long error)
936{
937 conditional_sti(regs);
938 machine_check_vector(regs, error);
939}
940#endif
941
942dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
943{
944 siginfo_t info;
945 local_irq_enable();
946
947 info.si_signo = SIGILL;
948 info.si_errno = 0;
949 info.si_code = ILL_BADSTK;
950 info.si_addr = 0;
951 if (notify_die(DIE_TRAP, "iret exception",
952 regs, error_code, 32, SIGILL) == NOTIFY_STOP)
953 return;
954 do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
955}
956#endif
957
1178void __init trap_init(void) 958void __init trap_init(void)
1179{ 959{
960#ifdef CONFIG_X86_32
1180 int i; 961 int i;
962#endif
1181 963
1182#ifdef CONFIG_EISA 964#ifdef CONFIG_EISA
1183 void __iomem *p = early_ioremap(0x0FFFD9, 4); 965 void __iomem *p = early_ioremap(0x0FFFD9, 4);
@@ -1187,29 +969,40 @@ void __init trap_init(void)
1187 early_iounmap(p, 4); 969 early_iounmap(p, 4);
1188#endif 970#endif
1189 971
1190 set_trap_gate(0, &divide_error); 972 set_intr_gate(0, &divide_error);
1191 set_intr_gate(1, &debug); 973 set_intr_gate_ist(1, &debug, DEBUG_STACK);
1192 set_intr_gate(2, &nmi); 974 set_intr_gate_ist(2, &nmi, NMI_STACK);
1193 set_system_intr_gate(3, &int3); /* int3 can be called from all */ 975 /* int3 can be called from all */
1194 set_system_gate(4, &overflow); /* int4 can be called from all */ 976 set_system_intr_gate_ist(3, &int3, DEBUG_STACK);
1195 set_trap_gate(5, &bounds); 977 /* int4 can be called from all */
1196 set_trap_gate(6, &invalid_op); 978 set_system_intr_gate(4, &overflow);
1197 set_trap_gate(7, &device_not_available); 979 set_intr_gate(5, &bounds);
980 set_intr_gate(6, &invalid_op);
981 set_intr_gate(7, &device_not_available);
982#ifdef CONFIG_X86_32
1198 set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); 983 set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
1199 set_trap_gate(9, &coprocessor_segment_overrun); 984#else
1200 set_trap_gate(10, &invalid_TSS); 985 set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
1201 set_trap_gate(11, &segment_not_present); 986#endif
1202 set_trap_gate(12, &stack_segment); 987 set_intr_gate(9, &coprocessor_segment_overrun);
1203 set_trap_gate(13, &general_protection); 988 set_intr_gate(10, &invalid_TSS);
989 set_intr_gate(11, &segment_not_present);
990 set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
991 set_intr_gate(13, &general_protection);
1204 set_intr_gate(14, &page_fault); 992 set_intr_gate(14, &page_fault);
1205 set_trap_gate(15, &spurious_interrupt_bug); 993 set_intr_gate(15, &spurious_interrupt_bug);
1206 set_trap_gate(16, &coprocessor_error); 994 set_intr_gate(16, &coprocessor_error);
1207 set_trap_gate(17, &alignment_check); 995 set_intr_gate(17, &alignment_check);
1208#ifdef CONFIG_X86_MCE 996#ifdef CONFIG_X86_MCE
1209 set_trap_gate(18, &machine_check); 997 set_intr_gate_ist(18, &machine_check, MCE_STACK);
1210#endif 998#endif
1211 set_trap_gate(19, &simd_coprocessor_error); 999 set_intr_gate(19, &simd_coprocessor_error);
1212 1000
1001#ifdef CONFIG_IA32_EMULATION
1002 set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
1003#endif
1004
1005#ifdef CONFIG_X86_32
1213 if (cpu_has_fxsr) { 1006 if (cpu_has_fxsr) {
1214 printk(KERN_INFO "Enabling fast FPU save and restore... "); 1007 printk(KERN_INFO "Enabling fast FPU save and restore... ");
1215 set_in_cr4(X86_CR4_OSFXSR); 1008 set_in_cr4(X86_CR4_OSFXSR);
@@ -1222,36 +1015,20 @@ void __init trap_init(void)
1222 printk("done.\n"); 1015 printk("done.\n");
1223 } 1016 }
1224 1017
1225 set_system_gate(SYSCALL_VECTOR, &system_call); 1018 set_system_trap_gate(SYSCALL_VECTOR, &system_call);
1226 1019
1227 /* Reserve all the builtin and the syscall vector: */ 1020 /* Reserve all the builtin and the syscall vector: */
1228 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) 1021 for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
1229 set_bit(i, used_vectors); 1022 set_bit(i, used_vectors);
1230 1023
1231 set_bit(SYSCALL_VECTOR, used_vectors); 1024 set_bit(SYSCALL_VECTOR, used_vectors);
1232 1025#endif
1233 /* 1026 /*
1234 * Should be a barrier for any external CPU state: 1027 * Should be a barrier for any external CPU state:
1235 */ 1028 */
1236 cpu_init(); 1029 cpu_init();
1237 1030
1031#ifdef CONFIG_X86_32
1238 trap_init_hook(); 1032 trap_init_hook();
1033#endif
1239} 1034}
1240
1241static int __init kstack_setup(char *s)
1242{
1243 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
1244
1245 return 1;
1246}
1247__setup("kstack=", kstack_setup);
1248
1249static int __init code_bytes_setup(char *s)
1250{
1251 code_bytes = simple_strtoul(s, NULL, 0);
1252 if (code_bytes > 8192)
1253 code_bytes = 8192;
1254
1255 return 1;
1256}
1257__setup("code_bytes=", code_bytes_setup);
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
deleted file mode 100644
index 9c0ac0cab013..000000000000
--- a/arch/x86/kernel/traps_64.c
+++ /dev/null
@@ -1,1214 +0,0 @@
1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
4 *
5 * Pentium III FXSR, SSE support
6 * Gareth Hughes <gareth@valinux.com>, May 2000
7 */
8
9/*
10 * 'Traps.c' handles hardware traps and faults after we have saved some
11 * state in 'entry.S'.
12 */
13#include <linux/moduleparam.h>
14#include <linux/interrupt.h>
15#include <linux/kallsyms.h>
16#include <linux/spinlock.h>
17#include <linux/kprobes.h>
18#include <linux/uaccess.h>
19#include <linux/utsname.h>
20#include <linux/kdebug.h>
21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/ptrace.h>
24#include <linux/string.h>
25#include <linux/unwind.h>
26#include <linux/delay.h>
27#include <linux/errno.h>
28#include <linux/kexec.h>
29#include <linux/sched.h>
30#include <linux/timer.h>
31#include <linux/init.h>
32#include <linux/bug.h>
33#include <linux/nmi.h>
34#include <linux/mm.h>
35#include <linux/smp.h>
36#include <linux/io.h>
37
38#if defined(CONFIG_EDAC)
39#include <linux/edac.h>
40#endif
41
42#include <asm/stacktrace.h>
43#include <asm/processor.h>
44#include <asm/debugreg.h>
45#include <asm/atomic.h>
46#include <asm/system.h>
47#include <asm/unwind.h>
48#include <asm/desc.h>
49#include <asm/i387.h>
50#include <asm/pgalloc.h>
51#include <asm/proto.h>
52#include <asm/pda.h>
53#include <asm/traps.h>
54
55#include <mach_traps.h>
56
57int panic_on_unrecovered_nmi;
58int kstack_depth_to_print = 12;
59static unsigned int code_bytes = 64;
60static int ignore_nmis;
61static int die_counter;
62
63static inline void conditional_sti(struct pt_regs *regs)
64{
65 if (regs->flags & X86_EFLAGS_IF)
66 local_irq_enable();
67}
68
69static inline void preempt_conditional_sti(struct pt_regs *regs)
70{
71 inc_preempt_count();
72 if (regs->flags & X86_EFLAGS_IF)
73 local_irq_enable();
74}
75
76static inline void preempt_conditional_cli(struct pt_regs *regs)
77{
78 if (regs->flags & X86_EFLAGS_IF)
79 local_irq_disable();
80 /* Make sure to not schedule here because we could be running
81 on an exception stack. */
82 dec_preempt_count();
83}
84
85void printk_address(unsigned long address, int reliable)
86{
87 printk(" [<%016lx>] %s%pS\n",
88 address, reliable ? "" : "? ", (void *) address);
89}
90
91static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
92 unsigned *usedp, char **idp)
93{
94 static char ids[][8] = {
95 [DEBUG_STACK - 1] = "#DB",
96 [NMI_STACK - 1] = "NMI",
97 [DOUBLEFAULT_STACK - 1] = "#DF",
98 [STACKFAULT_STACK - 1] = "#SS",
99 [MCE_STACK - 1] = "#MC",
100#if DEBUG_STKSZ > EXCEPTION_STKSZ
101 [N_EXCEPTION_STACKS ...
102 N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
103#endif
104 };
105 unsigned k;
106
107 /*
108 * Iterate over all exception stacks, and figure out whether
109 * 'stack' is in one of them:
110 */
111 for (k = 0; k < N_EXCEPTION_STACKS; k++) {
112 unsigned long end = per_cpu(orig_ist, cpu).ist[k];
113 /*
114 * Is 'stack' above this exception frame's end?
115 * If yes then skip to the next frame.
116 */
117 if (stack >= end)
118 continue;
119 /*
120 * Is 'stack' above this exception frame's start address?
121 * If yes then we found the right frame.
122 */
123 if (stack >= end - EXCEPTION_STKSZ) {
124 /*
125 * Make sure we only iterate through an exception
126 * stack once. If it comes up for the second time
127 * then there's something wrong going on - just
128 * break out and return NULL:
129 */
130 if (*usedp & (1U << k))
131 break;
132 *usedp |= 1U << k;
133 *idp = ids[k];
134 return (unsigned long *)end;
135 }
136 /*
137 * If this is a debug stack, and if it has a larger size than
138 * the usual exception stacks, then 'stack' might still
139 * be within the lower portion of the debug stack:
140 */
141#if DEBUG_STKSZ > EXCEPTION_STKSZ
142 if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
143 unsigned j = N_EXCEPTION_STACKS - 1;
144
145 /*
146 * Black magic. A large debug stack is composed of
147 * multiple exception stack entries, which we
148 * iterate through now. Dont look:
149 */
150 do {
151 ++j;
152 end -= EXCEPTION_STKSZ;
153 ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
154 } while (stack < end - EXCEPTION_STKSZ);
155 if (*usedp & (1U << j))
156 break;
157 *usedp |= 1U << j;
158 *idp = ids[j];
159 return (unsigned long *)end;
160 }
161#endif
162 }
163 return NULL;
164}
165
166/*
167 * x86-64 can have up to three kernel stacks:
168 * process stack
169 * interrupt stack
170 * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
171 */
172
173static inline int valid_stack_ptr(struct thread_info *tinfo,
174 void *p, unsigned int size, void *end)
175{
176 void *t = tinfo;
177 if (end) {
178 if (p < end && p >= (end-THREAD_SIZE))
179 return 1;
180 else
181 return 0;
182 }
183 return p > t && p < t + THREAD_SIZE - size;
184}
185
186/* The form of the top of the frame on the stack */
187struct stack_frame {
188 struct stack_frame *next_frame;
189 unsigned long return_address;
190};
191
192static inline unsigned long
193print_context_stack(struct thread_info *tinfo,
194 unsigned long *stack, unsigned long bp,
195 const struct stacktrace_ops *ops, void *data,
196 unsigned long *end)
197{
198 struct stack_frame *frame = (struct stack_frame *)bp;
199
200 while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
201 unsigned long addr;
202
203 addr = *stack;
204 if (__kernel_text_address(addr)) {
205 if ((unsigned long) stack == bp + 8) {
206 ops->address(data, addr, 1);
207 frame = frame->next_frame;
208 bp = (unsigned long) frame;
209 } else {
210 ops->address(data, addr, bp == 0);
211 }
212 }
213 stack++;
214 }
215 return bp;
216}
217
218void dump_trace(struct task_struct *task, struct pt_regs *regs,
219 unsigned long *stack, unsigned long bp,
220 const struct stacktrace_ops *ops, void *data)
221{
222 const unsigned cpu = get_cpu();
223 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
224 unsigned used = 0;
225 struct thread_info *tinfo;
226
227 if (!task)
228 task = current;
229
230 if (!stack) {
231 unsigned long dummy;
232 stack = &dummy;
233 if (task && task != current)
234 stack = (unsigned long *)task->thread.sp;
235 }
236
237#ifdef CONFIG_FRAME_POINTER
238 if (!bp) {
239 if (task == current) {
240 /* Grab bp right from our regs */
241 asm("movq %%rbp, %0" : "=r" (bp) : );
242 } else {
243 /* bp is the last reg pushed by switch_to */
244 bp = *(unsigned long *) task->thread.sp;
245 }
246 }
247#endif
248
249 /*
250 * Print function call entries in all stacks, starting at the
251 * current stack address. If the stacks consist of nested
252 * exceptions
253 */
254 tinfo = task_thread_info(task);
255 for (;;) {
256 char *id;
257 unsigned long *estack_end;
258 estack_end = in_exception_stack(cpu, (unsigned long)stack,
259 &used, &id);
260
261 if (estack_end) {
262 if (ops->stack(data, id) < 0)
263 break;
264
265 bp = print_context_stack(tinfo, stack, bp, ops,
266 data, estack_end);
267 ops->stack(data, "<EOE>");
268 /*
269 * We link to the next stack via the
270 * second-to-last pointer (index -2 to end) in the
271 * exception stack:
272 */
273 stack = (unsigned long *) estack_end[-2];
274 continue;
275 }
276 if (irqstack_end) {
277 unsigned long *irqstack;
278 irqstack = irqstack_end -
279 (IRQSTACKSIZE - 64) / sizeof(*irqstack);
280
281 if (stack >= irqstack && stack < irqstack_end) {
282 if (ops->stack(data, "IRQ") < 0)
283 break;
284 bp = print_context_stack(tinfo, stack, bp,
285 ops, data, irqstack_end);
286 /*
287 * We link to the next stack (which would be
288 * the process stack normally) the last
289 * pointer (index -1 to end) in the IRQ stack:
290 */
291 stack = (unsigned long *) (irqstack_end[-1]);
292 irqstack_end = NULL;
293 ops->stack(data, "EOI");
294 continue;
295 }
296 }
297 break;
298 }
299
300 /*
301 * This handles the process stack:
302 */
303 bp = print_context_stack(tinfo, stack, bp, ops, data, NULL);
304 put_cpu();
305}
306EXPORT_SYMBOL(dump_trace);
307
308static void
309print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
310{
311 print_symbol(msg, symbol);
312 printk("\n");
313}
314
315static void print_trace_warning(void *data, char *msg)
316{
317 printk("%s\n", msg);
318}
319
320static int print_trace_stack(void *data, char *name)
321{
322 printk(" <%s> ", name);
323 return 0;
324}
325
326static void print_trace_address(void *data, unsigned long addr, int reliable)
327{
328 touch_nmi_watchdog();
329 printk_address(addr, reliable);
330}
331
332static const struct stacktrace_ops print_trace_ops = {
333 .warning = print_trace_warning,
334 .warning_symbol = print_trace_warning_symbol,
335 .stack = print_trace_stack,
336 .address = print_trace_address,
337};
338
339static void
340show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
341 unsigned long *stack, unsigned long bp, char *log_lvl)
342{
343 printk("Call Trace:\n");
344 dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
345}
346
347void show_trace(struct task_struct *task, struct pt_regs *regs,
348 unsigned long *stack, unsigned long bp)
349{
350 show_trace_log_lvl(task, regs, stack, bp, "");
351}
352
353static void
354show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
355 unsigned long *sp, unsigned long bp, char *log_lvl)
356{
357 unsigned long *stack;
358 int i;
359 const int cpu = smp_processor_id();
360 unsigned long *irqstack_end =
361 (unsigned long *) (cpu_pda(cpu)->irqstackptr);
362 unsigned long *irqstack =
363 (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
364
365 /*
366 * debugging aid: "show_stack(NULL, NULL);" prints the
367 * back trace for this cpu.
368 */
369
370 if (sp == NULL) {
371 if (task)
372 sp = (unsigned long *)task->thread.sp;
373 else
374 sp = (unsigned long *)&sp;
375 }
376
377 stack = sp;
378 for (i = 0; i < kstack_depth_to_print; i++) {
379 if (stack >= irqstack && stack <= irqstack_end) {
380 if (stack == irqstack_end) {
381 stack = (unsigned long *) (irqstack_end[-1]);
382 printk(" <EOI> ");
383 }
384 } else {
385 if (((long) stack & (THREAD_SIZE-1)) == 0)
386 break;
387 }
388 if (i && ((i % 4) == 0))
389 printk("\n");
390 printk(" %016lx", *stack++);
391 touch_nmi_watchdog();
392 }
393 printk("\n");
394 show_trace_log_lvl(task, regs, sp, bp, log_lvl);
395}
396
397void show_stack(struct task_struct *task, unsigned long *sp)
398{
399 show_stack_log_lvl(task, NULL, sp, 0, "");
400}
401
402/*
403 * The architecture-independent dump_stack generator
404 */
405void dump_stack(void)
406{
407 unsigned long bp = 0;
408 unsigned long stack;
409
410#ifdef CONFIG_FRAME_POINTER
411 if (!bp)
412 asm("movq %%rbp, %0" : "=r" (bp) : );
413#endif
414
415 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
416 current->pid, current->comm, print_tainted(),
417 init_utsname()->release,
418 (int)strcspn(init_utsname()->version, " "),
419 init_utsname()->version);
420 show_trace(NULL, NULL, &stack, bp);
421}
422EXPORT_SYMBOL(dump_stack);
423
424void show_registers(struct pt_regs *regs)
425{
426 int i;
427 unsigned long sp;
428 const int cpu = smp_processor_id();
429 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
430
431 sp = regs->sp;
432 printk("CPU %d ", cpu);
433 __show_regs(regs);
434 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
435 cur->comm, cur->pid, task_thread_info(cur), cur);
436
437 /*
438 * When in-kernel, we also print out the stack and code at the
439 * time of the fault..
440 */
441 if (!user_mode(regs)) {
442 unsigned int code_prologue = code_bytes * 43 / 64;
443 unsigned int code_len = code_bytes;
444 unsigned char c;
445 u8 *ip;
446
447 printk("Stack: ");
448 show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
449 regs->bp, "");
450
451 printk(KERN_EMERG "Code: ");
452
453 ip = (u8 *)regs->ip - code_prologue;
454 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
455 /* try starting at RIP */
456 ip = (u8 *)regs->ip;
457 code_len = code_len - code_prologue + 1;
458 }
459 for (i = 0; i < code_len; i++, ip++) {
460 if (ip < (u8 *)PAGE_OFFSET ||
461 probe_kernel_address(ip, c)) {
462 printk(" Bad RIP value.");
463 break;
464 }
465 if (ip == (u8 *)regs->ip)
466 printk("<%02x> ", c);
467 else
468 printk("%02x ", c);
469 }
470 }
471 printk("\n");
472}
473
474int is_valid_bugaddr(unsigned long ip)
475{
476 unsigned short ud2;
477
478 if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
479 return 0;
480
481 return ud2 == 0x0b0f;
482}
483
484static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
485static int die_owner = -1;
486static unsigned int die_nest_count;
487
488unsigned __kprobes long oops_begin(void)
489{
490 int cpu;
491 unsigned long flags;
492
493 oops_enter();
494
495 /* racy, but better than risking deadlock. */
496 raw_local_irq_save(flags);
497 cpu = smp_processor_id();
498 if (!__raw_spin_trylock(&die_lock)) {
499 if (cpu == die_owner)
500 /* nested oops. should stop eventually */;
501 else
502 __raw_spin_lock(&die_lock);
503 }
504 die_nest_count++;
505 die_owner = cpu;
506 console_verbose();
507 bust_spinlocks(1);
508 return flags;
509}
510
511void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
512{
513 die_owner = -1;
514 bust_spinlocks(0);
515 die_nest_count--;
516 if (!die_nest_count)
517 /* Nest count reaches zero, release the lock. */
518 __raw_spin_unlock(&die_lock);
519 raw_local_irq_restore(flags);
520 if (!regs) {
521 oops_exit();
522 return;
523 }
524 if (panic_on_oops)
525 panic("Fatal exception");
526 oops_exit();
527 do_exit(signr);
528}
529
530int __kprobes __die(const char *str, struct pt_regs *regs, long err)
531{
532 printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter);
533#ifdef CONFIG_PREEMPT
534 printk("PREEMPT ");
535#endif
536#ifdef CONFIG_SMP
537 printk("SMP ");
538#endif
539#ifdef CONFIG_DEBUG_PAGEALLOC
540 printk("DEBUG_PAGEALLOC");
541#endif
542 printk("\n");
543 if (notify_die(DIE_OOPS, str, regs, err,
544 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
545 return 1;
546
547 show_registers(regs);
548 add_taint(TAINT_DIE);
549 /* Executive summary in case the oops scrolled away */
550 printk(KERN_ALERT "RIP ");
551 printk_address(regs->ip, 1);
552 printk(" RSP <%016lx>\n", regs->sp);
553 if (kexec_should_crash(current))
554 crash_kexec(regs);
555 return 0;
556}
557
558void die(const char *str, struct pt_regs *regs, long err)
559{
560 unsigned long flags = oops_begin();
561
562 if (!user_mode(regs))
563 report_bug(regs->ip, regs);
564
565 if (__die(str, regs, err))
566 regs = NULL;
567 oops_end(flags, regs, SIGSEGV);
568}
569
570notrace __kprobes void
571die_nmi(char *str, struct pt_regs *regs, int do_panic)
572{
573 unsigned long flags;
574
575 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
576 return;
577
578 flags = oops_begin();
579 /*
580 * We are in trouble anyway, lets at least try
581 * to get a message out.
582 */
583 printk(KERN_EMERG "%s", str);
584 printk(" on CPU%d, ip %08lx, registers:\n",
585 smp_processor_id(), regs->ip);
586 show_registers(regs);
587 if (kexec_should_crash(current))
588 crash_kexec(regs);
589 if (do_panic || panic_on_oops)
590 panic("Non maskable interrupt");
591 oops_end(flags, NULL, SIGBUS);
592 nmi_exit();
593 local_irq_enable();
594 do_exit(SIGBUS);
595}
596
597static void __kprobes
598do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
599 long error_code, siginfo_t *info)
600{
601 struct task_struct *tsk = current;
602
603 if (!user_mode(regs))
604 goto kernel_trap;
605
606 /*
607 * We want error_code and trap_no set for userspace faults and
608 * kernelspace faults which result in die(), but not
609 * kernelspace faults which are fixed up. die() gives the
610 * process no chance to handle the signal and notice the
611 * kernel fault information, so that won't result in polluting
612 * the information about previously queued, but not yet
613 * delivered, faults. See also do_general_protection below.
614 */
615 tsk->thread.error_code = error_code;
616 tsk->thread.trap_no = trapnr;
617
618 if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
619 printk_ratelimit()) {
620 printk(KERN_INFO
621 "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
622 tsk->comm, tsk->pid, str,
623 regs->ip, regs->sp, error_code);
624 print_vma_addr(" in ", regs->ip);
625 printk("\n");
626 }
627
628 if (info)
629 force_sig_info(signr, info, tsk);
630 else
631 force_sig(signr, tsk);
632 return;
633
634kernel_trap:
635 if (!fixup_exception(regs)) {
636 tsk->thread.error_code = error_code;
637 tsk->thread.trap_no = trapnr;
638 die(str, regs, error_code);
639 }
640 return;
641}
642
643#define DO_ERROR(trapnr, signr, str, name) \
644asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
645{ \
646 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
647 == NOTIFY_STOP) \
648 return; \
649 conditional_sti(regs); \
650 do_trap(trapnr, signr, str, regs, error_code, NULL); \
651}
652
653#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
654asmlinkage void do_##name(struct pt_regs *regs, long error_code) \
655{ \
656 siginfo_t info; \
657 info.si_signo = signr; \
658 info.si_errno = 0; \
659 info.si_code = sicode; \
660 info.si_addr = (void __user *)siaddr; \
661 trace_hardirqs_fixup(); \
662 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
663 == NOTIFY_STOP) \
664 return; \
665 conditional_sti(regs); \
666 do_trap(trapnr, signr, str, regs, error_code, &info); \
667}
668
669DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
670DO_ERROR(4, SIGSEGV, "overflow", overflow)
671DO_ERROR(5, SIGSEGV, "bounds", bounds)
672DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
673DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
674DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
675DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
676DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
677
678/* Runs on IST stack */
679asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
680{
681 if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
682 12, SIGBUS) == NOTIFY_STOP)
683 return;
684 preempt_conditional_sti(regs);
685 do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
686 preempt_conditional_cli(regs);
687}
688
689asmlinkage void do_double_fault(struct pt_regs *regs, long error_code)
690{
691 static const char str[] = "double fault";
692 struct task_struct *tsk = current;
693
694 /* Return not checked because double check cannot be ignored */
695 notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
696
697 tsk->thread.error_code = error_code;
698 tsk->thread.trap_no = 8;
699
700 /* This is always a kernel trap and never fixable (and thus must
701 never return). */
702 for (;;)
703 die(str, regs, error_code);
704}
705
706asmlinkage void __kprobes
707do_general_protection(struct pt_regs *regs, long error_code)
708{
709 struct task_struct *tsk;
710
711 conditional_sti(regs);
712
713 tsk = current;
714 if (!user_mode(regs))
715 goto gp_in_kernel;
716
717 tsk->thread.error_code = error_code;
718 tsk->thread.trap_no = 13;
719
720 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
721 printk_ratelimit()) {
722 printk(KERN_INFO
723 "%s[%d] general protection ip:%lx sp:%lx error:%lx",
724 tsk->comm, tsk->pid,
725 regs->ip, regs->sp, error_code);
726 print_vma_addr(" in ", regs->ip);
727 printk("\n");
728 }
729
730 force_sig(SIGSEGV, tsk);
731 return;
732
733gp_in_kernel:
734 if (fixup_exception(regs))
735 return;
736
737 tsk->thread.error_code = error_code;
738 tsk->thread.trap_no = 13;
739 if (notify_die(DIE_GPF, "general protection fault", regs,
740 error_code, 13, SIGSEGV) == NOTIFY_STOP)
741 return;
742 die("general protection fault", regs, error_code);
743}
744
745static notrace __kprobes void
746mem_parity_error(unsigned char reason, struct pt_regs *regs)
747{
748 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
749 reason);
750 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
751
752#if defined(CONFIG_EDAC)
753 if (edac_handler_set()) {
754 edac_atomic_assert_error();
755 return;
756 }
757#endif
758
759 if (panic_on_unrecovered_nmi)
760 panic("NMI: Not continuing");
761
762 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
763
764 /* Clear and disable the memory parity error line. */
765 reason = (reason & 0xf) | 4;
766 outb(reason, 0x61);
767}
768
769static notrace __kprobes void
770io_check_error(unsigned char reason, struct pt_regs *regs)
771{
772 printk("NMI: IOCK error (debug interrupt?)\n");
773 show_registers(regs);
774
775 /* Re-enable the IOCK line, wait for a few seconds */
776 reason = (reason & 0xf) | 8;
777 outb(reason, 0x61);
778 mdelay(2000);
779 reason &= ~8;
780 outb(reason, 0x61);
781}
782
783static notrace __kprobes void
784unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
785{
786 if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
787 NOTIFY_STOP)
788 return;
789 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
790 reason);
791 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
792
793 if (panic_on_unrecovered_nmi)
794 panic("NMI: Not continuing");
795
796 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
797}
798
799/* Runs on IST stack. This code must keep interrupts off all the time.
800 Nested NMIs are prevented by the CPU. */
801asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
802{
803 unsigned char reason = 0;
804 int cpu;
805
806 cpu = smp_processor_id();
807
808 /* Only the BSP gets external NMIs from the system. */
809 if (!cpu)
810 reason = get_nmi_reason();
811
812 if (!(reason & 0xc0)) {
813 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
814 == NOTIFY_STOP)
815 return;
816 /*
817 * Ok, so this is none of the documented NMI sources,
818 * so it must be the NMI watchdog.
819 */
820 if (nmi_watchdog_tick(regs, reason))
821 return;
822 if (!do_nmi_callback(regs, cpu))
823 unknown_nmi_error(reason, regs);
824
825 return;
826 }
827 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
828 return;
829
830 /* AK: following checks seem to be broken on modern chipsets. FIXME */
831 if (reason & 0x80)
832 mem_parity_error(reason, regs);
833 if (reason & 0x40)
834 io_check_error(reason, regs);
835}
836
837asmlinkage notrace __kprobes void
838do_nmi(struct pt_regs *regs, long error_code)
839{
840 nmi_enter();
841
842 add_pda(__nmi_count, 1);
843
844 if (!ignore_nmis)
845 default_do_nmi(regs);
846
847 nmi_exit();
848}
849
850void stop_nmi(void)
851{
852 acpi_nmi_disable();
853 ignore_nmis++;
854}
855
856void restart_nmi(void)
857{
858 ignore_nmis--;
859 acpi_nmi_enable();
860}
861
862/* runs on IST stack. */
863asmlinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
864{
865 trace_hardirqs_fixup();
866
867 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
868 == NOTIFY_STOP)
869 return;
870
871 preempt_conditional_sti(regs);
872 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
873 preempt_conditional_cli(regs);
874}
875
876/* Help handler running on IST stack to switch back to user stack
877 for scheduling or signal handling. The actual stack switch is done in
878 entry.S */
879asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
880{
881 struct pt_regs *regs = eregs;
882 /* Did already sync */
883 if (eregs == (struct pt_regs *)eregs->sp)
884 ;
885 /* Exception from user space */
886 else if (user_mode(eregs))
887 regs = task_pt_regs(current);
888 /* Exception from kernel and interrupts are enabled. Move to
889 kernel process stack. */
890 else if (eregs->flags & X86_EFLAGS_IF)
891 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
892 if (eregs != regs)
893 *regs = *eregs;
894 return regs;
895}
896
897/* runs on IST stack. */
898asmlinkage void __kprobes do_debug(struct pt_regs *regs,
899 unsigned long error_code)
900{
901 struct task_struct *tsk = current;
902 unsigned long condition;
903 siginfo_t info;
904
905 trace_hardirqs_fixup();
906
907 get_debugreg(condition, 6);
908
909 /*
910 * The processor cleared BTF, so don't mark that we need it set.
911 */
912 clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
913 tsk->thread.debugctlmsr = 0;
914
915 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
916 SIGTRAP) == NOTIFY_STOP)
917 return;
918
919 preempt_conditional_sti(regs);
920
921 /* Mask out spurious debug traps due to lazy DR7 setting */
922 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
923 if (!tsk->thread.debugreg7)
924 goto clear_dr7;
925 }
926
927 tsk->thread.debugreg6 = condition;
928
929 /*
930 * Single-stepping through TF: make sure we ignore any events in
931 * kernel space (but re-enable TF when returning to user mode).
932 */
933 if (condition & DR_STEP) {
934 if (!user_mode(regs))
935 goto clear_TF_reenable;
936 }
937
938 /* Ok, finally something we can handle */
939 tsk->thread.trap_no = 1;
940 tsk->thread.error_code = error_code;
941 info.si_signo = SIGTRAP;
942 info.si_errno = 0;
943 info.si_code = get_si_code(condition);
944 info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
945 force_sig_info(SIGTRAP, &info, tsk);
946
947clear_dr7:
948 set_debugreg(0, 7);
949 preempt_conditional_cli(regs);
950 return;
951
952clear_TF_reenable:
953 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
954 regs->flags &= ~X86_EFLAGS_TF;
955 preempt_conditional_cli(regs);
956 return;
957}
958
959static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
960{
961 if (fixup_exception(regs))
962 return 1;
963
964 notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
965 /* Illegal floating point operation in the kernel */
966 current->thread.trap_no = trapnr;
967 die(str, regs, 0);
968 return 0;
969}
970
971/*
972 * Note that we play around with the 'TS' bit in an attempt to get
973 * the correct behaviour even in the presence of the asynchronous
974 * IRQ13 behaviour
975 */
976asmlinkage void do_coprocessor_error(struct pt_regs *regs)
977{
978 void __user *ip = (void __user *)(regs->ip);
979 struct task_struct *task;
980 siginfo_t info;
981 unsigned short cwd, swd;
982
983 conditional_sti(regs);
984 if (!user_mode(regs) &&
985 kernel_math_error(regs, "kernel x87 math error", 16))
986 return;
987
988 /*
989 * Save the info for the exception handler and clear the error.
990 */
991 task = current;
992 save_init_fpu(task);
993 task->thread.trap_no = 16;
994 task->thread.error_code = 0;
995 info.si_signo = SIGFPE;
996 info.si_errno = 0;
997 info.si_code = __SI_FAULT;
998 info.si_addr = ip;
999 /*
1000 * (~cwd & swd) will mask out exceptions that are not set to unmasked
1001 * status. 0x3f is the exception bits in these regs, 0x200 is the
1002 * C1 reg you need in case of a stack fault, 0x040 is the stack
1003 * fault bit. We should only be taking one exception at a time,
1004 * so if this combination doesn't produce any single exception,
1005 * then we have a bad program that isn't synchronizing its FPU usage
1006 * and it will suffer the consequences since we won't be able to
1007 * fully reproduce the context of the exception
1008 */
1009 cwd = get_fpu_cwd(task);
1010 swd = get_fpu_swd(task);
1011 switch (swd & ~cwd & 0x3f) {
1012 case 0x000: /* No unmasked exception */
1013 default: /* Multiple exceptions */
1014 break;
1015 case 0x001: /* Invalid Op */
1016 /*
1017 * swd & 0x240 == 0x040: Stack Underflow
1018 * swd & 0x240 == 0x240: Stack Overflow
1019 * User must clear the SF bit (0x40) if set
1020 */
1021 info.si_code = FPE_FLTINV;
1022 break;
1023 case 0x002: /* Denormalize */
1024 case 0x010: /* Underflow */
1025 info.si_code = FPE_FLTUND;
1026 break;
1027 case 0x004: /* Zero Divide */
1028 info.si_code = FPE_FLTDIV;
1029 break;
1030 case 0x008: /* Overflow */
1031 info.si_code = FPE_FLTOVF;
1032 break;
1033 case 0x020: /* Precision */
1034 info.si_code = FPE_FLTRES;
1035 break;
1036 }
1037 force_sig_info(SIGFPE, &info, task);
1038}
1039
1040asmlinkage void bad_intr(void)
1041{
1042 printk("bad interrupt");
1043}
1044
1045asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
1046{
1047 void __user *ip = (void __user *)(regs->ip);
1048 struct task_struct *task;
1049 siginfo_t info;
1050 unsigned short mxcsr;
1051
1052 conditional_sti(regs);
1053 if (!user_mode(regs) &&
1054 kernel_math_error(regs, "kernel simd math error", 19))
1055 return;
1056
1057 /*
1058 * Save the info for the exception handler and clear the error.
1059 */
1060 task = current;
1061 save_init_fpu(task);
1062 task->thread.trap_no = 19;
1063 task->thread.error_code = 0;
1064 info.si_signo = SIGFPE;
1065 info.si_errno = 0;
1066 info.si_code = __SI_FAULT;
1067 info.si_addr = ip;
1068 /*
1069 * The SIMD FPU exceptions are handled a little differently, as there
1070 * is only a single status/control register. Thus, to determine which
1071 * unmasked exception was caught we must mask the exception mask bits
1072 * at 0x1f80, and then use these to mask the exception bits at 0x3f.
1073 */
1074 mxcsr = get_fpu_mxcsr(task);
1075 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
1076 case 0x000:
1077 default:
1078 break;
1079 case 0x001: /* Invalid Op */
1080 info.si_code = FPE_FLTINV;
1081 break;
1082 case 0x002: /* Denormalize */
1083 case 0x010: /* Underflow */
1084 info.si_code = FPE_FLTUND;
1085 break;
1086 case 0x004: /* Zero Divide */
1087 info.si_code = FPE_FLTDIV;
1088 break;
1089 case 0x008: /* Overflow */
1090 info.si_code = FPE_FLTOVF;
1091 break;
1092 case 0x020: /* Precision */
1093 info.si_code = FPE_FLTRES;
1094 break;
1095 }
1096 force_sig_info(SIGFPE, &info, task);
1097}
1098
1099asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs)
1100{
1101}
1102
1103asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
1104{
1105}
1106
1107asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
1108{
1109}
1110
1111/*
1112 * 'math_state_restore()' saves the current math information in the
1113 * old math state array, and gets the new ones from the current task
1114 *
1115 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
1116 * Don't touch unless you *really* know how it works.
1117 */
1118asmlinkage void math_state_restore(void)
1119{
1120 struct task_struct *me = current;
1121
1122 if (!used_math()) {
1123 local_irq_enable();
1124 /*
1125 * does a slab alloc which can sleep
1126 */
1127 if (init_fpu(me)) {
1128 /*
1129 * ran out of memory!
1130 */
1131 do_group_exit(SIGKILL);
1132 return;
1133 }
1134 local_irq_disable();
1135 }
1136
1137 clts(); /* Allow maths ops (or we recurse) */
1138 /*
1139 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
1140 */
1141 if (unlikely(restore_fpu_checking(me))) {
1142 stts();
1143 force_sig(SIGSEGV, me);
1144 return;
1145 }
1146 task_thread_info(me)->status |= TS_USEDFPU;
1147 me->fpu_counter++;
1148}
1149EXPORT_SYMBOL_GPL(math_state_restore);
1150
1151void __init trap_init(void)
1152{
1153 set_intr_gate(0, &divide_error);
1154 set_intr_gate_ist(1, &debug, DEBUG_STACK);
1155 set_intr_gate_ist(2, &nmi, NMI_STACK);
1156 /* int3 can be called from all */
1157 set_system_gate_ist(3, &int3, DEBUG_STACK);
1158 /* int4 can be called from all */
1159 set_system_gate(4, &overflow);
1160 set_intr_gate(5, &bounds);
1161 set_intr_gate(6, &invalid_op);
1162 set_intr_gate(7, &device_not_available);
1163 set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
1164 set_intr_gate(9, &coprocessor_segment_overrun);
1165 set_intr_gate(10, &invalid_TSS);
1166 set_intr_gate(11, &segment_not_present);
1167 set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
1168 set_intr_gate(13, &general_protection);
1169 set_intr_gate(14, &page_fault);
1170 set_intr_gate(15, &spurious_interrupt_bug);
1171 set_intr_gate(16, &coprocessor_error);
1172 set_intr_gate(17, &alignment_check);
1173#ifdef CONFIG_X86_MCE
1174 set_intr_gate_ist(18, &machine_check, MCE_STACK);
1175#endif
1176 set_intr_gate(19, &simd_coprocessor_error);
1177
1178#ifdef CONFIG_IA32_EMULATION
1179 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
1180#endif
1181 /*
1182 * Should be a barrier for any external CPU state:
1183 */
1184 cpu_init();
1185}
1186
1187static int __init oops_setup(char *s)
1188{
1189 if (!s)
1190 return -EINVAL;
1191 if (!strcmp(s, "panic"))
1192 panic_on_oops = 1;
1193 return 0;
1194}
1195early_param("oops", oops_setup);
1196
1197static int __init kstack_setup(char *s)
1198{
1199 if (!s)
1200 return -EINVAL;
1201 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
1202 return 0;
1203}
1204early_param("kstack", kstack_setup);
1205
1206static int __init code_bytes_setup(char *s)
1207{
1208 code_bytes = simple_strtoul(s, NULL, 0);
1209 if (code_bytes > 8192)
1210 code_bytes = 8192;
1211
1212 return 1;
1213}
1214__setup("code_bytes=", code_bytes_setup);