diff options
-rw-r--r-- | arch/x86_64/kernel/entry.S | 6 | ||||
-rw-r--r-- | arch/x86_64/kernel/mce.c | 105 | ||||
-rw-r--r-- | arch/x86_64/kernel/signal.c | 7 | ||||
-rw-r--r-- | include/asm-x86_64/mce.h | 2 | ||||
-rw-r--r-- | include/asm-x86_64/thread_info.h | 2 |
5 files changed, 82 insertions, 40 deletions
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index a67f87bf401..830cfc6ee8c 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
@@ -282,7 +282,7 @@ sysret_careful: | |||
282 | sysret_signal: | 282 | sysret_signal: |
283 | TRACE_IRQS_ON | 283 | TRACE_IRQS_ON |
284 | sti | 284 | sti |
285 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | 285 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx |
286 | jz 1f | 286 | jz 1f |
287 | 287 | ||
288 | /* Really a signal */ | 288 | /* Really a signal */ |
@@ -375,7 +375,7 @@ int_very_careful: | |||
375 | jmp int_restore_rest | 375 | jmp int_restore_rest |
376 | 376 | ||
377 | int_signal: | 377 | int_signal: |
378 | testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx | 378 | testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx |
379 | jz 1f | 379 | jz 1f |
380 | movq %rsp,%rdi # &ptregs -> arg1 | 380 | movq %rsp,%rdi # &ptregs -> arg1 |
381 | xorl %esi,%esi # oldset -> arg2 | 381 | xorl %esi,%esi # oldset -> arg2 |
@@ -599,7 +599,7 @@ retint_careful: | |||
599 | jmp retint_check | 599 | jmp retint_check |
600 | 600 | ||
601 | retint_signal: | 601 | retint_signal: |
602 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx | 602 | testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx |
603 | jz retint_swapgs | 603 | jz retint_swapgs |
604 | TRACE_IRQS_ON | 604 | TRACE_IRQS_ON |
605 | sti | 605 | sti |
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 77fee481be4..968613572b9 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <linux/capability.h> | 18 | #include <linux/capability.h> |
19 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
20 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
21 | #include <linux/poll.h> | ||
22 | #include <linux/thread_info.h> | ||
21 | #include <linux/ctype.h> | 23 | #include <linux/ctype.h> |
22 | #include <linux/kmod.h> | 24 | #include <linux/kmod.h> |
23 | #include <linux/kdebug.h> | 25 | #include <linux/kdebug.h> |
@@ -26,6 +28,7 @@ | |||
26 | #include <asm/mce.h> | 28 | #include <asm/mce.h> |
27 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
28 | #include <asm/smp.h> | 30 | #include <asm/smp.h> |
31 | #include <asm/idle.h> | ||
29 | 32 | ||
30 | #define MISC_MCELOG_MINOR 227 | 33 | #define MISC_MCELOG_MINOR 227 |
31 | #define NR_BANKS 6 | 34 | #define NR_BANKS 6 |
@@ -39,8 +42,7 @@ static int mce_dont_init; | |||
39 | static int tolerant = 1; | 42 | static int tolerant = 1; |
40 | static int banks; | 43 | static int banks; |
41 | static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; | 44 | static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; |
42 | static unsigned long console_logged; | 45 | static unsigned long notify_user; |
43 | static int notify_user; | ||
44 | static int rip_msr; | 46 | static int rip_msr; |
45 | static int mce_bootlog = 1; | 47 | static int mce_bootlog = 1; |
46 | static atomic_t mce_events; | 48 | static atomic_t mce_events; |
@@ -48,6 +50,8 @@ static atomic_t mce_events; | |||
48 | static char trigger[128]; | 50 | static char trigger[128]; |
49 | static char *trigger_argv[2] = { trigger, NULL }; | 51 | static char *trigger_argv[2] = { trigger, NULL }; |
50 | 52 | ||
53 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | ||
54 | |||
51 | /* | 55 | /* |
52 | * Lockless MCE logging infrastructure. | 56 | * Lockless MCE logging infrastructure. |
53 | * This avoids deadlocks on printk locks without having to break locks. Also | 57 | * This avoids deadlocks on printk locks without having to break locks. Also |
@@ -94,8 +98,7 @@ void mce_log(struct mce *mce) | |||
94 | mcelog.entry[entry].finished = 1; | 98 | mcelog.entry[entry].finished = 1; |
95 | wmb(); | 99 | wmb(); |
96 | 100 | ||
97 | if (!test_and_set_bit(0, &console_logged)) | 101 | set_bit(0, ¬ify_user); |
98 | notify_user = 1; | ||
99 | } | 102 | } |
100 | 103 | ||
101 | static void print_mce(struct mce *m) | 104 | static void print_mce(struct mce *m) |
@@ -128,6 +131,10 @@ static void print_mce(struct mce *m) | |||
128 | static void mce_panic(char *msg, struct mce *backup, unsigned long start) | 131 | static void mce_panic(char *msg, struct mce *backup, unsigned long start) |
129 | { | 132 | { |
130 | int i; | 133 | int i; |
134 | |||
135 | if (tolerant >= 3) | ||
136 | return; | ||
137 | |||
131 | oops_begin(); | 138 | oops_begin(); |
132 | for (i = 0; i < MCE_LOG_LEN; i++) { | 139 | for (i = 0; i < MCE_LOG_LEN; i++) { |
133 | unsigned long tsc = mcelog.entry[i].tsc; | 140 | unsigned long tsc = mcelog.entry[i].tsc; |
@@ -139,10 +146,7 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start) | |||
139 | } | 146 | } |
140 | if (backup) | 147 | if (backup) |
141 | print_mce(backup); | 148 | print_mce(backup); |
142 | if (tolerant >= 3) | 149 | panic(msg); |
143 | printk("Fake panic: %s\n", msg); | ||
144 | else | ||
145 | panic(msg); | ||
146 | } | 150 | } |
147 | 151 | ||
148 | static int mce_available(struct cpuinfo_x86 *c) | 152 | static int mce_available(struct cpuinfo_x86 *c) |
@@ -167,17 +171,6 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | |||
167 | } | 171 | } |
168 | } | 172 | } |
169 | 173 | ||
170 | static void do_mce_trigger(void) | ||
171 | { | ||
172 | static atomic_t mce_logged; | ||
173 | int events = atomic_read(&mce_events); | ||
174 | if (events != atomic_read(&mce_logged) && trigger[0]) { | ||
175 | /* Small race window, but should be harmless. */ | ||
176 | atomic_set(&mce_logged, events); | ||
177 | call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); | ||
178 | } | ||
179 | } | ||
180 | |||
181 | /* | 174 | /* |
182 | * The actual machine check handler | 175 | * The actual machine check handler |
183 | */ | 176 | */ |
@@ -251,12 +244,8 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
251 | } | 244 | } |
252 | 245 | ||
253 | /* Never do anything final in the polling timer */ | 246 | /* Never do anything final in the polling timer */ |
254 | if (!regs) { | 247 | if (!regs) |
255 | /* Normal interrupt context here. Call trigger for any new | ||
256 | events. */ | ||
257 | do_mce_trigger(); | ||
258 | goto out; | 248 | goto out; |
259 | } | ||
260 | 249 | ||
261 | /* If we didn't find an uncorrectable error, pick | 250 | /* If we didn't find an uncorrectable error, pick |
262 | the last one (shouldn't happen, just being safe). */ | 251 | the last one (shouldn't happen, just being safe). */ |
@@ -288,6 +277,9 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
288 | do_exit(SIGBUS); | 277 | do_exit(SIGBUS); |
289 | } | 278 | } |
290 | 279 | ||
280 | /* notify userspace ASAP */ | ||
281 | set_thread_flag(TIF_MCE_NOTIFY); | ||
282 | |||
291 | out: | 283 | out: |
292 | /* Last thing done in the machine check exception to clear state. */ | 284 | /* Last thing done in the machine check exception to clear state. */ |
293 | wrmsrl(MSR_IA32_MCG_STATUS, 0); | 285 | wrmsrl(MSR_IA32_MCG_STATUS, 0); |
@@ -344,37 +336,67 @@ static void mcheck_timer(struct work_struct *work) | |||
344 | on_each_cpu(mcheck_check_cpu, NULL, 1, 1); | 336 | on_each_cpu(mcheck_check_cpu, NULL, 1, 1); |
345 | 337 | ||
346 | /* | 338 | /* |
347 | * It's ok to read stale data here for notify_user and | 339 | * Alert userspace if needed. If we logged an MCE, reduce the |
348 | * console_logged as we'll simply get the updated versions | 340 | * polling interval, otherwise increase the polling interval. |
349 | * on the next mcheck_timer execution and atomic operations | ||
350 | * on console_logged act as synchronization for notify_user | ||
351 | * writes. | ||
352 | */ | 341 | */ |
353 | if (notify_user && console_logged) { | 342 | if (mce_notify_user()) { |
343 | next_interval = max(next_interval/2, HZ/100); | ||
344 | } else { | ||
345 | next_interval = min(next_interval*2, check_interval*HZ); | ||
346 | } | ||
347 | |||
348 | schedule_delayed_work(&mcheck_work, next_interval); | ||
349 | } | ||
350 | |||
351 | /* | ||
352 | * This is only called from process context. This is where we do | ||
353 | * anything we need to alert userspace about new MCEs. This is called | ||
354 | * directly from the poller and also from entry.S and idle, thanks to | ||
355 | * TIF_MCE_NOTIFY. | ||
356 | */ | ||
357 | int mce_notify_user(void) | ||
358 | { | ||
359 | clear_thread_flag(TIF_MCE_NOTIFY); | ||
360 | if (test_and_clear_bit(0, ¬ify_user)) { | ||
354 | static unsigned long last_print; | 361 | static unsigned long last_print; |
355 | unsigned long now = jiffies; | 362 | unsigned long now = jiffies; |
356 | 363 | ||
357 | /* if we logged an MCE, reduce the polling interval */ | 364 | wake_up_interruptible(&mce_wait); |
358 | next_interval = max(next_interval/2, HZ/100); | 365 | if (trigger[0]) |
359 | notify_user = 0; | 366 | call_usermodehelper(trigger, trigger_argv, NULL, |
360 | clear_bit(0, &console_logged); | 367 | UMH_NO_WAIT); |
368 | |||
361 | if (time_after_eq(now, last_print + (check_interval*HZ))) { | 369 | if (time_after_eq(now, last_print + (check_interval*HZ))) { |
362 | last_print = now; | 370 | last_print = now; |
363 | printk(KERN_INFO "Machine check events logged\n"); | 371 | printk(KERN_INFO "Machine check events logged\n"); |
364 | } | 372 | } |
365 | } else { | 373 | |
366 | next_interval = min(next_interval*2, check_interval*HZ); | 374 | return 1; |
367 | } | 375 | } |
376 | return 0; | ||
377 | } | ||
368 | 378 | ||
369 | schedule_delayed_work(&mcheck_work, next_interval); | 379 | /* see if the idle task needs to notify userspace */ |
380 | static int | ||
381 | mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk) | ||
382 | { | ||
383 | /* IDLE_END should be safe - interrupts are back on */ | ||
384 | if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) | ||
385 | mce_notify_user(); | ||
386 | |||
387 | return NOTIFY_OK; | ||
370 | } | 388 | } |
371 | 389 | ||
390 | static struct notifier_block mce_idle_notifier = { | ||
391 | .notifier_call = mce_idle_callback, | ||
392 | }; | ||
372 | 393 | ||
373 | static __init int periodic_mcheck_init(void) | 394 | static __init int periodic_mcheck_init(void) |
374 | { | 395 | { |
375 | next_interval = check_interval * HZ; | 396 | next_interval = check_interval * HZ; |
376 | if (next_interval) | 397 | if (next_interval) |
377 | schedule_delayed_work(&mcheck_work, next_interval); | 398 | schedule_delayed_work(&mcheck_work, next_interval); |
399 | idle_notifier_register(&mce_idle_notifier); | ||
378 | return 0; | 400 | return 0; |
379 | } | 401 | } |
380 | __initcall(periodic_mcheck_init); | 402 | __initcall(periodic_mcheck_init); |
@@ -566,6 +588,14 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff | |||
566 | return err ? -EFAULT : buf - ubuf; | 588 | return err ? -EFAULT : buf - ubuf; |
567 | } | 589 | } |
568 | 590 | ||
591 | static unsigned int mce_poll(struct file *file, poll_table *wait) | ||
592 | { | ||
593 | poll_wait(file, &mce_wait, wait); | ||
594 | if (rcu_dereference(mcelog.next)) | ||
595 | return POLLIN | POLLRDNORM; | ||
596 | return 0; | ||
597 | } | ||
598 | |||
569 | static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg) | 599 | static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg) |
570 | { | 600 | { |
571 | int __user *p = (int __user *)arg; | 601 | int __user *p = (int __user *)arg; |
@@ -592,6 +622,7 @@ static const struct file_operations mce_chrdev_ops = { | |||
592 | .open = mce_open, | 622 | .open = mce_open, |
593 | .release = mce_release, | 623 | .release = mce_release, |
594 | .read = mce_read, | 624 | .read = mce_read, |
625 | .poll = mce_poll, | ||
595 | .ioctl = mce_ioctl, | 626 | .ioctl = mce_ioctl, |
596 | }; | 627 | }; |
597 | 628 | ||
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index 290f5d8037c..4886afcd628 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <asm/i387.h> | 26 | #include <asm/i387.h> |
27 | #include <asm/proto.h> | 27 | #include <asm/proto.h> |
28 | #include <asm/ia32_unistd.h> | 28 | #include <asm/ia32_unistd.h> |
29 | #include <asm/mce.h> | ||
29 | 30 | ||
30 | /* #define DEBUG_SIG 1 */ | 31 | /* #define DEBUG_SIG 1 */ |
31 | 32 | ||
@@ -472,6 +473,12 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
472 | clear_thread_flag(TIF_SINGLESTEP); | 473 | clear_thread_flag(TIF_SINGLESTEP); |
473 | } | 474 | } |
474 | 475 | ||
476 | #ifdef CONFIG_X86_MCE | ||
477 | /* notify userspace of pending MCEs */ | ||
478 | if (thread_info_flags & _TIF_MCE_NOTIFY) | ||
479 | mce_notify_user(); | ||
480 | #endif /* CONFIG_X86_MCE */ | ||
481 | |||
475 | /* deal with pending signal delivery */ | 482 | /* deal with pending signal delivery */ |
476 | if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK)) | 483 | if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK)) |
477 | do_signal(regs); | 484 | do_signal(regs); |
diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h index 177e92b4019..556be5563e3 100644 --- a/include/asm-x86_64/mce.h +++ b/include/asm-x86_64/mce.h | |||
@@ -105,6 +105,8 @@ extern atomic_t mce_entry; | |||
105 | 105 | ||
106 | extern void do_machine_check(struct pt_regs *, long); | 106 | extern void do_machine_check(struct pt_regs *, long); |
107 | 107 | ||
108 | extern int mce_notify_user(void); | ||
109 | |||
108 | #endif | 110 | #endif |
109 | 111 | ||
110 | #endif | 112 | #endif |
diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h index 10bb5a8ed68..33c72ef15a0 100644 --- a/include/asm-x86_64/thread_info.h +++ b/include/asm-x86_64/thread_info.h | |||
@@ -115,6 +115,7 @@ static inline struct thread_info *stack_thread_info(void) | |||
115 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ | 115 | #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ |
116 | #define TIF_SECCOMP 8 /* secure computing */ | 116 | #define TIF_SECCOMP 8 /* secure computing */ |
117 | #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ | 117 | #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ |
118 | #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ | ||
118 | /* 16 free */ | 119 | /* 16 free */ |
119 | #define TIF_IA32 17 /* 32bit process */ | 120 | #define TIF_IA32 17 /* 32bit process */ |
120 | #define TIF_FORK 18 /* ret_from_fork */ | 121 | #define TIF_FORK 18 /* ret_from_fork */ |
@@ -133,6 +134,7 @@ static inline struct thread_info *stack_thread_info(void) | |||
133 | #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) | 134 | #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) |
134 | #define _TIF_SECCOMP (1<<TIF_SECCOMP) | 135 | #define _TIF_SECCOMP (1<<TIF_SECCOMP) |
135 | #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) | 136 | #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) |
137 | #define _TIF_MCE_NOTIFY (1<<TIF_MCE_NOTIFY) | ||
136 | #define _TIF_IA32 (1<<TIF_IA32) | 138 | #define _TIF_IA32 (1<<TIF_IA32) |
137 | #define _TIF_FORK (1<<TIF_FORK) | 139 | #define _TIF_FORK (1<<TIF_FORK) |
138 | #define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING) | 140 | #define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING) |