aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86_64/kernel/entry.S6
-rw-r--r--arch/x86_64/kernel/mce.c105
-rw-r--r--arch/x86_64/kernel/signal.c7
-rw-r--r--include/asm-x86_64/mce.h2
-rw-r--r--include/asm-x86_64/thread_info.h2
5 files changed, 82 insertions, 40 deletions
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index a67f87bf401..830cfc6ee8c 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -282,7 +282,7 @@ sysret_careful:
282sysret_signal: 282sysret_signal:
283 TRACE_IRQS_ON 283 TRACE_IRQS_ON
284 sti 284 sti
285 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx 285 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
286 jz 1f 286 jz 1f
287 287
288 /* Really a signal */ 288 /* Really a signal */
@@ -375,7 +375,7 @@ int_very_careful:
375 jmp int_restore_rest 375 jmp int_restore_rest
376 376
377int_signal: 377int_signal:
378 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx 378 testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
379 jz 1f 379 jz 1f
380 movq %rsp,%rdi # &ptregs -> arg1 380 movq %rsp,%rdi # &ptregs -> arg1
381 xorl %esi,%esi # oldset -> arg2 381 xorl %esi,%esi # oldset -> arg2
@@ -599,7 +599,7 @@ retint_careful:
599 jmp retint_check 599 jmp retint_check
600 600
601retint_signal: 601retint_signal:
602 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx 602 testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
603 jz retint_swapgs 603 jz retint_swapgs
604 TRACE_IRQS_ON 604 TRACE_IRQS_ON
605 sti 605 sti
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 77fee481be4..968613572b9 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -18,6 +18,8 @@
18#include <linux/capability.h> 18#include <linux/capability.h>
19#include <linux/cpu.h> 19#include <linux/cpu.h>
20#include <linux/percpu.h> 20#include <linux/percpu.h>
21#include <linux/poll.h>
22#include <linux/thread_info.h>
21#include <linux/ctype.h> 23#include <linux/ctype.h>
22#include <linux/kmod.h> 24#include <linux/kmod.h>
23#include <linux/kdebug.h> 25#include <linux/kdebug.h>
@@ -26,6 +28,7 @@
26#include <asm/mce.h> 28#include <asm/mce.h>
27#include <asm/uaccess.h> 29#include <asm/uaccess.h>
28#include <asm/smp.h> 30#include <asm/smp.h>
31#include <asm/idle.h>
29 32
30#define MISC_MCELOG_MINOR 227 33#define MISC_MCELOG_MINOR 227
31#define NR_BANKS 6 34#define NR_BANKS 6
@@ -39,8 +42,7 @@ static int mce_dont_init;
39static int tolerant = 1; 42static int tolerant = 1;
40static int banks; 43static int banks;
41static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; 44static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
42static unsigned long console_logged; 45static unsigned long notify_user;
43static int notify_user;
44static int rip_msr; 46static int rip_msr;
45static int mce_bootlog = 1; 47static int mce_bootlog = 1;
46static atomic_t mce_events; 48static atomic_t mce_events;
@@ -48,6 +50,8 @@ static atomic_t mce_events;
48static char trigger[128]; 50static char trigger[128];
49static char *trigger_argv[2] = { trigger, NULL }; 51static char *trigger_argv[2] = { trigger, NULL };
50 52
53static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
54
51/* 55/*
52 * Lockless MCE logging infrastructure. 56 * Lockless MCE logging infrastructure.
53 * This avoids deadlocks on printk locks without having to break locks. Also 57 * This avoids deadlocks on printk locks without having to break locks. Also
@@ -94,8 +98,7 @@ void mce_log(struct mce *mce)
94 mcelog.entry[entry].finished = 1; 98 mcelog.entry[entry].finished = 1;
95 wmb(); 99 wmb();
96 100
97 if (!test_and_set_bit(0, &console_logged)) 101 set_bit(0, &notify_user);
98 notify_user = 1;
99} 102}
100 103
101static void print_mce(struct mce *m) 104static void print_mce(struct mce *m)
@@ -128,6 +131,10 @@ static void print_mce(struct mce *m)
128static void mce_panic(char *msg, struct mce *backup, unsigned long start) 131static void mce_panic(char *msg, struct mce *backup, unsigned long start)
129{ 132{
130 int i; 133 int i;
134
135 if (tolerant >= 3)
136 return;
137
131 oops_begin(); 138 oops_begin();
132 for (i = 0; i < MCE_LOG_LEN; i++) { 139 for (i = 0; i < MCE_LOG_LEN; i++) {
133 unsigned long tsc = mcelog.entry[i].tsc; 140 unsigned long tsc = mcelog.entry[i].tsc;
@@ -139,10 +146,7 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start)
139 } 146 }
140 if (backup) 147 if (backup)
141 print_mce(backup); 148 print_mce(backup);
142 if (tolerant >= 3) 149 panic(msg);
143 printk("Fake panic: %s\n", msg);
144 else
145 panic(msg);
146} 150}
147 151
148static int mce_available(struct cpuinfo_x86 *c) 152static int mce_available(struct cpuinfo_x86 *c)
@@ -167,17 +171,6 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
167 } 171 }
168} 172}
169 173
170static void do_mce_trigger(void)
171{
172 static atomic_t mce_logged;
173 int events = atomic_read(&mce_events);
174 if (events != atomic_read(&mce_logged) && trigger[0]) {
175 /* Small race window, but should be harmless. */
176 atomic_set(&mce_logged, events);
177 call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
178 }
179}
180
181/* 174/*
182 * The actual machine check handler 175 * The actual machine check handler
183 */ 176 */
@@ -251,12 +244,8 @@ void do_machine_check(struct pt_regs * regs, long error_code)
251 } 244 }
252 245
253 /* Never do anything final in the polling timer */ 246 /* Never do anything final in the polling timer */
254 if (!regs) { 247 if (!regs)
255 /* Normal interrupt context here. Call trigger for any new
256 events. */
257 do_mce_trigger();
258 goto out; 248 goto out;
259 }
260 249
261 /* If we didn't find an uncorrectable error, pick 250 /* If we didn't find an uncorrectable error, pick
262 the last one (shouldn't happen, just being safe). */ 251 the last one (shouldn't happen, just being safe). */
@@ -288,6 +277,9 @@ void do_machine_check(struct pt_regs * regs, long error_code)
288 do_exit(SIGBUS); 277 do_exit(SIGBUS);
289 } 278 }
290 279
280 /* notify userspace ASAP */
281 set_thread_flag(TIF_MCE_NOTIFY);
282
291 out: 283 out:
292 /* Last thing done in the machine check exception to clear state. */ 284 /* Last thing done in the machine check exception to clear state. */
293 wrmsrl(MSR_IA32_MCG_STATUS, 0); 285 wrmsrl(MSR_IA32_MCG_STATUS, 0);
@@ -344,37 +336,67 @@ static void mcheck_timer(struct work_struct *work)
344 on_each_cpu(mcheck_check_cpu, NULL, 1, 1); 336 on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
345 337
346 /* 338 /*
347 * It's ok to read stale data here for notify_user and 339 * Alert userspace if needed. If we logged an MCE, reduce the
348 * console_logged as we'll simply get the updated versions 340 * polling interval, otherwise increase the polling interval.
349 * on the next mcheck_timer execution and atomic operations
350 * on console_logged act as synchronization for notify_user
351 * writes.
352 */ 341 */
353 if (notify_user && console_logged) { 342 if (mce_notify_user()) {
343 next_interval = max(next_interval/2, HZ/100);
344 } else {
345 next_interval = min(next_interval*2, check_interval*HZ);
346 }
347
348 schedule_delayed_work(&mcheck_work, next_interval);
349}
350
351/*
352 * This is only called from process context. This is where we do
353 * anything we need to alert userspace about new MCEs. This is called
354 * directly from the poller and also from entry.S and idle, thanks to
355 * TIF_MCE_NOTIFY.
356 */
357int mce_notify_user(void)
358{
359 clear_thread_flag(TIF_MCE_NOTIFY);
360 if (test_and_clear_bit(0, &notify_user)) {
354 static unsigned long last_print; 361 static unsigned long last_print;
355 unsigned long now = jiffies; 362 unsigned long now = jiffies;
356 363
357 /* if we logged an MCE, reduce the polling interval */ 364 wake_up_interruptible(&mce_wait);
358 next_interval = max(next_interval/2, HZ/100); 365 if (trigger[0])
359 notify_user = 0; 366 call_usermodehelper(trigger, trigger_argv, NULL,
360 clear_bit(0, &console_logged); 367 UMH_NO_WAIT);
368
361 if (time_after_eq(now, last_print + (check_interval*HZ))) { 369 if (time_after_eq(now, last_print + (check_interval*HZ))) {
362 last_print = now; 370 last_print = now;
363 printk(KERN_INFO "Machine check events logged\n"); 371 printk(KERN_INFO "Machine check events logged\n");
364 } 372 }
365 } else { 373
366 next_interval = min(next_interval*2, check_interval*HZ); 374 return 1;
367 } 375 }
376 return 0;
377}
368 378
369 schedule_delayed_work(&mcheck_work, next_interval); 379/* see if the idle task needs to notify userspace */
380static int
381mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk)
382{
383 /* IDLE_END should be safe - interrupts are back on */
384 if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY))
385 mce_notify_user();
386
387 return NOTIFY_OK;
370} 388}
371 389
390static struct notifier_block mce_idle_notifier = {
391 .notifier_call = mce_idle_callback,
392};
372 393
373static __init int periodic_mcheck_init(void) 394static __init int periodic_mcheck_init(void)
374{ 395{
375 next_interval = check_interval * HZ; 396 next_interval = check_interval * HZ;
376 if (next_interval) 397 if (next_interval)
377 schedule_delayed_work(&mcheck_work, next_interval); 398 schedule_delayed_work(&mcheck_work, next_interval);
399 idle_notifier_register(&mce_idle_notifier);
378 return 0; 400 return 0;
379} 401}
380__initcall(periodic_mcheck_init); 402__initcall(periodic_mcheck_init);
@@ -566,6 +588,14 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff
566 return err ? -EFAULT : buf - ubuf; 588 return err ? -EFAULT : buf - ubuf;
567} 589}
568 590
591static unsigned int mce_poll(struct file *file, poll_table *wait)
592{
593 poll_wait(file, &mce_wait, wait);
594 if (rcu_dereference(mcelog.next))
595 return POLLIN | POLLRDNORM;
596 return 0;
597}
598
569static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg) 599static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg)
570{ 600{
571 int __user *p = (int __user *)arg; 601 int __user *p = (int __user *)arg;
@@ -592,6 +622,7 @@ static const struct file_operations mce_chrdev_ops = {
592 .open = mce_open, 622 .open = mce_open,
593 .release = mce_release, 623 .release = mce_release,
594 .read = mce_read, 624 .read = mce_read,
625 .poll = mce_poll,
595 .ioctl = mce_ioctl, 626 .ioctl = mce_ioctl,
596}; 627};
597 628
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index 290f5d8037c..4886afcd628 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -26,6 +26,7 @@
26#include <asm/i387.h> 26#include <asm/i387.h>
27#include <asm/proto.h> 27#include <asm/proto.h>
28#include <asm/ia32_unistd.h> 28#include <asm/ia32_unistd.h>
29#include <asm/mce.h>
29 30
30/* #define DEBUG_SIG 1 */ 31/* #define DEBUG_SIG 1 */
31 32
@@ -472,6 +473,12 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
472 clear_thread_flag(TIF_SINGLESTEP); 473 clear_thread_flag(TIF_SINGLESTEP);
473 } 474 }
474 475
476#ifdef CONFIG_X86_MCE
477 /* notify userspace of pending MCEs */
478 if (thread_info_flags & _TIF_MCE_NOTIFY)
479 mce_notify_user();
480#endif /* CONFIG_X86_MCE */
481
475 /* deal with pending signal delivery */ 482 /* deal with pending signal delivery */
476 if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK)) 483 if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK))
477 do_signal(regs); 484 do_signal(regs);
diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h
index 177e92b4019..556be5563e3 100644
--- a/include/asm-x86_64/mce.h
+++ b/include/asm-x86_64/mce.h
@@ -105,6 +105,8 @@ extern atomic_t mce_entry;
105 105
106extern void do_machine_check(struct pt_regs *, long); 106extern void do_machine_check(struct pt_regs *, long);
107 107
108extern int mce_notify_user(void);
109
108#endif 110#endif
109 111
110#endif 112#endif
diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h
index 10bb5a8ed68..33c72ef15a0 100644
--- a/include/asm-x86_64/thread_info.h
+++ b/include/asm-x86_64/thread_info.h
@@ -115,6 +115,7 @@ static inline struct thread_info *stack_thread_info(void)
115#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ 115#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
116#define TIF_SECCOMP 8 /* secure computing */ 116#define TIF_SECCOMP 8 /* secure computing */
117#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ 117#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
118#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
118/* 16 free */ 119/* 16 free */
119#define TIF_IA32 17 /* 32bit process */ 120#define TIF_IA32 17 /* 32bit process */
120#define TIF_FORK 18 /* ret_from_fork */ 121#define TIF_FORK 18 /* ret_from_fork */
@@ -133,6 +134,7 @@ static inline struct thread_info *stack_thread_info(void)
133#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) 134#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
134#define _TIF_SECCOMP (1<<TIF_SECCOMP) 135#define _TIF_SECCOMP (1<<TIF_SECCOMP)
135#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) 136#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
137#define _TIF_MCE_NOTIFY (1<<TIF_MCE_NOTIFY)
136#define _TIF_IA32 (1<<TIF_IA32) 138#define _TIF_IA32 (1<<TIF_IA32)
137#define _TIF_FORK (1<<TIF_FORK) 139#define _TIF_FORK (1<<TIF_FORK)
138#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING) 140#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)