aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c164
1 files changed, 86 insertions, 78 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 07bbfe7aa7f7..b9f802e35209 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -1,8 +1,8 @@
1/* 1/*
2 * Machine check handler. 2 * Machine check handler.
3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. 3 * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
4 * Rest from unknown author(s). 4 * Rest from unknown author(s).
5 * 2004 Andi Kleen. Rewrote most of it. 5 * 2004 Andi Kleen. Rewrote most of it.
6 */ 6 */
7 7
8#include <linux/init.h> 8#include <linux/init.h>
@@ -23,7 +23,7 @@
23#include <linux/ctype.h> 23#include <linux/ctype.h>
24#include <linux/kmod.h> 24#include <linux/kmod.h>
25#include <linux/kdebug.h> 25#include <linux/kdebug.h>
26#include <asm/processor.h> 26#include <asm/processor.h>
27#include <asm/msr.h> 27#include <asm/msr.h>
28#include <asm/mce.h> 28#include <asm/mce.h>
29#include <asm/uaccess.h> 29#include <asm/uaccess.h>
@@ -63,10 +63,10 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
63 * separate MCEs from kernel messages to avoid bogus bug reports. 63 * separate MCEs from kernel messages to avoid bogus bug reports.
64 */ 64 */
65 65
66struct mce_log mcelog = { 66struct mce_log mcelog = {
67 MCE_LOG_SIGNATURE, 67 MCE_LOG_SIGNATURE,
68 MCE_LOG_LEN, 68 MCE_LOG_LEN,
69}; 69};
70 70
71void mce_log(struct mce *mce) 71void mce_log(struct mce *mce)
72{ 72{
@@ -111,42 +111,42 @@ static void print_mce(struct mce *m)
111 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", 111 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
112 m->cpu, m->mcgstatus, m->bank, m->status); 112 m->cpu, m->mcgstatus, m->bank, m->status);
113 if (m->rip) { 113 if (m->rip) {
114 printk(KERN_EMERG 114 printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
115 "RIP%s %02x:<%016Lx> ",
116 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", 115 !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
117 m->cs, m->rip); 116 m->cs, m->rip);
118 if (m->cs == __KERNEL_CS) 117 if (m->cs == __KERNEL_CS)
119 print_symbol("{%s}", m->rip); 118 print_symbol("{%s}", m->rip);
120 printk("\n"); 119 printk("\n");
121 } 120 }
122 printk(KERN_EMERG "TSC %Lx ", m->tsc); 121 printk(KERN_EMERG "TSC %Lx ", m->tsc);
123 if (m->addr) 122 if (m->addr)
124 printk("ADDR %Lx ", m->addr); 123 printk("ADDR %Lx ", m->addr);
125 if (m->misc) 124 if (m->misc)
126 printk("MISC %Lx ", m->misc); 125 printk("MISC %Lx ", m->misc);
127 printk("\n"); 126 printk("\n");
128 printk(KERN_EMERG "This is not a software problem!\n"); 127 printk(KERN_EMERG "This is not a software problem!\n");
129 printk(KERN_EMERG 128 printk(KERN_EMERG "Run through mcelog --ascii to decode "
130 "Run through mcelog --ascii to decode and contact your hardware vendor\n"); 129 "and contact your hardware vendor\n");
131} 130}
132 131
133static void mce_panic(char *msg, struct mce *backup, unsigned long start) 132static void mce_panic(char *msg, struct mce *backup, unsigned long start)
134{ 133{
135 int i; 134 int i;
136 135
137 oops_begin(); 136 oops_begin();
138 for (i = 0; i < MCE_LOG_LEN; i++) { 137 for (i = 0; i < MCE_LOG_LEN; i++) {
139 unsigned long tsc = mcelog.entry[i].tsc; 138 unsigned long tsc = mcelog.entry[i].tsc;
139
140 if (time_before(tsc, start)) 140 if (time_before(tsc, start))
141 continue; 141 continue;
142 print_mce(&mcelog.entry[i]); 142 print_mce(&mcelog.entry[i]);
143 if (backup && mcelog.entry[i].tsc == backup->tsc) 143 if (backup && mcelog.entry[i].tsc == backup->tsc)
144 backup = NULL; 144 backup = NULL;
145 } 145 }
146 if (backup) 146 if (backup)
147 print_mce(backup); 147 print_mce(backup);
148 panic(msg); 148 panic(msg);
149} 149}
150 150
151static int mce_available(struct cpuinfo_x86 *c) 151static int mce_available(struct cpuinfo_x86 *c)
152{ 152{
@@ -170,10 +170,9 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
170 } 170 }
171} 171}
172 172
173/* 173/*
174 * The actual machine check handler 174 * The actual machine check handler
175 */ 175 */
176
177void do_machine_check(struct pt_regs * regs, long error_code) 176void do_machine_check(struct pt_regs * regs, long error_code)
178{ 177{
179 struct mce m, panicm; 178 struct mce m, panicm;
@@ -194,7 +193,8 @@ void do_machine_check(struct pt_regs * regs, long error_code)
194 atomic_inc(&mce_entry); 193 atomic_inc(&mce_entry);
195 194
196 if (regs) 195 if (regs)
197 notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); 196 notify_die(DIE_NMI, "machine check", regs, error_code, 18,
197 SIGKILL);
198 if (!banks) 198 if (!banks)
199 goto out2; 199 goto out2;
200 200
@@ -204,15 +204,15 @@ void do_machine_check(struct pt_regs * regs, long error_code)
204 /* if the restart IP is not valid, we're done for */ 204 /* if the restart IP is not valid, we're done for */
205 if (!(m.mcgstatus & MCG_STATUS_RIPV)) 205 if (!(m.mcgstatus & MCG_STATUS_RIPV))
206 no_way_out = 1; 206 no_way_out = 1;
207 207
208 rdtscll(mcestart); 208 rdtscll(mcestart);
209 barrier(); 209 barrier();
210 210
211 for (i = 0; i < banks; i++) { 211 for (i = 0; i < banks; i++) {
212 if (!bank[i]) 212 if (!bank[i])
213 continue; 213 continue;
214 214
215 m.misc = 0; 215 m.misc = 0;
216 m.addr = 0; 216 m.addr = 0;
217 m.bank = i; 217 m.bank = i;
218 m.tsc = 0; 218 m.tsc = 0;
@@ -372,7 +372,7 @@ static void mcheck_timer(struct work_struct *work)
372 if (mce_notify_user()) { 372 if (mce_notify_user()) {
373 next_interval = max(next_interval/2, HZ/100); 373 next_interval = max(next_interval/2, HZ/100);
374 } else { 374 } else {
375 next_interval = min(next_interval*2, 375 next_interval = min(next_interval * 2,
376 (int)round_jiffies_relative(check_interval*HZ)); 376 (int)round_jiffies_relative(check_interval*HZ));
377 } 377 }
378 378
@@ -423,18 +423,18 @@ static struct notifier_block mce_idle_notifier = {
423}; 423};
424 424
425static __init int periodic_mcheck_init(void) 425static __init int periodic_mcheck_init(void)
426{ 426{
427 next_interval = check_interval * HZ; 427 next_interval = check_interval * HZ;
428 if (next_interval) 428 if (next_interval)
429 schedule_delayed_work(&mcheck_work, 429 schedule_delayed_work(&mcheck_work,
430 round_jiffies_relative(next_interval)); 430 round_jiffies_relative(next_interval));
431 idle_notifier_register(&mce_idle_notifier); 431 idle_notifier_register(&mce_idle_notifier);
432 return 0; 432 return 0;
433} 433}
434__initcall(periodic_mcheck_init); 434__initcall(periodic_mcheck_init);
435 435
436 436
437/* 437/*
438 * Initialize Machine Checks for a CPU. 438 * Initialize Machine Checks for a CPU.
439 */ 439 */
440static void mce_init(void *dummy) 440static void mce_init(void *dummy)
@@ -444,9 +444,9 @@ static void mce_init(void *dummy)
444 444
445 rdmsrl(MSR_IA32_MCG_CAP, cap); 445 rdmsrl(MSR_IA32_MCG_CAP, cap);
446 banks = cap & 0xff; 446 banks = cap & 0xff;
447 if (banks > NR_BANKS) { 447 if (banks > NR_BANKS) {
448 printk(KERN_INFO "MCE: warning: using only %d banks\n", banks); 448 printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
449 banks = NR_BANKS; 449 banks = NR_BANKS;
450 } 450 }
451 /* Use accurate RIP reporting if available. */ 451 /* Use accurate RIP reporting if available. */
452 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) 452 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
@@ -464,15 +464,15 @@ static void mce_init(void *dummy)
464 for (i = 0; i < banks; i++) { 464 for (i = 0; i < banks; i++) {
465 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); 465 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
466 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 466 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
467 } 467 }
468} 468}
469 469
470/* Add per CPU specific workarounds here */ 470/* Add per CPU specific workarounds here */
471static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) 471static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
472{ 472{
473 /* This should be disabled by the BIOS, but isn't always */ 473 /* This should be disabled by the BIOS, but isn't always */
474 if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) { 474 if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
475 /* disable GART TBL walk error reporting, which trips off 475 /* disable GART TBL walk error reporting, which trips off
476 incorrectly with the IOMMU & 3ware & Cerberus. */ 476 incorrectly with the IOMMU & 3ware & Cerberus. */
477 clear_bit(10, &bank[4]); 477 clear_bit(10, &bank[4]);
478 /* Lots of broken BIOS around that don't clear them 478 /* Lots of broken BIOS around that don't clear them
@@ -480,7 +480,7 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
480 mce_bootlog = 0; 480 mce_bootlog = 0;
481 } 481 }
482 482
483} 483}
484 484
485static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) 485static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
486{ 486{
@@ -496,15 +496,15 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
496 } 496 }
497} 497}
498 498
499/* 499/*
500 * Called for each booted CPU to set up machine checks. 500 * Called for each booted CPU to set up machine checks.
501 * Must be called with preempt off. 501 * Must be called with preempt off.
502 */ 502 */
503void __cpuinit mcheck_init(struct cpuinfo_x86 *c) 503void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
504{ 504{
505 static cpumask_t mce_cpus = CPU_MASK_NONE; 505 static cpumask_t mce_cpus = CPU_MASK_NONE;
506 506
507 mce_cpu_quirks(c); 507 mce_cpu_quirks(c);
508 508
509 if (mce_dont_init || 509 if (mce_dont_init ||
510 cpu_test_and_set(smp_processor_id(), mce_cpus) || 510 cpu_test_and_set(smp_processor_id(), mce_cpus) ||
@@ -553,13 +553,15 @@ static int mce_release(struct inode *inode, struct file *file)
553 return 0; 553 return 0;
554} 554}
555 555
556static void collect_tscs(void *data) 556static void collect_tscs(void *data)
557{ 557{
558 unsigned long *cpu_tsc = (unsigned long *)data; 558 unsigned long *cpu_tsc = (unsigned long *)data;
559
559 rdtscll(cpu_tsc[smp_processor_id()]); 560 rdtscll(cpu_tsc[smp_processor_id()]);
560} 561}
561 562
562static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff_t *off) 563static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
564 loff_t *off)
563{ 565{
564 unsigned long *cpu_tsc; 566 unsigned long *cpu_tsc;
565 static DECLARE_MUTEX(mce_read_sem); 567 static DECLARE_MUTEX(mce_read_sem);
@@ -571,19 +573,20 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff
571 if (!cpu_tsc) 573 if (!cpu_tsc)
572 return -ENOMEM; 574 return -ENOMEM;
573 575
574 down(&mce_read_sem); 576 down(&mce_read_sem);
575 next = rcu_dereference(mcelog.next); 577 next = rcu_dereference(mcelog.next);
576 578
577 /* Only supports full reads right now */ 579 /* Only supports full reads right now */
578 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { 580 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) {
579 up(&mce_read_sem); 581 up(&mce_read_sem);
580 kfree(cpu_tsc); 582 kfree(cpu_tsc);
581 return -EINVAL; 583 return -EINVAL;
582 } 584 }
583 585
584 err = 0; 586 err = 0;
585 for (i = 0; i < next; i++) { 587 for (i = 0; i < next; i++) {
586 unsigned long start = jiffies; 588 unsigned long start = jiffies;
589
587 while (!mcelog.entry[i].finished) { 590 while (!mcelog.entry[i].finished) {
588 if (time_after_eq(jiffies, start + 2)) { 591 if (time_after_eq(jiffies, start + 2)) {
589 memset(mcelog.entry + i,0, sizeof(struct mce)); 592 memset(mcelog.entry + i,0, sizeof(struct mce));
@@ -593,31 +596,34 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff
593 } 596 }
594 smp_rmb(); 597 smp_rmb();
595 err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce)); 598 err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
596 buf += sizeof(struct mce); 599 buf += sizeof(struct mce);
597 timeout: 600 timeout:
598 ; 601 ;
599 } 602 }
600 603
601 memset(mcelog.entry, 0, next * sizeof(struct mce)); 604 memset(mcelog.entry, 0, next * sizeof(struct mce));
602 mcelog.next = 0; 605 mcelog.next = 0;
603 606
604 synchronize_sched(); 607 synchronize_sched();
605 608
606 /* Collect entries that were still getting written before the synchronize. */ 609 /*
607 610 * Collect entries that were still getting written before the
611 * synchronize.
612 */
608 on_each_cpu(collect_tscs, cpu_tsc, 1, 1); 613 on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
609 for (i = next; i < MCE_LOG_LEN; i++) { 614 for (i = next; i < MCE_LOG_LEN; i++) {
610 if (mcelog.entry[i].finished && 615 if (mcelog.entry[i].finished &&
611 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { 616 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
612 err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct mce)); 617 err |= copy_to_user(buf, mcelog.entry+i,
618 sizeof(struct mce));
613 smp_rmb(); 619 smp_rmb();
614 buf += sizeof(struct mce); 620 buf += sizeof(struct mce);
615 memset(&mcelog.entry[i], 0, sizeof(struct mce)); 621 memset(&mcelog.entry[i], 0, sizeof(struct mce));
616 } 622 }
617 } 623 }
618 up(&mce_read_sem); 624 up(&mce_read_sem);
619 kfree(cpu_tsc); 625 kfree(cpu_tsc);
620 return err ? -EFAULT : buf - ubuf; 626 return err ? -EFAULT : buf - ubuf;
621} 627}
622 628
623static unsigned int mce_poll(struct file *file, poll_table *wait) 629static unsigned int mce_poll(struct file *file, poll_table *wait)
@@ -628,26 +634,29 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
628 return 0; 634 return 0;
629} 635}
630 636
631static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg) 637static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd,
638 unsigned long arg)
632{ 639{
633 int __user *p = (int __user *)arg; 640 int __user *p = (int __user *)arg;
641
634 if (!capable(CAP_SYS_ADMIN)) 642 if (!capable(CAP_SYS_ADMIN))
635 return -EPERM; 643 return -EPERM;
636 switch (cmd) { 644 switch (cmd) {
637 case MCE_GET_RECORD_LEN: 645 case MCE_GET_RECORD_LEN:
638 return put_user(sizeof(struct mce), p); 646 return put_user(sizeof(struct mce), p);
639 case MCE_GET_LOG_LEN: 647 case MCE_GET_LOG_LEN:
640 return put_user(MCE_LOG_LEN, p); 648 return put_user(MCE_LOG_LEN, p);
641 case MCE_GETCLEAR_FLAGS: { 649 case MCE_GETCLEAR_FLAGS: {
642 unsigned flags; 650 unsigned flags;
643 do { 651
652 do {
644 flags = mcelog.flags; 653 flags = mcelog.flags;
645 } while (cmpxchg(&mcelog.flags, flags, 0) != flags); 654 } while (cmpxchg(&mcelog.flags, flags, 0) != flags);
646 return put_user(flags, p); 655 return put_user(flags, p);
647 } 656 }
648 default: 657 default:
649 return -ENOTTY; 658 return -ENOTTY;
650 } 659 }
651} 660}
652 661
653static const struct file_operations mce_chrdev_ops = { 662static const struct file_operations mce_chrdev_ops = {
@@ -678,10 +687,9 @@ void __init restart_mce(void)
678 set_in_cr4(X86_CR4_MCE); 687 set_in_cr4(X86_CR4_MCE);
679} 688}
680 689
681/* 690/*
682 * Old style boot options parsing. Only for compatibility. 691 * Old style boot options parsing. Only for compatibility.
683 */ 692 */
684
685static int __init mcheck_disable(char *str) 693static int __init mcheck_disable(char *str)
686{ 694{
687 mce_dont_init = 1; 695 mce_dont_init = 1;
@@ -702,16 +710,16 @@ static int __init mcheck_enable(char *str)
702 else if (isdigit(str[0])) 710 else if (isdigit(str[0]))
703 get_option(&str, &tolerant); 711 get_option(&str, &tolerant);
704 else 712 else
705 printk("mce= argument %s ignored. Please use /sys", str); 713 printk("mce= argument %s ignored. Please use /sys", str);
706 return 1; 714 return 1;
707} 715}
708 716
709__setup("nomce", mcheck_disable); 717__setup("nomce", mcheck_disable);
710__setup("mce=", mcheck_enable); 718__setup("mce=", mcheck_enable);
711 719
712/* 720/*
713 * Sysfs support 721 * Sysfs support
714 */ 722 */
715 723
716/* On resume clear all MCE state. Don't want to see leftovers from the BIOS. 724/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
717 Only one CPU is active at this time, the others get readded later using 725 Only one CPU is active at this time, the others get readded later using
@@ -723,12 +731,12 @@ static int mce_resume(struct sys_device *dev)
723} 731}
724 732
725/* Reinit MCEs after user configuration changes */ 733/* Reinit MCEs after user configuration changes */
726static void mce_restart(void) 734static void mce_restart(void)
727{ 735{
728 if (next_interval) 736 if (next_interval)
729 cancel_delayed_work(&mcheck_work); 737 cancel_delayed_work(&mcheck_work);
730 /* Timer race is harmless here */ 738 /* Timer race is harmless here */
731 on_each_cpu(mce_init, NULL, 1, 1); 739 on_each_cpu(mce_init, NULL, 1, 1);
732 next_interval = check_interval * HZ; 740 next_interval = check_interval * HZ;
733 if (next_interval) 741 if (next_interval)
734 schedule_delayed_work(&mcheck_work, 742 schedule_delayed_work(&mcheck_work,
@@ -744,17 +752,17 @@ DEFINE_PER_CPU(struct sys_device, device_mce);
744 752
745/* Why are there no generic functions for this? */ 753/* Why are there no generic functions for this? */
746#define ACCESSOR(name, var, start) \ 754#define ACCESSOR(name, var, start) \
747 static ssize_t show_ ## name(struct sys_device *s, char *buf) { \ 755 static ssize_t show_ ## name(struct sys_device *s, char *buf) { \
748 return sprintf(buf, "%lx\n", (unsigned long)var); \ 756 return sprintf(buf, "%lx\n", (unsigned long)var); \
749 } \ 757 } \
750 static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \ 758 static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \
751 char *end; \ 759 char *end; \
752 unsigned long new = simple_strtoul(buf, &end, 0); \ 760 unsigned long new = simple_strtoul(buf, &end, 0); \
753 if (end == buf) return -EINVAL; \ 761 if (end == buf) return -EINVAL; \
754 var = new; \ 762 var = new; \
755 start; \ 763 start; \
756 return end-buf; \ 764 return end-buf; \
757 } \ 765 } \
758 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); 766 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
759 767
760/* TBD should generate these dynamically based on number of available banks */ 768/* TBD should generate these dynamically based on number of available banks */