aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64/kernel/mce.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/kernel/mce.c')
-rw-r--r--arch/x86_64/kernel/mce.c52
1 files changed, 34 insertions, 18 deletions
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 69541db5ff2c..13a2eada6c95 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -15,6 +15,7 @@
15#include <linux/sysdev.h> 15#include <linux/sysdev.h>
16#include <linux/miscdevice.h> 16#include <linux/miscdevice.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/capability.h>
18#include <linux/cpu.h> 19#include <linux/cpu.h>
19#include <linux/percpu.h> 20#include <linux/percpu.h>
20#include <linux/ctype.h> 21#include <linux/ctype.h>
@@ -23,9 +24,10 @@
23#include <asm/mce.h> 24#include <asm/mce.h>
24#include <asm/kdebug.h> 25#include <asm/kdebug.h>
25#include <asm/uaccess.h> 26#include <asm/uaccess.h>
27#include <asm/smp.h>
26 28
27#define MISC_MCELOG_MINOR 227 29#define MISC_MCELOG_MINOR 227
28#define NR_BANKS 5 30#define NR_BANKS 6
29 31
30static int mce_dont_init; 32static int mce_dont_init;
31 33
@@ -37,7 +39,7 @@ static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
37static unsigned long console_logged; 39static unsigned long console_logged;
38static int notify_user; 40static int notify_user;
39static int rip_msr; 41static int rip_msr;
40static int mce_bootlog; 42static int mce_bootlog = 1;
41 43
42/* 44/*
43 * Lockless MCE logging infrastructure. 45 * Lockless MCE logging infrastructure.
@@ -91,6 +93,7 @@ void mce_log(struct mce *mce)
91static void print_mce(struct mce *m) 93static void print_mce(struct mce *m)
92{ 94{
93 printk(KERN_EMERG "\n" 95 printk(KERN_EMERG "\n"
96 KERN_EMERG "HARDWARE ERROR\n"
94 KERN_EMERG 97 KERN_EMERG
95 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", 98 "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
96 m->cpu, m->mcgstatus, m->bank, m->status); 99 m->cpu, m->mcgstatus, m->bank, m->status);
@@ -109,6 +112,9 @@ static void print_mce(struct mce *m)
109 if (m->misc) 112 if (m->misc)
110 printk("MISC %Lx ", m->misc); 113 printk("MISC %Lx ", m->misc);
111 printk("\n"); 114 printk("\n");
115 printk(KERN_EMERG "This is not a software problem!\n");
116 printk(KERN_EMERG
117 "Run through mcelog --ascii to decode and contact your hardware vendor\n");
112} 118}
113 119
114static void mce_panic(char *msg, struct mce *backup, unsigned long start) 120static void mce_panic(char *msg, struct mce *backup, unsigned long start)
@@ -168,12 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code)
168 int panicm_found = 0; 174 int panicm_found = 0;
169 175
170 if (regs) 176 if (regs)
171 notify_die(DIE_NMI, "machine check", regs, error_code, 255, SIGKILL); 177 notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL);
172 if (!banks) 178 if (!banks)
173 return; 179 return;
174 180
175 memset(&m, 0, sizeof(struct mce)); 181 memset(&m, 0, sizeof(struct mce));
176 m.cpu = hard_smp_processor_id(); 182 m.cpu = safe_smp_processor_id();
177 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); 183 rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
178 if (!(m.mcgstatus & MCG_STATUS_RIPV)) 184 if (!(m.mcgstatus & MCG_STATUS_RIPV))
179 kill_it = 1; 185 kill_it = 1;
@@ -347,7 +353,11 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
347 /* disable GART TBL walk error reporting, which trips off 353 /* disable GART TBL walk error reporting, which trips off
348 incorrectly with the IOMMU & 3ware & Cerberus. */ 354 incorrectly with the IOMMU & 3ware & Cerberus. */
349 clear_bit(10, &bank[4]); 355 clear_bit(10, &bank[4]);
356 /* Lots of broken BIOS around that don't clear them
357 by default and leave crap in there. Don't log. */
358 mce_bootlog = 0;
350 } 359 }
360
351} 361}
352 362
353static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) 363static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
@@ -356,6 +366,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
356 case X86_VENDOR_INTEL: 366 case X86_VENDOR_INTEL:
357 mce_intel_feature_init(c); 367 mce_intel_feature_init(c);
358 break; 368 break;
369 case X86_VENDOR_AMD:
370 mce_amd_feature_init(c);
371 break;
359 default: 372 default:
360 break; 373 break;
361 } 374 }
@@ -495,16 +508,16 @@ static int __init mcheck_disable(char *str)
495/* mce=off disables machine check. Note you can reenable it later 508/* mce=off disables machine check. Note you can reenable it later
496 using sysfs. 509 using sysfs.
497 mce=TOLERANCELEVEL (number, see above) 510 mce=TOLERANCELEVEL (number, see above)
498 mce=bootlog Log MCEs from before booting. Disabled by default to work 511 mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
499 around buggy BIOS that leave bogus MCEs. */ 512 mce=nobootlog Don't log MCEs from before booting. */
500static int __init mcheck_enable(char *str) 513static int __init mcheck_enable(char *str)
501{ 514{
502 if (*str == '=') 515 if (*str == '=')
503 str++; 516 str++;
504 if (!strcmp(str, "off")) 517 if (!strcmp(str, "off"))
505 mce_dont_init = 1; 518 mce_dont_init = 1;
506 else if (!strcmp(str, "bootlog")) 519 else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
507 mce_bootlog = 1; 520 mce_bootlog = str[0] == 'b';
508 else if (isdigit(str[0])) 521 else if (isdigit(str[0]))
509 get_option(&str, &tolerant); 522 get_option(&str, &tolerant);
510 else 523 else
@@ -566,6 +579,10 @@ ACCESSOR(bank1ctl,bank[1],mce_restart())
566ACCESSOR(bank2ctl,bank[2],mce_restart()) 579ACCESSOR(bank2ctl,bank[2],mce_restart())
567ACCESSOR(bank3ctl,bank[3],mce_restart()) 580ACCESSOR(bank3ctl,bank[3],mce_restart())
568ACCESSOR(bank4ctl,bank[4],mce_restart()) 581ACCESSOR(bank4ctl,bank[4],mce_restart())
582ACCESSOR(bank5ctl,bank[5],mce_restart())
583static struct sysdev_attribute * bank_attributes[NR_BANKS] = {
584 &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
585 &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl};
569ACCESSOR(tolerant,tolerant,) 586ACCESSOR(tolerant,tolerant,)
570ACCESSOR(check_interval,check_interval,mce_restart()) 587ACCESSOR(check_interval,check_interval,mce_restart())
571 588
@@ -573,6 +590,7 @@ ACCESSOR(check_interval,check_interval,mce_restart())
573static __cpuinit int mce_create_device(unsigned int cpu) 590static __cpuinit int mce_create_device(unsigned int cpu)
574{ 591{
575 int err; 592 int err;
593 int i;
576 if (!mce_available(&cpu_data[cpu])) 594 if (!mce_available(&cpu_data[cpu]))
577 return -EIO; 595 return -EIO;
578 596
@@ -582,11 +600,9 @@ static __cpuinit int mce_create_device(unsigned int cpu)
582 err = sysdev_register(&per_cpu(device_mce,cpu)); 600 err = sysdev_register(&per_cpu(device_mce,cpu));
583 601
584 if (!err) { 602 if (!err) {
585 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank0ctl); 603 for (i = 0; i < banks; i++)
586 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank1ctl); 604 sysdev_create_file(&per_cpu(device_mce,cpu),
587 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank2ctl); 605 bank_attributes[i]);
588 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank3ctl);
589 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank4ctl);
590 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant); 606 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant);
591 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval); 607 sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval);
592 } 608 }
@@ -596,11 +612,11 @@ static __cpuinit int mce_create_device(unsigned int cpu)
596#ifdef CONFIG_HOTPLUG_CPU 612#ifdef CONFIG_HOTPLUG_CPU
597static __cpuinit void mce_remove_device(unsigned int cpu) 613static __cpuinit void mce_remove_device(unsigned int cpu)
598{ 614{
599 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank0ctl); 615 int i;
600 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank1ctl); 616
601 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank2ctl); 617 for (i = 0; i < banks; i++)
602 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank3ctl); 618 sysdev_remove_file(&per_cpu(device_mce,cpu),
603 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank4ctl); 619 bank_attributes[i]);
604 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); 620 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
605 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); 621 sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
606 sysdev_unregister(&per_cpu(device_mce,cpu)); 622 sysdev_unregister(&per_cpu(device_mce,cpu));