diff options
Diffstat (limited to 'arch/x86_64/kernel/mce.c')
-rw-r--r-- | arch/x86_64/kernel/mce.c | 52 |
1 files changed, 34 insertions, 18 deletions
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 69541db5ff2c..13a2eada6c95 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/sysdev.h> | 15 | #include <linux/sysdev.h> |
16 | #include <linux/miscdevice.h> | 16 | #include <linux/miscdevice.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/capability.h> | ||
18 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
19 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
20 | #include <linux/ctype.h> | 21 | #include <linux/ctype.h> |
@@ -23,9 +24,10 @@ | |||
23 | #include <asm/mce.h> | 24 | #include <asm/mce.h> |
24 | #include <asm/kdebug.h> | 25 | #include <asm/kdebug.h> |
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
27 | #include <asm/smp.h> | ||
26 | 28 | ||
27 | #define MISC_MCELOG_MINOR 227 | 29 | #define MISC_MCELOG_MINOR 227 |
28 | #define NR_BANKS 5 | 30 | #define NR_BANKS 6 |
29 | 31 | ||
30 | static int mce_dont_init; | 32 | static int mce_dont_init; |
31 | 33 | ||
@@ -37,7 +39,7 @@ static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; | |||
37 | static unsigned long console_logged; | 39 | static unsigned long console_logged; |
38 | static int notify_user; | 40 | static int notify_user; |
39 | static int rip_msr; | 41 | static int rip_msr; |
40 | static int mce_bootlog; | 42 | static int mce_bootlog = 1; |
41 | 43 | ||
42 | /* | 44 | /* |
43 | * Lockless MCE logging infrastructure. | 45 | * Lockless MCE logging infrastructure. |
@@ -91,6 +93,7 @@ void mce_log(struct mce *mce) | |||
91 | static void print_mce(struct mce *m) | 93 | static void print_mce(struct mce *m) |
92 | { | 94 | { |
93 | printk(KERN_EMERG "\n" | 95 | printk(KERN_EMERG "\n" |
96 | KERN_EMERG "HARDWARE ERROR\n" | ||
94 | KERN_EMERG | 97 | KERN_EMERG |
95 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | 98 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", |
96 | m->cpu, m->mcgstatus, m->bank, m->status); | 99 | m->cpu, m->mcgstatus, m->bank, m->status); |
@@ -109,6 +112,9 @@ static void print_mce(struct mce *m) | |||
109 | if (m->misc) | 112 | if (m->misc) |
110 | printk("MISC %Lx ", m->misc); | 113 | printk("MISC %Lx ", m->misc); |
111 | printk("\n"); | 114 | printk("\n"); |
115 | printk(KERN_EMERG "This is not a software problem!\n"); | ||
116 | printk(KERN_EMERG | ||
117 | "Run through mcelog --ascii to decode and contact your hardware vendor\n"); | ||
112 | } | 118 | } |
113 | 119 | ||
114 | static void mce_panic(char *msg, struct mce *backup, unsigned long start) | 120 | static void mce_panic(char *msg, struct mce *backup, unsigned long start) |
@@ -168,12 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
168 | int panicm_found = 0; | 174 | int panicm_found = 0; |
169 | 175 | ||
170 | if (regs) | 176 | if (regs) |
171 | notify_die(DIE_NMI, "machine check", regs, error_code, 255, SIGKILL); | 177 | notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); |
172 | if (!banks) | 178 | if (!banks) |
173 | return; | 179 | return; |
174 | 180 | ||
175 | memset(&m, 0, sizeof(struct mce)); | 181 | memset(&m, 0, sizeof(struct mce)); |
176 | m.cpu = hard_smp_processor_id(); | 182 | m.cpu = safe_smp_processor_id(); |
177 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); | 183 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); |
178 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | 184 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) |
179 | kill_it = 1; | 185 | kill_it = 1; |
@@ -347,7 +353,11 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
347 | /* disable GART TBL walk error reporting, which trips off | 353 | /* disable GART TBL walk error reporting, which trips off |
348 | incorrectly with the IOMMU & 3ware & Cerberus. */ | 354 | incorrectly with the IOMMU & 3ware & Cerberus. */ |
349 | clear_bit(10, &bank[4]); | 355 | clear_bit(10, &bank[4]); |
356 | /* Lots of broken BIOS around that don't clear them | ||
357 | by default and leave crap in there. Don't log. */ | ||
358 | mce_bootlog = 0; | ||
350 | } | 359 | } |
360 | |||
351 | } | 361 | } |
352 | 362 | ||
353 | static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) | 363 | static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) |
@@ -356,6 +366,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) | |||
356 | case X86_VENDOR_INTEL: | 366 | case X86_VENDOR_INTEL: |
357 | mce_intel_feature_init(c); | 367 | mce_intel_feature_init(c); |
358 | break; | 368 | break; |
369 | case X86_VENDOR_AMD: | ||
370 | mce_amd_feature_init(c); | ||
371 | break; | ||
359 | default: | 372 | default: |
360 | break; | 373 | break; |
361 | } | 374 | } |
@@ -495,16 +508,16 @@ static int __init mcheck_disable(char *str) | |||
495 | /* mce=off disables machine check. Note you can reenable it later | 508 | /* mce=off disables machine check. Note you can reenable it later |
496 | using sysfs. | 509 | using sysfs. |
497 | mce=TOLERANCELEVEL (number, see above) | 510 | mce=TOLERANCELEVEL (number, see above) |
498 | mce=bootlog Log MCEs from before booting. Disabled by default to work | 511 | mce=bootlog Log MCEs from before booting. Disabled by default on AMD. |
499 | around buggy BIOS that leave bogus MCEs. */ | 512 | mce=nobootlog Don't log MCEs from before booting. */ |
500 | static int __init mcheck_enable(char *str) | 513 | static int __init mcheck_enable(char *str) |
501 | { | 514 | { |
502 | if (*str == '=') | 515 | if (*str == '=') |
503 | str++; | 516 | str++; |
504 | if (!strcmp(str, "off")) | 517 | if (!strcmp(str, "off")) |
505 | mce_dont_init = 1; | 518 | mce_dont_init = 1; |
506 | else if (!strcmp(str, "bootlog")) | 519 | else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) |
507 | mce_bootlog = 1; | 520 | mce_bootlog = str[0] == 'b'; |
508 | else if (isdigit(str[0])) | 521 | else if (isdigit(str[0])) |
509 | get_option(&str, &tolerant); | 522 | get_option(&str, &tolerant); |
510 | else | 523 | else |
@@ -566,6 +579,10 @@ ACCESSOR(bank1ctl,bank[1],mce_restart()) | |||
566 | ACCESSOR(bank2ctl,bank[2],mce_restart()) | 579 | ACCESSOR(bank2ctl,bank[2],mce_restart()) |
567 | ACCESSOR(bank3ctl,bank[3],mce_restart()) | 580 | ACCESSOR(bank3ctl,bank[3],mce_restart()) |
568 | ACCESSOR(bank4ctl,bank[4],mce_restart()) | 581 | ACCESSOR(bank4ctl,bank[4],mce_restart()) |
582 | ACCESSOR(bank5ctl,bank[5],mce_restart()) | ||
583 | static struct sysdev_attribute * bank_attributes[NR_BANKS] = { | ||
584 | &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, | ||
585 | &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl}; | ||
569 | ACCESSOR(tolerant,tolerant,) | 586 | ACCESSOR(tolerant,tolerant,) |
570 | ACCESSOR(check_interval,check_interval,mce_restart()) | 587 | ACCESSOR(check_interval,check_interval,mce_restart()) |
571 | 588 | ||
@@ -573,6 +590,7 @@ ACCESSOR(check_interval,check_interval,mce_restart()) | |||
573 | static __cpuinit int mce_create_device(unsigned int cpu) | 590 | static __cpuinit int mce_create_device(unsigned int cpu) |
574 | { | 591 | { |
575 | int err; | 592 | int err; |
593 | int i; | ||
576 | if (!mce_available(&cpu_data[cpu])) | 594 | if (!mce_available(&cpu_data[cpu])) |
577 | return -EIO; | 595 | return -EIO; |
578 | 596 | ||
@@ -582,11 +600,9 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
582 | err = sysdev_register(&per_cpu(device_mce,cpu)); | 600 | err = sysdev_register(&per_cpu(device_mce,cpu)); |
583 | 601 | ||
584 | if (!err) { | 602 | if (!err) { |
585 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank0ctl); | 603 | for (i = 0; i < banks; i++) |
586 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank1ctl); | 604 | sysdev_create_file(&per_cpu(device_mce,cpu), |
587 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank2ctl); | 605 | bank_attributes[i]); |
588 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank3ctl); | ||
589 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank4ctl); | ||
590 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant); | 606 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant); |
591 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval); | 607 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval); |
592 | } | 608 | } |
@@ -596,11 +612,11 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
596 | #ifdef CONFIG_HOTPLUG_CPU | 612 | #ifdef CONFIG_HOTPLUG_CPU |
597 | static __cpuinit void mce_remove_device(unsigned int cpu) | 613 | static __cpuinit void mce_remove_device(unsigned int cpu) |
598 | { | 614 | { |
599 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank0ctl); | 615 | int i; |
600 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank1ctl); | 616 | |
601 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank2ctl); | 617 | for (i = 0; i < banks; i++) |
602 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank3ctl); | 618 | sysdev_remove_file(&per_cpu(device_mce,cpu), |
603 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank4ctl); | 619 | bank_attributes[i]); |
604 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); | 620 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); |
605 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); | 621 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); |
606 | sysdev_unregister(&per_cpu(device_mce,cpu)); | 622 | sysdev_unregister(&per_cpu(device_mce,cpu)); |