diff options
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck/mce_64.c')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_64.c | 147 |
1 files changed, 115 insertions, 32 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 870d08deccf..2297730bb51 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
@@ -24,6 +24,8 @@ | |||
24 | #include <linux/ctype.h> | 24 | #include <linux/ctype.h> |
25 | #include <linux/kmod.h> | 25 | #include <linux/kmod.h> |
26 | #include <linux/kdebug.h> | 26 | #include <linux/kdebug.h> |
27 | #include <linux/kobject.h> | ||
28 | #include <linux/sysfs.h> | ||
27 | #include <asm/processor.h> | 29 | #include <asm/processor.h> |
28 | #include <asm/msr.h> | 30 | #include <asm/msr.h> |
29 | #include <asm/mce.h> | 31 | #include <asm/mce.h> |
@@ -32,7 +34,12 @@ | |||
32 | #include <asm/idle.h> | 34 | #include <asm/idle.h> |
33 | 35 | ||
34 | #define MISC_MCELOG_MINOR 227 | 36 | #define MISC_MCELOG_MINOR 227 |
35 | #define NR_SYSFS_BANKS 6 | 37 | |
38 | /* | ||
39 | * To support more than 128 would need to escape the predefined | ||
40 | * Linux defined extended banks first. | ||
41 | */ | ||
42 | #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) | ||
36 | 43 | ||
37 | atomic_t mce_entry; | 44 | atomic_t mce_entry; |
38 | 45 | ||
@@ -47,7 +54,7 @@ static int mce_dont_init; | |||
47 | */ | 54 | */ |
48 | static int tolerant = 1; | 55 | static int tolerant = 1; |
49 | static int banks; | 56 | static int banks; |
50 | static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL }; | 57 | static u64 *bank; |
51 | static unsigned long notify_user; | 58 | static unsigned long notify_user; |
52 | static int rip_msr; | 59 | static int rip_msr; |
53 | static int mce_bootlog = -1; | 60 | static int mce_bootlog = -1; |
@@ -212,7 +219,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
212 | barrier(); | 219 | barrier(); |
213 | 220 | ||
214 | for (i = 0; i < banks; i++) { | 221 | for (i = 0; i < banks; i++) { |
215 | if (i < NR_SYSFS_BANKS && !bank[i]) | 222 | if (!bank[i]) |
216 | continue; | 223 | continue; |
217 | 224 | ||
218 | m.misc = 0; | 225 | m.misc = 0; |
@@ -446,37 +453,54 @@ __initcall(periodic_mcheck_init); | |||
446 | /* | 453 | /* |
447 | * Initialize Machine Checks for a CPU. | 454 | * Initialize Machine Checks for a CPU. |
448 | */ | 455 | */ |
449 | static void mce_init(void *dummy) | 456 | static int mce_cap_init(void) |
450 | { | 457 | { |
451 | u64 cap; | 458 | u64 cap; |
452 | int i; | 459 | unsigned b; |
453 | 460 | ||
454 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 461 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
455 | banks = cap & 0xff; | 462 | b = cap & 0xff; |
456 | if (banks > MCE_EXTENDED_BANK) { | 463 | if (b > MAX_NR_BANKS) { |
457 | banks = MCE_EXTENDED_BANK; | 464 | printk(KERN_WARNING |
458 | printk(KERN_INFO "MCE: warning: using only %d banks\n", | 465 | "MCE: Using only %u machine check banks out of %u\n", |
459 | MCE_EXTENDED_BANK); | 466 | MAX_NR_BANKS, b); |
467 | b = MAX_NR_BANKS; | ||
468 | } | ||
469 | |||
470 | /* Don't support asymmetric configurations today */ | ||
471 | WARN_ON(banks != 0 && b != banks); | ||
472 | banks = b; | ||
473 | if (!bank) { | ||
474 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); | ||
475 | if (!bank) | ||
476 | return -ENOMEM; | ||
477 | memset(bank, 0xff, banks * sizeof(u64)); | ||
460 | } | 478 | } |
479 | |||
461 | /* Use accurate RIP reporting if available. */ | 480 | /* Use accurate RIP reporting if available. */ |
462 | if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) | 481 | if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) |
463 | rip_msr = MSR_IA32_MCG_EIP; | 482 | rip_msr = MSR_IA32_MCG_EIP; |
464 | 483 | ||
484 | return 0; | ||
485 | } | ||
486 | |||
487 | static void mce_init(void *dummy) | ||
488 | { | ||
489 | u64 cap; | ||
490 | int i; | ||
491 | |||
465 | /* Log the machine checks left over from the previous reset. | 492 | /* Log the machine checks left over from the previous reset. |
466 | This also clears all registers */ | 493 | This also clears all registers */ |
467 | do_machine_check(NULL, mce_bootlog ? -1 : -2); | 494 | do_machine_check(NULL, mce_bootlog ? -1 : -2); |
468 | 495 | ||
469 | set_in_cr4(X86_CR4_MCE); | 496 | set_in_cr4(X86_CR4_MCE); |
470 | 497 | ||
498 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
471 | if (cap & MCG_CTL_P) | 499 | if (cap & MCG_CTL_P) |
472 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 500 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
473 | 501 | ||
474 | for (i = 0; i < banks; i++) { | 502 | for (i = 0; i < banks; i++) { |
475 | if (i < NR_SYSFS_BANKS) | 503 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); |
476 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | ||
477 | else | ||
478 | wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL); | ||
479 | |||
480 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 504 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); |
481 | } | 505 | } |
482 | } | 506 | } |
@@ -486,10 +510,10 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
486 | { | 510 | { |
487 | /* This should be disabled by the BIOS, but isn't always */ | 511 | /* This should be disabled by the BIOS, but isn't always */ |
488 | if (c->x86_vendor == X86_VENDOR_AMD) { | 512 | if (c->x86_vendor == X86_VENDOR_AMD) { |
489 | if(c->x86 == 15) | 513 | if (c->x86 == 15 && banks > 4) |
490 | /* disable GART TBL walk error reporting, which trips off | 514 | /* disable GART TBL walk error reporting, which trips off |
491 | incorrectly with the IOMMU & 3ware & Cerberus. */ | 515 | incorrectly with the IOMMU & 3ware & Cerberus. */ |
492 | clear_bit(10, &bank[4]); | 516 | clear_bit(10, (unsigned long *)&bank[4]); |
493 | if(c->x86 <= 17 && mce_bootlog < 0) | 517 | if(c->x86 <= 17 && mce_bootlog < 0) |
494 | /* Lots of broken BIOS around that don't clear them | 518 | /* Lots of broken BIOS around that don't clear them |
495 | by default and leave crap in there. Don't log. */ | 519 | by default and leave crap in there. Don't log. */ |
@@ -532,11 +556,15 @@ static void mce_init_timer(void) | |||
532 | */ | 556 | */ |
533 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | 557 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) |
534 | { | 558 | { |
535 | mce_cpu_quirks(c); | ||
536 | |||
537 | if (!mce_available(c)) | 559 | if (!mce_available(c)) |
538 | return; | 560 | return; |
539 | 561 | ||
562 | if (mce_cap_init() < 0) { | ||
563 | mce_dont_init = 1; | ||
564 | return; | ||
565 | } | ||
566 | mce_cpu_quirks(c); | ||
567 | |||
540 | mce_init(NULL); | 568 | mce_init(NULL); |
541 | mce_cpu_features(c); | 569 | mce_cpu_features(c); |
542 | mce_init_timer(); | 570 | mce_init_timer(); |
@@ -819,16 +847,26 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinit | |||
819 | } \ | 847 | } \ |
820 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); | 848 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); |
821 | 849 | ||
822 | /* | 850 | static struct sysdev_attribute *bank_attrs; |
823 | * TBD should generate these dynamically based on number of available banks. | 851 | |
824 | * Have only 6 contol banks in /sysfs until then. | 852 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, |
825 | */ | 853 | char *buf) |
826 | ACCESSOR(bank0ctl,bank[0],mce_restart()) | 854 | { |
827 | ACCESSOR(bank1ctl,bank[1],mce_restart()) | 855 | u64 b = bank[attr - bank_attrs]; |
828 | ACCESSOR(bank2ctl,bank[2],mce_restart()) | 856 | return sprintf(buf, "%Lx\n", b); |
829 | ACCESSOR(bank3ctl,bank[3],mce_restart()) | 857 | } |
830 | ACCESSOR(bank4ctl,bank[4],mce_restart()) | 858 | |
831 | ACCESSOR(bank5ctl,bank[5],mce_restart()) | 859 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, |
860 | const char *buf, size_t siz) | ||
861 | { | ||
862 | char *end; | ||
863 | u64 new = simple_strtoull(buf, &end, 0); | ||
864 | if (end == buf) | ||
865 | return -EINVAL; | ||
866 | bank[attr - bank_attrs] = new; | ||
867 | mce_restart(); | ||
868 | return end-buf; | ||
869 | } | ||
832 | 870 | ||
833 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, | 871 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
834 | char *buf) | 872 | char *buf) |
@@ -855,8 +893,6 @@ static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | |||
855 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | 893 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); |
856 | ACCESSOR(check_interval,check_interval,mce_restart()) | 894 | ACCESSOR(check_interval,check_interval,mce_restart()) |
857 | static struct sysdev_attribute *mce_attributes[] = { | 895 | static struct sysdev_attribute *mce_attributes[] = { |
858 | &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, | ||
859 | &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, | ||
860 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, | 896 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, |
861 | NULL | 897 | NULL |
862 | }; | 898 | }; |
@@ -886,11 +922,22 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
886 | if (err) | 922 | if (err) |
887 | goto error; | 923 | goto error; |
888 | } | 924 | } |
925 | for (i = 0; i < banks; i++) { | ||
926 | err = sysdev_create_file(&per_cpu(device_mce, cpu), | ||
927 | &bank_attrs[i]); | ||
928 | if (err) | ||
929 | goto error2; | ||
930 | } | ||
889 | cpu_set(cpu, mce_device_initialized); | 931 | cpu_set(cpu, mce_device_initialized); |
890 | 932 | ||
891 | return 0; | 933 | return 0; |
934 | error2: | ||
935 | while (--i >= 0) { | ||
936 | sysdev_remove_file(&per_cpu(device_mce, cpu), | ||
937 | &bank_attrs[i]); | ||
938 | } | ||
892 | error: | 939 | error: |
893 | while (i--) { | 940 | while (--i >= 0) { |
894 | sysdev_remove_file(&per_cpu(device_mce,cpu), | 941 | sysdev_remove_file(&per_cpu(device_mce,cpu), |
895 | mce_attributes[i]); | 942 | mce_attributes[i]); |
896 | } | 943 | } |
@@ -909,6 +956,9 @@ static __cpuinit void mce_remove_device(unsigned int cpu) | |||
909 | for (i = 0; mce_attributes[i]; i++) | 956 | for (i = 0; mce_attributes[i]; i++) |
910 | sysdev_remove_file(&per_cpu(device_mce,cpu), | 957 | sysdev_remove_file(&per_cpu(device_mce,cpu), |
911 | mce_attributes[i]); | 958 | mce_attributes[i]); |
959 | for (i = 0; i < banks; i++) | ||
960 | sysdev_remove_file(&per_cpu(device_mce, cpu), | ||
961 | &bank_attrs[i]); | ||
912 | sysdev_unregister(&per_cpu(device_mce,cpu)); | 962 | sysdev_unregister(&per_cpu(device_mce,cpu)); |
913 | cpu_clear(cpu, mce_device_initialized); | 963 | cpu_clear(cpu, mce_device_initialized); |
914 | } | 964 | } |
@@ -973,6 +1023,34 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = { | |||
973 | .notifier_call = mce_cpu_callback, | 1023 | .notifier_call = mce_cpu_callback, |
974 | }; | 1024 | }; |
975 | 1025 | ||
1026 | static __init int mce_init_banks(void) | ||
1027 | { | ||
1028 | int i; | ||
1029 | |||
1030 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, | ||
1031 | GFP_KERNEL); | ||
1032 | if (!bank_attrs) | ||
1033 | return -ENOMEM; | ||
1034 | |||
1035 | for (i = 0; i < banks; i++) { | ||
1036 | struct sysdev_attribute *a = &bank_attrs[i]; | ||
1037 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); | ||
1038 | if (!a->attr.name) | ||
1039 | goto nomem; | ||
1040 | a->attr.mode = 0644; | ||
1041 | a->show = show_bank; | ||
1042 | a->store = set_bank; | ||
1043 | } | ||
1044 | return 0; | ||
1045 | |||
1046 | nomem: | ||
1047 | while (--i >= 0) | ||
1048 | kfree(bank_attrs[i].attr.name); | ||
1049 | kfree(bank_attrs); | ||
1050 | bank_attrs = NULL; | ||
1051 | return -ENOMEM; | ||
1052 | } | ||
1053 | |||
976 | static __init int mce_init_device(void) | 1054 | static __init int mce_init_device(void) |
977 | { | 1055 | { |
978 | int err; | 1056 | int err; |
@@ -980,6 +1058,11 @@ static __init int mce_init_device(void) | |||
980 | 1058 | ||
981 | if (!mce_available(&boot_cpu_data)) | 1059 | if (!mce_available(&boot_cpu_data)) |
982 | return -EIO; | 1060 | return -EIO; |
1061 | |||
1062 | err = mce_init_banks(); | ||
1063 | if (err) | ||
1064 | return err; | ||
1065 | |||
983 | err = sysdev_class_register(&mce_sysclass); | 1066 | err = sysdev_class_register(&mce_sysclass); |
984 | if (err) | 1067 | if (err) |
985 | return err; | 1068 | return err; |