diff options
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_64.c | 147 |
1 files changed, 115 insertions, 32 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 870d08deccf7..2297730bb514 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
| @@ -24,6 +24,8 @@ | |||
| 24 | #include <linux/ctype.h> | 24 | #include <linux/ctype.h> |
| 25 | #include <linux/kmod.h> | 25 | #include <linux/kmod.h> |
| 26 | #include <linux/kdebug.h> | 26 | #include <linux/kdebug.h> |
| 27 | #include <linux/kobject.h> | ||
| 28 | #include <linux/sysfs.h> | ||
| 27 | #include <asm/processor.h> | 29 | #include <asm/processor.h> |
| 28 | #include <asm/msr.h> | 30 | #include <asm/msr.h> |
| 29 | #include <asm/mce.h> | 31 | #include <asm/mce.h> |
| @@ -32,7 +34,12 @@ | |||
| 32 | #include <asm/idle.h> | 34 | #include <asm/idle.h> |
| 33 | 35 | ||
| 34 | #define MISC_MCELOG_MINOR 227 | 36 | #define MISC_MCELOG_MINOR 227 |
| 35 | #define NR_SYSFS_BANKS 6 | 37 | |
| 38 | /* | ||
| 39 | * To support more than 128 would need to escape the predefined | ||
| 40 | * Linux defined extended banks first. | ||
| 41 | */ | ||
| 42 | #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) | ||
| 36 | 43 | ||
| 37 | atomic_t mce_entry; | 44 | atomic_t mce_entry; |
| 38 | 45 | ||
| @@ -47,7 +54,7 @@ static int mce_dont_init; | |||
| 47 | */ | 54 | */ |
| 48 | static int tolerant = 1; | 55 | static int tolerant = 1; |
| 49 | static int banks; | 56 | static int banks; |
| 50 | static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL }; | 57 | static u64 *bank; |
| 51 | static unsigned long notify_user; | 58 | static unsigned long notify_user; |
| 52 | static int rip_msr; | 59 | static int rip_msr; |
| 53 | static int mce_bootlog = -1; | 60 | static int mce_bootlog = -1; |
| @@ -212,7 +219,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
| 212 | barrier(); | 219 | barrier(); |
| 213 | 220 | ||
| 214 | for (i = 0; i < banks; i++) { | 221 | for (i = 0; i < banks; i++) { |
| 215 | if (i < NR_SYSFS_BANKS && !bank[i]) | 222 | if (!bank[i]) |
| 216 | continue; | 223 | continue; |
| 217 | 224 | ||
| 218 | m.misc = 0; | 225 | m.misc = 0; |
| @@ -446,37 +453,54 @@ __initcall(periodic_mcheck_init); | |||
| 446 | /* | 453 | /* |
| 447 | * Initialize Machine Checks for a CPU. | 454 | * Initialize Machine Checks for a CPU. |
| 448 | */ | 455 | */ |
| 449 | static void mce_init(void *dummy) | 456 | static int mce_cap_init(void) |
| 450 | { | 457 | { |
| 451 | u64 cap; | 458 | u64 cap; |
| 452 | int i; | 459 | unsigned b; |
| 453 | 460 | ||
| 454 | rdmsrl(MSR_IA32_MCG_CAP, cap); | 461 | rdmsrl(MSR_IA32_MCG_CAP, cap); |
| 455 | banks = cap & 0xff; | 462 | b = cap & 0xff; |
| 456 | if (banks > MCE_EXTENDED_BANK) { | 463 | if (b > MAX_NR_BANKS) { |
| 457 | banks = MCE_EXTENDED_BANK; | 464 | printk(KERN_WARNING |
| 458 | printk(KERN_INFO "MCE: warning: using only %d banks\n", | 465 | "MCE: Using only %u machine check banks out of %u\n", |
| 459 | MCE_EXTENDED_BANK); | 466 | MAX_NR_BANKS, b); |
| 467 | b = MAX_NR_BANKS; | ||
| 468 | } | ||
| 469 | |||
| 470 | /* Don't support asymmetric configurations today */ | ||
| 471 | WARN_ON(banks != 0 && b != banks); | ||
| 472 | banks = b; | ||
| 473 | if (!bank) { | ||
| 474 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); | ||
| 475 | if (!bank) | ||
| 476 | return -ENOMEM; | ||
| 477 | memset(bank, 0xff, banks * sizeof(u64)); | ||
| 460 | } | 478 | } |
| 479 | |||
| 461 | /* Use accurate RIP reporting if available. */ | 480 | /* Use accurate RIP reporting if available. */ |
| 462 | if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) | 481 | if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) |
| 463 | rip_msr = MSR_IA32_MCG_EIP; | 482 | rip_msr = MSR_IA32_MCG_EIP; |
| 464 | 483 | ||
| 484 | return 0; | ||
| 485 | } | ||
| 486 | |||
| 487 | static void mce_init(void *dummy) | ||
| 488 | { | ||
| 489 | u64 cap; | ||
| 490 | int i; | ||
| 491 | |||
| 465 | /* Log the machine checks left over from the previous reset. | 492 | /* Log the machine checks left over from the previous reset. |
| 466 | This also clears all registers */ | 493 | This also clears all registers */ |
| 467 | do_machine_check(NULL, mce_bootlog ? -1 : -2); | 494 | do_machine_check(NULL, mce_bootlog ? -1 : -2); |
| 468 | 495 | ||
| 469 | set_in_cr4(X86_CR4_MCE); | 496 | set_in_cr4(X86_CR4_MCE); |
| 470 | 497 | ||
| 498 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
| 471 | if (cap & MCG_CTL_P) | 499 | if (cap & MCG_CTL_P) |
| 472 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 500 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
| 473 | 501 | ||
| 474 | for (i = 0; i < banks; i++) { | 502 | for (i = 0; i < banks; i++) { |
| 475 | if (i < NR_SYSFS_BANKS) | 503 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); |
| 476 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | ||
| 477 | else | ||
| 478 | wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL); | ||
| 479 | |||
| 480 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 504 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); |
| 481 | } | 505 | } |
| 482 | } | 506 | } |
| @@ -486,10 +510,10 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
| 486 | { | 510 | { |
| 487 | /* This should be disabled by the BIOS, but isn't always */ | 511 | /* This should be disabled by the BIOS, but isn't always */ |
| 488 | if (c->x86_vendor == X86_VENDOR_AMD) { | 512 | if (c->x86_vendor == X86_VENDOR_AMD) { |
| 489 | if(c->x86 == 15) | 513 | if (c->x86 == 15 && banks > 4) |
| 490 | /* disable GART TBL walk error reporting, which trips off | 514 | /* disable GART TBL walk error reporting, which trips off |
| 491 | incorrectly with the IOMMU & 3ware & Cerberus. */ | 515 | incorrectly with the IOMMU & 3ware & Cerberus. */ |
| 492 | clear_bit(10, &bank[4]); | 516 | clear_bit(10, (unsigned long *)&bank[4]); |
| 493 | if(c->x86 <= 17 && mce_bootlog < 0) | 517 | if(c->x86 <= 17 && mce_bootlog < 0) |
| 494 | /* Lots of broken BIOS around that don't clear them | 518 | /* Lots of broken BIOS around that don't clear them |
| 495 | by default and leave crap in there. Don't log. */ | 519 | by default and leave crap in there. Don't log. */ |
| @@ -532,11 +556,15 @@ static void mce_init_timer(void) | |||
| 532 | */ | 556 | */ |
| 533 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | 557 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) |
| 534 | { | 558 | { |
| 535 | mce_cpu_quirks(c); | ||
| 536 | |||
| 537 | if (!mce_available(c)) | 559 | if (!mce_available(c)) |
| 538 | return; | 560 | return; |
| 539 | 561 | ||
| 562 | if (mce_cap_init() < 0) { | ||
| 563 | mce_dont_init = 1; | ||
| 564 | return; | ||
| 565 | } | ||
| 566 | mce_cpu_quirks(c); | ||
| 567 | |||
| 540 | mce_init(NULL); | 568 | mce_init(NULL); |
| 541 | mce_cpu_features(c); | 569 | mce_cpu_features(c); |
| 542 | mce_init_timer(); | 570 | mce_init_timer(); |
| @@ -819,16 +847,26 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinit | |||
| 819 | } \ | 847 | } \ |
| 820 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); | 848 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); |
| 821 | 849 | ||
| 822 | /* | 850 | static struct sysdev_attribute *bank_attrs; |
| 823 | * TBD should generate these dynamically based on number of available banks. | 851 | |
| 824 | * Have only 6 contol banks in /sysfs until then. | 852 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, |
| 825 | */ | 853 | char *buf) |
| 826 | ACCESSOR(bank0ctl,bank[0],mce_restart()) | 854 | { |
| 827 | ACCESSOR(bank1ctl,bank[1],mce_restart()) | 855 | u64 b = bank[attr - bank_attrs]; |
| 828 | ACCESSOR(bank2ctl,bank[2],mce_restart()) | 856 | return sprintf(buf, "%Lx\n", b); |
| 829 | ACCESSOR(bank3ctl,bank[3],mce_restart()) | 857 | } |
| 830 | ACCESSOR(bank4ctl,bank[4],mce_restart()) | 858 | |
| 831 | ACCESSOR(bank5ctl,bank[5],mce_restart()) | 859 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, |
| 860 | const char *buf, size_t siz) | ||
| 861 | { | ||
| 862 | char *end; | ||
| 863 | u64 new = simple_strtoull(buf, &end, 0); | ||
| 864 | if (end == buf) | ||
| 865 | return -EINVAL; | ||
| 866 | bank[attr - bank_attrs] = new; | ||
| 867 | mce_restart(); | ||
| 868 | return end-buf; | ||
| 869 | } | ||
| 832 | 870 | ||
| 833 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, | 871 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
| 834 | char *buf) | 872 | char *buf) |
| @@ -855,8 +893,6 @@ static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | |||
| 855 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | 893 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); |
| 856 | ACCESSOR(check_interval,check_interval,mce_restart()) | 894 | ACCESSOR(check_interval,check_interval,mce_restart()) |
| 857 | static struct sysdev_attribute *mce_attributes[] = { | 895 | static struct sysdev_attribute *mce_attributes[] = { |
| 858 | &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, | ||
| 859 | &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, | ||
| 860 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, | 896 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, |
| 861 | NULL | 897 | NULL |
| 862 | }; | 898 | }; |
| @@ -886,11 +922,22 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
| 886 | if (err) | 922 | if (err) |
| 887 | goto error; | 923 | goto error; |
| 888 | } | 924 | } |
| 925 | for (i = 0; i < banks; i++) { | ||
| 926 | err = sysdev_create_file(&per_cpu(device_mce, cpu), | ||
| 927 | &bank_attrs[i]); | ||
| 928 | if (err) | ||
| 929 | goto error2; | ||
| 930 | } | ||
| 889 | cpu_set(cpu, mce_device_initialized); | 931 | cpu_set(cpu, mce_device_initialized); |
| 890 | 932 | ||
| 891 | return 0; | 933 | return 0; |
| 934 | error2: | ||
| 935 | while (--i >= 0) { | ||
| 936 | sysdev_remove_file(&per_cpu(device_mce, cpu), | ||
| 937 | &bank_attrs[i]); | ||
| 938 | } | ||
| 892 | error: | 939 | error: |
| 893 | while (i--) { | 940 | while (--i >= 0) { |
| 894 | sysdev_remove_file(&per_cpu(device_mce,cpu), | 941 | sysdev_remove_file(&per_cpu(device_mce,cpu), |
| 895 | mce_attributes[i]); | 942 | mce_attributes[i]); |
| 896 | } | 943 | } |
| @@ -909,6 +956,9 @@ static __cpuinit void mce_remove_device(unsigned int cpu) | |||
| 909 | for (i = 0; mce_attributes[i]; i++) | 956 | for (i = 0; mce_attributes[i]; i++) |
| 910 | sysdev_remove_file(&per_cpu(device_mce,cpu), | 957 | sysdev_remove_file(&per_cpu(device_mce,cpu), |
| 911 | mce_attributes[i]); | 958 | mce_attributes[i]); |
| 959 | for (i = 0; i < banks; i++) | ||
| 960 | sysdev_remove_file(&per_cpu(device_mce, cpu), | ||
| 961 | &bank_attrs[i]); | ||
| 912 | sysdev_unregister(&per_cpu(device_mce,cpu)); | 962 | sysdev_unregister(&per_cpu(device_mce,cpu)); |
| 913 | cpu_clear(cpu, mce_device_initialized); | 963 | cpu_clear(cpu, mce_device_initialized); |
| 914 | } | 964 | } |
| @@ -973,6 +1023,34 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = { | |||
| 973 | .notifier_call = mce_cpu_callback, | 1023 | .notifier_call = mce_cpu_callback, |
| 974 | }; | 1024 | }; |
| 975 | 1025 | ||
| 1026 | static __init int mce_init_banks(void) | ||
| 1027 | { | ||
| 1028 | int i; | ||
| 1029 | |||
| 1030 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, | ||
| 1031 | GFP_KERNEL); | ||
| 1032 | if (!bank_attrs) | ||
| 1033 | return -ENOMEM; | ||
| 1034 | |||
| 1035 | for (i = 0; i < banks; i++) { | ||
| 1036 | struct sysdev_attribute *a = &bank_attrs[i]; | ||
| 1037 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); | ||
| 1038 | if (!a->attr.name) | ||
| 1039 | goto nomem; | ||
| 1040 | a->attr.mode = 0644; | ||
| 1041 | a->show = show_bank; | ||
| 1042 | a->store = set_bank; | ||
| 1043 | } | ||
| 1044 | return 0; | ||
| 1045 | |||
| 1046 | nomem: | ||
| 1047 | while (--i >= 0) | ||
| 1048 | kfree(bank_attrs[i].attr.name); | ||
| 1049 | kfree(bank_attrs); | ||
| 1050 | bank_attrs = NULL; | ||
| 1051 | return -ENOMEM; | ||
| 1052 | } | ||
| 1053 | |||
| 976 | static __init int mce_init_device(void) | 1054 | static __init int mce_init_device(void) |
| 977 | { | 1055 | { |
| 978 | int err; | 1056 | int err; |
| @@ -980,6 +1058,11 @@ static __init int mce_init_device(void) | |||
| 980 | 1058 | ||
| 981 | if (!mce_available(&boot_cpu_data)) | 1059 | if (!mce_available(&boot_cpu_data)) |
| 982 | return -EIO; | 1060 | return -EIO; |
| 1061 | |||
| 1062 | err = mce_init_banks(); | ||
| 1063 | if (err) | ||
| 1064 | return err; | ||
| 1065 | |||
| 983 | err = sysdev_class_register(&mce_sysclass); | 1066 | err = sysdev_class_register(&mce_sysclass); |
| 984 | if (err) | 1067 | if (err) |
| 985 | return err; | 1068 | return err; |
