aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
authorAndi Kleen <andi@firstfloor.org>2009-02-17 17:07:13 -0500
committerH. Peter Anvin <hpa@linux.intel.com>2009-02-19 17:50:58 -0500
commit0d7482e3d76522157c9d741d79fce22c401fa0c5 (patch)
treebe87d0d3dcf9c44c7e706ce40f23515471a0b16e /arch/x86/kernel/cpu
parente35849e910a6543d37c0d13648ef166678d03565 (diff)
x86, mce: implement dynamic machine check banks support
Impact: cleanup; making code future proof; memory saving on small systems This patch replaces the hardcoded max number of machine check banks with dynamic allocation depending on what the CPU reports. The sysfs data structures and the banks array are dynamically allocated. There is still a hard bank limit (128) because the mcelog protocol uses banks >= 128 as pseudo banks to escape other events. But we expect that 128 banks is beyond any reasonable CPU for now. This supersedes an earlier patch by Venki, but it solves the problem more completely by making the limit fully dynamic (up to the 128 boundary). This saves some memory on machines with less than 6 banks because they won't need sysdevs for unused ones and also allows to use sysfs to control these banks on possible future CPUs with more than 6 banks. This is an updated patch addressing Venki's comments. I also added in another patch from Thomas which fixed the error allocation path (that patch was previously separated) Cc: Venki Pallipadi <venkatesh.pallipadi@intel.com> Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c147
1 files changed, 115 insertions, 32 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 870d08deccf..2297730bb51 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -24,6 +24,8 @@
24#include <linux/ctype.h> 24#include <linux/ctype.h>
25#include <linux/kmod.h> 25#include <linux/kmod.h>
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <linux/kobject.h>
28#include <linux/sysfs.h>
27#include <asm/processor.h> 29#include <asm/processor.h>
28#include <asm/msr.h> 30#include <asm/msr.h>
29#include <asm/mce.h> 31#include <asm/mce.h>
@@ -32,7 +34,12 @@
32#include <asm/idle.h> 34#include <asm/idle.h>
33 35
34#define MISC_MCELOG_MINOR 227 36#define MISC_MCELOG_MINOR 227
35#define NR_SYSFS_BANKS 6 37
38/*
39 * To support more than 128 would need to escape the predefined
40 * Linux defined extended banks first.
41 */
42#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
36 43
37atomic_t mce_entry; 44atomic_t mce_entry;
38 45
@@ -47,7 +54,7 @@ static int mce_dont_init;
47 */ 54 */
48static int tolerant = 1; 55static int tolerant = 1;
49static int banks; 56static int banks;
50static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL }; 57static u64 *bank;
51static unsigned long notify_user; 58static unsigned long notify_user;
52static int rip_msr; 59static int rip_msr;
53static int mce_bootlog = -1; 60static int mce_bootlog = -1;
@@ -212,7 +219,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
212 barrier(); 219 barrier();
213 220
214 for (i = 0; i < banks; i++) { 221 for (i = 0; i < banks; i++) {
215 if (i < NR_SYSFS_BANKS && !bank[i]) 222 if (!bank[i])
216 continue; 223 continue;
217 224
218 m.misc = 0; 225 m.misc = 0;
@@ -446,37 +453,54 @@ __initcall(periodic_mcheck_init);
446/* 453/*
447 * Initialize Machine Checks for a CPU. 454 * Initialize Machine Checks for a CPU.
448 */ 455 */
449static void mce_init(void *dummy) 456static int mce_cap_init(void)
450{ 457{
451 u64 cap; 458 u64 cap;
452 int i; 459 unsigned b;
453 460
454 rdmsrl(MSR_IA32_MCG_CAP, cap); 461 rdmsrl(MSR_IA32_MCG_CAP, cap);
455 banks = cap & 0xff; 462 b = cap & 0xff;
456 if (banks > MCE_EXTENDED_BANK) { 463 if (b > MAX_NR_BANKS) {
457 banks = MCE_EXTENDED_BANK; 464 printk(KERN_WARNING
458 printk(KERN_INFO "MCE: warning: using only %d banks\n", 465 "MCE: Using only %u machine check banks out of %u\n",
459 MCE_EXTENDED_BANK); 466 MAX_NR_BANKS, b);
467 b = MAX_NR_BANKS;
468 }
469
470 /* Don't support asymmetric configurations today */
471 WARN_ON(banks != 0 && b != banks);
472 banks = b;
473 if (!bank) {
474 bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
475 if (!bank)
476 return -ENOMEM;
477 memset(bank, 0xff, banks * sizeof(u64));
460 } 478 }
479
461 /* Use accurate RIP reporting if available. */ 480 /* Use accurate RIP reporting if available. */
462 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) 481 if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
463 rip_msr = MSR_IA32_MCG_EIP; 482 rip_msr = MSR_IA32_MCG_EIP;
464 483
484 return 0;
485}
486
487static void mce_init(void *dummy)
488{
489 u64 cap;
490 int i;
491
465 /* Log the machine checks left over from the previous reset. 492 /* Log the machine checks left over from the previous reset.
466 This also clears all registers */ 493 This also clears all registers */
467 do_machine_check(NULL, mce_bootlog ? -1 : -2); 494 do_machine_check(NULL, mce_bootlog ? -1 : -2);
468 495
469 set_in_cr4(X86_CR4_MCE); 496 set_in_cr4(X86_CR4_MCE);
470 497
498 rdmsrl(MSR_IA32_MCG_CAP, cap);
471 if (cap & MCG_CTL_P) 499 if (cap & MCG_CTL_P)
472 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 500 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
473 501
474 for (i = 0; i < banks; i++) { 502 for (i = 0; i < banks; i++) {
475 if (i < NR_SYSFS_BANKS) 503 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
476 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
477 else
478 wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
479
480 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 504 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
481 } 505 }
482} 506}
@@ -486,10 +510,10 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
486{ 510{
487 /* This should be disabled by the BIOS, but isn't always */ 511 /* This should be disabled by the BIOS, but isn't always */
488 if (c->x86_vendor == X86_VENDOR_AMD) { 512 if (c->x86_vendor == X86_VENDOR_AMD) {
489 if(c->x86 == 15) 513 if (c->x86 == 15 && banks > 4)
490 /* disable GART TBL walk error reporting, which trips off 514 /* disable GART TBL walk error reporting, which trips off
491 incorrectly with the IOMMU & 3ware & Cerberus. */ 515 incorrectly with the IOMMU & 3ware & Cerberus. */
492 clear_bit(10, &bank[4]); 516 clear_bit(10, (unsigned long *)&bank[4]);
493 if(c->x86 <= 17 && mce_bootlog < 0) 517 if(c->x86 <= 17 && mce_bootlog < 0)
494 /* Lots of broken BIOS around that don't clear them 518 /* Lots of broken BIOS around that don't clear them
495 by default and leave crap in there. Don't log. */ 519 by default and leave crap in there. Don't log. */
@@ -532,11 +556,15 @@ static void mce_init_timer(void)
532 */ 556 */
533void __cpuinit mcheck_init(struct cpuinfo_x86 *c) 557void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
534{ 558{
535 mce_cpu_quirks(c);
536
537 if (!mce_available(c)) 559 if (!mce_available(c))
538 return; 560 return;
539 561
562 if (mce_cap_init() < 0) {
563 mce_dont_init = 1;
564 return;
565 }
566 mce_cpu_quirks(c);
567
540 mce_init(NULL); 568 mce_init(NULL);
541 mce_cpu_features(c); 569 mce_cpu_features(c);
542 mce_init_timer(); 570 mce_init_timer();
@@ -819,16 +847,26 @@ void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinit
819 } \ 847 } \
820 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); 848 static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
821 849
822/* 850static struct sysdev_attribute *bank_attrs;
823 * TBD should generate these dynamically based on number of available banks. 851
824 * Have only 6 contol banks in /sysfs until then. 852static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
825 */ 853 char *buf)
826ACCESSOR(bank0ctl,bank[0],mce_restart()) 854{
827ACCESSOR(bank1ctl,bank[1],mce_restart()) 855 u64 b = bank[attr - bank_attrs];
828ACCESSOR(bank2ctl,bank[2],mce_restart()) 856 return sprintf(buf, "%Lx\n", b);
829ACCESSOR(bank3ctl,bank[3],mce_restart()) 857}
830ACCESSOR(bank4ctl,bank[4],mce_restart()) 858
831ACCESSOR(bank5ctl,bank[5],mce_restart()) 859static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
860 const char *buf, size_t siz)
861{
862 char *end;
863 u64 new = simple_strtoull(buf, &end, 0);
864 if (end == buf)
865 return -EINVAL;
866 bank[attr - bank_attrs] = new;
867 mce_restart();
868 return end-buf;
869}
832 870
833static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, 871static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
834 char *buf) 872 char *buf)
@@ -855,8 +893,6 @@ static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
855static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); 893static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
856ACCESSOR(check_interval,check_interval,mce_restart()) 894ACCESSOR(check_interval,check_interval,mce_restart())
857static struct sysdev_attribute *mce_attributes[] = { 895static struct sysdev_attribute *mce_attributes[] = {
858 &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
859 &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
860 &attr_tolerant.attr, &attr_check_interval, &attr_trigger, 896 &attr_tolerant.attr, &attr_check_interval, &attr_trigger,
861 NULL 897 NULL
862}; 898};
@@ -886,11 +922,22 @@ static __cpuinit int mce_create_device(unsigned int cpu)
886 if (err) 922 if (err)
887 goto error; 923 goto error;
888 } 924 }
925 for (i = 0; i < banks; i++) {
926 err = sysdev_create_file(&per_cpu(device_mce, cpu),
927 &bank_attrs[i]);
928 if (err)
929 goto error2;
930 }
889 cpu_set(cpu, mce_device_initialized); 931 cpu_set(cpu, mce_device_initialized);
890 932
891 return 0; 933 return 0;
934error2:
935 while (--i >= 0) {
936 sysdev_remove_file(&per_cpu(device_mce, cpu),
937 &bank_attrs[i]);
938 }
892error: 939error:
893 while (i--) { 940 while (--i >= 0) {
894 sysdev_remove_file(&per_cpu(device_mce,cpu), 941 sysdev_remove_file(&per_cpu(device_mce,cpu),
895 mce_attributes[i]); 942 mce_attributes[i]);
896 } 943 }
@@ -909,6 +956,9 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
909 for (i = 0; mce_attributes[i]; i++) 956 for (i = 0; mce_attributes[i]; i++)
910 sysdev_remove_file(&per_cpu(device_mce,cpu), 957 sysdev_remove_file(&per_cpu(device_mce,cpu),
911 mce_attributes[i]); 958 mce_attributes[i]);
959 for (i = 0; i < banks; i++)
960 sysdev_remove_file(&per_cpu(device_mce, cpu),
961 &bank_attrs[i]);
912 sysdev_unregister(&per_cpu(device_mce,cpu)); 962 sysdev_unregister(&per_cpu(device_mce,cpu));
913 cpu_clear(cpu, mce_device_initialized); 963 cpu_clear(cpu, mce_device_initialized);
914} 964}
@@ -973,6 +1023,34 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = {
973 .notifier_call = mce_cpu_callback, 1023 .notifier_call = mce_cpu_callback,
974}; 1024};
975 1025
1026static __init int mce_init_banks(void)
1027{
1028 int i;
1029
1030 bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
1031 GFP_KERNEL);
1032 if (!bank_attrs)
1033 return -ENOMEM;
1034
1035 for (i = 0; i < banks; i++) {
1036 struct sysdev_attribute *a = &bank_attrs[i];
1037 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
1038 if (!a->attr.name)
1039 goto nomem;
1040 a->attr.mode = 0644;
1041 a->show = show_bank;
1042 a->store = set_bank;
1043 }
1044 return 0;
1045
1046nomem:
1047 while (--i >= 0)
1048 kfree(bank_attrs[i].attr.name);
1049 kfree(bank_attrs);
1050 bank_attrs = NULL;
1051 return -ENOMEM;
1052}
1053
976static __init int mce_init_device(void) 1054static __init int mce_init_device(void)
977{ 1055{
978 int err; 1056 int err;
@@ -980,6 +1058,11 @@ static __init int mce_init_device(void)
980 1058
981 if (!mce_available(&boot_cpu_data)) 1059 if (!mce_available(&boot_cpu_data))
982 return -EIO; 1060 return -EIO;
1061
1062 err = mce_init_banks();
1063 if (err)
1064 return err;
1065
983 err = sysdev_class_register(&mce_sysclass); 1066 err = sysdev_class_register(&mce_sysclass);
984 if (err) 1067 if (err)
985 return err; 1068 return err;