aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac/amd64_edac.c
diff options
context:
space:
mode:
authorBorislav Petkov <borislav.petkov@amd.com>2009-12-11 12:14:40 -0500
committerH. Peter Anvin <hpa@zytor.com>2009-12-11 13:59:21 -0500
commit505422517d3f126bb939439e9d15dece94e11d2c (patch)
treea2d58c0b3cdf2b1c6b66eee6d78a283224ae1ac3 /drivers/edac/amd64_edac.c
parent5c6baba84e1ac6a79b266b40e17e692aab6604a1 (diff)
x86, msr: Add support for non-contiguous cpumasks
The current rd/wrmsr_on_cpus helpers assume that the supplied cpumasks are contiguous. However, there are machines out there like some K8 multinode Opterons which have a non-contiguous core enumeration on each node (e.g. cores 0,2 on node 0 instead of 0,1), see http://www.gossamer-threads.com/lists/linux/kernel/1160268. This patch fixes out-of-bounds writes (see URL above) by adding per-CPU msr structs which are used on the respective cores. Additionally, two helpers, msrs_{alloc,free}, are provided for use by the callers of the MSR accessors. Cc: H. Peter Anvin <hpa@zytor.com> Cc: Mauro Carvalho Chehab <mchehab@redhat.com> Cc: Aristeu Rozanski <aris@redhat.com> Cc: Randy Dunlap <randy.dunlap@oracle.com> Cc: Doug Thompson <dougthompson@xmission.com> Signed-off-by: Borislav Petkov <borislav.petkov@amd.com> LKML-Reference: <20091211171440.GD31998@aftab> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'drivers/edac/amd64_edac.c')
-rw-r--r--drivers/edac/amd64_edac.c46
1 files changed, 17 insertions, 29 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 5fdd6daa40ea..df5b68433f34 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -13,6 +13,8 @@ module_param(report_gart_errors, int, 0644);
13static int ecc_enable_override; 13static int ecc_enable_override;
14module_param(ecc_enable_override, int, 0644); 14module_param(ecc_enable_override, int, 0644);
15 15
16static struct msr *msrs;
17
16/* Lookup table for all possible MC control instances */ 18/* Lookup table for all possible MC control instances */
17struct amd64_pvt; 19struct amd64_pvt;
18static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES]; 20static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES];
@@ -2495,8 +2497,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, int nid)
2495static bool amd64_nb_mce_bank_enabled_on_node(int nid) 2497static bool amd64_nb_mce_bank_enabled_on_node(int nid)
2496{ 2498{
2497 cpumask_var_t mask; 2499 cpumask_var_t mask;
2498 struct msr *msrs; 2500 int cpu, nbe;
2499 int cpu, nbe, idx = 0;
2500 bool ret = false; 2501 bool ret = false;
2501 2502
2502 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 2503 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
@@ -2507,32 +2508,22 @@ static bool amd64_nb_mce_bank_enabled_on_node(int nid)
2507 2508
2508 get_cpus_on_this_dct_cpumask(mask, nid); 2509 get_cpus_on_this_dct_cpumask(mask, nid);
2509 2510
2510 msrs = kzalloc(sizeof(struct msr) * cpumask_weight(mask), GFP_KERNEL);
2511 if (!msrs) {
2512 amd64_printk(KERN_WARNING, "%s: error allocating msrs\n",
2513 __func__);
2514 free_cpumask_var(mask);
2515 return false;
2516 }
2517
2518 rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs); 2511 rdmsr_on_cpus(mask, MSR_IA32_MCG_CTL, msrs);
2519 2512
2520 for_each_cpu(cpu, mask) { 2513 for_each_cpu(cpu, mask) {
2521 nbe = msrs[idx].l & K8_MSR_MCGCTL_NBE; 2514 struct msr *reg = per_cpu_ptr(msrs, cpu);
2515 nbe = reg->l & K8_MSR_MCGCTL_NBE;
2522 2516
2523 debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n", 2517 debugf0("core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
2524 cpu, msrs[idx].q, 2518 cpu, reg->q,
2525 (nbe ? "enabled" : "disabled")); 2519 (nbe ? "enabled" : "disabled"));
2526 2520
2527 if (!nbe) 2521 if (!nbe)
2528 goto out; 2522 goto out;
2529
2530 idx++;
2531 } 2523 }
2532 ret = true; 2524 ret = true;
2533 2525
2534out: 2526out:
2535 kfree(msrs);
2536 free_cpumask_var(mask); 2527 free_cpumask_var(mask);
2537 return ret; 2528 return ret;
2538} 2529}
@@ -2540,8 +2531,7 @@ out:
2540static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on) 2531static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
2541{ 2532{
2542 cpumask_var_t cmask; 2533 cpumask_var_t cmask;
2543 struct msr *msrs = NULL; 2534 int cpu;
2544 int cpu, idx = 0;
2545 2535
2546 if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) { 2536 if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) {
2547 amd64_printk(KERN_WARNING, "%s: error allocating mask\n", 2537 amd64_printk(KERN_WARNING, "%s: error allocating mask\n",
@@ -2551,34 +2541,27 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
2551 2541
2552 get_cpus_on_this_dct_cpumask(cmask, pvt->mc_node_id); 2542 get_cpus_on_this_dct_cpumask(cmask, pvt->mc_node_id);
2553 2543
2554 msrs = kzalloc(sizeof(struct msr) * cpumask_weight(cmask), GFP_KERNEL);
2555 if (!msrs) {
2556 amd64_printk(KERN_WARNING, "%s: error allocating msrs\n",
2557 __func__);
2558 return -ENOMEM;
2559 }
2560
2561 rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs); 2544 rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2562 2545
2563 for_each_cpu(cpu, cmask) { 2546 for_each_cpu(cpu, cmask) {
2564 2547
2548 struct msr *reg = per_cpu_ptr(msrs, cpu);
2549
2565 if (on) { 2550 if (on) {
2566 if (msrs[idx].l & K8_MSR_MCGCTL_NBE) 2551 if (reg->l & K8_MSR_MCGCTL_NBE)
2567 pvt->flags.ecc_report = 1; 2552 pvt->flags.ecc_report = 1;
2568 2553
2569 msrs[idx].l |= K8_MSR_MCGCTL_NBE; 2554 reg->l |= K8_MSR_MCGCTL_NBE;
2570 } else { 2555 } else {
2571 /* 2556 /*
2572 * Turn off ECC reporting only when it was off before 2557 * Turn off ECC reporting only when it was off before
2573 */ 2558 */
2574 if (!pvt->flags.ecc_report) 2559 if (!pvt->flags.ecc_report)
2575 msrs[idx].l &= ~K8_MSR_MCGCTL_NBE; 2560 reg->l &= ~K8_MSR_MCGCTL_NBE;
2576 } 2561 }
2577 idx++;
2578 } 2562 }
2579 wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs); 2563 wrmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
2580 2564
2581 kfree(msrs);
2582 free_cpumask_var(cmask); 2565 free_cpumask_var(cmask);
2583 2566
2584 return 0; 2567 return 0;
@@ -3036,6 +3019,8 @@ static int __init amd64_edac_init(void)
3036 if (cache_k8_northbridges() < 0) 3019 if (cache_k8_northbridges() < 0)
3037 return err; 3020 return err;
3038 3021
3022 msrs = msrs_alloc();
3023
3039 err = pci_register_driver(&amd64_pci_driver); 3024 err = pci_register_driver(&amd64_pci_driver);
3040 if (err) 3025 if (err)
3041 return err; 3026 return err;
@@ -3071,6 +3056,9 @@ static void __exit amd64_edac_exit(void)
3071 edac_pci_release_generic_ctl(amd64_ctl_pci); 3056 edac_pci_release_generic_ctl(amd64_ctl_pci);
3072 3057
3073 pci_unregister_driver(&amd64_pci_driver); 3058 pci_unregister_driver(&amd64_pci_driver);
3059
3060 msrs_free(msrs);
3061 msrs = NULL;
3074} 3062}
3075 3063
3076module_init(amd64_edac_init); 3064module_init(amd64_edac_init);