aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
authorBorislav Petkov <borislav.petkov@amd.com>2010-10-15 11:44:04 -0400
committerBorislav Petkov <borislav.petkov@amd.com>2011-01-07 05:34:02 -0500
commit2299ef7114000f8e403797b7f9a972f54bc05fad (patch)
treebcce876e2bcd05d168e43422d3ad82a1a2419ae3 /drivers/edac
parentae7bb7c679e7ddba6c52d1a78a30f9bc868d9738 (diff)
amd64_edac: Check ECC capabilities initially
Rework the code to check the hardware ECC capabilities at PCI probing time. We do all further initialization only if we actually can/have ECC enabled. While at it: 0. Fix function naming. 1. Simplify/clarify debug output. 2. Remove amd64_ prefix from the static functions 3. Reorganize code. Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/amd64_edac.c141
1 files changed, 75 insertions, 66 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 9bc0299e8c74..4632081eaea6 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2188,18 +2188,19 @@ static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt)
2188static int amd64_init_csrows(struct mem_ctl_info *mci) 2188static int amd64_init_csrows(struct mem_ctl_info *mci)
2189{ 2189{
2190 struct csrow_info *csrow; 2190 struct csrow_info *csrow;
2191 struct amd64_pvt *pvt; 2191 struct amd64_pvt *pvt = mci->pvt_info;
2192 u64 input_addr_min, input_addr_max, sys_addr; 2192 u64 input_addr_min, input_addr_max, sys_addr;
2193 u32 val;
2193 int i, empty = 1; 2194 int i, empty = 1;
2194 2195
2195 pvt = mci->pvt_info; 2196 amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &val);
2196 2197
2197 amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &pvt->nbcfg); 2198 pvt->nbcfg = val;
2199 pvt->ctl_error_info.nbcfg = val;
2198 2200
2199 debugf0("NBCFG= 0x%x CHIPKILL= %s DRAM ECC= %s\n", pvt->nbcfg, 2201 debugf0("node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
2200 (pvt->nbcfg & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", 2202 pvt->mc_node_id, val,
2201 (pvt->nbcfg & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled" 2203 !!(val & K8_NBCFG_CHIPKILL), !!(val & K8_NBCFG_ECC_ENABLE));
2202 );
2203 2204
2204 for (i = 0; i < pvt->cs_count; i++) { 2205 for (i = 0; i < pvt->cs_count; i++) {
2205 csrow = &mci->csrows[i]; 2206 csrow = &mci->csrows[i];
@@ -2294,7 +2295,7 @@ out:
2294 return ret; 2295 return ret;
2295} 2296}
2296 2297
2297static int amd64_toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) 2298static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
2298{ 2299{
2299 cpumask_var_t cmask; 2300 cpumask_var_t cmask;
2300 int cpu; 2301 int cpu;
@@ -2332,30 +2333,31 @@ static int amd64_toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool o
2332 return 0; 2333 return 0;
2333} 2334}
2334 2335
2335static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) 2336static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
2337 struct pci_dev *F3)
2336{ 2338{
2337 struct amd64_pvt *pvt = mci->pvt_info; 2339 bool ret = true;
2338 u8 nid = pvt->mc_node_id;
2339 struct ecc_settings *s = ecc_stngs[nid];
2340 u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; 2340 u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
2341 2341
2342 amd64_read_pci_cfg(pvt->F3, K8_NBCTL, &value); 2342 if (toggle_ecc_err_reporting(s, nid, ON)) {
2343 amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
2344 return false;
2345 }
2346
2347 amd64_read_pci_cfg(F3, K8_NBCTL, &value);
2343 2348
2344 /* turn on UECCEn and CECCEn bits */ 2349 /* turn on UECCEn and CECCEn bits */
2345 s->old_nbctl = value & mask; 2350 s->old_nbctl = value & mask;
2346 s->nbctl_valid = true; 2351 s->nbctl_valid = true;
2347 2352
2348 value |= mask; 2353 value |= mask;
2349 pci_write_config_dword(pvt->F3, K8_NBCTL, value); 2354 pci_write_config_dword(F3, K8_NBCTL, value);
2350 2355
2351 if (amd64_toggle_ecc_err_reporting(s, nid, ON)) 2356 amd64_read_pci_cfg(F3, K8_NBCFG, &value);
2352 amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
2353 2357
2354 amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value); 2358 debugf0("1: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
2355 2359 nid, value,
2356 debugf0("NBCFG(1)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, 2360 !!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE));
2357 (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
2358 (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
2359 2361
2360 if (!(value & K8_NBCFG_ECC_ENABLE)) { 2362 if (!(value & K8_NBCFG_ECC_ENABLE)) {
2361 amd64_warn("DRAM ECC disabled on this node, enabling...\n"); 2363 amd64_warn("DRAM ECC disabled on this node, enabling...\n");
@@ -2364,13 +2366,14 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
2364 2366
2365 /* Attempt to turn on DRAM ECC Enable */ 2367 /* Attempt to turn on DRAM ECC Enable */
2366 value |= K8_NBCFG_ECC_ENABLE; 2368 value |= K8_NBCFG_ECC_ENABLE;
2367 pci_write_config_dword(pvt->F3, K8_NBCFG, value); 2369 pci_write_config_dword(F3, K8_NBCFG, value);
2368 2370
2369 amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value); 2371 amd64_read_pci_cfg(F3, K8_NBCFG, &value);
2370 2372
2371 if (!(value & K8_NBCFG_ECC_ENABLE)) { 2373 if (!(value & K8_NBCFG_ECC_ENABLE)) {
2372 amd64_warn("Hardware rejected DRAM ECC enable," 2374 amd64_warn("Hardware rejected DRAM ECC enable,"
2373 "check memory DIMM configuration.\n"); 2375 "check memory DIMM configuration.\n");
2376 ret = false;
2374 } else { 2377 } else {
2375 amd64_info("Hardware accepted DRAM ECC Enable\n"); 2378 amd64_info("Hardware accepted DRAM ECC Enable\n");
2376 } 2379 }
@@ -2378,11 +2381,11 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
2378 s->flags.nb_ecc_prev = 1; 2381 s->flags.nb_ecc_prev = 1;
2379 } 2382 }
2380 2383
2381 debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, 2384 debugf0("2: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
2382 (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", 2385 nid, value,
2383 (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"); 2386 !!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE));
2384 2387
2385 pvt->ctl_error_info.nbcfg = value; 2388 return ret;
2386} 2389}
2387 2390
2388static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) 2391static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
@@ -2408,15 +2411,15 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
2408 } 2411 }
2409 2412
2410 /* restore the NB Enable MCGCTL bit */ 2413 /* restore the NB Enable MCGCTL bit */
2411 if (amd64_toggle_ecc_err_reporting(s, nid, OFF)) 2414 if (toggle_ecc_err_reporting(s, nid, OFF))
2412 amd64_warn("Error restoring NB MCGCTL settings!\n"); 2415 amd64_warn("Error restoring NB MCGCTL settings!\n");
2413} 2416}
2414 2417
2415/* 2418/*
2416 * EDAC requires that the BIOS have ECC enabled before taking over the 2419 * EDAC requires that the BIOS have ECC enabled before
2417 * processing of ECC errors. This is because the BIOS can properly initialize 2420 * taking over the processing of ECC errors. A command line
2418 * the memory system completely. A command line option allows to force-enable 2421 * option allows to force-enable hardware ECC later in
2419 * hardware ECC later in amd64_enable_ecc_error_reporting(). 2422 * enable_ecc_error_reporting().
2420 */ 2423 */
2421static const char *ecc_msg = 2424static const char *ecc_msg =
2422 "ECC disabled in the BIOS or no ECC capability, module will not load.\n" 2425 "ECC disabled in the BIOS or no ECC capability, module will not load.\n"
@@ -2424,33 +2427,28 @@ static const char *ecc_msg =
2424 "'ecc_enable_override'.\n" 2427 "'ecc_enable_override'.\n"
2425 " (Note that use of the override may cause unknown side effects.)\n"; 2428 " (Note that use of the override may cause unknown side effects.)\n";
2426 2429
2427static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) 2430static bool ecc_enabled(struct pci_dev *F3, u8 nid)
2428{ 2431{
2429 u32 value; 2432 u32 value;
2430 u8 ecc_enabled = 0; 2433 u8 ecc_en = 0;
2431 bool nb_mce_en = false; 2434 bool nb_mce_en = false;
2432 2435
2433 amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value); 2436 amd64_read_pci_cfg(F3, K8_NBCFG, &value);
2434 2437
2435 ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE); 2438 ecc_en = !!(value & K8_NBCFG_ECC_ENABLE);
2436 amd64_info("DRAM ECC %s.\n", (ecc_enabled ? "enabled" : "disabled")); 2439 amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled"));
2437 2440
2438 nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id); 2441 nb_mce_en = amd64_nb_mce_bank_enabled_on_node(nid);
2439 if (!nb_mce_en) 2442 if (!nb_mce_en)
2440 amd64_notice("NB MCE bank disabled, " 2443 amd64_notice("NB MCE bank disabled, set MSR "
2441 "set MSR 0x%08x[4] on node %d to enable.\n", 2444 "0x%08x[4] on node %d to enable.\n",
2442 MSR_IA32_MCG_CTL, pvt->mc_node_id); 2445 MSR_IA32_MCG_CTL, nid);
2443
2444 if (!ecc_enabled || !nb_mce_en) {
2445 if (!ecc_enable_override) {
2446 amd64_notice("%s", ecc_msg);
2447 return -ENODEV;
2448 } else {
2449 amd64_warn("Forcing ECC on!\n");
2450 }
2451 }
2452 2446
2453 return 0; 2447 if (!ecc_en || !nb_mce_en) {
2448 amd64_notice("%s", ecc_msg);
2449 return false;
2450 }
2451 return true;
2454} 2452}
2455 2453
2456struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) + 2454struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
@@ -2536,7 +2534,7 @@ static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt)
2536 return fam_type; 2534 return fam_type;
2537} 2535}
2538 2536
2539static int amd64_probe_one_instance(struct pci_dev *F2) 2537static int amd64_init_one_instance(struct pci_dev *F2)
2540{ 2538{
2541 struct amd64_pvt *pvt = NULL; 2539 struct amd64_pvt *pvt = NULL;
2542 struct amd64_family_type *fam_type = NULL; 2540 struct amd64_family_type *fam_type = NULL;
@@ -2561,11 +2559,6 @@ static int amd64_probe_one_instance(struct pci_dev *F2)
2561 if (err) 2559 if (err)
2562 goto err_free; 2560 goto err_free;
2563 2561
2564 ret = -EINVAL;
2565 err = amd64_check_ecc_enabled(pvt);
2566 if (err)
2567 goto err_put;
2568
2569 /* 2562 /*
2570 * Save the pointer to the private data for use in 2nd initialization 2563 * Save the pointer to the private data for use in 2nd initialization
2571 * stage 2564 * stage
@@ -2574,9 +2567,6 @@ static int amd64_probe_one_instance(struct pci_dev *F2)
2574 2567
2575 return 0; 2568 return 0;
2576 2569
2577err_put:
2578 amd64_free_mc_sibling_devices(pvt);
2579
2580err_free: 2570err_free:
2581 kfree(pvt); 2571 kfree(pvt);
2582 2572
@@ -2618,7 +2608,6 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
2618 if (amd64_init_csrows(mci)) 2608 if (amd64_init_csrows(mci))
2619 mci->edac_cap = EDAC_FLAG_NONE; 2609 mci->edac_cap = EDAC_FLAG_NONE;
2620 2610
2621 amd64_enable_ecc_error_reporting(mci);
2622 amd64_set_mc_sysfs_attributes(mci); 2611 amd64_set_mc_sysfs_attributes(mci);
2623 2612
2624 ret = -ENODEV; 2613 ret = -ENODEV;
@@ -2655,12 +2644,13 @@ err_exit:
2655} 2644}
2656 2645
2657 2646
2658static int __devinit amd64_init_one_instance(struct pci_dev *pdev, 2647static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
2659 const struct pci_device_id *mc_type) 2648 const struct pci_device_id *mc_type)
2660{ 2649{
2661 int ret = 0;
2662 u8 nid = get_node_id(pdev); 2650 u8 nid = get_node_id(pdev);
2651 struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
2663 struct ecc_settings *s; 2652 struct ecc_settings *s;
2653 int ret = 0;
2664 2654
2665 ret = pci_enable_device(pdev); 2655 ret = pci_enable_device(pdev);
2666 if (ret < 0) { 2656 if (ret < 0) {
@@ -2671,15 +2661,34 @@ static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
2671 ret = -ENOMEM; 2661 ret = -ENOMEM;
2672 s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL); 2662 s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
2673 if (!s) 2663 if (!s)
2674 return ret; 2664 goto err_out;
2675 2665
2676 ecc_stngs[nid] = s; 2666 ecc_stngs[nid] = s;
2677 2667
2678 ret = amd64_probe_one_instance(pdev); 2668 if (!ecc_enabled(F3, nid)) {
2669 ret = -ENODEV;
2670
2671 if (!ecc_enable_override)
2672 goto err_enable;
2673
2674 amd64_warn("Forcing ECC on!\n");
2675
2676 if (!enable_ecc_error_reporting(s, nid, F3))
2677 goto err_enable;
2678 }
2679
2680 ret = amd64_init_one_instance(pdev);
2679 if (ret < 0) 2681 if (ret < 0)
2680 amd64_err("Error probing instance: %d\n", nid); 2682 amd64_err("Error probing instance: %d\n", nid);
2681 2683
2682 return ret; 2684 return ret;
2685
2686err_enable:
2687 kfree(s);
2688 ecc_stngs[nid] = NULL;
2689
2690err_out:
2691 return ret;
2683} 2692}
2684 2693
2685static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) 2694static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
@@ -2741,7 +2750,7 @@ MODULE_DEVICE_TABLE(pci, amd64_pci_table);
2741 2750
2742static struct pci_driver amd64_pci_driver = { 2751static struct pci_driver amd64_pci_driver = {
2743 .name = EDAC_MOD_STR, 2752 .name = EDAC_MOD_STR,
2744 .probe = amd64_init_one_instance, 2753 .probe = amd64_probe_one_instance,
2745 .remove = __devexit_p(amd64_remove_one_instance), 2754 .remove = __devexit_p(amd64_remove_one_instance),
2746 .id_table = amd64_pci_table, 2755 .id_table = amd64_pci_table,
2747}; 2756};