diff options
author | Borislav Petkov <borislav.petkov@amd.com> | 2010-10-15 11:44:04 -0400 |
---|---|---|
committer | Borislav Petkov <borislav.petkov@amd.com> | 2011-01-07 05:34:02 -0500 |
commit | 2299ef7114000f8e403797b7f9a972f54bc05fad (patch) | |
tree | bcce876e2bcd05d168e43422d3ad82a1a2419ae3 /drivers/edac | |
parent | ae7bb7c679e7ddba6c52d1a78a30f9bc868d9738 (diff) |
amd64_edac: Check ECC capabilities initially
Rework the code to check the hardware ECC capabilities at PCI probing
time. We do all further initialization only if we actually can/have ECC
enabled.
While at it:
0. Fix function naming.
1. Simplify/clarify debug output.
2. Remove amd64_ prefix from the static functions
3. Reorganize code.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/amd64_edac.c | 141 |
1 files changed, 75 insertions, 66 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 9bc0299e8c74..4632081eaea6 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c | |||
@@ -2188,18 +2188,19 @@ static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt) | |||
2188 | static int amd64_init_csrows(struct mem_ctl_info *mci) | 2188 | static int amd64_init_csrows(struct mem_ctl_info *mci) |
2189 | { | 2189 | { |
2190 | struct csrow_info *csrow; | 2190 | struct csrow_info *csrow; |
2191 | struct amd64_pvt *pvt; | 2191 | struct amd64_pvt *pvt = mci->pvt_info; |
2192 | u64 input_addr_min, input_addr_max, sys_addr; | 2192 | u64 input_addr_min, input_addr_max, sys_addr; |
2193 | u32 val; | ||
2193 | int i, empty = 1; | 2194 | int i, empty = 1; |
2194 | 2195 | ||
2195 | pvt = mci->pvt_info; | 2196 | amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &val); |
2196 | 2197 | ||
2197 | amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &pvt->nbcfg); | 2198 | pvt->nbcfg = val; |
2199 | pvt->ctl_error_info.nbcfg = val; | ||
2198 | 2200 | ||
2199 | debugf0("NBCFG= 0x%x CHIPKILL= %s DRAM ECC= %s\n", pvt->nbcfg, | 2201 | debugf0("node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", |
2200 | (pvt->nbcfg & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", | 2202 | pvt->mc_node_id, val, |
2201 | (pvt->nbcfg & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled" | 2203 | !!(val & K8_NBCFG_CHIPKILL), !!(val & K8_NBCFG_ECC_ENABLE)); |
2202 | ); | ||
2203 | 2204 | ||
2204 | for (i = 0; i < pvt->cs_count; i++) { | 2205 | for (i = 0; i < pvt->cs_count; i++) { |
2205 | csrow = &mci->csrows[i]; | 2206 | csrow = &mci->csrows[i]; |
@@ -2294,7 +2295,7 @@ out: | |||
2294 | return ret; | 2295 | return ret; |
2295 | } | 2296 | } |
2296 | 2297 | ||
2297 | static int amd64_toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) | 2298 | static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on) |
2298 | { | 2299 | { |
2299 | cpumask_var_t cmask; | 2300 | cpumask_var_t cmask; |
2300 | int cpu; | 2301 | int cpu; |
@@ -2332,30 +2333,31 @@ static int amd64_toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool o | |||
2332 | return 0; | 2333 | return 0; |
2333 | } | 2334 | } |
2334 | 2335 | ||
2335 | static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) | 2336 | static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid, |
2337 | struct pci_dev *F3) | ||
2336 | { | 2338 | { |
2337 | struct amd64_pvt *pvt = mci->pvt_info; | 2339 | bool ret = true; |
2338 | u8 nid = pvt->mc_node_id; | ||
2339 | struct ecc_settings *s = ecc_stngs[nid]; | ||
2340 | u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; | 2340 | u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn; |
2341 | 2341 | ||
2342 | amd64_read_pci_cfg(pvt->F3, K8_NBCTL, &value); | 2342 | if (toggle_ecc_err_reporting(s, nid, ON)) { |
2343 | amd64_warn("Error enabling ECC reporting over MCGCTL!\n"); | ||
2344 | return false; | ||
2345 | } | ||
2346 | |||
2347 | amd64_read_pci_cfg(F3, K8_NBCTL, &value); | ||
2343 | 2348 | ||
2344 | /* turn on UECCEn and CECCEn bits */ | 2349 | /* turn on UECCEn and CECCEn bits */ |
2345 | s->old_nbctl = value & mask; | 2350 | s->old_nbctl = value & mask; |
2346 | s->nbctl_valid = true; | 2351 | s->nbctl_valid = true; |
2347 | 2352 | ||
2348 | value |= mask; | 2353 | value |= mask; |
2349 | pci_write_config_dword(pvt->F3, K8_NBCTL, value); | 2354 | pci_write_config_dword(F3, K8_NBCTL, value); |
2350 | 2355 | ||
2351 | if (amd64_toggle_ecc_err_reporting(s, nid, ON)) | 2356 | amd64_read_pci_cfg(F3, K8_NBCFG, &value); |
2352 | amd64_warn("Error enabling ECC reporting over MCGCTL!\n"); | ||
2353 | 2357 | ||
2354 | amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value); | 2358 | debugf0("1: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", |
2355 | 2359 | nid, value, | |
2356 | debugf0("NBCFG(1)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, | 2360 | !!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE)); |
2357 | (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", | ||
2358 | (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"); | ||
2359 | 2361 | ||
2360 | if (!(value & K8_NBCFG_ECC_ENABLE)) { | 2362 | if (!(value & K8_NBCFG_ECC_ENABLE)) { |
2361 | amd64_warn("DRAM ECC disabled on this node, enabling...\n"); | 2363 | amd64_warn("DRAM ECC disabled on this node, enabling...\n"); |
@@ -2364,13 +2366,14 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) | |||
2364 | 2366 | ||
2365 | /* Attempt to turn on DRAM ECC Enable */ | 2367 | /* Attempt to turn on DRAM ECC Enable */ |
2366 | value |= K8_NBCFG_ECC_ENABLE; | 2368 | value |= K8_NBCFG_ECC_ENABLE; |
2367 | pci_write_config_dword(pvt->F3, K8_NBCFG, value); | 2369 | pci_write_config_dword(F3, K8_NBCFG, value); |
2368 | 2370 | ||
2369 | amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value); | 2371 | amd64_read_pci_cfg(F3, K8_NBCFG, &value); |
2370 | 2372 | ||
2371 | if (!(value & K8_NBCFG_ECC_ENABLE)) { | 2373 | if (!(value & K8_NBCFG_ECC_ENABLE)) { |
2372 | amd64_warn("Hardware rejected DRAM ECC enable," | 2374 | amd64_warn("Hardware rejected DRAM ECC enable," |
2373 | "check memory DIMM configuration.\n"); | 2375 | "check memory DIMM configuration.\n"); |
2376 | ret = false; | ||
2374 | } else { | 2377 | } else { |
2375 | amd64_info("Hardware accepted DRAM ECC Enable\n"); | 2378 | amd64_info("Hardware accepted DRAM ECC Enable\n"); |
2376 | } | 2379 | } |
@@ -2378,11 +2381,11 @@ static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci) | |||
2378 | s->flags.nb_ecc_prev = 1; | 2381 | s->flags.nb_ecc_prev = 1; |
2379 | } | 2382 | } |
2380 | 2383 | ||
2381 | debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value, | 2384 | debugf0("2: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n", |
2382 | (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled", | 2385 | nid, value, |
2383 | (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"); | 2386 | !!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE)); |
2384 | 2387 | ||
2385 | pvt->ctl_error_info.nbcfg = value; | 2388 | return ret; |
2386 | } | 2389 | } |
2387 | 2390 | ||
2388 | static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) | 2391 | static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) |
@@ -2408,15 +2411,15 @@ static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt) | |||
2408 | } | 2411 | } |
2409 | 2412 | ||
2410 | /* restore the NB Enable MCGCTL bit */ | 2413 | /* restore the NB Enable MCGCTL bit */ |
2411 | if (amd64_toggle_ecc_err_reporting(s, nid, OFF)) | 2414 | if (toggle_ecc_err_reporting(s, nid, OFF)) |
2412 | amd64_warn("Error restoring NB MCGCTL settings!\n"); | 2415 | amd64_warn("Error restoring NB MCGCTL settings!\n"); |
2413 | } | 2416 | } |
2414 | 2417 | ||
2415 | /* | 2418 | /* |
2416 | * EDAC requires that the BIOS have ECC enabled before taking over the | 2419 | * EDAC requires that the BIOS have ECC enabled before |
2417 | * processing of ECC errors. This is because the BIOS can properly initialize | 2420 | * taking over the processing of ECC errors. A command line |
2418 | * the memory system completely. A command line option allows to force-enable | 2421 | * option allows to force-enable hardware ECC later in |
2419 | * hardware ECC later in amd64_enable_ecc_error_reporting(). | 2422 | * enable_ecc_error_reporting(). |
2420 | */ | 2423 | */ |
2421 | static const char *ecc_msg = | 2424 | static const char *ecc_msg = |
2422 | "ECC disabled in the BIOS or no ECC capability, module will not load.\n" | 2425 | "ECC disabled in the BIOS or no ECC capability, module will not load.\n" |
@@ -2424,33 +2427,28 @@ static const char *ecc_msg = | |||
2424 | "'ecc_enable_override'.\n" | 2427 | "'ecc_enable_override'.\n" |
2425 | " (Note that use of the override may cause unknown side effects.)\n"; | 2428 | " (Note that use of the override may cause unknown side effects.)\n"; |
2426 | 2429 | ||
2427 | static int amd64_check_ecc_enabled(struct amd64_pvt *pvt) | 2430 | static bool ecc_enabled(struct pci_dev *F3, u8 nid) |
2428 | { | 2431 | { |
2429 | u32 value; | 2432 | u32 value; |
2430 | u8 ecc_enabled = 0; | 2433 | u8 ecc_en = 0; |
2431 | bool nb_mce_en = false; | 2434 | bool nb_mce_en = false; |
2432 | 2435 | ||
2433 | amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &value); | 2436 | amd64_read_pci_cfg(F3, K8_NBCFG, &value); |
2434 | 2437 | ||
2435 | ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE); | 2438 | ecc_en = !!(value & K8_NBCFG_ECC_ENABLE); |
2436 | amd64_info("DRAM ECC %s.\n", (ecc_enabled ? "enabled" : "disabled")); | 2439 | amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled")); |
2437 | 2440 | ||
2438 | nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id); | 2441 | nb_mce_en = amd64_nb_mce_bank_enabled_on_node(nid); |
2439 | if (!nb_mce_en) | 2442 | if (!nb_mce_en) |
2440 | amd64_notice("NB MCE bank disabled, " | 2443 | amd64_notice("NB MCE bank disabled, set MSR " |
2441 | "set MSR 0x%08x[4] on node %d to enable.\n", | 2444 | "0x%08x[4] on node %d to enable.\n", |
2442 | MSR_IA32_MCG_CTL, pvt->mc_node_id); | 2445 | MSR_IA32_MCG_CTL, nid); |
2443 | |||
2444 | if (!ecc_enabled || !nb_mce_en) { | ||
2445 | if (!ecc_enable_override) { | ||
2446 | amd64_notice("%s", ecc_msg); | ||
2447 | return -ENODEV; | ||
2448 | } else { | ||
2449 | amd64_warn("Forcing ECC on!\n"); | ||
2450 | } | ||
2451 | } | ||
2452 | 2446 | ||
2453 | return 0; | 2447 | if (!ecc_en || !nb_mce_en) { |
2448 | amd64_notice("%s", ecc_msg); | ||
2449 | return false; | ||
2450 | } | ||
2451 | return true; | ||
2454 | } | 2452 | } |
2455 | 2453 | ||
2456 | struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) + | 2454 | struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) + |
@@ -2536,7 +2534,7 @@ static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt) | |||
2536 | return fam_type; | 2534 | return fam_type; |
2537 | } | 2535 | } |
2538 | 2536 | ||
2539 | static int amd64_probe_one_instance(struct pci_dev *F2) | 2537 | static int amd64_init_one_instance(struct pci_dev *F2) |
2540 | { | 2538 | { |
2541 | struct amd64_pvt *pvt = NULL; | 2539 | struct amd64_pvt *pvt = NULL; |
2542 | struct amd64_family_type *fam_type = NULL; | 2540 | struct amd64_family_type *fam_type = NULL; |
@@ -2561,11 +2559,6 @@ static int amd64_probe_one_instance(struct pci_dev *F2) | |||
2561 | if (err) | 2559 | if (err) |
2562 | goto err_free; | 2560 | goto err_free; |
2563 | 2561 | ||
2564 | ret = -EINVAL; | ||
2565 | err = amd64_check_ecc_enabled(pvt); | ||
2566 | if (err) | ||
2567 | goto err_put; | ||
2568 | |||
2569 | /* | 2562 | /* |
2570 | * Save the pointer to the private data for use in 2nd initialization | 2563 | * Save the pointer to the private data for use in 2nd initialization |
2571 | * stage | 2564 | * stage |
@@ -2574,9 +2567,6 @@ static int amd64_probe_one_instance(struct pci_dev *F2) | |||
2574 | 2567 | ||
2575 | return 0; | 2568 | return 0; |
2576 | 2569 | ||
2577 | err_put: | ||
2578 | amd64_free_mc_sibling_devices(pvt); | ||
2579 | |||
2580 | err_free: | 2570 | err_free: |
2581 | kfree(pvt); | 2571 | kfree(pvt); |
2582 | 2572 | ||
@@ -2618,7 +2608,6 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt) | |||
2618 | if (amd64_init_csrows(mci)) | 2608 | if (amd64_init_csrows(mci)) |
2619 | mci->edac_cap = EDAC_FLAG_NONE; | 2609 | mci->edac_cap = EDAC_FLAG_NONE; |
2620 | 2610 | ||
2621 | amd64_enable_ecc_error_reporting(mci); | ||
2622 | amd64_set_mc_sysfs_attributes(mci); | 2611 | amd64_set_mc_sysfs_attributes(mci); |
2623 | 2612 | ||
2624 | ret = -ENODEV; | 2613 | ret = -ENODEV; |
@@ -2655,12 +2644,13 @@ err_exit: | |||
2655 | } | 2644 | } |
2656 | 2645 | ||
2657 | 2646 | ||
2658 | static int __devinit amd64_init_one_instance(struct pci_dev *pdev, | 2647 | static int __devinit amd64_probe_one_instance(struct pci_dev *pdev, |
2659 | const struct pci_device_id *mc_type) | 2648 | const struct pci_device_id *mc_type) |
2660 | { | 2649 | { |
2661 | int ret = 0; | ||
2662 | u8 nid = get_node_id(pdev); | 2650 | u8 nid = get_node_id(pdev); |
2651 | struct pci_dev *F3 = node_to_amd_nb(nid)->misc; | ||
2663 | struct ecc_settings *s; | 2652 | struct ecc_settings *s; |
2653 | int ret = 0; | ||
2664 | 2654 | ||
2665 | ret = pci_enable_device(pdev); | 2655 | ret = pci_enable_device(pdev); |
2666 | if (ret < 0) { | 2656 | if (ret < 0) { |
@@ -2671,15 +2661,34 @@ static int __devinit amd64_init_one_instance(struct pci_dev *pdev, | |||
2671 | ret = -ENOMEM; | 2661 | ret = -ENOMEM; |
2672 | s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL); | 2662 | s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL); |
2673 | if (!s) | 2663 | if (!s) |
2674 | return ret; | 2664 | goto err_out; |
2675 | 2665 | ||
2676 | ecc_stngs[nid] = s; | 2666 | ecc_stngs[nid] = s; |
2677 | 2667 | ||
2678 | ret = amd64_probe_one_instance(pdev); | 2668 | if (!ecc_enabled(F3, nid)) { |
2669 | ret = -ENODEV; | ||
2670 | |||
2671 | if (!ecc_enable_override) | ||
2672 | goto err_enable; | ||
2673 | |||
2674 | amd64_warn("Forcing ECC on!\n"); | ||
2675 | |||
2676 | if (!enable_ecc_error_reporting(s, nid, F3)) | ||
2677 | goto err_enable; | ||
2678 | } | ||
2679 | |||
2680 | ret = amd64_init_one_instance(pdev); | ||
2679 | if (ret < 0) | 2681 | if (ret < 0) |
2680 | amd64_err("Error probing instance: %d\n", nid); | 2682 | amd64_err("Error probing instance: %d\n", nid); |
2681 | 2683 | ||
2682 | return ret; | 2684 | return ret; |
2685 | |||
2686 | err_enable: | ||
2687 | kfree(s); | ||
2688 | ecc_stngs[nid] = NULL; | ||
2689 | |||
2690 | err_out: | ||
2691 | return ret; | ||
2683 | } | 2692 | } |
2684 | 2693 | ||
2685 | static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) | 2694 | static void __devexit amd64_remove_one_instance(struct pci_dev *pdev) |
@@ -2741,7 +2750,7 @@ MODULE_DEVICE_TABLE(pci, amd64_pci_table); | |||
2741 | 2750 | ||
2742 | static struct pci_driver amd64_pci_driver = { | 2751 | static struct pci_driver amd64_pci_driver = { |
2743 | .name = EDAC_MOD_STR, | 2752 | .name = EDAC_MOD_STR, |
2744 | .probe = amd64_init_one_instance, | 2753 | .probe = amd64_probe_one_instance, |
2745 | .remove = __devexit_p(amd64_remove_one_instance), | 2754 | .remove = __devexit_p(amd64_remove_one_instance), |
2746 | .id_table = amd64_pci_table, | 2755 | .id_table = amd64_pci_table, |
2747 | }; | 2756 | }; |