aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac/amd64_edac.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac/amd64_edac.c')
-rw-r--r--drivers/edac/amd64_edac.c98
1 files changed, 24 insertions, 74 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 82f48ee90f11..2080b1e2e8a2 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -2282,8 +2282,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci,
2282 } 2282 }
2283} 2283}
2284 2284
2285static void amd64_decode_bus_error(struct mem_ctl_info *mci, 2285static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
2286 struct err_regs *info, int ecc_type) 2286 struct err_regs *info, int ecc_type)
2287{ 2287{
2288 u32 ec = ERROR_CODE(info->nbsl); 2288 u32 ec = ERROR_CODE(info->nbsl);
2289 u32 xec = EXT_ERROR_CODE(info->nbsl); 2289 u32 xec = EXT_ERROR_CODE(info->nbsl);
@@ -2316,86 +2316,23 @@ static void amd64_decode_bus_error(struct mem_ctl_info *mci,
2316 edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR "Error Overflow"); 2316 edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR "Error Overflow");
2317} 2317}
2318 2318
2319void amd64_decode_nb_mce(struct mem_ctl_info *mci, struct err_regs *regs, 2319void amd64_decode_bus_error(int node_id, struct err_regs *regs,
2320 int handle_errors) 2320 int ecc_type)
2321{ 2321{
2322 struct amd64_pvt *pvt = mci->pvt_info; 2322 struct mem_ctl_info *mci = mci_lookup[node_id];
2323 int ecc;
2324 u32 ec = ERROR_CODE(regs->nbsl);
2325 u32 xec = EXT_ERROR_CODE(regs->nbsl);
2326
2327 if (!handle_errors)
2328 return;
2329
2330 pr_emerg(" Northbridge ERROR, mc node %d", pvt->mc_node_id);
2331
2332 /*
2333 * F10h, revD can disable ErrCpu[3:0] so check that first and also the
2334 * value encoding has changed so interpret those differently
2335 */
2336 if ((boot_cpu_data.x86 == 0x10) &&
2337 (boot_cpu_data.x86_model > 8)) {
2338 if (regs->nbsh & K8_NBSH_ERR_CPU_VAL)
2339 pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf));
2340 } else {
2341 pr_cont(", core: %d\n", ilog2((regs->nbsh & 0xf)));
2342 }
2343
2344 pr_emerg(" Error: %sorrected",
2345 ((regs->nbsh & K8_NBSH_UC_ERR) ? "Unc" : "C"));
2346 pr_cont(", Report Error: %s",
2347 ((regs->nbsh & K8_NBSH_ERR_EN) ? "yes" : "no"));
2348 pr_cont(", MiscV: %svalid, CPU context corrupt: %s",
2349 ((regs->nbsh & K8_NBSH_MISCV) ? "" : "In"),
2350 ((regs->nbsh & K8_NBSH_PCC) ? "yes" : "no"));
2351
2352 /* do the two bits[14:13] together */
2353 ecc = regs->nbsh & (0x3 << 13);
2354 if (ecc)
2355 pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
2356
2357 pr_cont("\n");
2358
2359 if (TLB_ERROR(ec)) {
2360 /*
2361 * GART errors are intended to help graphics driver developers
2362 * to detect bad GART PTEs. It is recommended by AMD to disable
2363 * GART table walk error reporting by default[1] (currently
2364 * being disabled in mce_cpu_quirks()) and according to the
2365 * comment in mce_cpu_quirks(), such GART errors can be
2366 * incorrectly triggered. We may see these errors anyway and
2367 * unless requested by the user, they won't be reported.
2368 *
2369 * [1] section 13.10.1 on BIOS and Kernel Developers Guide for
2370 * AMD NPT family 0Fh processors
2371 */
2372 if (!report_gart_errors)
2373 return;
2374
2375 pr_emerg(" GART TLB error, Transaction: %s, Cache Level %s\n",
2376 TT_MSG(ec), LL_MSG(ec));
2377 } else if (MEM_ERROR(ec)) {
2378 pr_emerg(" Memory/Cache error, Transaction: %s, Type: %s,"
2379 " Cache Level: %s",
2380 RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
2381 } else if (BUS_ERROR(ec)) {
2382 pr_emerg(" Bus (Link/DRAM) error\n");
2383 amd64_decode_bus_error(mci, regs, ecc);
2384 } else {
2385 /* shouldn't reach here! */
2386 amd64_mc_printk(mci, KERN_WARNING,
2387 "%s(): unknown MCE error 0x%x\n", __func__, ec);
2388 }
2389 2323
2390 pr_emerg("%s.\n", EXT_ERR_MSG(xec)); 2324 __amd64_decode_bus_error(mci, regs, ecc_type);
2391 2325
2392 /* 2326 /*
2393 * Check the UE bit of the NB status high register, if set generate some 2327 * Check the UE bit of the NB status high register, if set generate some
2394 * logs. If NOT a GART error, then process the event as a NO-INFO event. 2328 * logs. If NOT a GART error, then process the event as a NO-INFO event.
2395 * If it was a GART error, skip that process. 2329 * If it was a GART error, skip that process.
2330 *
2331 * FIXME: this should go somewhere else, if at all.
2396 */ 2332 */
2397 if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors) 2333 if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors)
2398 edac_mc_handle_ue_no_info(mci, "UE bit is set"); 2334 edac_mc_handle_ue_no_info(mci, "UE bit is set");
2335
2399} 2336}
2400 2337
2401/* 2338/*
@@ -2406,8 +2343,10 @@ static void amd64_check(struct mem_ctl_info *mci)
2406{ 2343{
2407 struct err_regs regs; 2344 struct err_regs regs;
2408 2345
2409 if (amd64_get_error_info(mci, &regs)) 2346 if (amd64_get_error_info(mci, &regs)) {
2410 amd64_decode_nb_mce(mci, &regs, 1); 2347 struct amd64_pvt *pvt = mci->pvt_info;
2348 amd_decode_nb_mce(pvt->mc_node_id, &regs, 1);
2349 }
2411} 2350}
2412 2351
2413/* 2352/*
@@ -3103,6 +3042,13 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
3103 3042
3104 mci_lookup[node_id] = mci; 3043 mci_lookup[node_id] = mci;
3105 pvt_lookup[node_id] = NULL; 3044 pvt_lookup[node_id] = NULL;
3045
3046 /* register stuff with EDAC MCE */
3047 if (report_gart_errors)
3048 amd_report_gart_errors(true);
3049
3050 amd_register_ecc_decoder(amd64_decode_bus_error);
3051
3106 return 0; 3052 return 0;
3107 3053
3108err_add_mc: 3054err_add_mc:
@@ -3169,6 +3115,10 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
3169 3115
3170 mci_lookup[pvt->mc_node_id] = NULL; 3116 mci_lookup[pvt->mc_node_id] = NULL;
3171 3117
3118 /* unregister from EDAC MCE */
3119 amd_report_gart_errors(false);
3120 amd_unregister_ecc_decoder(amd64_decode_bus_error);
3121
3172 /* Free the EDAC CORE resources */ 3122 /* Free the EDAC CORE resources */
3173 edac_mc_free(mci); 3123 edac_mc_free(mci);
3174} 3124}