diff options
-rw-r--r-- | drivers/edac/amd64_edac.c | 125 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 26 | ||||
-rw-r--r-- | drivers/edac/amd64_edac_dbg.c | 2 | ||||
-rw-r--r-- | drivers/edac/edac_mce_amd.h | 2 |
4 files changed, 56 insertions, 99 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index c9b88d829701..5af87d44c80c 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c | |||
@@ -2355,62 +2355,47 @@ static void amd64_decode_bus_error(struct mem_ctl_info *mci, | |||
2355 | "Error Overflow set"); | 2355 | "Error Overflow set"); |
2356 | } | 2356 | } |
2357 | 2357 | ||
2358 | int amd64_process_error_info(struct mem_ctl_info *mci, | 2358 | void amd64_decode_nb_mce(struct mem_ctl_info *mci, struct err_regs *regs, |
2359 | struct err_regs *regs, | 2359 | int handle_errors) |
2360 | int handle_errors) | ||
2361 | { | 2360 | { |
2362 | struct amd64_pvt *pvt; | 2361 | struct amd64_pvt *pvt = mci->pvt_info; |
2363 | u32 err_code, ext_ec; | 2362 | int ecc; |
2364 | int gart_tlb_error = 0; | 2363 | u32 ec = ERROR_CODE(regs->nbsl); |
2365 | 2364 | u32 xec = EXT_ERROR_CODE(regs->nbsl); | |
2366 | pvt = mci->pvt_info; | ||
2367 | 2365 | ||
2368 | if (!handle_errors) | 2366 | if (!handle_errors) |
2369 | return 1; | 2367 | return; |
2370 | 2368 | ||
2371 | debugf1("NorthBridge ERROR: mci(0x%p)\n", mci); | 2369 | pr_emerg(" Northbridge ERROR, mc node %d", pvt->mc_node_id); |
2372 | debugf1(" MC node(%d) Error-Address(0x%.8x-%.8x)\n", | ||
2373 | pvt->mc_node_id, regs->nbeah, regs->nbeal); | ||
2374 | debugf1(" nbsh(0x%.8x) nbsl(0x%.8x)\n", | ||
2375 | regs->nbsh, regs->nbsl); | ||
2376 | debugf1(" Valid Error=%s Overflow=%s\n", | ||
2377 | (regs->nbsh & K8_NBSH_VALID_BIT) ? "True" : "False", | ||
2378 | (regs->nbsh & K8_NBSH_OVERFLOW) ? "True" : "False"); | ||
2379 | debugf1(" Err Uncorrected=%s MCA Error Reporting=%s\n", | ||
2380 | (regs->nbsh & K8_NBSH_UNCORRECTED_ERR) ? | ||
2381 | "True" : "False", | ||
2382 | (regs->nbsh & K8_NBSH_ERR_ENABLE) ? | ||
2383 | "True" : "False"); | ||
2384 | debugf1(" MiscErr Valid=%s ErrAddr Valid=%s PCC=%s\n", | ||
2385 | (regs->nbsh & K8_NBSH_MISC_ERR_VALID) ? | ||
2386 | "True" : "False", | ||
2387 | (regs->nbsh & K8_NBSH_VALID_ERROR_ADDR) ? | ||
2388 | "True" : "False", | ||
2389 | (regs->nbsh & K8_NBSH_PCC) ? | ||
2390 | "True" : "False"); | ||
2391 | debugf1(" CECC=%s UECC=%s Found by Scruber=%s\n", | ||
2392 | (regs->nbsh & K8_NBSH_CECC) ? | ||
2393 | "True" : "False", | ||
2394 | (regs->nbsh & K8_NBSH_UECC) ? | ||
2395 | "True" : "False", | ||
2396 | (regs->nbsh & K8_NBSH_ERR_SCRUBER) ? | ||
2397 | "True" : "False"); | ||
2398 | debugf1(" CORE0=%s CORE1=%s CORE2=%s CORE3=%s\n", | ||
2399 | (regs->nbsh & K8_NBSH_CORE0) ? "True" : "False", | ||
2400 | (regs->nbsh & K8_NBSH_CORE1) ? "True" : "False", | ||
2401 | (regs->nbsh & K8_NBSH_CORE2) ? "True" : "False", | ||
2402 | (regs->nbsh & K8_NBSH_CORE3) ? "True" : "False"); | ||
2403 | 2370 | ||
2371 | /* | ||
2372 | * F10h, revD can disable ErrCpu[3:0] so check that first and also the | ||
2373 | * value encoding has changed so interpret those differently | ||
2374 | */ | ||
2375 | if ((boot_cpu_data.x86 == 0x10) && | ||
2376 | (boot_cpu_data.x86_model > 8)) { | ||
2377 | if (regs->nbsh & K8_NBSH_ERR_CPU_VAL) | ||
2378 | pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf)); | ||
2379 | } else { | ||
2380 | pr_cont(", core: %d\n", ilog2((regs->nbsh & 0xf))); | ||
2381 | } | ||
2404 | 2382 | ||
2405 | err_code = ERROR_CODE(regs->nbsl); | 2383 | pr_emerg(" Error: %sorrected", |
2384 | ((regs->nbsh & K8_NBSH_UC_ERR) ? "Unc" : "C")); | ||
2385 | pr_cont(", Report Error: %s", | ||
2386 | ((regs->nbsh & K8_NBSH_ERR_EN) ? "yes" : "no")); | ||
2387 | pr_cont(", MiscV: %svalid, CPU context corrupt: %s", | ||
2388 | ((regs->nbsh & K8_NBSH_MISCV) ? "" : "In"), | ||
2389 | ((regs->nbsh & K8_NBSH_PCC) ? "yes" : "no")); | ||
2406 | 2390 | ||
2407 | /* Determine which error type: | 2391 | /* do the two bits[14:13] together */ |
2408 | * 1) GART errors - non-fatal, developmental events | 2392 | ecc = regs->nbsh & (0x3 << 13); |
2409 | * 2) MEMORY errors | 2393 | if (ecc) |
2410 | * 3) BUS errors | 2394 | pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U")); |
2411 | * 4) Unknown error | 2395 | |
2412 | */ | 2396 | pr_cont("\n"); |
2413 | if (TLB_ERROR(err_code)) { | 2397 | |
2398 | if (TLB_ERROR(ec)) { | ||
2414 | /* | 2399 | /* |
2415 | * GART errors are intended to help graphics driver developers | 2400 | * GART errors are intended to help graphics driver developers |
2416 | * to detect bad GART PTEs. It is recommended by AMD to disable | 2401 | * to detect bad GART PTEs. It is recommended by AMD to disable |
@@ -2423,52 +2408,34 @@ int amd64_process_error_info(struct mem_ctl_info *mci, | |||
2423 | * [1] section 13.10.1 on BIOS and Kernel Developers Guide for | 2408 | * [1] section 13.10.1 on BIOS and Kernel Developers Guide for |
2424 | * AMD NPT family 0Fh processors | 2409 | * AMD NPT family 0Fh processors |
2425 | */ | 2410 | */ |
2426 | if (report_gart_errors == 0) | 2411 | if (!report_gart_errors) |
2427 | return 1; | 2412 | return; |
2428 | |||
2429 | /* | ||
2430 | * Only if GART error reporting is requested should we generate | ||
2431 | * any logs. | ||
2432 | */ | ||
2433 | gart_tlb_error = 1; | ||
2434 | 2413 | ||
2435 | debugf1("GART TLB error\n"); | 2414 | pr_emerg("GART TLB error\n"); |
2436 | amd64_decode_gart_tlb_error(mci, regs); | 2415 | amd64_decode_gart_tlb_error(mci, regs); |
2437 | } else if (MEM_ERROR(err_code)) { | 2416 | } else if (MEM_ERROR(ec)) { |
2438 | debugf1("Memory/Cache error\n"); | 2417 | pr_emerg("Memory/Cache error\n"); |
2439 | amd64_decode_mem_cache_error(mci, regs); | 2418 | amd64_decode_mem_cache_error(mci, regs); |
2440 | } else if (BUS_ERROR(err_code)) { | 2419 | } else if (BUS_ERROR(ec)) { |
2441 | debugf1("Bus (Link/DRAM) error\n"); | 2420 | pr_emerg("Bus (Link/DRAM) error\n"); |
2442 | amd64_decode_bus_error(mci, regs); | 2421 | amd64_decode_bus_error(mci, regs); |
2443 | } else { | 2422 | } else { |
2444 | /* shouldn't reach here! */ | 2423 | /* shouldn't reach here! */ |
2445 | amd64_mc_printk(mci, KERN_WARNING, | 2424 | amd64_mc_printk(mci, KERN_WARNING, |
2446 | "%s(): unknown MCE error 0x%x\n", __func__, | 2425 | "%s(): unknown MCE error 0x%x\n", __func__, |
2447 | err_code); | 2426 | ec); |
2448 | } | 2427 | } |
2449 | 2428 | ||
2450 | ext_ec = EXT_ERROR_CODE(regs->nbsl); | 2429 | pr_emerg("%s.\n", EXT_ERR_MSG(xec)); |
2451 | amd64_mc_printk(mci, KERN_ERR, | ||
2452 | "ExtErr=(0x%x) %s\n", ext_ec, ext_msgs[ext_ec]); | ||
2453 | 2430 | ||
2454 | /* | 2431 | /* |
2455 | * Check the UE bit of the NB status high register, if set generate some | 2432 | * Check the UE bit of the NB status high register, if set generate some |
2456 | * logs. If NOT a GART error, then process the event as a NO-INFO event. | 2433 | * logs. If NOT a GART error, then process the event as a NO-INFO event. |
2457 | * If it was a GART error, skip that process. | 2434 | * If it was a GART error, skip that process. |
2458 | */ | 2435 | */ |
2459 | if (regs->nbsh & K8_NBSH_UNCORRECTED_ERR) { | 2436 | if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors) |
2460 | amd64_mc_printk(mci, KERN_CRIT, "uncorrected error\n"); | 2437 | edac_mc_handle_ue_no_info(mci, "UE bit is set"); |
2461 | if (!gart_tlb_error) | ||
2462 | edac_mc_handle_ue_no_info(mci, "UE bit is set\n"); | ||
2463 | } | ||
2464 | |||
2465 | if (regs->nbsh & K8_NBSH_PCC) | ||
2466 | amd64_mc_printk(mci, KERN_CRIT, | ||
2467 | "PCC (processor context corrupt) set\n"); | ||
2468 | |||
2469 | return 1; | ||
2470 | } | 2438 | } |
2471 | EXPORT_SYMBOL_GPL(amd64_process_error_info); | ||
2472 | 2439 | ||
2473 | /* | 2440 | /* |
2474 | * The main polling 'check' function, called FROM the edac core to perform the | 2441 | * The main polling 'check' function, called FROM the edac core to perform the |
@@ -2479,7 +2446,7 @@ static void amd64_check(struct mem_ctl_info *mci) | |||
2479 | struct err_regs regs; | 2446 | struct err_regs regs; |
2480 | 2447 | ||
2481 | if (amd64_get_error_info(mci, ®s)) | 2448 | if (amd64_get_error_info(mci, ®s)) |
2482 | amd64_process_error_info(mci, ®s, 1); | 2449 | amd64_decode_nb_mce(mci, ®s, 1); |
2483 | } | 2450 | } |
2484 | 2451 | ||
2485 | /* | 2452 | /* |
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index bde8f78551f9..ecab0c9fd14e 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h | |||
@@ -306,16 +306,7 @@ enum { | |||
306 | 306 | ||
307 | /* Family F10h: Normalized Extended Error Codes */ | 307 | /* Family F10h: Normalized Extended Error Codes */ |
308 | #define F10_NBSL_EXT_ERR_RES 0x0 | 308 | #define F10_NBSL_EXT_ERR_RES 0x0 |
309 | #define F10_NBSL_EXT_ERR_CRC 0x1 | ||
310 | #define F10_NBSL_EXT_ERR_SYNC 0x2 | ||
311 | #define F10_NBSL_EXT_ERR_MST 0x3 | ||
312 | #define F10_NBSL_EXT_ERR_TGT 0x4 | ||
313 | #define F10_NBSL_EXT_ERR_GART 0x5 | ||
314 | #define F10_NBSL_EXT_ERR_RMW 0x6 | ||
315 | #define F10_NBSL_EXT_ERR_WDT 0x7 | ||
316 | #define F10_NBSL_EXT_ERR_ECC 0x8 | 309 | #define F10_NBSL_EXT_ERR_ECC 0x8 |
317 | #define F10_NBSL_EXT_ERR_DEV 0x9 | ||
318 | #define F10_NBSL_EXT_ERR_LINK_DATA 0xA | ||
319 | 310 | ||
320 | /* Next two are overloaded values */ | 311 | /* Next two are overloaded values */ |
321 | #define F10_NBSL_EXT_ERR_LINK_PROTO 0xB | 312 | #define F10_NBSL_EXT_ERR_LINK_PROTO 0xB |
@@ -360,18 +351,15 @@ enum { | |||
360 | 351 | ||
361 | #define K8_NBSH_VALID_BIT BIT(31) | 352 | #define K8_NBSH_VALID_BIT BIT(31) |
362 | #define K8_NBSH_OVERFLOW BIT(30) | 353 | #define K8_NBSH_OVERFLOW BIT(30) |
363 | #define K8_NBSH_UNCORRECTED_ERR BIT(29) | 354 | #define K8_NBSH_UC_ERR BIT(29) |
364 | #define K8_NBSH_ERR_ENABLE BIT(28) | 355 | #define K8_NBSH_ERR_EN BIT(28) |
365 | #define K8_NBSH_MISC_ERR_VALID BIT(27) | 356 | #define K8_NBSH_MISCV BIT(27) |
366 | #define K8_NBSH_VALID_ERROR_ADDR BIT(26) | 357 | #define K8_NBSH_VALID_ERROR_ADDR BIT(26) |
367 | #define K8_NBSH_PCC BIT(25) | 358 | #define K8_NBSH_PCC BIT(25) |
359 | #define K8_NBSH_ERR_CPU_VAL BIT(24) | ||
368 | #define K8_NBSH_CECC BIT(14) | 360 | #define K8_NBSH_CECC BIT(14) |
369 | #define K8_NBSH_UECC BIT(13) | 361 | #define K8_NBSH_UECC BIT(13) |
370 | #define K8_NBSH_ERR_SCRUBER BIT(8) | 362 | #define K8_NBSH_ERR_SCRUBER BIT(8) |
371 | #define K8_NBSH_CORE3 BIT(3) | ||
372 | #define K8_NBSH_CORE2 BIT(2) | ||
373 | #define K8_NBSH_CORE1 BIT(1) | ||
374 | #define K8_NBSH_CORE0 BIT(0) | ||
375 | 363 | ||
376 | #define EXTRACT_ERR_CPU_MAP(x) ((x) & 0xF) | 364 | #define EXTRACT_ERR_CPU_MAP(x) ((x) & 0xF) |
377 | 365 | ||
@@ -622,8 +610,8 @@ static inline struct low_ops *family_ops(int index) | |||
622 | #define F10_MIN_SCRUB_RATE_BITS 0x5 | 610 | #define F10_MIN_SCRUB_RATE_BITS 0x5 |
623 | #define F11_MIN_SCRUB_RATE_BITS 0x6 | 611 | #define F11_MIN_SCRUB_RATE_BITS 0x6 |
624 | 612 | ||
625 | int amd64_process_error_info(struct mem_ctl_info *mci, | 613 | void amd64_decode_nb_mce(struct mem_ctl_info *mci, struct err_regs *info, |
626 | struct err_regs *info, | 614 | int handle_errors); |
627 | int handle_errors); | 615 | |
628 | int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, | 616 | int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, |
629 | u64 *hole_offset, u64 *hole_size); | 617 | u64 *hole_offset, u64 *hole_size); |
diff --git a/drivers/edac/amd64_edac_dbg.c b/drivers/edac/amd64_edac_dbg.c index 0a41b248a4ad..bcb4e2eba3dc 100644 --- a/drivers/edac/amd64_edac_dbg.c +++ b/drivers/edac/amd64_edac_dbg.c | |||
@@ -24,7 +24,7 @@ static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data, | |||
24 | 24 | ||
25 | /* Process the Mapping request */ | 25 | /* Process the Mapping request */ |
26 | /* TODO: Add race prevention */ | 26 | /* TODO: Add race prevention */ |
27 | amd64_process_error_info(mci, &pvt->ctl_error_info, 1); | 27 | amd64_decode_nb_mce(mci, &pvt->ctl_error_info, 1); |
28 | 28 | ||
29 | return count; | 29 | return count; |
30 | } | 30 | } |
diff --git a/drivers/edac/edac_mce_amd.h b/drivers/edac/edac_mce_amd.h index 81f9dcf9990a..39971cdabb51 100644 --- a/drivers/edac/edac_mce_amd.h +++ b/drivers/edac/edac_mce_amd.h | |||
@@ -1,5 +1,7 @@ | |||
1 | #define ERROR_CODE(x) ((x) & 0xffff) | 1 | #define ERROR_CODE(x) ((x) & 0xffff) |
2 | #define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f) | 2 | #define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f) |
3 | #define EXT_ERR_MSG(x) ext_msgs[EXT_ERROR_CODE(x)] | ||
4 | |||
3 | #define LOW_SYNDROME(x) (((x) >> 15) & 0xff) | 5 | #define LOW_SYNDROME(x) (((x) >> 15) & 0xff) |
4 | #define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) | 6 | #define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) |
5 | 7 | ||