diff options
-rw-r--r-- | drivers/edac/i5000_edac.c | 181 |
1 files changed, 119 insertions, 62 deletions
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index 4a16b5b61cfb..7d045ad0a1fe 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c | |||
@@ -119,6 +119,7 @@ | |||
119 | #define FERR_NF_UNCORRECTABLE (FERR_NF_M12ERR | \ | 119 | #define FERR_NF_UNCORRECTABLE (FERR_NF_M12ERR | \ |
120 | FERR_NF_M11ERR | \ | 120 | FERR_NF_M11ERR | \ |
121 | FERR_NF_M10ERR | \ | 121 | FERR_NF_M10ERR | \ |
122 | FERR_NF_M9ERR | \ | ||
122 | FERR_NF_M8ERR | \ | 123 | FERR_NF_M8ERR | \ |
123 | FERR_NF_M7ERR | \ | 124 | FERR_NF_M7ERR | \ |
124 | FERR_NF_M6ERR | \ | 125 | FERR_NF_M6ERR | \ |
@@ -301,6 +302,9 @@ static char *numcol_toString[] = { | |||
301 | }; | 302 | }; |
302 | #endif | 303 | #endif |
303 | 304 | ||
305 | /* enables the report of miscellaneous messages as CE errors - default off */ | ||
306 | static int misc_messages; | ||
307 | |||
304 | /* Enumeration of supported devices */ | 308 | /* Enumeration of supported devices */ |
305 | enum i5000_chips { | 309 | enum i5000_chips { |
306 | I5000P = 0, | 310 | I5000P = 0, |
@@ -466,7 +470,8 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci, | |||
466 | struct i5000_error_info *info, | 470 | struct i5000_error_info *info, |
467 | int handle_errors) | 471 | int handle_errors) |
468 | { | 472 | { |
469 | char msg[EDAC_MC_LABEL_LEN + 1 + 90]; | 473 | char msg[EDAC_MC_LABEL_LEN + 1 + 160]; |
474 | char *specific = NULL; | ||
470 | u32 allErrors; | 475 | u32 allErrors; |
471 | int branch; | 476 | int branch; |
472 | int channel; | 477 | int channel; |
@@ -480,11 +485,6 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci, | |||
480 | if (!allErrors) | 485 | if (!allErrors) |
481 | return; /* if no error, return now */ | 486 | return; /* if no error, return now */ |
482 | 487 | ||
483 | /* ONLY ONE of the possible error bits will be set, as per the docs */ | ||
484 | i5000_mc_printk(mci, KERN_ERR, | ||
485 | "FATAL ERRORS Found!!! 1st FATAL Err Reg= 0x%x\n", | ||
486 | allErrors); | ||
487 | |||
488 | branch = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd); | 488 | branch = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd); |
489 | channel = branch; | 489 | channel = branch; |
490 | 490 | ||
@@ -501,28 +501,27 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci, | |||
501 | rdwr ? "Write" : "Read", ras, cas); | 501 | rdwr ? "Write" : "Read", ras, cas); |
502 | 502 | ||
503 | /* Only 1 bit will be on */ | 503 | /* Only 1 bit will be on */ |
504 | if (allErrors & FERR_FAT_M1ERR) { | 504 | switch (allErrors) { |
505 | i5000_mc_printk(mci, KERN_ERR, | 505 | case FERR_FAT_M1ERR: |
506 | "Alert on non-redundant retry or fast " | 506 | specific = "Alert on non-redundant retry or fast " |
507 | "reset timeout\n"); | 507 | "reset timeout"; |
508 | 508 | break; | |
509 | } else if (allErrors & FERR_FAT_M2ERR) { | 509 | case FERR_FAT_M2ERR: |
510 | i5000_mc_printk(mci, KERN_ERR, | 510 | specific = "Northbound CRC error on non-redundant " |
511 | "Northbound CRC error on non-redundant " | 511 | "retry"; |
512 | "retry\n"); | 512 | break; |
513 | 513 | case FERR_FAT_M3ERR: | |
514 | } else if (allErrors & FERR_FAT_M3ERR) { | 514 | specific = ">Tmid Thermal event with intelligent " |
515 | i5000_mc_printk(mci, KERN_ERR, | 515 | "throttling disabled"; |
516 | ">Tmid Thermal event with intelligent " | 516 | break; |
517 | "throttling disabled\n"); | ||
518 | } | 517 | } |
519 | 518 | ||
520 | /* Form out message */ | 519 | /* Form out message */ |
521 | snprintf(msg, sizeof(msg), | 520 | snprintf(msg, sizeof(msg), |
522 | "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d CAS=%d " | 521 | "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d CAS=%d " |
523 | "FATAL Err=0x%x)", | 522 | "FATAL Err=0x%x (%s))", |
524 | branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, | 523 | branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, |
525 | allErrors); | 524 | allErrors, specific); |
526 | 525 | ||
527 | /* Call the helper to output message */ | 526 | /* Call the helper to output message */ |
528 | edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); | 527 | edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); |
@@ -539,7 +538,8 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci, | |||
539 | struct i5000_error_info *info, | 538 | struct i5000_error_info *info, |
540 | int handle_errors) | 539 | int handle_errors) |
541 | { | 540 | { |
542 | char msg[EDAC_MC_LABEL_LEN + 1 + 90]; | 541 | char msg[EDAC_MC_LABEL_LEN + 1 + 170]; |
542 | char *specific = NULL; | ||
543 | u32 allErrors; | 543 | u32 allErrors; |
544 | u32 ue_errors; | 544 | u32 ue_errors; |
545 | u32 ce_errors; | 545 | u32 ce_errors; |
@@ -557,10 +557,6 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci, | |||
557 | return; /* if no error, return now */ | 557 | return; /* if no error, return now */ |
558 | 558 | ||
559 | /* ONLY ONE of the possible error bits will be set, as per the docs */ | 559 | /* ONLY ONE of the possible error bits will be set, as per the docs */ |
560 | i5000_mc_printk(mci, KERN_WARNING, | ||
561 | "NON-FATAL ERRORS Found!!! 1st NON-FATAL Err " | ||
562 | "Reg= 0x%x\n", allErrors); | ||
563 | |||
564 | ue_errors = allErrors & FERR_NF_UNCORRECTABLE; | 560 | ue_errors = allErrors & FERR_NF_UNCORRECTABLE; |
565 | if (ue_errors) { | 561 | if (ue_errors) { |
566 | debugf0("\tUncorrected bits= 0x%x\n", ue_errors); | 562 | debugf0("\tUncorrected bits= 0x%x\n", ue_errors); |
@@ -579,12 +575,47 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci, | |||
579 | rank, channel, channel + 1, branch >> 1, bank, | 575 | rank, channel, channel + 1, branch >> 1, bank, |
580 | rdwr ? "Write" : "Read", ras, cas); | 576 | rdwr ? "Write" : "Read", ras, cas); |
581 | 577 | ||
578 | switch (ue_errors) { | ||
579 | case FERR_NF_M12ERR: | ||
580 | specific = "Non-Aliased Uncorrectable Patrol Data ECC"; | ||
581 | break; | ||
582 | case FERR_NF_M11ERR: | ||
583 | specific = "Non-Aliased Uncorrectable Spare-Copy " | ||
584 | "Data ECC"; | ||
585 | break; | ||
586 | case FERR_NF_M10ERR: | ||
587 | specific = "Non-Aliased Uncorrectable Mirrored Demand " | ||
588 | "Data ECC"; | ||
589 | break; | ||
590 | case FERR_NF_M9ERR: | ||
591 | specific = "Non-Aliased Uncorrectable Non-Mirrored " | ||
592 | "Demand Data ECC"; | ||
593 | break; | ||
594 | case FERR_NF_M8ERR: | ||
595 | specific = "Aliased Uncorrectable Patrol Data ECC"; | ||
596 | break; | ||
597 | case FERR_NF_M7ERR: | ||
598 | specific = "Aliased Uncorrectable Spare-Copy Data ECC"; | ||
599 | break; | ||
600 | case FERR_NF_M6ERR: | ||
601 | specific = "Aliased Uncorrectable Mirrored Demand " | ||
602 | "Data ECC"; | ||
603 | break; | ||
604 | case FERR_NF_M5ERR: | ||
605 | specific = "Aliased Uncorrectable Non-Mirrored Demand " | ||
606 | "Data ECC"; | ||
607 | break; | ||
608 | case FERR_NF_M4ERR: | ||
609 | specific = "Uncorrectable Data ECC on Replay"; | ||
610 | break; | ||
611 | } | ||
612 | |||
582 | /* Form out message */ | 613 | /* Form out message */ |
583 | snprintf(msg, sizeof(msg), | 614 | snprintf(msg, sizeof(msg), |
584 | "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " | 615 | "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " |
585 | "CAS=%d, UE Err=0x%x)", | 616 | "CAS=%d, UE Err=0x%x (%s))", |
586 | branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, | 617 | branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, |
587 | ue_errors); | 618 | ue_errors, specific); |
588 | 619 | ||
589 | /* Call the helper to output message */ | 620 | /* Call the helper to output message */ |
590 | edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); | 621 | edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); |
@@ -616,51 +647,74 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci, | |||
616 | rank, channel, branch >> 1, bank, | 647 | rank, channel, branch >> 1, bank, |
617 | rdwr ? "Write" : "Read", ras, cas); | 648 | rdwr ? "Write" : "Read", ras, cas); |
618 | 649 | ||
650 | switch (ce_errors) { | ||
651 | case FERR_NF_M17ERR: | ||
652 | specific = "Correctable Non-Mirrored Demand Data ECC"; | ||
653 | break; | ||
654 | case FERR_NF_M18ERR: | ||
655 | specific = "Correctable Mirrored Demand Data ECC"; | ||
656 | break; | ||
657 | case FERR_NF_M19ERR: | ||
658 | specific = "Correctable Spare-Copy Data ECC"; | ||
659 | break; | ||
660 | case FERR_NF_M20ERR: | ||
661 | specific = "Correctable Patrol Data ECC"; | ||
662 | break; | ||
663 | } | ||
664 | |||
619 | /* Form out message */ | 665 | /* Form out message */ |
620 | snprintf(msg, sizeof(msg), | 666 | snprintf(msg, sizeof(msg), |
621 | "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " | 667 | "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " |
622 | "CAS=%d, CE Err=0x%x)", branch >> 1, bank, | 668 | "CAS=%d, CE Err=0x%x (%s))", branch >> 1, bank, |
623 | rdwr ? "Write" : "Read", ras, cas, ce_errors); | 669 | rdwr ? "Write" : "Read", ras, cas, ce_errors, |
670 | specific); | ||
624 | 671 | ||
625 | /* Call the helper to output message */ | 672 | /* Call the helper to output message */ |
626 | edac_mc_handle_fbd_ce(mci, rank, channel, msg); | 673 | edac_mc_handle_fbd_ce(mci, rank, channel, msg); |
627 | } | 674 | } |
628 | 675 | ||
629 | /* See if any of the thermal errors have fired */ | 676 | if (!misc_messages) |
630 | misc_errors = allErrors & FERR_NF_THERMAL; | 677 | return; |
631 | if (misc_errors) { | ||
632 | i5000_printk(KERN_WARNING, "\tTHERMAL Error, bits= 0x%x\n", | ||
633 | misc_errors); | ||
634 | } | ||
635 | |||
636 | /* See if any of the thermal errors have fired */ | ||
637 | misc_errors = allErrors & FERR_NF_NON_RETRY; | ||
638 | if (misc_errors) { | ||
639 | i5000_printk(KERN_WARNING, "\tNON-Retry Errors, bits= 0x%x\n", | ||
640 | misc_errors); | ||
641 | } | ||
642 | 678 | ||
643 | /* See if any of the thermal errors have fired */ | 679 | misc_errors = allErrors & (FERR_NF_NON_RETRY | FERR_NF_NORTH_CRC | |
644 | misc_errors = allErrors & FERR_NF_NORTH_CRC; | 680 | FERR_NF_SPD_PROTOCOL | FERR_NF_DIMM_SPARE); |
645 | if (misc_errors) { | 681 | if (misc_errors) { |
646 | i5000_printk(KERN_WARNING, | 682 | switch (misc_errors) { |
647 | "\tNORTHBOUND CRC Error, bits= 0x%x\n", | 683 | case FERR_NF_M13ERR: |
648 | misc_errors); | 684 | specific = "Non-Retry or Redundant Retry FBD Memory " |
649 | } | 685 | "Alert or Redundant Fast Reset Timeout"; |
686 | break; | ||
687 | case FERR_NF_M14ERR: | ||
688 | specific = "Non-Retry or Redundant Retry FBD " | ||
689 | "Configuration Alert"; | ||
690 | break; | ||
691 | case FERR_NF_M15ERR: | ||
692 | specific = "Non-Retry or Redundant Retry FBD " | ||
693 | "Northbound CRC error on read data"; | ||
694 | break; | ||
695 | case FERR_NF_M21ERR: | ||
696 | specific = "FBD Northbound CRC error on " | ||
697 | "FBD Sync Status"; | ||
698 | break; | ||
699 | case FERR_NF_M22ERR: | ||
700 | specific = "SPD protocol error"; | ||
701 | break; | ||
702 | case FERR_NF_M27ERR: | ||
703 | specific = "DIMM-spare copy started"; | ||
704 | break; | ||
705 | case FERR_NF_M28ERR: | ||
706 | specific = "DIMM-spare copy completed"; | ||
707 | break; | ||
708 | } | ||
709 | branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd); | ||
650 | 710 | ||
651 | /* See if any of the thermal errors have fired */ | 711 | /* Form out message */ |
652 | misc_errors = allErrors & FERR_NF_SPD_PROTOCOL; | 712 | snprintf(msg, sizeof(msg), |
653 | if (misc_errors) { | 713 | "(Branch=%d Err=%#x (%s))", branch >> 1, |
654 | i5000_printk(KERN_WARNING, | 714 | misc_errors, specific); |
655 | "\tSPD Protocol Error, bits= 0x%x\n", | ||
656 | misc_errors); | ||
657 | } | ||
658 | 715 | ||
659 | /* See if any of the thermal errors have fired */ | 716 | /* Call the helper to output message */ |
660 | misc_errors = allErrors & FERR_NF_DIMM_SPARE; | 717 | edac_mc_handle_fbd_ce(mci, 0, 0, msg); |
661 | if (misc_errors) { | ||
662 | i5000_printk(KERN_WARNING, "\tDIMM-Spare Error, bits= 0x%x\n", | ||
663 | misc_errors); | ||
664 | } | 718 | } |
665 | } | 719 | } |
666 | 720 | ||
@@ -1497,3 +1551,6 @@ MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - " | |||
1497 | 1551 | ||
1498 | module_param(edac_op_state, int, 0444); | 1552 | module_param(edac_op_state, int, 0444); |
1499 | MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); | 1553 | MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); |
1554 | module_param(misc_messages, int, 0444); | ||
1555 | MODULE_PARM_DESC(misc_messages, "Log miscellaneous non fatal messages"); | ||
1556 | |||