aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/i5000_edac.c181
1 files changed, 119 insertions, 62 deletions
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index 4a16b5b61cfb..7d045ad0a1fe 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -119,6 +119,7 @@
119#define FERR_NF_UNCORRECTABLE (FERR_NF_M12ERR | \ 119#define FERR_NF_UNCORRECTABLE (FERR_NF_M12ERR | \
120 FERR_NF_M11ERR | \ 120 FERR_NF_M11ERR | \
121 FERR_NF_M10ERR | \ 121 FERR_NF_M10ERR | \
122 FERR_NF_M9ERR | \
122 FERR_NF_M8ERR | \ 123 FERR_NF_M8ERR | \
123 FERR_NF_M7ERR | \ 124 FERR_NF_M7ERR | \
124 FERR_NF_M6ERR | \ 125 FERR_NF_M6ERR | \
@@ -301,6 +302,9 @@ static char *numcol_toString[] = {
301}; 302};
302#endif 303#endif
303 304
305/* enables the report of miscellaneous messages as CE errors - default off */
306static int misc_messages;
307
304/* Enumeration of supported devices */ 308/* Enumeration of supported devices */
305enum i5000_chips { 309enum i5000_chips {
306 I5000P = 0, 310 I5000P = 0,
@@ -466,7 +470,8 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
466 struct i5000_error_info *info, 470 struct i5000_error_info *info,
467 int handle_errors) 471 int handle_errors)
468{ 472{
469 char msg[EDAC_MC_LABEL_LEN + 1 + 90]; 473 char msg[EDAC_MC_LABEL_LEN + 1 + 160];
474 char *specific = NULL;
470 u32 allErrors; 475 u32 allErrors;
471 int branch; 476 int branch;
472 int channel; 477 int channel;
@@ -480,11 +485,6 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
480 if (!allErrors) 485 if (!allErrors)
481 return; /* if no error, return now */ 486 return; /* if no error, return now */
482 487
483 /* ONLY ONE of the possible error bits will be set, as per the docs */
484 i5000_mc_printk(mci, KERN_ERR,
485 "FATAL ERRORS Found!!! 1st FATAL Err Reg= 0x%x\n",
486 allErrors);
487
488 branch = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd); 488 branch = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd);
489 channel = branch; 489 channel = branch;
490 490
@@ -501,28 +501,27 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
501 rdwr ? "Write" : "Read", ras, cas); 501 rdwr ? "Write" : "Read", ras, cas);
502 502
503 /* Only 1 bit will be on */ 503 /* Only 1 bit will be on */
504 if (allErrors & FERR_FAT_M1ERR) { 504 switch (allErrors) {
505 i5000_mc_printk(mci, KERN_ERR, 505 case FERR_FAT_M1ERR:
506 "Alert on non-redundant retry or fast " 506 specific = "Alert on non-redundant retry or fast "
507 "reset timeout\n"); 507 "reset timeout";
508 508 break;
509 } else if (allErrors & FERR_FAT_M2ERR) { 509 case FERR_FAT_M2ERR:
510 i5000_mc_printk(mci, KERN_ERR, 510 specific = "Northbound CRC error on non-redundant "
511 "Northbound CRC error on non-redundant " 511 "retry";
512 "retry\n"); 512 break;
513 513 case FERR_FAT_M3ERR:
514 } else if (allErrors & FERR_FAT_M3ERR) { 514 specific = ">Tmid Thermal event with intelligent "
515 i5000_mc_printk(mci, KERN_ERR, 515 "throttling disabled";
516 ">Tmid Thermal event with intelligent " 516 break;
517 "throttling disabled\n");
518 } 517 }
519 518
520 /* Form out message */ 519 /* Form out message */
521 snprintf(msg, sizeof(msg), 520 snprintf(msg, sizeof(msg),
522 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d CAS=%d " 521 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d CAS=%d "
523 "FATAL Err=0x%x)", 522 "FATAL Err=0x%x (%s))",
524 branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, 523 branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas,
525 allErrors); 524 allErrors, specific);
526 525
527 /* Call the helper to output message */ 526 /* Call the helper to output message */
528 edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); 527 edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
@@ -539,7 +538,8 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
539 struct i5000_error_info *info, 538 struct i5000_error_info *info,
540 int handle_errors) 539 int handle_errors)
541{ 540{
542 char msg[EDAC_MC_LABEL_LEN + 1 + 90]; 541 char msg[EDAC_MC_LABEL_LEN + 1 + 170];
542 char *specific = NULL;
543 u32 allErrors; 543 u32 allErrors;
544 u32 ue_errors; 544 u32 ue_errors;
545 u32 ce_errors; 545 u32 ce_errors;
@@ -557,10 +557,6 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
557 return; /* if no error, return now */ 557 return; /* if no error, return now */
558 558
559 /* ONLY ONE of the possible error bits will be set, as per the docs */ 559 /* ONLY ONE of the possible error bits will be set, as per the docs */
560 i5000_mc_printk(mci, KERN_WARNING,
561 "NON-FATAL ERRORS Found!!! 1st NON-FATAL Err "
562 "Reg= 0x%x\n", allErrors);
563
564 ue_errors = allErrors & FERR_NF_UNCORRECTABLE; 560 ue_errors = allErrors & FERR_NF_UNCORRECTABLE;
565 if (ue_errors) { 561 if (ue_errors) {
566 debugf0("\tUncorrected bits= 0x%x\n", ue_errors); 562 debugf0("\tUncorrected bits= 0x%x\n", ue_errors);
@@ -579,12 +575,47 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
579 rank, channel, channel + 1, branch >> 1, bank, 575 rank, channel, channel + 1, branch >> 1, bank,
580 rdwr ? "Write" : "Read", ras, cas); 576 rdwr ? "Write" : "Read", ras, cas);
581 577
578 switch (ue_errors) {
579 case FERR_NF_M12ERR:
580 specific = "Non-Aliased Uncorrectable Patrol Data ECC";
581 break;
582 case FERR_NF_M11ERR:
583 specific = "Non-Aliased Uncorrectable Spare-Copy "
584 "Data ECC";
585 break;
586 case FERR_NF_M10ERR:
587 specific = "Non-Aliased Uncorrectable Mirrored Demand "
588 "Data ECC";
589 break;
590 case FERR_NF_M9ERR:
591 specific = "Non-Aliased Uncorrectable Non-Mirrored "
592 "Demand Data ECC";
593 break;
594 case FERR_NF_M8ERR:
595 specific = "Aliased Uncorrectable Patrol Data ECC";
596 break;
597 case FERR_NF_M7ERR:
598 specific = "Aliased Uncorrectable Spare-Copy Data ECC";
599 break;
600 case FERR_NF_M6ERR:
601 specific = "Aliased Uncorrectable Mirrored Demand "
602 "Data ECC";
603 break;
604 case FERR_NF_M5ERR:
605 specific = "Aliased Uncorrectable Non-Mirrored Demand "
606 "Data ECC";
607 break;
608 case FERR_NF_M4ERR:
609 specific = "Uncorrectable Data ECC on Replay";
610 break;
611 }
612
582 /* Form out message */ 613 /* Form out message */
583 snprintf(msg, sizeof(msg), 614 snprintf(msg, sizeof(msg),
584 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " 615 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d "
585 "CAS=%d, UE Err=0x%x)", 616 "CAS=%d, UE Err=0x%x (%s))",
586 branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, 617 branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas,
587 ue_errors); 618 ue_errors, specific);
588 619
589 /* Call the helper to output message */ 620 /* Call the helper to output message */
590 edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); 621 edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
@@ -616,51 +647,74 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
616 rank, channel, branch >> 1, bank, 647 rank, channel, branch >> 1, bank,
617 rdwr ? "Write" : "Read", ras, cas); 648 rdwr ? "Write" : "Read", ras, cas);
618 649
650 switch (ce_errors) {
651 case FERR_NF_M17ERR:
652 specific = "Correctable Non-Mirrored Demand Data ECC";
653 break;
654 case FERR_NF_M18ERR:
655 specific = "Correctable Mirrored Demand Data ECC";
656 break;
657 case FERR_NF_M19ERR:
658 specific = "Correctable Spare-Copy Data ECC";
659 break;
660 case FERR_NF_M20ERR:
661 specific = "Correctable Patrol Data ECC";
662 break;
663 }
664
619 /* Form out message */ 665 /* Form out message */
620 snprintf(msg, sizeof(msg), 666 snprintf(msg, sizeof(msg),
621 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " 667 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d "
622 "CAS=%d, CE Err=0x%x)", branch >> 1, bank, 668 "CAS=%d, CE Err=0x%x (%s))", branch >> 1, bank,
623 rdwr ? "Write" : "Read", ras, cas, ce_errors); 669 rdwr ? "Write" : "Read", ras, cas, ce_errors,
670 specific);
624 671
625 /* Call the helper to output message */ 672 /* Call the helper to output message */
626 edac_mc_handle_fbd_ce(mci, rank, channel, msg); 673 edac_mc_handle_fbd_ce(mci, rank, channel, msg);
627 } 674 }
628 675
629 /* See if any of the thermal errors have fired */ 676 if (!misc_messages)
630 misc_errors = allErrors & FERR_NF_THERMAL; 677 return;
631 if (misc_errors) {
632 i5000_printk(KERN_WARNING, "\tTHERMAL Error, bits= 0x%x\n",
633 misc_errors);
634 }
635
636 /* See if any of the thermal errors have fired */
637 misc_errors = allErrors & FERR_NF_NON_RETRY;
638 if (misc_errors) {
639 i5000_printk(KERN_WARNING, "\tNON-Retry Errors, bits= 0x%x\n",
640 misc_errors);
641 }
642 678
643 /* See if any of the thermal errors have fired */ 679 misc_errors = allErrors & (FERR_NF_NON_RETRY | FERR_NF_NORTH_CRC |
644 misc_errors = allErrors & FERR_NF_NORTH_CRC; 680 FERR_NF_SPD_PROTOCOL | FERR_NF_DIMM_SPARE);
645 if (misc_errors) { 681 if (misc_errors) {
646 i5000_printk(KERN_WARNING, 682 switch (misc_errors) {
647 "\tNORTHBOUND CRC Error, bits= 0x%x\n", 683 case FERR_NF_M13ERR:
648 misc_errors); 684 specific = "Non-Retry or Redundant Retry FBD Memory "
649 } 685 "Alert or Redundant Fast Reset Timeout";
686 break;
687 case FERR_NF_M14ERR:
688 specific = "Non-Retry or Redundant Retry FBD "
689 "Configuration Alert";
690 break;
691 case FERR_NF_M15ERR:
692 specific = "Non-Retry or Redundant Retry FBD "
693 "Northbound CRC error on read data";
694 break;
695 case FERR_NF_M21ERR:
696 specific = "FBD Northbound CRC error on "
697 "FBD Sync Status";
698 break;
699 case FERR_NF_M22ERR:
700 specific = "SPD protocol error";
701 break;
702 case FERR_NF_M27ERR:
703 specific = "DIMM-spare copy started";
704 break;
705 case FERR_NF_M28ERR:
706 specific = "DIMM-spare copy completed";
707 break;
708 }
709 branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd);
650 710
651 /* See if any of the thermal errors have fired */ 711 /* Form out message */
652 misc_errors = allErrors & FERR_NF_SPD_PROTOCOL; 712 snprintf(msg, sizeof(msg),
653 if (misc_errors) { 713 "(Branch=%d Err=%#x (%s))", branch >> 1,
654 i5000_printk(KERN_WARNING, 714 misc_errors, specific);
655 "\tSPD Protocol Error, bits= 0x%x\n",
656 misc_errors);
657 }
658 715
659 /* See if any of the thermal errors have fired */ 716 /* Call the helper to output message */
660 misc_errors = allErrors & FERR_NF_DIMM_SPARE; 717 edac_mc_handle_fbd_ce(mci, 0, 0, msg);
661 if (misc_errors) {
662 i5000_printk(KERN_WARNING, "\tDIMM-Spare Error, bits= 0x%x\n",
663 misc_errors);
664 } 718 }
665} 719}
666 720
@@ -1497,3 +1551,6 @@ MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - "
1497 1551
1498module_param(edac_op_state, int, 0444); 1552module_param(edac_op_state, int, 0444);
1499MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); 1553MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
1554module_param(misc_messages, int, 0444);
1555MODULE_PARM_DESC(misc_messages, "Log miscellaneous non fatal messages");
1556