aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
authorAristeu Rozanski <aris@redhat.com>2008-10-16 01:04:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-16 14:21:48 -0400
commitc066740739c4251effc349e3beae02ead9049e5b (patch)
tree4c87cc7a33bcc7a3d86076782136e96a4fe215b3 /drivers/edac
parent60be75515e45167d48d3677ae05b522ba7762d40 (diff)
edac i5000: fix error messages
Update the i5000_edac messages, making everything pass through the EDAC (so the log controls will work) and being more specific about the errors. Also, it makes the miscellaneous errors optional and disabled by default. As I didn't found anywhere information about M23ERR-M26ERR (FERR_NF_THERMAL) on FERR_NF_FBD, I'm removing them. Signed-off-by: Aristeu Rozanski <aris@redhat.com> Signed-off-by: Doug Thompson <dougthompson@xmission.com> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/i5000_edac.c181
1 files changed, 119 insertions, 62 deletions
diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c
index 4a16b5b61cfb..7d045ad0a1fe 100644
--- a/drivers/edac/i5000_edac.c
+++ b/drivers/edac/i5000_edac.c
@@ -119,6 +119,7 @@
119#define FERR_NF_UNCORRECTABLE (FERR_NF_M12ERR | \ 119#define FERR_NF_UNCORRECTABLE (FERR_NF_M12ERR | \
120 FERR_NF_M11ERR | \ 120 FERR_NF_M11ERR | \
121 FERR_NF_M10ERR | \ 121 FERR_NF_M10ERR | \
122 FERR_NF_M9ERR | \
122 FERR_NF_M8ERR | \ 123 FERR_NF_M8ERR | \
123 FERR_NF_M7ERR | \ 124 FERR_NF_M7ERR | \
124 FERR_NF_M6ERR | \ 125 FERR_NF_M6ERR | \
@@ -301,6 +302,9 @@ static char *numcol_toString[] = {
301}; 302};
302#endif 303#endif
303 304
305/* enables the report of miscellaneous messages as CE errors - default off */
306static int misc_messages;
307
304/* Enumeration of supported devices */ 308/* Enumeration of supported devices */
305enum i5000_chips { 309enum i5000_chips {
306 I5000P = 0, 310 I5000P = 0,
@@ -466,7 +470,8 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
466 struct i5000_error_info *info, 470 struct i5000_error_info *info,
467 int handle_errors) 471 int handle_errors)
468{ 472{
469 char msg[EDAC_MC_LABEL_LEN + 1 + 90]; 473 char msg[EDAC_MC_LABEL_LEN + 1 + 160];
474 char *specific = NULL;
470 u32 allErrors; 475 u32 allErrors;
471 int branch; 476 int branch;
472 int channel; 477 int channel;
@@ -480,11 +485,6 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
480 if (!allErrors) 485 if (!allErrors)
481 return; /* if no error, return now */ 486 return; /* if no error, return now */
482 487
483 /* ONLY ONE of the possible error bits will be set, as per the docs */
484 i5000_mc_printk(mci, KERN_ERR,
485 "FATAL ERRORS Found!!! 1st FATAL Err Reg= 0x%x\n",
486 allErrors);
487
488 branch = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd); 488 branch = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd);
489 channel = branch; 489 channel = branch;
490 490
@@ -501,28 +501,27 @@ static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
501 rdwr ? "Write" : "Read", ras, cas); 501 rdwr ? "Write" : "Read", ras, cas);
502 502
503 /* Only 1 bit will be on */ 503 /* Only 1 bit will be on */
504 if (allErrors & FERR_FAT_M1ERR) { 504 switch (allErrors) {
505 i5000_mc_printk(mci, KERN_ERR, 505 case FERR_FAT_M1ERR:
506 "Alert on non-redundant retry or fast " 506 specific = "Alert on non-redundant retry or fast "
507 "reset timeout\n"); 507 "reset timeout";
508 508 break;
509 } else if (allErrors & FERR_FAT_M2ERR) { 509 case FERR_FAT_M2ERR:
510 i5000_mc_printk(mci, KERN_ERR, 510 specific = "Northbound CRC error on non-redundant "
511 "Northbound CRC error on non-redundant " 511 "retry";
512 "retry\n"); 512 break;
513 513 case FERR_FAT_M3ERR:
514 } else if (allErrors & FERR_FAT_M3ERR) { 514 specific = ">Tmid Thermal event with intelligent "
515 i5000_mc_printk(mci, KERN_ERR, 515 "throttling disabled";
516 ">Tmid Thermal event with intelligent " 516 break;
517 "throttling disabled\n");
518 } 517 }
519 518
520 /* Form out message */ 519 /* Form out message */
521 snprintf(msg, sizeof(msg), 520 snprintf(msg, sizeof(msg),
522 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d CAS=%d " 521 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d CAS=%d "
523 "FATAL Err=0x%x)", 522 "FATAL Err=0x%x (%s))",
524 branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, 523 branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas,
525 allErrors); 524 allErrors, specific);
526 525
527 /* Call the helper to output message */ 526 /* Call the helper to output message */
528 edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); 527 edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
@@ -539,7 +538,8 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
539 struct i5000_error_info *info, 538 struct i5000_error_info *info,
540 int handle_errors) 539 int handle_errors)
541{ 540{
542 char msg[EDAC_MC_LABEL_LEN + 1 + 90]; 541 char msg[EDAC_MC_LABEL_LEN + 1 + 170];
542 char *specific = NULL;
543 u32 allErrors; 543 u32 allErrors;
544 u32 ue_errors; 544 u32 ue_errors;
545 u32 ce_errors; 545 u32 ce_errors;
@@ -557,10 +557,6 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
557 return; /* if no error, return now */ 557 return; /* if no error, return now */
558 558
559 /* ONLY ONE of the possible error bits will be set, as per the docs */ 559 /* ONLY ONE of the possible error bits will be set, as per the docs */
560 i5000_mc_printk(mci, KERN_WARNING,
561 "NON-FATAL ERRORS Found!!! 1st NON-FATAL Err "
562 "Reg= 0x%x\n", allErrors);
563
564 ue_errors = allErrors & FERR_NF_UNCORRECTABLE; 560 ue_errors = allErrors & FERR_NF_UNCORRECTABLE;
565 if (ue_errors) { 561 if (ue_errors) {
566 debugf0("\tUncorrected bits= 0x%x\n", ue_errors); 562 debugf0("\tUncorrected bits= 0x%x\n", ue_errors);
@@ -579,12 +575,47 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
579 rank, channel, channel + 1, branch >> 1, bank, 575 rank, channel, channel + 1, branch >> 1, bank,
580 rdwr ? "Write" : "Read", ras, cas); 576 rdwr ? "Write" : "Read", ras, cas);
581 577
578 switch (ue_errors) {
579 case FERR_NF_M12ERR:
580 specific = "Non-Aliased Uncorrectable Patrol Data ECC";
581 break;
582 case FERR_NF_M11ERR:
583 specific = "Non-Aliased Uncorrectable Spare-Copy "
584 "Data ECC";
585 break;
586 case FERR_NF_M10ERR:
587 specific = "Non-Aliased Uncorrectable Mirrored Demand "
588 "Data ECC";
589 break;
590 case FERR_NF_M9ERR:
591 specific = "Non-Aliased Uncorrectable Non-Mirrored "
592 "Demand Data ECC";
593 break;
594 case FERR_NF_M8ERR:
595 specific = "Aliased Uncorrectable Patrol Data ECC";
596 break;
597 case FERR_NF_M7ERR:
598 specific = "Aliased Uncorrectable Spare-Copy Data ECC";
599 break;
600 case FERR_NF_M6ERR:
601 specific = "Aliased Uncorrectable Mirrored Demand "
602 "Data ECC";
603 break;
604 case FERR_NF_M5ERR:
605 specific = "Aliased Uncorrectable Non-Mirrored Demand "
606 "Data ECC";
607 break;
608 case FERR_NF_M4ERR:
609 specific = "Uncorrectable Data ECC on Replay";
610 break;
611 }
612
582 /* Form out message */ 613 /* Form out message */
583 snprintf(msg, sizeof(msg), 614 snprintf(msg, sizeof(msg),
584 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " 615 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d "
585 "CAS=%d, UE Err=0x%x)", 616 "CAS=%d, UE Err=0x%x (%s))",
586 branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas, 617 branch >> 1, bank, rdwr ? "Write" : "Read", ras, cas,
587 ue_errors); 618 ue_errors, specific);
588 619
589 /* Call the helper to output message */ 620 /* Call the helper to output message */
590 edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg); 621 edac_mc_handle_fbd_ue(mci, rank, channel, channel + 1, msg);
@@ -616,51 +647,74 @@ static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
616 rank, channel, branch >> 1, bank, 647 rank, channel, branch >> 1, bank,
617 rdwr ? "Write" : "Read", ras, cas); 648 rdwr ? "Write" : "Read", ras, cas);
618 649
650 switch (ce_errors) {
651 case FERR_NF_M17ERR:
652 specific = "Correctable Non-Mirrored Demand Data ECC";
653 break;
654 case FERR_NF_M18ERR:
655 specific = "Correctable Mirrored Demand Data ECC";
656 break;
657 case FERR_NF_M19ERR:
658 specific = "Correctable Spare-Copy Data ECC";
659 break;
660 case FERR_NF_M20ERR:
661 specific = "Correctable Patrol Data ECC";
662 break;
663 }
664
619 /* Form out message */ 665 /* Form out message */
620 snprintf(msg, sizeof(msg), 666 snprintf(msg, sizeof(msg),
621 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d " 667 "(Branch=%d DRAM-Bank=%d RDWR=%s RAS=%d "
622 "CAS=%d, CE Err=0x%x)", branch >> 1, bank, 668 "CAS=%d, CE Err=0x%x (%s))", branch >> 1, bank,
623 rdwr ? "Write" : "Read", ras, cas, ce_errors); 669 rdwr ? "Write" : "Read", ras, cas, ce_errors,
670 specific);
624 671
625 /* Call the helper to output message */ 672 /* Call the helper to output message */
626 edac_mc_handle_fbd_ce(mci, rank, channel, msg); 673 edac_mc_handle_fbd_ce(mci, rank, channel, msg);
627 } 674 }
628 675
629 /* See if any of the thermal errors have fired */ 676 if (!misc_messages)
630 misc_errors = allErrors & FERR_NF_THERMAL; 677 return;
631 if (misc_errors) {
632 i5000_printk(KERN_WARNING, "\tTHERMAL Error, bits= 0x%x\n",
633 misc_errors);
634 }
635
636 /* See if any of the thermal errors have fired */
637 misc_errors = allErrors & FERR_NF_NON_RETRY;
638 if (misc_errors) {
639 i5000_printk(KERN_WARNING, "\tNON-Retry Errors, bits= 0x%x\n",
640 misc_errors);
641 }
642 678
643 /* See if any of the thermal errors have fired */ 679 misc_errors = allErrors & (FERR_NF_NON_RETRY | FERR_NF_NORTH_CRC |
644 misc_errors = allErrors & FERR_NF_NORTH_CRC; 680 FERR_NF_SPD_PROTOCOL | FERR_NF_DIMM_SPARE);
645 if (misc_errors) { 681 if (misc_errors) {
646 i5000_printk(KERN_WARNING, 682 switch (misc_errors) {
647 "\tNORTHBOUND CRC Error, bits= 0x%x\n", 683 case FERR_NF_M13ERR:
648 misc_errors); 684 specific = "Non-Retry or Redundant Retry FBD Memory "
649 } 685 "Alert or Redundant Fast Reset Timeout";
686 break;
687 case FERR_NF_M14ERR:
688 specific = "Non-Retry or Redundant Retry FBD "
689 "Configuration Alert";
690 break;
691 case FERR_NF_M15ERR:
692 specific = "Non-Retry or Redundant Retry FBD "
693 "Northbound CRC error on read data";
694 break;
695 case FERR_NF_M21ERR:
696 specific = "FBD Northbound CRC error on "
697 "FBD Sync Status";
698 break;
699 case FERR_NF_M22ERR:
700 specific = "SPD protocol error";
701 break;
702 case FERR_NF_M27ERR:
703 specific = "DIMM-spare copy started";
704 break;
705 case FERR_NF_M28ERR:
706 specific = "DIMM-spare copy completed";
707 break;
708 }
709 branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd);
650 710
651 /* See if any of the thermal errors have fired */ 711 /* Form out message */
652 misc_errors = allErrors & FERR_NF_SPD_PROTOCOL; 712 snprintf(msg, sizeof(msg),
653 if (misc_errors) { 713 "(Branch=%d Err=%#x (%s))", branch >> 1,
654 i5000_printk(KERN_WARNING, 714 misc_errors, specific);
655 "\tSPD Protocol Error, bits= 0x%x\n",
656 misc_errors);
657 }
658 715
659 /* See if any of the thermal errors have fired */ 716 /* Call the helper to output message */
660 misc_errors = allErrors & FERR_NF_DIMM_SPARE; 717 edac_mc_handle_fbd_ce(mci, 0, 0, msg);
661 if (misc_errors) {
662 i5000_printk(KERN_WARNING, "\tDIMM-Spare Error, bits= 0x%x\n",
663 misc_errors);
664 } 718 }
665} 719}
666 720
@@ -1497,3 +1551,6 @@ MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - "
1497 1551
1498module_param(edac_op_state, int, 0444); 1552module_param(edac_op_state, int, 0444);
1499MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); 1553MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
1554module_param(misc_messages, int, 0444);
1555MODULE_PARM_DESC(misc_messages, "Log miscellaneous non fatal messages");
1556