aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Tyser <ptyser@xes-inc.com>2010-03-10 18:23:12 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-03-12 18:52:40 -0500
commitdcca7c3d003ce61ed71b412f645cfbe7bd8e882e (patch)
tree8d4c5c5afad17525db0ee09d56850313b7fa7e81
parent21768639be419d00275ac4e58b863361d0c24ee4 (diff)
edac: mpc85xx improve SDRAM error reporting
Add the ability to detect the specific data line or ECC line which failed when printing out SDRAM single-bit errors. An example of a single-bit SDRAM ECC error is below: EDAC MPC85xx MC1: Err Detect Register: 0x80000004 EDAC MPC85xx MC1: Faulty data bit: 59 EDAC MPC85xx MC1: Expected Data / ECC: 0x7f80d000_409effa0 / 0x6d EDAC MPC85xx MC1: Captured Data / ECC: 0x7780d000_409effa0 / 0x6d EDAC MPC85xx MC1: Err addr: 0x00031ca0 EDAC MPC85xx MC1: PFN: 0x00000031 Knowning which specific data or ECC line caused an error can be useful in tracking down hardware issues such as improperly terminated signals, loose pins, etc. Note that this feature is only currently enabled for 64-bit wide data buses, 32-bit wide bus support should be added. I don't have any 32-bit wide systems to test on. If someone has one and is willing to give this patch a shot with the check for a 64-bit data bus removed it would be much appreciated and I can re-submit with both 32 and 64 bit buses supported. Signed-off-by: Peter Tyser <ptyser@xes-inc.com> Signed-off-by: Doug Thompson <dougthompson@xmission.com> Cc: Kumar Gala <galak@gate.crashing.org> Cc: Dave Jiang <djiang@mvista.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/edac/mpc85xx_edac.c146
1 files changed, 138 insertions, 8 deletions
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index 6d0114a1b77e..517042fafa78 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -668,6 +668,111 @@ static struct of_platform_driver mpc85xx_l2_err_driver = {
668 668
669/**************************** MC Err device ***************************/ 669/**************************** MC Err device ***************************/
670 670
671/*
672 * Taken from table 8-55 in the MPC8641 User's Manual and/or 9-61 in the
673 * MPC8572 User's Manual. Each line represents a syndrome bit column as a
674 * 64-bit value, but split into an upper and lower 32-bit chunk. The labels
675 * below correspond to Freescale's manuals.
676 */
677static unsigned int ecc_table[16] = {
678 /* MSB LSB */
679 /* [0:31] [32:63] */
680 0xf00fe11e, 0xc33c0ff7, /* Syndrome bit 7 */
681 0x00ff00ff, 0x00fff0ff,
682 0x0f0f0f0f, 0x0f0fff00,
683 0x11113333, 0x7777000f,
684 0x22224444, 0x8888222f,
685 0x44448888, 0xffff4441,
686 0x8888ffff, 0x11118882,
687 0xffff1111, 0x22221114, /* Syndrome bit 0 */
688};
689
690/*
691 * Calculate the correct ECC value for a 64-bit value specified by high:low
692 */
693static u8 calculate_ecc(u32 high, u32 low)
694{
695 u32 mask_low;
696 u32 mask_high;
697 int bit_cnt;
698 u8 ecc = 0;
699 int i;
700 int j;
701
702 for (i = 0; i < 8; i++) {
703 mask_high = ecc_table[i * 2];
704 mask_low = ecc_table[i * 2 + 1];
705 bit_cnt = 0;
706
707 for (j = 0; j < 32; j++) {
708 if ((mask_high >> j) & 1)
709 bit_cnt ^= (high >> j) & 1;
710 if ((mask_low >> j) & 1)
711 bit_cnt ^= (low >> j) & 1;
712 }
713
714 ecc |= bit_cnt << i;
715 }
716
717 return ecc;
718}
719
720/*
721 * Create the syndrome code which is generated if the data line specified by
722 * 'bit' failed. Eg generate an 8-bit codes seen in Table 8-55 in the MPC8641
723 * User's Manual and 9-61 in the MPC8572 User's Manual.
724 */
725static u8 syndrome_from_bit(unsigned int bit) {
726 int i;
727 u8 syndrome = 0;
728
729 /*
730 * Cycle through the upper or lower 32-bit portion of each value in
731 * ecc_table depending on if 'bit' is in the upper or lower half of
732 * 64-bit data.
733 */
734 for (i = bit < 32; i < 16; i += 2)
735 syndrome |= ((ecc_table[i] >> (bit % 32)) & 1) << (i / 2);
736
737 return syndrome;
738}
739
740/*
741 * Decode data and ecc syndrome to determine what went wrong
742 * Note: This can only decode single-bit errors
743 */
744static void sbe_ecc_decode(u32 cap_high, u32 cap_low, u32 cap_ecc,
745 int *bad_data_bit, int *bad_ecc_bit)
746{
747 int i;
748 u8 syndrome;
749
750 *bad_data_bit = -1;
751 *bad_ecc_bit = -1;
752
753 /*
754 * Calculate the ECC of the captured data and XOR it with the captured
755 * ECC to find an ECC syndrome value we can search for
756 */
757 syndrome = calculate_ecc(cap_high, cap_low) ^ cap_ecc;
758
759 /* Check if a data line is stuck... */
760 for (i = 0; i < 64; i++) {
761 if (syndrome == syndrome_from_bit(i)) {
762 *bad_data_bit = i;
763 return;
764 }
765 }
766
767 /* If data is correct, check ECC bits for errors... */
768 for (i = 0; i < 8; i++) {
769 if ((syndrome >> i) & 0x1) {
770 *bad_ecc_bit = i;
771 return;
772 }
773 }
774}
775
671static void mpc85xx_mc_check(struct mem_ctl_info *mci) 776static void mpc85xx_mc_check(struct mem_ctl_info *mci)
672{ 777{
673 struct mpc85xx_mc_pdata *pdata = mci->pvt_info; 778 struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
@@ -678,6 +783,10 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci)
678 u32 err_addr; 783 u32 err_addr;
679 u32 pfn; 784 u32 pfn;
680 int row_index; 785 int row_index;
786 u32 cap_high;
787 u32 cap_low;
788 int bad_data_bit;
789 int bad_ecc_bit;
681 790
682 err_detect = in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT); 791 err_detect = in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT);
683 if (!err_detect) 792 if (!err_detect)
@@ -711,14 +820,35 @@ static void mpc85xx_mc_check(struct mem_ctl_info *mci)
711 break; 820 break;
712 } 821 }
713 822
714 mpc85xx_mc_printk(mci, KERN_ERR, "Capture Data High: %#8.8x\n", 823 cap_high = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_DATA_HI);
715 in_be32(pdata->mc_vbase + 824 cap_low = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_DATA_LO);
716 MPC85XX_MC_CAPTURE_DATA_HI)); 825
717 mpc85xx_mc_printk(mci, KERN_ERR, "Capture Data Low: %#8.8x\n", 826 /*
718 in_be32(pdata->mc_vbase + 827 * Analyze single-bit errors on 64-bit wide buses
719 MPC85XX_MC_CAPTURE_DATA_LO)); 828 * TODO: Add support for 32-bit wide buses
720 mpc85xx_mc_printk(mci, KERN_ERR, "syndrome: %#2.2x\n", syndrome); 829 */
721 mpc85xx_mc_printk(mci, KERN_ERR, "err addr: %#8.8x\n", err_addr); 830 if ((err_detect & DDR_EDE_SBE) && (bus_width == 64)) {
831 sbe_ecc_decode(cap_high, cap_low, syndrome,
832 &bad_data_bit, &bad_ecc_bit);
833
834 if (bad_data_bit != -1)
835 mpc85xx_mc_printk(mci, KERN_ERR,
836 "Faulty Data bit: %d\n", bad_data_bit);
837 if (bad_ecc_bit != -1)
838 mpc85xx_mc_printk(mci, KERN_ERR,
839 "Faulty ECC bit: %d\n", bad_ecc_bit);
840
841 mpc85xx_mc_printk(mci, KERN_ERR,
842 "Expected Data / ECC:\t%#8.8x_%08x / %#2.2x\n",
843 cap_high ^ (1 << (bad_data_bit - 32)),
844 cap_low ^ (1 << bad_data_bit),
845 syndrome ^ (1 << bad_ecc_bit));
846 }
847
848 mpc85xx_mc_printk(mci, KERN_ERR,
849 "Captured Data / ECC:\t%#8.8x_%08x / %#2.2x\n",
850 cap_high, cap_low, syndrome);
851 mpc85xx_mc_printk(mci, KERN_ERR, "Err addr: %#8.8x\n", err_addr);
722 mpc85xx_mc_printk(mci, KERN_ERR, "PFN: %#8.8x\n", pfn); 852 mpc85xx_mc_printk(mci, KERN_ERR, "PFN: %#8.8x\n", pfn);
723 853
724 /* we are out of range */ 854 /* we are out of range */