aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/edac/Kconfig2
-rw-r--r--drivers/edac/edac_mc.c6
-rw-r--r--drivers/edac/i5100_edac.c13
-rw-r--r--drivers/edac/i5400_edac.c54
-rw-r--r--drivers/edac/ppc4xx_edac.c4
-rw-r--r--drivers/edac/sb_edac.c52
-rw-r--r--include/linux/edac.h179
7 files changed, 208 insertions, 102 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 5948a2194f50..fdffa1beca17 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -215,7 +215,7 @@ config EDAC_I7300
215config EDAC_SBRIDGE 215config EDAC_SBRIDGE
216 tristate "Intel Sandy-Bridge Integrated MC" 216 tristate "Intel Sandy-Bridge Integrated MC"
217 depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL 217 depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
218 depends on EXPERIMENTAL 218 depends on PCI_MMCONFIG && EXPERIMENTAL
219 help 219 help
220 Support for error detection and correction the Intel 220 Support for error detection and correction the Intel
221 Sandy Bridge Integrated Memory Controller. 221 Sandy Bridge Integrated Memory Controller.
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index da09cd74bc5b..feef7733fae7 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -39,7 +39,7 @@ static LIST_HEAD(mc_devices);
39 39
40#ifdef CONFIG_EDAC_DEBUG 40#ifdef CONFIG_EDAC_DEBUG
41 41
42static void edac_mc_dump_channel(struct channel_info *chan) 42static void edac_mc_dump_channel(struct rank_info *chan)
43{ 43{
44 debugf4("\tchannel = %p\n", chan); 44 debugf4("\tchannel = %p\n", chan);
45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx); 45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
@@ -156,7 +156,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
156{ 156{
157 struct mem_ctl_info *mci; 157 struct mem_ctl_info *mci;
158 struct csrow_info *csi, *csrow; 158 struct csrow_info *csi, *csrow;
159 struct channel_info *chi, *chp, *chan; 159 struct rank_info *chi, *chp, *chan;
160 void *pvt; 160 void *pvt;
161 unsigned size; 161 unsigned size;
162 int row, chn; 162 int row, chn;
@@ -181,7 +181,7 @@ struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
181 * rather than an imaginary chunk of memory located at address 0. 181 * rather than an imaginary chunk of memory located at address 0.
182 */ 182 */
183 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi)); 183 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
184 chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi)); 184 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
185 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL; 185 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
186 186
187 /* setup index and various internal pointers */ 187 /* setup index and various internal pointers */
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 2e23547b2f24..d500749464ea 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -49,7 +49,7 @@
49#define I5100_FERR_NF_MEM_M6ERR_MASK (1 << 6) 49#define I5100_FERR_NF_MEM_M6ERR_MASK (1 << 6)
50#define I5100_FERR_NF_MEM_M5ERR_MASK (1 << 5) 50#define I5100_FERR_NF_MEM_M5ERR_MASK (1 << 5)
51#define I5100_FERR_NF_MEM_M4ERR_MASK (1 << 4) 51#define I5100_FERR_NF_MEM_M4ERR_MASK (1 << 4)
52#define I5100_FERR_NF_MEM_M1ERR_MASK 1 52#define I5100_FERR_NF_MEM_M1ERR_MASK (1 << 1)
53#define I5100_FERR_NF_MEM_ANY_MASK \ 53#define I5100_FERR_NF_MEM_ANY_MASK \
54 (I5100_FERR_NF_MEM_M16ERR_MASK | \ 54 (I5100_FERR_NF_MEM_M16ERR_MASK | \
55 I5100_FERR_NF_MEM_M15ERR_MASK | \ 55 I5100_FERR_NF_MEM_M15ERR_MASK | \
@@ -535,23 +535,20 @@ static void i5100_read_log(struct mem_ctl_info *mci, int chan,
535static void i5100_check_error(struct mem_ctl_info *mci) 535static void i5100_check_error(struct mem_ctl_info *mci)
536{ 536{
537 struct i5100_priv *priv = mci->pvt_info; 537 struct i5100_priv *priv = mci->pvt_info;
538 u32 dw; 538 u32 dw, dw2;
539
540 539
541 pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw); 540 pci_read_config_dword(priv->mc, I5100_FERR_NF_MEM, &dw);
542 if (i5100_ferr_nf_mem_any(dw)) { 541 if (i5100_ferr_nf_mem_any(dw)) {
543 u32 dw2;
544 542
545 pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2); 543 pci_read_config_dword(priv->mc, I5100_NERR_NF_MEM, &dw2);
546 if (dw2)
547 pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM,
548 dw2);
549 pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
550 544
551 i5100_read_log(mci, i5100_ferr_nf_mem_chan_indx(dw), 545 i5100_read_log(mci, i5100_ferr_nf_mem_chan_indx(dw),
552 i5100_ferr_nf_mem_any(dw), 546 i5100_ferr_nf_mem_any(dw),
553 i5100_nerr_nf_mem_any(dw2)); 547 i5100_nerr_nf_mem_any(dw2));
548
549 pci_write_config_dword(priv->mc, I5100_NERR_NF_MEM, dw2);
554 } 550 }
551 pci_write_config_dword(priv->mc, I5100_FERR_NF_MEM, dw);
555} 552}
556 553
557/* The i5100 chipset will scrub the entire memory once, then 554/* The i5100 chipset will scrub the entire memory once, then
diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c
index 67ec9626a330..1869a1018fb5 100644
--- a/drivers/edac/i5400_edac.c
+++ b/drivers/edac/i5400_edac.c
@@ -735,7 +735,7 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
735 735
736 /* Attempt to 'get' the MCH register we want */ 736 /* Attempt to 'get' the MCH register we want */
737 pdev = NULL; 737 pdev = NULL;
738 while (!pvt->branchmap_werrors || !pvt->fsb_error_regs) { 738 while (1) {
739 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 739 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
740 PCI_DEVICE_ID_INTEL_5400_ERR, pdev); 740 PCI_DEVICE_ID_INTEL_5400_ERR, pdev);
741 if (!pdev) { 741 if (!pdev) {
@@ -743,23 +743,42 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
743 i5400_printk(KERN_ERR, 743 i5400_printk(KERN_ERR,
744 "'system address,Process Bus' " 744 "'system address,Process Bus' "
745 "device not found:" 745 "device not found:"
746 "vendor 0x%x device 0x%x ERR funcs " 746 "vendor 0x%x device 0x%x ERR func 1 "
747 "(broken BIOS?)\n", 747 "(broken BIOS?)\n",
748 PCI_VENDOR_ID_INTEL, 748 PCI_VENDOR_ID_INTEL,
749 PCI_DEVICE_ID_INTEL_5400_ERR); 749 PCI_DEVICE_ID_INTEL_5400_ERR);
750 goto error; 750 return -ENODEV;
751 } 751 }
752 752
753 /* Store device 16 funcs 1 and 2 */ 753 /* Store device 16 func 1 */
754 switch (PCI_FUNC(pdev->devfn)) { 754 if (PCI_FUNC(pdev->devfn) == 1)
755 case 1:
756 pvt->branchmap_werrors = pdev;
757 break;
758 case 2:
759 pvt->fsb_error_regs = pdev;
760 break; 755 break;
756 }
757 pvt->branchmap_werrors = pdev;
758
759 pdev = NULL;
760 while (1) {
761 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
762 PCI_DEVICE_ID_INTEL_5400_ERR, pdev);
763 if (!pdev) {
764 /* End of list, leave */
765 i5400_printk(KERN_ERR,
766 "'system address,Process Bus' "
767 "device not found:"
768 "vendor 0x%x device 0x%x ERR func 2 "
769 "(broken BIOS?)\n",
770 PCI_VENDOR_ID_INTEL,
771 PCI_DEVICE_ID_INTEL_5400_ERR);
772
773 pci_dev_put(pvt->branchmap_werrors);
774 return -ENODEV;
761 } 775 }
776
777 /* Store device 16 func 2 */
778 if (PCI_FUNC(pdev->devfn) == 2)
779 break;
762 } 780 }
781 pvt->fsb_error_regs = pdev;
763 782
764 debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n", 783 debugf1("System Address, processor bus- PCI Bus ID: %s %x:%x\n",
765 pci_name(pvt->system_address), 784 pci_name(pvt->system_address),
@@ -778,7 +797,10 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
778 "MC: 'BRANCH 0' device not found:" 797 "MC: 'BRANCH 0' device not found:"
779 "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n", 798 "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
780 PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD0); 799 PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_5400_FBD0);
781 goto error; 800
801 pci_dev_put(pvt->fsb_error_regs);
802 pci_dev_put(pvt->branchmap_werrors);
803 return -ENODEV;
782 } 804 }
783 805
784 /* If this device claims to have more than 2 channels then 806 /* If this device claims to have more than 2 channels then
@@ -796,14 +818,14 @@ static int i5400_get_devices(struct mem_ctl_info *mci, int dev_idx)
796 "(broken BIOS?)\n", 818 "(broken BIOS?)\n",
797 PCI_VENDOR_ID_INTEL, 819 PCI_VENDOR_ID_INTEL,
798 PCI_DEVICE_ID_INTEL_5400_FBD1); 820 PCI_DEVICE_ID_INTEL_5400_FBD1);
799 goto error; 821
822 pci_dev_put(pvt->branch_0);
823 pci_dev_put(pvt->fsb_error_regs);
824 pci_dev_put(pvt->branchmap_werrors);
825 return -ENODEV;
800 } 826 }
801 827
802 return 0; 828 return 0;
803
804error:
805 i5400_put_devices(mci);
806 return -ENODEV;
807} 829}
808 830
809/* 831/*
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
index fc757069c6af..d427c69bb8b1 100644
--- a/drivers/edac/ppc4xx_edac.c
+++ b/drivers/edac/ppc4xx_edac.c
@@ -184,7 +184,7 @@ struct ppc4xx_ecc_status {
184 184
185/* Function Prototypes */ 185/* Function Prototypes */
186 186
187static int ppc4xx_edac_probe(struct platform_device *device) 187static int ppc4xx_edac_probe(struct platform_device *device);
188static int ppc4xx_edac_remove(struct platform_device *device); 188static int ppc4xx_edac_remove(struct platform_device *device);
189 189
190/* Global Variables */ 190/* Global Variables */
@@ -1068,7 +1068,7 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci,
1068 1068
1069 mci->mod_name = PPC4XX_EDAC_MODULE_NAME; 1069 mci->mod_name = PPC4XX_EDAC_MODULE_NAME;
1070 mci->mod_ver = PPC4XX_EDAC_MODULE_REVISION; 1070 mci->mod_ver = PPC4XX_EDAC_MODULE_REVISION;
1071 mci->ctl_name = match->compatible, 1071 mci->ctl_name = ppc4xx_edac_match->compatible,
1072 mci->dev_name = np->full_name; 1072 mci->dev_name = np->full_name;
1073 1073
1074 /* Initialize callbacks */ 1074 /* Initialize callbacks */
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 3a605f777712..a203536d90dd 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -20,6 +20,7 @@
20#include <linux/mmzone.h> 20#include <linux/mmzone.h>
21#include <linux/smp.h> 21#include <linux/smp.h>
22#include <linux/bitmap.h> 22#include <linux/bitmap.h>
23#include <linux/math64.h>
23#include <asm/processor.h> 24#include <asm/processor.h>
24#include <asm/mce.h> 25#include <asm/mce.h>
25 26
@@ -670,6 +671,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
670 u32 reg; 671 u32 reg;
671 u64 limit, prv = 0; 672 u64 limit, prv = 0;
672 u64 tmp_mb; 673 u64 tmp_mb;
674 u32 mb, kb;
673 u32 rir_way; 675 u32 rir_way;
674 676
675 /* 677 /*
@@ -682,8 +684,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
682 pvt->tolm = GET_TOLM(reg); 684 pvt->tolm = GET_TOLM(reg);
683 tmp_mb = (1 + pvt->tolm) >> 20; 685 tmp_mb = (1 + pvt->tolm) >> 20;
684 686
685 debugf0("TOLM: %Lu.%03Lu GB (0x%016Lx)\n", 687 mb = div_u64_rem(tmp_mb, 1000, &kb);
686 tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tolm); 688 debugf0("TOLM: %u.%03u GB (0x%016Lx)\n",
689 mb, kb, (u64)pvt->tolm);
687 690
688 /* Address range is already 45:25 */ 691 /* Address range is already 45:25 */
689 pci_read_config_dword(pvt->pci_sad1, TOHM, 692 pci_read_config_dword(pvt->pci_sad1, TOHM,
@@ -691,8 +694,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
691 pvt->tohm = GET_TOHM(reg); 694 pvt->tohm = GET_TOHM(reg);
692 tmp_mb = (1 + pvt->tohm) >> 20; 695 tmp_mb = (1 + pvt->tohm) >> 20;
693 696
694 debugf0("TOHM: %Lu.%03Lu GB (0x%016Lx)", 697 mb = div_u64_rem(tmp_mb, 1000, &kb);
695 tmp_mb / 1000, tmp_mb % 1000, (u64)pvt->tohm); 698 debugf0("TOHM: %u.%03u GB (0x%016Lx)",
699 mb, kb, (u64)pvt->tohm);
696 700
697 /* 701 /*
698 * Step 2) Get SAD range and SAD Interleave list 702 * Step 2) Get SAD range and SAD Interleave list
@@ -714,10 +718,11 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
714 break; 718 break;
715 719
716 tmp_mb = (limit + 1) >> 20; 720 tmp_mb = (limit + 1) >> 20;
717 debugf0("SAD#%d %s up to %Lu.%03Lu GB (0x%016Lx) %s reg=0x%08x\n", 721 mb = div_u64_rem(tmp_mb, 1000, &kb);
722 debugf0("SAD#%d %s up to %u.%03u GB (0x%016Lx) %s reg=0x%08x\n",
718 n_sads, 723 n_sads,
719 get_dram_attr(reg), 724 get_dram_attr(reg),
720 tmp_mb / 1000, tmp_mb % 1000, 725 mb, kb,
721 ((u64)tmp_mb) << 20L, 726 ((u64)tmp_mb) << 20L,
722 INTERLEAVE_MODE(reg) ? "Interleave: 8:6" : "Interleave: [8:6]XOR[18:16]", 727 INTERLEAVE_MODE(reg) ? "Interleave: 8:6" : "Interleave: [8:6]XOR[18:16]",
723 reg); 728 reg);
@@ -747,8 +752,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
747 break; 752 break;
748 tmp_mb = (limit + 1) >> 20; 753 tmp_mb = (limit + 1) >> 20;
749 754
750 debugf0("TAD#%d: up to %Lu.%03Lu GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", 755 mb = div_u64_rem(tmp_mb, 1000, &kb);
751 n_tads, tmp_mb / 1000, tmp_mb % 1000, 756 debugf0("TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
757 n_tads, mb, kb,
752 ((u64)tmp_mb) << 20L, 758 ((u64)tmp_mb) << 20L,
753 (u32)TAD_SOCK(reg), 759 (u32)TAD_SOCK(reg),
754 (u32)TAD_CH(reg), 760 (u32)TAD_CH(reg),
@@ -757,7 +763,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
757 (u32)TAD_TGT2(reg), 763 (u32)TAD_TGT2(reg),
758 (u32)TAD_TGT3(reg), 764 (u32)TAD_TGT3(reg),
759 reg); 765 reg);
760 prv = tmp_mb; 766 prv = limit;
761 } 767 }
762 768
763 /* 769 /*
@@ -771,9 +777,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
771 tad_ch_nilv_offset[j], 777 tad_ch_nilv_offset[j],
772 &reg); 778 &reg);
773 tmp_mb = TAD_OFFSET(reg) >> 20; 779 tmp_mb = TAD_OFFSET(reg) >> 20;
774 debugf0("TAD CH#%d, offset #%d: %Lu.%03Lu GB (0x%016Lx), reg=0x%08x\n", 780 mb = div_u64_rem(tmp_mb, 1000, &kb);
781 debugf0("TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n",
775 i, j, 782 i, j,
776 tmp_mb / 1000, tmp_mb % 1000, 783 mb, kb,
777 ((u64)tmp_mb) << 20L, 784 ((u64)tmp_mb) << 20L,
778 reg); 785 reg);
779 } 786 }
@@ -795,9 +802,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
795 802
796 tmp_mb = RIR_LIMIT(reg) >> 20; 803 tmp_mb = RIR_LIMIT(reg) >> 20;
797 rir_way = 1 << RIR_WAY(reg); 804 rir_way = 1 << RIR_WAY(reg);
798 debugf0("CH#%d RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d, reg=0x%08x\n", 805 mb = div_u64_rem(tmp_mb, 1000, &kb);
806 debugf0("CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n",
799 i, j, 807 i, j,
800 tmp_mb / 1000, tmp_mb % 1000, 808 mb, kb,
801 ((u64)tmp_mb) << 20L, 809 ((u64)tmp_mb) << 20L,
802 rir_way, 810 rir_way,
803 reg); 811 reg);
@@ -808,9 +816,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
808 &reg); 816 &reg);
809 tmp_mb = RIR_OFFSET(reg) << 6; 817 tmp_mb = RIR_OFFSET(reg) << 6;
810 818
811 debugf0("CH#%d RIR#%d INTL#%d, offset %Lu.%03Lu GB (0x%016Lx), tgt: %d, reg=0x%08x\n", 819 mb = div_u64_rem(tmp_mb, 1000, &kb);
820 debugf0("CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
812 i, j, k, 821 i, j, k,
813 tmp_mb / 1000, tmp_mb % 1000, 822 mb, kb,
814 ((u64)tmp_mb) << 20L, 823 ((u64)tmp_mb) << 20L,
815 (u32)RIR_RNK_TGT(reg), 824 (u32)RIR_RNK_TGT(reg),
816 reg); 825 reg);
@@ -848,6 +857,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
848 u8 ch_way,sck_way; 857 u8 ch_way,sck_way;
849 u32 tad_offset; 858 u32 tad_offset;
850 u32 rir_way; 859 u32 rir_way;
860 u32 mb, kb;
851 u64 ch_addr, offset, limit, prv = 0; 861 u64 ch_addr, offset, limit, prv = 0;
852 862
853 863
@@ -858,7 +868,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
858 * range (e. g. VGA addresses). It is unlikely, however, that the 868 * range (e. g. VGA addresses). It is unlikely, however, that the
859 * memory controller would generate an error on that range. 869 * memory controller would generate an error on that range.
860 */ 870 */
861 if ((addr > (u64) pvt->tolm) && (addr < (1L << 32))) { 871 if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
862 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr); 872 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
863 edac_mc_handle_ce_no_info(mci, msg); 873 edac_mc_handle_ce_no_info(mci, msg);
864 return -EINVAL; 874 return -EINVAL;
@@ -913,7 +923,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
913 addr, 923 addr,
914 limit, 924 limit,
915 sad_way + 7, 925 sad_way + 7,
916 INTERLEAVE_MODE(reg) ? "" : "XOR[18:16]"); 926 interleave_mode ? "" : "XOR[18:16]");
917 if (interleave_mode) 927 if (interleave_mode)
918 idx = ((addr >> 6) ^ (addr >> 16)) & 7; 928 idx = ((addr >> 6) ^ (addr >> 16)) & 7;
919 else 929 else
@@ -1053,7 +1063,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
1053 ch_addr = addr & 0x7f; 1063 ch_addr = addr & 0x7f;
1054 /* Remove socket wayness and remove 6 bits */ 1064 /* Remove socket wayness and remove 6 bits */
1055 addr >>= 6; 1065 addr >>= 6;
1056 addr /= sck_xch; 1066 addr = div_u64(addr, sck_xch);
1057#if 0 1067#if 0
1058 /* Divide by channel way */ 1068 /* Divide by channel way */
1059 addr = addr / ch_way; 1069 addr = addr / ch_way;
@@ -1073,10 +1083,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
1073 continue; 1083 continue;
1074 1084
1075 limit = RIR_LIMIT(reg); 1085 limit = RIR_LIMIT(reg);
1076 1086 mb = div_u64_rem(limit >> 20, 1000, &kb);
1077 debugf0("RIR#%d, limit: %Lu.%03Lu GB (0x%016Lx), way: %d\n", 1087 debugf0("RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n",
1078 n_rir, 1088 n_rir,
1079 (limit >> 20) / 1000, (limit >> 20) % 1000, 1089 mb, kb,
1080 limit, 1090 limit,
1081 1 << RIR_WAY(reg)); 1091 1 << RIR_WAY(reg));
1082 if (ch_addr <= limit) 1092 if (ch_addr <= limit)
diff --git a/include/linux/edac.h b/include/linux/edac.h
index ba317e2930a1..c621d762bb2c 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -70,25 +70,64 @@ enum dev_type {
70#define DEV_FLAG_X32 BIT(DEV_X32) 70#define DEV_FLAG_X32 BIT(DEV_X32)
71#define DEV_FLAG_X64 BIT(DEV_X64) 71#define DEV_FLAG_X64 BIT(DEV_X64)
72 72
73/* memory types */ 73/**
74 * enum mem_type - memory types. For a more detailed reference, please see
75 * http://en.wikipedia.org/wiki/DRAM
76 *
77 * @MEM_EMPTY Empty csrow
78 * @MEM_RESERVED: Reserved csrow type
79 * @MEM_UNKNOWN: Unknown csrow type
80 * @MEM_FPM: FPM - Fast Page Mode, used on systems up to 1995.
81 * @MEM_EDO: EDO - Extended data out, used on systems up to 1998.
82 * @MEM_BEDO: BEDO - Burst Extended data out, an EDO variant.
83 * @MEM_SDR: SDR - Single data rate SDRAM
84 * http://en.wikipedia.org/wiki/Synchronous_dynamic_random-access_memory
85 * They use 3 pins for chip select: Pins 0 and 2 are
86 * for rank 0; pins 1 and 3 are for rank 1, if the memory
87 * is dual-rank.
88 * @MEM_RDR: Registered SDR SDRAM
89 * @MEM_DDR: Double data rate SDRAM
90 * http://en.wikipedia.org/wiki/DDR_SDRAM
91 * @MEM_RDDR: Registered Double data rate SDRAM
92 * This is a variant of the DDR memories.
93 * A registered memory has a buffer inside it, hiding
94 * part of the memory details to the memory controller.
95 * @MEM_RMBS: Rambus DRAM, used on a few Pentium III/IV controllers.
96 * @MEM_DDR2: DDR2 RAM, as described at JEDEC JESD79-2F.
97 * Those memories are labed as "PC2-" instead of "PC" to
98 * differenciate from DDR.
99 * @MEM_FB_DDR2: Fully-Buffered DDR2, as described at JEDEC Std No. 205
100 * and JESD206.
101 * Those memories are accessed per DIMM slot, and not by
102 * a chip select signal.
103 * @MEM_RDDR2: Registered DDR2 RAM
104 * This is a variant of the DDR2 memories.
105 * @MEM_XDR: Rambus XDR
106 * It is an evolution of the original RAMBUS memories,
107 * created to compete with DDR2. Weren't used on any
108 * x86 arch, but cell_edac PPC memory controller uses it.
109 * @MEM_DDR3: DDR3 RAM
110 * @MEM_RDDR3: Registered DDR3 RAM
111 * This is a variant of the DDR3 memories.
112 */
74enum mem_type { 113enum mem_type {
75 MEM_EMPTY = 0, /* Empty csrow */ 114 MEM_EMPTY = 0,
76 MEM_RESERVED, /* Reserved csrow type */ 115 MEM_RESERVED,
77 MEM_UNKNOWN, /* Unknown csrow type */ 116 MEM_UNKNOWN,
78 MEM_FPM, /* Fast page mode */ 117 MEM_FPM,
79 MEM_EDO, /* Extended data out */ 118 MEM_EDO,
80 MEM_BEDO, /* Burst Extended data out */ 119 MEM_BEDO,
81 MEM_SDR, /* Single data rate SDRAM */ 120 MEM_SDR,
82 MEM_RDR, /* Registered single data rate SDRAM */ 121 MEM_RDR,
83 MEM_DDR, /* Double data rate SDRAM */ 122 MEM_DDR,
84 MEM_RDDR, /* Registered Double data rate SDRAM */ 123 MEM_RDDR,
85 MEM_RMBS, /* Rambus DRAM */ 124 MEM_RMBS,
86 MEM_DDR2, /* DDR2 RAM */ 125 MEM_DDR2,
87 MEM_FB_DDR2, /* fully buffered DDR2 */ 126 MEM_FB_DDR2,
88 MEM_RDDR2, /* Registered DDR2 RAM */ 127 MEM_RDDR2,
89 MEM_XDR, /* Rambus XDR */ 128 MEM_XDR,
90 MEM_DDR3, /* DDR3 RAM */ 129 MEM_DDR3,
91 MEM_RDDR3, /* Registered DDR3 RAM */ 130 MEM_RDDR3,
92}; 131};
93 132
94#define MEM_FLAG_EMPTY BIT(MEM_EMPTY) 133#define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
@@ -166,8 +205,9 @@ enum scrub_type {
166#define OP_OFFLINE 0x300 205#define OP_OFFLINE 0x300
167 206
168/* 207/*
169 * There are several things to be aware of that aren't at all obvious: 208 * Concepts used at the EDAC subsystem
170 * 209 *
210 * There are several things to be aware of that aren't at all obvious:
171 * 211 *
172 * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc.. 212 * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
173 * 213 *
@@ -176,36 +216,61 @@ enum scrub_type {
176 * creating a common ground for discussion, terms and their definitions 216 * creating a common ground for discussion, terms and their definitions
177 * will be established. 217 * will be established.
178 * 218 *
179 * Memory devices: The individual chip on a memory stick. These devices 219 * Memory devices: The individual DRAM chips on a memory stick. These
180 * commonly output 4 and 8 bits each. Grouping several 220 * devices commonly output 4 and 8 bits each (x4, x8).
181 * of these in parallel provides 64 bits which is common 221 * Grouping several of these in parallel provides the
182 * for a memory stick. 222 * number of bits that the memory controller expects:
223 * typically 72 bits, in order to provide 64 bits +
224 * 8 bits of ECC data.
183 * 225 *
184 * Memory Stick: A printed circuit board that aggregates multiple 226 * Memory Stick: A printed circuit board that aggregates multiple
185 * memory devices in parallel. This is the atomic 227 * memory devices in parallel. In general, this is the
186 * memory component that is purchaseable by Joe consumer 228 * Field Replaceable Unit (FRU) which gets replaced, in
187 * and loaded into a memory socket. 229 * the case of excessive errors. Most often it is also
230 * called DIMM (Dual Inline Memory Module).
231 *
232 * Memory Socket: A physical connector on the motherboard that accepts
233 * a single memory stick. Also called as "slot" on several
234 * datasheets.
188 * 235 *
189 * Socket: A physical connector on the motherboard that accepts 236 * Channel: A memory controller channel, responsible to communicate
190 * a single memory stick. 237 * with a group of DIMMs. Each channel has its own
238 * independent control (command) and data bus, and can
239 * be used independently or grouped with other channels.
191 * 240 *
192 * Channel: Set of memory devices on a memory stick that must be 241 * Branch: It is typically the highest hierarchy on a
193 * grouped in parallel with one or more additional 242 * Fully-Buffered DIMM memory controller.
194 * channels from other memory sticks. This parallel 243 * Typically, it contains two channels.
195 * grouping of the output from multiple channels are 244 * Two channels at the same branch can be used in single
196 * necessary for the smallest granularity of memory access. 245 * mode or in lockstep mode.
197 * Some memory controllers are capable of single channel - 246 * When lockstep is enabled, the cacheline is doubled,
198 * which means that memory sticks can be loaded 247 * but it generally brings some performance penalty.
199 * individually. Other memory controllers are only 248 * Also, it is generally not possible to point to just one
200 * capable of dual channel - which means that memory 249 * memory stick when an error occurs, as the error
201 * sticks must be loaded as pairs (see "socket set"). 250 * correction code is calculated using two DIMMs instead
251 * of one. Due to that, it is capable of correcting more
252 * errors than on single mode.
202 * 253 *
203 * Chip-select row: All of the memory devices that are selected together. 254 * Single-channel: The data accessed by the memory controller is contained
204 * for a single, minimum grain of memory access. 255 * into one dimm only. E. g. if the data is 64 bits-wide,
205 * This selects all of the parallel memory devices across 256 * the data flows to the CPU using one 64 bits parallel
206 * all of the parallel channels. Common chip-select rows 257 * access.
207 * for single channel are 64 bits, for dual channel 128 258 * Typically used with SDR, DDR, DDR2 and DDR3 memories.
208 * bits. 259 * FB-DIMM and RAMBUS use a different concept for channel,
260 * so this concept doesn't apply there.
261 *
262 * Double-channel: The data size accessed by the memory controller is
263 * interlaced into two dimms, accessed at the same time.
264 * E. g. if the DIMM is 64 bits-wide (72 bits with ECC),
265 * the data flows to the CPU using a 128 bits parallel
266 * access.
267 *
268 * Chip-select row: This is the name of the DRAM signal used to select the
269 * DRAM ranks to be accessed. Common chip-select rows for
270 * single channel are 64 bits, for dual channel 128 bits.
271 * It may not be visible by the memory controller, as some
272 * DIMM types have a memory buffer that can hide direct
273 * access to it from the Memory Controller.
209 * 274 *
210 * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory. 275 * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory.
211 * Motherboards commonly drive two chip-select pins to 276 * Motherboards commonly drive two chip-select pins to
@@ -218,8 +283,8 @@ enum scrub_type {
218 * 283 *
219 * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick. 284 * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick.
220 * A double-sided stick has two chip-select rows which 285 * A double-sided stick has two chip-select rows which
221 * access different sets of memory devices. The two 286 * access different sets of memory devices. The two
222 * rows cannot be accessed concurrently. "Double-sided" 287 * rows cannot be accessed concurrently. "Double-sided"
223 * is irrespective of the memory devices being mounted 288 * is irrespective of the memory devices being mounted
224 * on both sides of the memory stick. 289 * on both sides of the memory stick.
225 * 290 *
@@ -247,10 +312,22 @@ enum scrub_type {
247 * PS - I enjoyed writing all that about as much as you enjoyed reading it. 312 * PS - I enjoyed writing all that about as much as you enjoyed reading it.
248 */ 313 */
249 314
250struct channel_info { 315/**
251 int chan_idx; /* channel index */ 316 * struct rank_info - contains the information for one DIMM rank
252 u32 ce_count; /* Correctable Errors for this CHANNEL */ 317 *
253 char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */ 318 * @chan_idx: channel number where the rank is (typically, 0 or 1)
319 * @ce_count: number of correctable errors for this rank
320 * @label: DIMM label. Different ranks for the same DIMM should be
321 * filled, on userspace, with the same label.
322 * FIXME: The core currently won't enforce it.
323 * @csrow: A pointer to the chip select row structure (the parent
324 * structure). The location of the rank is given by
325 * the (csrow->csrow_idx, chan_idx) vector.
326 */
327struct rank_info {
328 int chan_idx;
329 u32 ce_count;
330 char label[EDAC_MC_LABEL_LEN + 1];
254 struct csrow_info *csrow; /* the parent */ 331 struct csrow_info *csrow; /* the parent */
255}; 332};
256 333
@@ -274,7 +351,7 @@ struct csrow_info {
274 351
275 /* channel information for this csrow */ 352 /* channel information for this csrow */
276 u32 nr_channels; 353 u32 nr_channels;
277 struct channel_info *channels; 354 struct rank_info *channels;
278}; 355};
279 356
280struct mcidev_sysfs_group { 357struct mcidev_sysfs_group {